mirror of
https://github.com/EDCD/EDDN.git
synced 2025-04-12 07:00:04 +03:00
* Comparison had an off-by one, accidentally including "second before" still. * Print out compiled regex for matching apache lines, in case of custom request text usage. * Output the timestamp at the end of the largest window. This enables easy checking of logs for possible anomalies around then. * Comment about how apache logs *aren't* actually strictly in time-order, but we assume they are.
120 lines
4.0 KiB
Python
Executable File
120 lines
4.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# vim: wrapmargin=0 textwidth=0 smarttab expandtab tabstop=2 shiftwidth=2
|
|
"""Process Apache access.log lines to find highest rate of /upload/."""
|
|
|
|
import argparse
|
|
from collections import deque
|
|
import datetime
|
|
import dateutil.parser
|
|
import fileinput
|
|
import re
|
|
|
|
|
|
def process_log_file(
|
|
input_file: str ='-',
|
|
request_text: str = '/upload/',
|
|
window_size: int = 1,
|
|
) -> None:
|
|
"""
|
|
Process the indicated log file to determine peak rate of interesting lines.
|
|
|
|
:param input_file: Name of input file, `-` for stdin
|
|
:param request_text: The text that denotes an interesting line
|
|
:param window_size: Time, in seconds, for the window to assess
|
|
"""
|
|
print(f'With:\n\tinput_file: "{input_file}"\n\trequest_text: "{request_text}"')
|
|
with fileinput.FileInput(files=(input_file)) as f:
|
|
apache_re = re.compile(r'^(?P<host>[.:0-9a-fA-F]{3,39}) - - \[(?P<datetime>[^\]]+)\] (?P<logtext>.*' + request_text + '.*)$')
|
|
print(f'Apache RE:\n{apache_re}\n')
|
|
apache_datetime_re = re.compile(
|
|
r'^(?P<d>[0-9]{2})/(?P<mon>[^/]{3})/(?P<YYYY>[0-9]{4}):(?P<time>[0-9]{2}:[0-9]{2}:[0-9]{2} \+[0-9]{4})$'
|
|
)
|
|
|
|
window_time_delta = datetime.timedelta(seconds=window_size)
|
|
window_count = 0
|
|
last_dt = None
|
|
window_end_longest_count = None
|
|
window_dts = deque()
|
|
line_count = 0
|
|
for line in f:
|
|
matches = apache_re.search(line)
|
|
if matches:
|
|
line_count += 1
|
|
# print(f'\nMatches:\n{line}')
|
|
# This will be referenced so many times we want a short name
|
|
m = apache_datetime_re.search(matches.group('datetime'))
|
|
this_dt_iso8601 = f'{m.group("YYYY")}-{m.group("mon")}-{m.group("d")} {m.group("time")}'
|
|
|
|
###############################################################
|
|
# This code absolutely assumes that the apache log lines are
|
|
# in strictly increasing time sequence order.
|
|
#
|
|
# That's not necessarily true. It has been observed that e.g.
|
|
# a long line for 00:24:39 can occur in the middle of lines for
|
|
# 00:24:40.
|
|
#
|
|
# Hopefully this doesn't happen too much.
|
|
###############################################################
|
|
this_dt = dateutil.parser.parse(this_dt_iso8601)
|
|
# print(f'Timestamp: {this_dt}')
|
|
window_dts.append(this_dt)
|
|
|
|
# Find the oldest entry that is still within the window:
|
|
oldest_of_interest = this_dt - window_time_delta
|
|
while window_dts[0] <= oldest_of_interest:
|
|
window_dts.popleft()
|
|
|
|
if len(window_dts) > window_count:
|
|
window_count = len(window_dts)
|
|
window_end_longest_count = last_dt
|
|
# print(f'Largest window count : {window_count:>9} ({window_count / window_size:>9}/s)')
|
|
|
|
last_dt = this_dt
|
|
|
|
# print()
|
|
|
|
else:
|
|
# print(f'\nNo matches:\n{line}\n')
|
|
pass
|
|
|
|
print(f'With window size : {window_size:>9}')
|
|
print(f'Total line matching lines: {line_count:>9}')
|
|
print(f'Largest window count : {window_count:>9} ({window_count / window_size:>9}/s)')
|
|
print(f'Busiest window ended at: {window_end_longest_count.strftime("%d/%b/%Y:%H:%M:%S")}')
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Process Apache web server access.log lines, counting the number of a specific request per a unit of time.',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--window-length',
|
|
metavar='<window size in seconds>',
|
|
required=False,
|
|
default=1,
|
|
help='The time period in which the max rate will be.',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'input_file',
|
|
metavar='<input file name>',
|
|
help='Name of an Apache access.log file. You may use "-" for standard input.',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'request_text',
|
|
metavar='<per-request text selector>',
|
|
help='Text that appears in the log lines of interest. Defaults to "/upload/"',
|
|
nargs='?',
|
|
default='/upload/',
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
process_log_file(input_file=args.input_file, request_text=args.request_text, window_size=int(args.window_length))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|