diff --git a/.github/workflows/docker-publish.yaml b/.github/workflows/docker-publish.yaml new file mode 100644 index 0000000..6021696 --- /dev/null +++ b/.github/workflows/docker-publish.yaml @@ -0,0 +1,19 @@ +name: Publish Docker Images +on: + release: + types: [published] +jobs: + push_to_registry: + name: Push Docker image to Docker Hub + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + - name: Push to Docker Hub + uses: docker/build-push-action@v1 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: molu8bits/modsecurity-parser + tags: latest + tag_with_ref: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..901f836 --- /dev/null +++ b/.gitignore @@ -0,0 +1,139 @@ +################################ +########### FILES ############ +################################ +*.exe + +################################ +########### FOLDERS ############ +################################ +build/ +html/ +.benchmarks/ +reports/ +lectures/ + +################################ +########### PYTHON ############# +################################ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.mypy_cache* + +*.isorted + +################################ +########### VS CODE ############ +################################ +.vscode +*.code-workspace +.history + + +################################ +########### molu ############ +################################ +.molu +sample_audit_log/modsec_output diff --git a/Dockerfile b/Dockerfile index cebd477..830b3a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,8 @@ -FROM ubuntu:18.04 +FROM ubuntu:20.04 + LABEL maintainer "molu8bits@gmail.com" LABEL description "modsecurity parse and charts via Docker" -LABEL version "2020.09 v0.3" +LABEL version "2023.03 v0.4" ENV DEBIAN_FRONTEND=noninteractive @@ -15,7 +16,7 @@ RUN apt-get update && apt-get install -y git && \ RUN pip3 install -r /opt/mparser/requirements.txt -COPY modsecurity-parser.py /opt/mparser/ +COPY modsecurity_parser.py /opt/mparser/ COPY run.sh /opt/mparser/ RUN chmod +x /opt/mparser/run.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0c8d685 --- /dev/null +++ b/Makefile @@ -0,0 +1,59 @@ +MODULE := modsecurity_parser +BLUE='\033[0;34m' +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +BYELLOW='\033[1;33m' +UYELLOW='\033[4;33m' +OYELLOW='\033[43m' +NC='\033[0m' # No color + +run: + @python -m $(MODULE) + +integration-v2: + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2.log -j out-v2.json -x out-v2.xlsx -g out-v2.png + +integration-v3: + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v3.log -j out-v3.json -x out-v3.xlsx -g out-v3.png --version3 + +integration-v2-json: + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2_json.log -j out-v2-json.json -x out-v2-json.xlsx -g out-v2-json.png --jsonaudit + +integration-v2-timems: + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2_timems.log -j out-v2-timems.json -x out-v2-timems.xlsx -g out-v2-timems.png + +test-e2e: + @echo "\n${BLUE}Running E2E tests on sample_audit_log folder ${NC}\n" + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2.log -j out-v2.json -x out-v2.xlsx -g out-v2.png + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v3.log -j out-v3.json -x out-v3.xlsx -g out-v3.png --version3 + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2_json.log -j out-v2-json.json -x out-v2-json.xlsx -g out-v2-json.png --jsonaudit + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2_utc_minus.log -j out-v2-utc-minus -x out-v2-utc-minus.xlsx -g out-v2-utc-minus.png + @python -m ${MODULE} -f sample_audit_log/modsec_audit_v2_timems.log -j out-v2-timems.json -x out-v2-timems.xlsx -g out-v2-timems.png + +test: + @echo "\n${OYELLOW}Running Pylint against source and test files...${NC}\n" + @pytest + +lint: + # test comments + @echo "\n${BLUE}Running Pylint against source and test files...${NC}\n" + # @pylint --rcfile=setup.cfg **/*.py *.py + @pylint --rcfile=setup.cfg *.py + @echo "\n${BLUE}Running PyDocStyle against source files...${NC}\n" + @pydocstyle --config=setup.cfg modsecurity_parser.py + @echo "\n${BLUE}Running Flake8 against source and test files...${NC}\n" + @flake8 + @echo "\n${BLUE}Running Bandit against source files...${NC}\n" + # @bandit -r --ini setup.cfg + @bandit -r --ini setup.cfg + @echo "\n${BLUE}Running pycodestyle against source files...${NC}\n" + @pycodestyle modsecurity_parser.py + @echo "\n${BLUE}Running Code Coverage against source files...${NC}\n" + @pytest --cov=modsecurity_parser tests --cov-report=html + + +clean: + rm -rf .pytest_cache .coverage .pytest_cache coverage.xml sample_audit_log/modsec_output + +.PHONY: clean test diff --git a/README.md b/README.md index ee503cd..654fad0 100644 --- a/README.md +++ b/README.md @@ -1,113 +1,111 @@ # modsecurity audit log parser, analyser and chart maker - ![Docker Build Status](https://img.shields.io/docker/automated/molu8bits/modsecurity-parser.svg) ![Docker Cloud Build Status](https://img.shields.io/docker/cloud/build/molu8bits/modsecurity-parser.svg) ![Docker Image Size](https://img.shields.io/docker/image-size/molu8bits/modsecurity-parser.svg?sort=date) ![Docker Image Version (latest by date):](https://img.shields.io/docker/v/molu8bits/modsecurity-parser.svg?sort=date) ![Docker Pulls](https://img.shields.io/docker/pulls/molu8bits/modsecurity-parser.svg) -# TL;DR +## TL;DR + Get the overview of security incidents reported by modsecurity module from modsec_audit.log file. -# 2020.09.20 update +## 2022.01.01 update + +- renamed to modsecurity_parser +- fix for timezone with miliseconds +- linting, testing added +- requirements vulnerabilities fixed + +## 2020.09.20 update + - added support for logs from timezone "UTC-..." - updated plotting to matplotlib.3.1 - added dockerhub autobuild - added requirements.txt -# 2019.04.17 update +## 2019.04.17 update + - added support for Modsecurity3 log (Nginx/Apache) - added feature to read Modsecurity log in JSON format +## Description -# Description -modsecurity parser is a python program to read modsecurity.org modsec_audit.log, transform read events into more human and machine readable formats (xlsx/json) and make basic charts. - +modsecurity parser is a python program to read [https://www.modsecurity.org/](https://www.modsecurity.org/) modsec_audit.log, transform read events into more human and machine readable formats (xlsx/json) and make basic charts. -

Functionality list: -

  • JSON output file with formatting conformed to JSON logging added into Modsecurity 2.9
  • -
  • XLSX output file which can be analysed further with desktop tools
  • -
  • PNG file with some basic charts - Timeline nonblocked vs intercepted events, TOP10 IP source address, TOP20 Rule IDs hit, TOP10 Attacks intercepted
  • +- JSON output file with formatting conformed to JSON logging added into Modsecurity 2.9 +- XLSX output file which can be analysed further with desktop tools +- PNG file with some basic charts - Timeline nonblocked vs intercepted events, TOP10 IP source address, TOP20 Rule IDs hit, TOP10 Attacks intercepted +## Graph analysis examples -# Graph analysis examples

    -

    -
    +

    +

    - -
    +

    +## Installation +Software needs at least Python 3.8.10 with additional libraries: -# Installation - Software needs at least Python 3.5.2 with additional libraries: -
  • Pandas 0.22
  • -
  • Pillow
  • -
  • matplotlib 2.1.2
  • -
  • numpy 1.13.1
  • -
  • openpyxl 2.4.0
  • +- pandas 1.1.3 +- Pillow 9.2.0 +- matplotlib 3.3.2 +- numpy 1.22.4 +- openpyxl 2.4.2 Install them with command -``` +```bash pip3 install -r requirements.txt ``` - -# Basic usage +## Basic usage -``` -python3 modsecurity-parser.py -f /home/user/logs/modsec_audit.log +```bash +python3 modsecurity_parser.py -f /home/user/logs/modsec_audit.log ``` for that case results will be recorded into subdirectory "modsec_output" where the log to analyse is placed. +## More options -# More options - -

    - -``` -python3 modsecurity-parser.py -h +```bash +python3 modsecurity_parser.py -h ``` - Filters INCLUDE and EXCLUDE are available for IP source addresses. -

    + --exclude option ( e.g. "--exclude 192.168.0.1 10.0.0.1") just skips events with given IP source addresses -

    + --include (e.g. "--include 10.0.5.6") take precedence over EXCLUDE. INCLUDE process only events with given IP source addresses. -

    ---jsononeperline - option recommended for big number of events where e.g. produced JSON is supposed to be read by other SIEM tool. Uses the very same format as modsecurity software when type of logging is set to "JSON". +--jsononeperline - option recommended for big number of events where e.g. produced JSON is supposed to be read by other SIEM tool. Uses the very same format as modsecurity software when type of logging is set to "JSON". Processing Modsecurity3 log -

    ---version3 (e.g. "modsecurity-parser.py -f modsec_audit.log --version3" -

    + +--version3 (e.g. "modsecurity_parser.py -f modsec_audit.log --version3" Processing Modsecurity log in JSON format: -

    ---jsonaudit (e.g. "modsecurity-parser.py -f modsec_audit.log --jsonaudit" +--jsonaudit (e.g. "modsecurity_parser.py -f modsec_audit.log --jsonaudit" +## Limitations -# Limitations: -

  • The biggest tested modsec_audit.log was 1GB size with around 70000 records. It took more or less 5 minutes on an 8 years old workstation and memory usage temporarily raised to 2GB of RAM.
  • -
  • modsec_audit.log were taken from Apache web servers with locale set to en-US. Software can except some errors if datatime format is different in the audited log. Adjust LOG_TIMESTAMP_FORMAT and LOG_TIMESTAMP_FORMAT_SHORT accordingly
  • -
  • To process more than 90000 events just adjust MAXEVENTS
  • -
  • Tested with modsec_audit.log from version 2.8/2.9/3.0. Anyway Modsecurity3 for some cases produces empty H section and not all information is available to be properly presented in all graphs
  • +- The biggest tested modsec_audit.log was 1GB size with around 70000 records. It took more or less 5 minutes on an 8 years old workstation and memory usage temporarily raised to 2GB of RAM +- modsec_audit.log were taken from Apache web servers with locale set to en-US. Software can except some errors if datatime format is different in the audited log. Adjust LOG_TIMESTAMP_FORMAT and LOG_TIMESTAMP_FORMAT_SHORT accordingly +- To process more than 90000 events just adjust MAXEVENTS +- Tested with modsec_audit.log from version 2.8/2.9/3.0. Anyway Modsecurity3 for some cases produces empty H section and not all information is available to be properly presented in all graphs -# run via Docker +## run via Docker Create a subfolder (e.g. "modseclogs") and put into some modsecurity audit logs (by default modsec_audit.log name is processed only). Output files will be created inside of ${subfolder}/modsec_output @@ -119,8 +117,7 @@ docker run --rm -ti --mount type=bind,source="$(pwd)"/modseclogs,target=/opt/mou ``` Get some more docker options: + ```bash docker run --rm -ti -e HELP=Yes molu8bits/modsecurity-parser:latest ``` - - diff --git a/modsecurity-parser.py b/modsecurity-parser.py deleted file mode 100644 index 9e41754..0000000 --- a/modsecurity-parser.py +++ /dev/null @@ -1,690 +0,0 @@ -# 2019.01 - molu8bits (at) gmail (dot) com -# modsecurity-parser.py -# Script to analyze modsecurity audit log and present outputs as: -# - json file (compatible with default JSON logging) -# - xlsx report -# - png with graphs -import matplotlib -matplotlib.use('Agg') -import os, argparse, re, json -from collections import OrderedDict, Counter -from time import localtime,strftime,strptime -from datetime import datetime -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import openpyxl - - -DEBUG = False -MAXEVENTS = 90000 -SAVEOUTPUTJSON = True -JSON_ONE_PER_LINE = False -FILTER_INCLUDE = True -FILTER_EXCLUDE = True -LOG_TIMESTAMP_FORMAT = '%d/%b/%Y:%H:%M:%S %z' # e.g. "01/Mar/2018:05:26:41 +0100" -LOG_TIMESTAMP_FORMAT_SHORT = '%Y-%m-%d_%H:%M' - - -# modsec_patterns -a_pattern = re.compile('^--\w{6,10}-A--$') -z_pattern = re.compile('^--\w{6,10}-Z--$') -modsec_event_types = ['A','B','C','E','F','H','I','J','K'] -modsec_message_file_pattern = r'(?<=\[file\s\").*?(?="\])' -modsec_message_msg_pattern = r'(?<=\[msg\s\").*?(?=\"\])' -modsec_message_id_pattern = r'(?<=\[id\s\").*?(?=\"\])' -modsec_message_severity_pattern = r'(?<=\[severity\s\").*?(?=\"\])' -modsec_message_maturity_pattern = r'(?<=\[maturity\s\").*?(?=\"\])' -modsec_message_accuracy_pattern = r'(?<=\[accuracy\s\").*?(?=\"\])' -modsec_message_message_pattern = r'(?<=Message:).*?(?=\.\ \[)' -modsec_v3_message_phase_pattern = r'(?<=\(phase).*?(?=\))' -#modsec_v3_message_phase_pattern = r'(?:\(phase).*?(?:\))' # (phase 2) -#modsec_v3_message_phase_pattern = r'(?:\(phase).*?(?=\))' -#modsec_v3_message_message_pattern = r'(?<=\Message:).*?(?=\[)' -modsec_v3_message_message_pattern = r'(?<=\Matched).*?(?=\[)' -modsec_v3_message_msg_pattern = r'(?<=\[msg\s\").*?(?=\"\])' - -# parse the command line arguments -argParser = argparse.ArgumentParser() -argParser.add_argument('-f', type=str, help='input file with the ModSecurity audit log', required=False) -argParser.add_argument('-j', type=str, help='output file name for JSON format', required=False) -argParser.add_argument('-x', type=str, help='output file name for Excel format', required=False) -argParser.add_argument('-g', type=str, help='output file name for Graphs - PNG format', required=False) -argParser.add_argument('-e','--exclude', type=str, nargs='+', help='source IP addresses to exclude from the results as a list (e.g. -exclude 127.0.0.1 192.168.0.1)', required=False) -argParser.add_argument('-i','--include', type=str, nargs='+', help='source IP addresses to include only into the results as a list (e.g. -include 1.2.3.4 5.5.5.5)', required=False) -argParser.add_argument('-l', type=str, help='output file name for logging purposes', required=False) -argParser.add_argument('--jsononeperline', action="store_true", help='events in output JSON will be enlisted one per line, otherwise by default JSON is humanreadable', default="False") -argParser.add_argument('--version3', action="store_true", help='required if modsec_audit.log is produced by ModSecurity3', default="False") -argParser.add_argument('--jsonaudit', action='store_true', help='required if modsec_audit.log is JSON') -passedArgs = vars(argParser.parse_args()) - -inputFileName = passedArgs['f'] -jsonOutputFilename = passedArgs['j'] -JSON_ONE_PER_LINE = True if passedArgs['jsononeperline'] is True else False -version3 = True if passedArgs['version3'] is True else False -jsonaudit = True if passedArgs['jsonaudit'] is True else False - -# Modsecurity JSON output for message doesn't comprise 'Message:' at the beggining of the string. -if jsonaudit: - modsec_message_message_pattern = r'(?<=^).*(?=\.\s\[)' - -# Modsecurity3 message information (if exists) starts with 'ModSecurity' string. -if version3: - a_pattern = re.compile('^---\w{8,10}---A--$') - z_pattern = re.compile('^---\w{8,10}---Z--$') - modsec_message_message_pattern = r'(?<=\ModSecurity:).*?(?=\[)' - -xlsxOutputFilename = passedArgs['x'] -logOutputFilename = passedArgs['l'] -graphOutputFilename = passedArgs['g'] -if passedArgs['include'] is not None: - filter_include_table = passedArgs['include'] - FILTER_INCLUDE = True - FILTER_EXCLUDE = False -elif passedArgs['exclude'] is not None: - filter_exclude_table = passedArgs['exclude'] - FILTER_INCLUDE = False - FILTER_EXCLUDE = True -else: - FILTER_INCLUDE = False - FILTER_EXCLUDE = False - -datetimenow = strftime('%Y-%m-%d_%H-%M-%S', localtime()) - -records_total = 0 -records_skipped_cnt = 0 -records_processed_cnt = 0 - -def safedictkey(dictname, keyname, default='None'): - """ - Returns value of nested keynames from dict. If no such key (or nested keys) exist then returns default value - """ - try: - dictname_temp = dictname - for value in keyname: - dictname_temp = d = dictname_temp[value] - return d - except Exception: - return default - -def get_params(string_in, separator=' ', defaultmissing='-', params_to_get=3): - """ - Split string using 'separator' into required number of parameters. Fulfill missing parameters with 'defaultmissing' - Current limitation: hardcoded return always 3 of them - """ - rtr = str(string_in).split(separator) - if len(rtr) > params_to_get: - rtr = [] - rtr.append(str(string_in)) - for x in range(0, (params_to_get - len(rtr))): - rtr.append(defaultmissing) - return rtr[0],rtr[1],rtr[2] - -def regular_expression_evaluate(string_in, regular_expression, group=True, to_split=False, to_split_value='/', to_split_column=-1): - try: - if group and not to_split: - re_value = re.search(regular_expression, string_in).group() - elif group and to_split: - re_value = re.search(regular_expression, string_in).group().split(to_split_value)[to_split_column] - else: - re_value = re.search(regular_expression, string_in) - except Exception as e5: - re_value = '?' - return re_value - - -if inputFileName == None: - print('No parameter inputFileName, looking for modsec_audit.log in current directory ...') - inputFileName = os.path.join(os.getcwd(), 'modsec_audit.log') -else: - print('inputFileName :', inputFileName) - -fileBaseName = str(os.path.splitext(os.path.split(inputFileName)[-1])[0]) + '_' + str(datetimenow) -fileBaseOutputDir = os.path.join(os.path.dirname(inputFileName), 'modsec_output') -if jsonOutputFilename == None: - jsonOutputFilename = fileBaseName + '.json' -if xlsxOutputFilename == None: - xlsxOutputFilename = fileBaseName + '.xlsx' -if logOutputFilename == None: - logOutputFilename = fileBaseName + '.log' -if graphOutputFilename == None: - graphOutputFilename = fileBaseName + '.png' - - -def modsecSaveJson(dictToSave, fileToSave, onePerLine): - """ - Exports modsec_audit events to *.json file. - onePerLine True -> file formatted likewise when logging set to JSON in modsecurity.conf, - onePerLine False -> human readable JSON output - """ - try: - if not os.path.isdir(fileBaseOutputDir): - os.mkdir(fileBaseOutputDir) - fOut = open(os.path.join(fileBaseOutputDir, fileToSave), 'w') - if onePerLine: - for line in dictToSave: - fOut.write(json.dumps(line)) - fOut.write('\n') - fOut.close() - else: - for line in dictToSave: - fOut.write(json.dumps(line, indent=4, sort_keys=False)) - fOut.write('\n') - fOut.close() - except Exception as e: - print('modsecSaveJson() thrown exception: %s', e) - pass - -def modsecSaveXLSX(modsecDict, outputXLSXFileName, outputWithGraphs): - """ - Exports processed modsec_audit events into XLSX formatted file. - :param modsecDict: List of audit events as JSON - :param outputXLSXFileName: file to save the report - :return: - """ - modsec_header_xlsx = ['transaction_id', 'event_time', 'remote_address', 'request_host', - 'request_useragent','request_line', 'request_line_method', 'request_line_url', 'request_line_protocol', - 'response_protocol', 'response_status', - 'action','action_phase', 'action_message', - 'message_type', 'message_description', 'message_rule_id', 'message_rule_file', - 'message_msg', 'message_severity', 'message_accuracy', 'message_maturity', 'full_message_line' - ] - wb = openpyxl.Workbook() - ws1 = wb.active - ws1.title = 'Modsec_entries' - ws1.append(modsec_header_xlsx) - - for entry_mod in modsecDict: - try: - transaction_id = entry_mod['transaction']['transaction_id'] - event_time = entry_mod['transaction']['time'] - remote_address = entry_mod['transaction']['remote_address'] - request_line = entry_mod['request']['request_line'] - request_line_method, request_line_url, request_line_protocol = get_params(string_in=request_line, defaultmissing='-', params_to_get=3) - request_headers_useragent = safedictkey(entry_mod, ['request','headers','User-Agent'], '-') - request_headers_host = safedictkey(entry_mod, ['request','headers','Host'], '-') - response_protocol = safedictkey(entry_mod, ['response', 'protocol'], '-') - response_status = safedictkey(entry_mod, ['response','status'], '-') - audit_data_producer = safedictkey(entry_mod, ['audit_data','producer'], '-') - audit_data_server = safedictkey(entry_mod, ['audit_data', 'server'], '-') - audit_data_enginemode = safedictkey(entry_mod, ['audit_data','Engine-Mode'], '-') - audit_data_action_intercepted = 'intercepted' if (safedictkey(entry_mod, ['audit_data','action','intercepted'], '-') == True) else '-' - audit_data_action_message = safedictkey(entry_mod, ['audit_data','action','message'], '-') - audit_data_action_phase = safedictkey(entry_mod, ['audit_data','action','phase'], '-') - - if ('messages' in entry_mod['audit_data']) and (len(entry_mod['audit_data']) > 0): - if len(entry_mod['audit_data']['messages']) > 1: - audit_data_message_type = 'multiple' - else: - audit_data_message_type = 'single' - for each in entry_mod['audit_data']['messages']: - audit_data_message_message = regular_expression_evaluate(each, modsec_message_message_pattern) - audit_data_message_file = regular_expression_evaluate(each, modsec_message_file_pattern, to_split=True, to_split_value='/', to_split_column=-1) - audit_data_message_id = regular_expression_evaluate(each, modsec_message_id_pattern) - audit_data_message_msg = regular_expression_evaluate(each, modsec_message_msg_pattern) - audit_data_message_severity = regular_expression_evaluate(each, modsec_message_severity_pattern) - audit_data_message_maturity = regular_expression_evaluate(each, modsec_message_maturity_pattern) - audit_data_message_accuracy = regular_expression_evaluate(each, modsec_message_accuracy_pattern) - #audit_data_message_tags = [] # TAGS not in use currently - ws1.append([transaction_id, event_time, remote_address, request_headers_host, request_headers_useragent, - request_line, request_line_method, request_line_url, request_line_protocol, - response_protocol, response_status, - audit_data_action_intercepted, audit_data_action_phase, audit_data_action_message, - audit_data_message_type,audit_data_message_message, audit_data_message_id, audit_data_message_file, - audit_data_message_msg, audit_data_message_severity, audit_data_message_accuracy, audit_data_message_maturity, - each - ]) - else: - audit_data_message_type = 'None' - each = 'None' - #print('M error - message not found for transaction_id :', transaction_id) - audit_data_message_message = audit_data_message_file = audit_data_message_id = audit_data_message_msg = \ - audit_data_message_severity = audit_data_message_maturity = audit_data_message_accuracy = '-' - ws1.append([transaction_id, event_time, remote_address, request_headers_host, request_headers_useragent, - request_line, request_line_method, request_line_url, request_line_protocol, - response_protocol, response_status, - audit_data_action_intercepted, audit_data_action_phase, audit_data_action_message, - audit_data_message_type, audit_data_message_message, audit_data_message_id, audit_data_message_file, - audit_data_message_msg, audit_data_message_severity, audit_data_message_accuracy, audit_data_message_maturity, - each - ]) - except Exception as e: - print('Exception at modsecSaveXLSX() :', e , ' , transaction_id :', transaction_id) - - if not 'error' in outputWithGraphs: - img = openpyxl.drawing.image.Image(outputWithGraphs) - ws2 = wb.create_sheet('Graphs') - ws2.add_image(img) - - try: - if not os.path.isdir(fileBaseOutputDir): - os.mkdir(fileBaseOutputDir) - fOut = os.path.join(fileBaseOutputDir, outputXLSXFileName) - wb.save(filename=fOut) - except Exception as e: - print('modsecSaveXLSX() has thrown exception: %s', e) - - pass - -def modsecViewGraphs(modsecDict): - """ - Module to visualize audit log as graphs - :param modsecDict: list of modsec_audit events given as a dictionary - :return: png file output or string 'error' in case no valid image created - """ - if len(modsecDict) < 1: - exit('Error: No logs to visualize. Check log and Include/Exclude filters') - ''' - GRAPHS PART I - Collect information into lists/dicts to make particular graphs - ''' - src_ip_tab = [] - event_time_action = [] - event_messages = [] - intercepted_reason = [] - event_rules = [] - for entry_mod in modsecDict: - try: - ''' Graph data for "TOP 10 IP source addresses" ''' - src_ip_tab.append(entry_mod['transaction']['remote_address']) - - ''' Graph data for "Modsecurity Events reported vs intercepted" ''' - if (version3 is False) and ('action' in entry_mod['audit_data'].keys() and 'intercepted' in entry_mod['audit_data']['action'].keys()): - event_time_action.append([entry_mod['transaction']['time'], True]) - - elif (version3 is True) and len(entry_mod['audit_data']) > 0: - for each_msg in entry_mod['audit_data']['messages']: - #print('each_msg :', each_msg) - if each_msg.startswith("ModSecurity: Access denied"): - event_time_action.append([entry_mod['transaction']['time'], True]) - else: - event_time_action.append([entry_mod['transaction']['time'], False]) - #print('Nobody expect the Spanish Inquisition for ModSecurity v3') - #print('each_msg :', each_msg) - else: - # No 'intercepted' - event_time_action.append([entry_mod['transaction']['time'], False]) - except Exception as e2: - print('Exception in Graph TOP 10 IP source addresses', e2) - - ''' Graph data for "TOP 20 rule hits"''' - try: - if 'messages' in entry_mod['audit_data'].keys(): - messages = safedictkey(entry_mod, ['audit_data','messages'], '-') - for each in messages: - event_messages.append(each) - rule_id = regular_expression_evaluate(each, modsec_message_id_pattern) - rule_msg = regular_expression_evaluate(each, modsec_message_msg_pattern) - rule_severity = regular_expression_evaluate(each, modsec_message_severity_pattern) - rule_file = regular_expression_evaluate(each, modsec_message_file_pattern) - """ - Cut the [msg] to 27 chars if it is longer than 30 chars. - If [msg] and [id] not found then treat message description as the [msg] - """ - if len(rule_msg) > 30: - rule_msg = rule_msg[:27] + '...' - if rule_msg == '?' and rule_id == '-': - rule_msg = str(each)[:30] - rule_descr = 'id: ' + str(rule_id) + ', sev: ' + str(rule_severity) + ', msg: ' + str(rule_msg) - event_rules.append([rule_id, rule_msg, rule_severity, rule_file, rule_descr]) - else: - ''' Skip modsec_audit entries without [message] part''' - pass - except Exception as e3: - print('Exception in TOP 20 rule hits', e3) - print('for transaction_id :', safedictkey(entry_mod, ['transaction','transaction_id'], '-')) - - ''' Graph data for "TOP 10 Attacks intercepted" ''' - try: - if (version3 is False) and ('action' in entry_mod['audit_data']): - msg = entry_mod['audit_data']['action']['message'] - if len(msg) > 60: - msg = msg[:50] + '...' - intercepted_reason.append([entry_mod['audit_data']['action']['phase'], msg, 'phase ' + str(entry_mod['audit_data']['action']['phase']) + ': ' + msg]) - elif (version3 is True) and len(entry_mod['audit_data']) > 0: - for each_msg in entry_mod['audit_data']['messages']: - if each_msg.startswith("ModSecurity: Access denied"): - msg = regular_expression_evaluate(each_msg, modsec_v3_message_msg_pattern) - if len(msg) > 60: - msg = msg[:50] + '...' - phase = regular_expression_evaluate(each_msg, modsec_v3_message_phase_pattern) - intercepted_reason.append([phase, msg, 'phase ' + phase + ': ' + msg]) - - except Exception as e: - print('Exception in Graph TOP 10 Attacks intercepted', e) - """ - Modsecurity events Passed vs Intercepted - """ - np_event_time_action = np.array(event_time_action) - event_times1 = np_event_time_action[:, 0] - event_times = list(map(lambda x: datetime.strptime(x.replace('--','-'), LOG_TIMESTAMP_FORMAT).replace(tzinfo=None), event_times1)) - event_action = np_event_time_action[:, 1] - event_times_min = min(event_times); event_times_max = max(event_times); event_times_range = event_times_max - event_times_min - event_times_range_seconds = int(event_times_range.total_seconds()) - event_times_range_minutes = int(event_times_range.total_seconds() / 60) - if event_times_range_minutes < 60: - PERIODS = str(int(event_times_range_seconds / 1)) + 's' - else: - PERIODS = str(int(event_times_range_minutes / 30)) + 'min' - events_df = pd.DataFrame({ - 'date': pd.to_datetime(event_times), - 'action': event_action - }) - intercepted = [] ; passed = []; passed_cnt2 = 0; intercepted_cnt2 = 0 - for row in events_df['action']: - if (row == 'True'): - intercepted.append(1); passed.append(0); intercepted_cnt2 += 1 - else: - intercepted.append(0); passed.append(1); passed_cnt2 += 1 - events_df['intercepted'] = intercepted; events_df['passed'] = passed - ''' - GRAPHS PART II - ''' - ''' TOP 10 IP addresses Graph - data preparation ''' - ipaddr_cnt = Counter() - for word in src_ip_tab: - ipaddr_cnt[word] += 1 - ipaddr_cnt_top10 = dict(ipaddr_cnt.most_common(10)) - - ''' TOP 10 Interception Reason - data preparation''' - intercepted_cnt = Counter() - for word in intercepted_reason: - intercepted_cnt[word[2]] += 1 - intercepted_cnt_top10 = dict(intercepted_cnt.most_common(10)) - ''' TOP 20 Rule IDs hit - data preparation''' - event_messages_ids = Counter() - for word in event_rules: - event_messages_ids[word[4]] += 1 - event_messages_ids_top20 = dict(event_messages_ids.most_common(20)) - - ''' GRIDS VERSION BEGIN ''' - fig = plt.figure(0) - grid = plt.GridSpec(3, 3, wspace=1.1, hspace=1.1) - ax1 = plt.subplot(grid[0, 0:3]) - ax21 = plt.subplot(grid[1, 0]) - ax22 = plt.subplot(grid[2, 0]) - ax31 = plt.subplot(grid[1, 1]) - ax32 = plt.subplot(grid[2, 1]) - ax41 = plt.subplot(grid[1, 2]) - ax42 = plt.subplot(grid[2, 2]) - - # Graph Included or Excluded - modsec_inc_exc_str = '' - if FILTER_INCLUDE: - modsec_inc_exc_str = 'Filter INCLUDE active. Skipped the rest of ' + str(records_skipped_cnt) + \ - ' events where source IP address NOT in: ' + str(filter_include_table) - elif FILTER_EXCLUDE: - modsec_inc_exc_str = 'Filter EXCLUDE active. Skipped the rest of ' + str(records_skipped_cnt) + \ - ' events where source IP address in: ' + str(filter_exclude_table) - else: - modsec_inc_exc_str = 'Filter INCLUDE/EXCLUDE non-active.' - - title_timespan = 'Analysis of ' + str(records_processed_cnt) + ' modsecurity events in timespan: ' + \ - str(event_times_min.strftime("%Y-%m-%d_%H:%M")) + ' - ' + str(event_times_max.strftime("%Y-%m-%d_%H:%M")) + '\n' - title_total = 'Total number of events found in logfile ' + str(records_total) + ' (output always trimmed to variable MAXEVENTS = ' + str(MAXEVENTS) + ' )\n' - title_reported_intercepted = 'events passed: ' + str(passed_cnt2) + ' , events intercepted: ' + str(intercepted_cnt2) - plot_title = title_timespan + title_total + modsec_inc_exc_str + '\n\n' + title_reported_intercepted - if event_times_range_seconds < 1800: - short_time_range_message = 'Creating timeline graph is not available for timespan ' + str(event_times_range_seconds) + ' seconds, skipping ...' - plt.subplot(ax1) - plt.text(0.5, 0.5, short_time_range_message, horizontalalignment='center', verticalalignment='center') - plt.title(plot_title) - else: - ex = events_df.groupby(pd.Grouper(key='date', freq=PERIODS)).sum() - ex.plot(ax=ax1, kind='bar', title=plot_title, stacked=True, color={'purple', 'red'}, fontsize=7, rot=45) - - ''' Bar chart "TOP 10 IP addresses" ''' - plt.subplot(ax21) - patches, texts, autotexts = plt.pie(ipaddr_cnt_top10.values(), autopct='%1.1f%%', shadow=True, startangle=90,radius=1.0) - plt.title(' TOP %s IP addresses (out of total %s) ' % (len(ipaddr_cnt_top10), len(ipaddr_cnt)), bbox={'facecolor': '0.8', 'pad': 5}) - - ''' Legend for chart "TOP 10 IP addresses" ''' - x = np.char.array(list(ipaddr_cnt_top10.keys())) - y = np.array(list(ipaddr_cnt_top10.values())) - labels = ['{0} --> {1} hits'.format(i, j) for i, j in - zip(ipaddr_cnt_top10.keys(), ipaddr_cnt_top10.values())] - if len(ipaddr_cnt_top10.keys()) >= 1: - patches, labels, dummy = zip(*sorted(zip(patches, labels, y), key=lambda x: x[2], reverse=True)) - plt.subplot(ax22) - plt.axis('off') - plt.legend(patches, labels, loc='center left', bbox_to_anchor=(-0.1, 1.), fontsize=7) - - ''' Bar chart "TOP 10 Attacks intercepted" ''' - plt.subplot(ax31) - patches, texts, autotexts = plt.pie(intercepted_cnt_top10.values(), autopct='%1.1f%%', shadow=True, startangle=90, radius=1.0, normalize=True) - [_.set_fontsize(7) for _ in texts] - plt.title('TOP 10 Attacks intercepted', bbox={'facecolor': '0.8', 'pad': 5}) - - ''' Legend for chart "TOP 10 Attacks intercepted" ''' - x = np.char.array(list(intercepted_cnt_top10.keys())) - y = np.array(list(intercepted_cnt_top10.values())) - labels = ['{0} --> {1} hits'.format(i,j) for i,j in zip(intercepted_cnt_top10.keys(), intercepted_cnt_top10.values())] - if len(intercepted_cnt_top10.values()) >= 1: - patches, labels, dummy = zip(*sorted(zip(patches, labels, y), key=lambda x: x[2], reverse=True)) - plt.subplot(ax32) - plt.axis('off') - plt.legend(patches, labels, loc='center left', bbox_to_anchor=(-0.1, 1.), fontsize=7) - else: - plt.subplot(ax32) - plt.axis('off') - plt.text(0.5, 0.5, 'No intercepted events found for given data set', horizontalalignment='center', verticalalignment='center') - - ''' Bar chart "TOP 20 Rule IDs hit" ''' - plt.subplot(ax41) - patches, texts, autotexts = plt.pie(event_messages_ids_top20.values(), autopct='%1.1f%%', shadow=True, startangle=90, radius=1.0, normalize=True) - plt.title('TOP 20 Rule IDs hit', bbox={'facecolor': '0.8', 'pad': 5}) - - ''' Legend for chart "TOP 20 Rule IDs hit" ''' - x = np.char.array(list(event_messages_ids_top20.keys())) - y = np.array(list(event_messages_ids_top20.values())) - labels = ['{0} --> {1} hits'.format(i, j) for i, j in zip(event_messages_ids_top20.keys(), event_messages_ids_top20.values())] - if len(event_messages_ids_top20.keys()) >= 1: - patches, labels, dummy = zip(*sorted(zip(patches, labels, y), key=lambda x: x[2], reverse=True)) - plt.subplot(ax42, axis='off') - plt.axis('off') - plt.legend(patches, labels, loc='center left', bbox_to_anchor=(-0.1, 1.), fontsize=7) - - ''' - GRID VERSION END - ''' - graph_title = 'Modsecurity events ' + str(datetimenow) + ' from file: ' + inputFileName + ' first ' + str(MAXEVENTS) + ' analyzed' - fig.canvas.set_window_title(graph_title) - fig.set_size_inches(18,11) - #plt.get_current_fig_manager().window.wm_geometry("+10+10") - try: - if not os.path.isdir(fileBaseOutputDir): - os.mkdir(fileBaseOutputDir) - fOut = os.path.join(fileBaseOutputDir, graphOutputFilename) - plt.savefig(fOut) - return(fOut) - except Exception as e: - print('modsecViewGraphs.savefig() thrown exception: %s', e) - return('error') - -def modsecLog2Info(singleEntry): - """ - Module gets piece of log for single modsecurity event and transform into dict (JSON) according to standard JSON logging - :param text consisted of many lines for single modsecurity event. - Expected it starting with section 'A' and ending with section 'Z' - :return: dict type with modsec_audit entry converted into JSON - """ - modsec_dict = OrderedDict() - a_header = singleEntry[0] - if version3: - e_separator = a_header[a_header.find('^---')+ 4:a_header.find('---A--')] - else: - e_separator = a_header[a_header.find('^--')+3:a_header.find('-A-')] - itemNumber = 0 - itemKV = OrderedDict() - try: - for item in singleEntry: - if item.__contains__(e_separator): - itemKV[item.rstrip()[-3:-2:]] = itemNumber - itemNumber+=1 - item_keys = list(itemKV.keys()) - itemKVFull = OrderedDict() - for item_letter in item_keys: - if item_letter in modsec_event_types: - i = int(itemKV[item_letter]) + 1 - j = itemKV[item_keys[item_keys.index(item_letter) + 1 ] ] - itemKVFull[item_letter] = singleEntry[i:j] - - modsec_a = itemKVFull['A'][0] - modsec_b = itemKVFull['B'] - modsec_f = itemKVFull['F'] - modsec_h = itemKVFull['H'] - - modsec_b_headers = dict(map(lambda s: [s[0:s.find(': ')],s[s.find(': ')+2:]], modsec_b[1:-1])) - modsec_f_headers = dict(map(lambda s: [s, '-'] if len(s.split(': ')) == 1 else [s[0:s.find(': ')], s[s.find(': ') + 2:]], modsec_f[1:-1])) - - modsec_h_dict = OrderedDict() - for elem in modsec_h: - if elem.startswith('Message:') or elem.startswith('ModSecurity:'): - if 'messages' not in modsec_h_dict: - modsec_h_dict['messages'] = [elem] - else: - modsec_h_dict['messages'].append(elem) - elif elem.startswith('Apache-Handler:'): - if 'handlers_messages' not in modsec_h_dict: - modsec_h_dict['handlers_messages'] = [elem] - else: - modsec_h_dict['handlers_messages'].append(elem) - elif elem.startswith('Apache-Error:'): - if 'error_messages' not in modsec_h_dict: - modsec_h_dict['error_messages'] = [elem] - else: - modsec_h_dict['error_messages'].append(elem) - elif elem.startswith('Producer:'): - modsec_h_dict['producer'] = elem.split(': ')[1].strip(' .').split('; ') - elif elem.startswith('Engine-Mode:'): - modsec_h_dict['Engine-Mode'] = elem.split(': ')[1].strip('"') - elif elem.startswith('Server:'): - modsec_h_dict['server'] = elem.split(': ')[1] - elif elem.startswith('Action: '): - modsec_h_dict['action'] = {} - if 'ntercepted' in elem: - modsec_h_dict['action']['intercepted'] = True - modsec_h_dict['action']['phase'] = int(elem[elem.find('phase')+6]) - modsec_h_dict['action']['message'] = modsec_h_dict['messages'][-1].split('.')[1].strip() - elif elem.startswith('Stopwatch2'): - modsec_h_dict['stopwatch'] = {} - for stopw in elem.split(' '): - if '=' in stopw: - modsec_h_dict['stopwatch'][stopw.split('=')[0]] = int(stopw.split('=')[1].strip(',')) - - else: - pass - modsec_a_split = modsec_a.split() - modsec_dict['transaction'] = {'time' : modsec_a_split[0].replace('[','') + ' ' + modsec_a_split[1].replace(']',''), 'transaction_id': modsec_a_split[2], 'remote_address' : modsec_a_split[3], - 'remote_port': modsec_a_split[4], 'local_address': modsec_a_split[5], 'local_port': modsec_a_split[6] } - if len(modsec_b) > 0: - modsec_dict['request'] = {'request_line': modsec_b[0], 'headers': modsec_b_headers} - else: - modsec_dict['request'] = 'None' - - if len(modsec_f_headers) > 3: - modsec_dict['response'] = OrderedDict() - try: - modsec_dict['response'] = {'protocol': modsec_f[0].split(' ')[0], 'status': modsec_f[0].split(' ')[1], 'status_text': ' '.join(modsec_f[0].split(' ')[2:]), 'headers': modsec_f_headers} - except Exception as e: - print('Exception at modsec_dict["response"] :', e) - modsec_dict['response'] = 'None' - else: - modsec_dict['response'] = 'None' - modsec_dict['audit_data'] = OrderedDict() - modsec_dict['audit_data'] = modsec_h_dict - except Exception as e: - print('modsecLog2Info() error found :', e, ' when processing :', singleEntry) - modsec_dict = 'ERROR' - - return modsec_dict - -def processModsecAudit(inputFileName): - try: - with open(inputFileName, 'r', encoding='cp437') as modsecFHandler: - modsec_Table = [] - for logLine in modsecFHandler: - if a_pattern.search(logLine): - modsec_Entry = [logLine] - for entryLog in modsecFHandler: - if z_pattern.search(entryLog): - modsec_Entry.append(entryLog.rstrip()) - modsec_Table.append(modsec_Entry) - break - else: - modsec_Entry.append(entryLog.rstrip()) - return modsec_Table - except FileNotFoundError: - print('File "', inputFileName, '" not found') - return 'error' - except Exception as e: - print('Error found during read file ', inputFileName) - return 'error' - -def processModsecAudit3(inputFileName): - lineNumber = 0 - modsec_Table = [] - try: - with open(inputFileName, 'r', encoding='utf-8', errors='ignore') as modsecFHandler: - for logLine in modsecFHandler: - lineNumber += 1 - try: - p = json.loads(logLine) - modsec_Table.append(p) - except Exception as e_logline: - print('Error {0} found during reading file {1} at line {2}'.format(e_logline, inputFileName, lineNumber)) - return modsec_Table - except FileNotFoundError: - print('File "', inputFileName, '" not found') - return 'error' - except Exception as e: - #print('Error found during read file ', inputFileName) - print('Error {0} found during read file {1} at line {2}'.format(e, inputFileName, lineNumber)) - return 'error' - -if __name__ == "__main__": - if jsonaudit is True: - modsec_Table = processModsecAudit3(inputFileName) - else: - modsec_Table = processModsecAudit(inputFileName) - if isinstance(modsec_Table, str) and modsec_Table in 'error': - print('No modsecurity audit log found') - elif isinstance(modsec_Table, list) and len(modsec_Table) == 0: - print('No modsecurity events found in the specified file') - else: - records_total = len(modsec_Table) - modsec_entries = [] - for modsec_entry in modsec_Table: - if jsonaudit is False: - json_modsec_entry = modsecLog2Info(modsec_entry) - else: - json_modsec_entry = modsec_entry - if FILTER_INCLUDE: - if dict(json_modsec_entry)['transaction']['remote_address'] in filter_include_table: - modsec_entries.append(json_modsec_entry) - records_processed_cnt +=1 - else: - records_skipped_cnt +=1 - elif FILTER_EXCLUDE: - if dict(json_modsec_entry)['transaction']['remote_address'] not in filter_exclude_table: - modsec_entries.append(json_modsec_entry) - records_processed_cnt +=1 - else: - records_skipped_cnt +=1 - elif (isinstance(json_modsec_entry, str)) and ('ERROR' in json_modsec_entry): - records_skipped_cnt += 1 - else: - modsec_entries.append(json_modsec_entry) - records_processed_cnt +=1 - if records_processed_cnt >= MAXEVENTS: - print('-' * 10, 'Limit ', MAXEVENTS, ' of events has been reached. ') - print('-' * 10, 'The rest of ', str(records_total - MAXEVENTS - records_skipped_cnt), ' events will be skipped ...', '-' * 10) - break - print('-' * 10, 'modsec_audit events processed: %s ' % records_processed_cnt, '-' * 10) - print('-' * 10, 'modsec_audit events skipped by INCLUDED/EXCLUDED options or INVALID :', records_skipped_cnt, '-' * 10) - if len(modsec_entries) < 1: - print('ERROR : modsec_audit entries to analyze not found with used filters') - else: - modsecSaveJson(modsec_entries, jsonOutputFilename, JSON_ONE_PER_LINE) - outputWithGraphs = modsecViewGraphs(modsec_entries) - modsecSaveXLSX(modsec_entries, xlsxOutputFilename, outputWithGraphs) diff --git a/modsecurity_parser.py b/modsecurity_parser.py new file mode 100644 index 0000000..18d81f8 --- /dev/null +++ b/modsecurity_parser.py @@ -0,0 +1,865 @@ +"""modsecurity_parser.py. +Module to analyze modsecurity audit log and present output as: + - json file (compatible with default JSON logging) + - xlsx report + - png with graphs +2019.01 - molu8bits (at) gmail (dot) com +""" + +from collections import OrderedDict, Counter +from time import localtime, strftime +from datetime import datetime + +import os +import sys +import argparse +import re +import json +import openpyxl + +import numpy as np +import pandas as pd + +import matplotlib.pyplot as plt +import matplotlib +matplotlib.use('Agg') + + +DEBUG = False +MAXEVENTS = 90000 +SAVEOUTPUTJSON = True +JSON_ONE_PER_LINE = False +FILTER_INCLUDE = True +FILTER_EXCLUDE = True +LOG_TIMESTAMP_FORMAT = '%d/%b/%Y:%H:%M:%S %z' # e.g. "01/Mar/2018:05:26:41 +0100" +LOG_TIMESTAMP_FORMAT_SHORT = '%Y-%m-%d_%H:%M' +LOG_TIMESTAMP_FORMAT_TIMEMS = '%d/%b/%Y:%H:%M:%S.%f %z' # e.g. "01/Mar/2018:05:26:41.341644 +0100" + + +# modsec_patterns +# a_pattern = re.compile('^--\w{6,10}-A--$') +a_pattern = re.compile(r'^--\w{6,10}-A--$') +# z_pattern = re.compile('^--\w{6,10}-Z--$') +z_pattern = re.compile(r'^--\w{6,10}-Z--$') +modsec_event_types = ['A', 'B', 'C', 'E', 'F', 'H', 'I', 'J', 'K'] +MODSEC_MESSAGE_FILE_PATTERN = r'(?<=\[file\s\").*?(?="\])' +MODSEC_MESSAGE_MSG_PATTERN = r'(?<=\[msg\s\").*?(?=\"\])' +MODSEC_MESSAGE_ID_PATTERN = r'(?<=\[id\s\").*?(?=\"\])' +MODSEC_MESSAGE_SEVERITY_PATTERN = r'(?<=\[severity\s\").*?(?=\"\])' +MODSEC_MESSAGE_MATURITY_PATTERN = r'(?<=\[maturity\s\").*?(?=\"\])' +MODSEC_MESSAGE_ACCURACY_PATTERN = r'(?<=\[accuracy\s\").*?(?=\"\])' +MODSEC_MESSAGE_MESSAGE_PATTERN = r'(?<=Message:).*?(?=\.\ \[)' +MODSEC_V3_MESSAGE_PHASE_PATTERN = r'(?<=\(phase).*?(?=\))' +# MODSEC_V3_MESSAGE_PHASE_PATTERN = r'(?:\(phase).*?(?:\))' # (phase 2) +# MODSEC_V3_MESSAGE_PHASE_PATTERN = r'(?:\(phase).*?(?=\))' +# MODSEC_V3_MESSAGE_MESSAGE_PATTERN = r'(?<=\Message:).*?(?=\[)' +MODSEC_V3_MESSAGE_MESSAGE_PATTERN = r'(?<=\Matched).*?(?=\[)' +MODSEC_V3_MESSAGE_MSG_PATTERN = r'(?<=\[msg\s\").*?(?=\"\])' + +# parse the command line arguments +argParser = argparse.ArgumentParser() +argParser.add_argument('-f', type=str, help='input file with the ModSecurity audit log', required=False) +argParser.add_argument('-j', type=str, help='output file name for JSON format', required=False) +argParser.add_argument('-x', type=str, help='output file name for Excel format', required=False) +argParser.add_argument('-g', type=str, help='output file name for Graphs - PNG format', required=False) +argParser.add_argument( + '-e', + '--exclude', + type=str, + nargs='+', + help='source IP addresses to exclude from the results as a list (e.g. -exclude 127.0.0.1 192.168.0.1)', + required=False) +argParser.add_argument( + '-i', + '--include', + type=str, + nargs='+', + help='source IP addresses to include only into the results as a list (e.g. -include 1.2.3.4 5.5.5.5)', + required=False) +argParser.add_argument('-l', type=str, help='output file name for logging purposes', required=False) +argParser.add_argument( + '--jsononeperline', + action="store_true", + help='events in output JSON will be enlisted one per line, otherwise by default JSON is humanreadable', + default="False") +argParser.add_argument( + '--version3', + action="store_true", + help='required if modsec_audit.log is produced by ModSecurity3', + default="False") +argParser.add_argument('--jsonaudit', action='store_true', help='required if modsec_audit.log is JSON') +passedArgs = vars(argParser.parse_args()) + + +input_filename = passedArgs['f'] +JSON_OUTPUT_FILENAME = passedArgs['j'] +JSON_ONE_PER_LINE = True if passedArgs['jsononeperline'] is True else False +VERSION3 = True if passedArgs['version3'] is True else False +# VERSION3 = passedArgs['version3'] +# print(f'passedArgs["version3"]: {passedArgs["version3"]}') +# print(f'VERSION3: {VERSION3}, type(VERSION3): {type(VERSION3)}') +JSONAUDIT = True if passedArgs['jsonaudit'] is True else False + +# Modsecurity JSON output for message doesn't comprise 'Message:' at the beggining of the string. +if JSONAUDIT: + MODSEC_MESSAGE_MESSAGE_PATTERN = r'(?<=^).*(?=\.\s\[)' + +# Modsecurity3 message information (if exists) starts with 'ModSecurity' string. +if VERSION3: + a_pattern = re.compile(r'^---\w{8,10}---A--$') + z_pattern = re.compile(r'^---\w{8,10}---Z--$') + MODSEC_MESSAGE_MESSAGE_PATTERN = r'(?<=\ModSecurity:).*?(?=\[)' + +XLSX_OUTPUT_FILENAME = passedArgs['x'] +LOG_OUTPUT_FILENAME = passedArgs['l'] +GRAPH_OUTPUT_FILENAME = passedArgs['g'] +if passedArgs['include'] is not None: + filter_include_table = passedArgs['include'] + FILTER_INCLUDE = True + FILTER_EXCLUDE = False +elif passedArgs['exclude'] is not None: + filter_exclude_table = passedArgs['exclude'] + FILTER_INCLUDE = False + FILTER_EXCLUDE = True +else: + FILTER_INCLUDE = False + FILTER_EXCLUDE = False + +datetimenow = strftime('%Y-%m-%d_%H-%M-%S', localtime()) + +RECORDS_TOTAL = 0 +RECORDS_SKIPPED_CNT = 0 +RECORDS_PROCESSED_CNT = 0 + + +if input_filename is None: + print('No parameter input_filename, looking for modsec_audit.log in current directory ...') + input_filename = os.path.join(os.getcwd(), 'modsec_audit.log') +else: + print(f'input_filename: {input_filename}') + +FILE_BASENAME = str(os.path.splitext(os.path.split(input_filename)[-1])[0]) + '_' + str(datetimenow) +fileBaseOutputDir = os.path.join(os.path.dirname(input_filename), 'modsec_output') +if JSON_OUTPUT_FILENAME is None: + JSON_OUTPUT_FILENAME = FILE_BASENAME + '.json' +if XLSX_OUTPUT_FILENAME is None: + XLSX_OUTPUT_FILENAME = FILE_BASENAME + '.xlsx' +if LOG_OUTPUT_FILENAME is None: + LOG_OUTPUT_FILENAME = FILE_BASENAME + '.log' +if GRAPH_OUTPUT_FILENAME is None: + GRAPH_OUTPUT_FILENAME = FILE_BASENAME + '.png' + + +def safedictkey(dictname, keyname, default='None'): + """Return value of nested keynames from dict. + + Return value of nested keynames from dict. + If no such key (or nested keys) exist then returns default value. + + Args: + dictname(dict): _description_. No default. + keyname(string): _description_. No default. + default(string): _description_. Default value to return if nothing found. + + Raises: + Exception: _description_ + """ + # print(f'dictname: {dictname}') + # print(f'keyname: {keyname}') + # print(f'default : {default}') + try: + dictname_temp = dictname + for value in keyname: + dictname_temp = dict_return = dictname_temp[value] + # print(f'dict_return: {dict_return}') + return dict_return + except Exception: + return default + + +def get_params(string_in, separator=' ', defaultmissing='-', params_to_get=3): + """Split string into requred number of parameters. + + Use defined separator and fulfill missing elements. + + Args: + string_in(string): input string. + separator(char): separator used to split input string. Default value ' ' (space). + defaultmissing(string): value to replace missing list elements. Default '-'. + params_to_get(varchar): how many parameters to take from string to list. Enforced to 3. + + Returns: + var1: _description_ + var2: _description_ + var3: _description_ + """ + # print(f'string_in: {string_in}') + # print(f'separator: {separator}') + # print(f'defaultmissing: {defaultmissing}') + rtr = str(string_in).split(separator) + # print(f'rtr: {rtr}') + if len(rtr) > params_to_get: + rtr = [] + rtr.append(str(string_in)) + # for x in range(0, (params_to_get - len(rtr))): + for _ in range(0, (params_to_get - len(rtr))): + rtr.append(defaultmissing) + # print('rtr one by one: ', rtr[0], rtr[1], rtr[2]) + return rtr[0], rtr[1], rtr[2] + + +def regular_expression_evaluate( + string_in, regular_expression, + group=True, to_split=False, to_split_value='/', to_split_column=-1): + """_summary_ + + Args: + string_in (_type_): _description_ + regular_expression (_type_): _description_ + group (bool, optional): _description_. Defaults to True. + to_split (bool, optional): _description_. Defaults to False. + to_split_value (str, optional): _description_. Defaults to '/'. + to_split_column (int, optional): _description_. Defaults to -1. + + Returns: + _type_: _description_ + """ + try: + if group and not to_split: + re_value = re.search(regular_expression, string_in).group() + elif group and to_split: + re_value = re.search(regular_expression, string_in).group().split(to_split_value)[to_split_column] + else: + re_value = re.search(regular_expression, string_in) + # except Exception as exception5: + except Exception: + re_value = '?' + return re_value + + +def modsec_save_json(dict_to_save, file_to_save, one_per_line): + """_summary_ + + Exports modsec_audit events to *.json file. + one_per_line True -> file formatted likewise when logging set to JSON in modsecurity.conf, + one_per_line False -> human readable JSON output + + Args: + dict_to_save (_type_): _description_ + file_to_save (_type_): _description_ + one_per_line (_type_): _description_ + """ + try: + if not os.path.isdir(fileBaseOutputDir): + os.mkdir(fileBaseOutputDir) + file_out = open(os.path.join(fileBaseOutputDir, file_to_save), 'w') + if one_per_line: + for line in dict_to_save: + file_out.write(json.dumps(line)) + file_out.write('\n') + file_out.close() + else: + for line in dict_to_save: + file_out.write(json.dumps(line, indent=4, sort_keys=False)) + file_out.write('\n') + file_out.close() + except Exception as exception: + print(f'modsec_save_json() thrown exception: {exception}') + + +def modsec_save_xlsx(modsec_dict, output_xlsx_filename, output_with_graphs): + """_summary_ + + Exports processed modsec_audit events into XLSX formatted file. + + Args: + modsec_dict (_type_): List of audit events as JSON + output_xlsx_filename (_type_): file to save the report + output_with_graphs (_type_): _description_ + + Returns: + _type_: _description_ + """ + modsec_header_xlsx = ['transaction_id', 'event_time', 'remote_address', 'request_host', + 'request_useragent', 'request_line', 'request_line_method', 'request_line_url', + 'request_line_protocol', 'response_protocol', 'response_status', + 'action', 'action_phase', 'action_message', + 'message_type', 'message_description', 'message_rule_id', 'message_rule_file', + 'message_msg', 'message_severity', 'message_accuracy', 'message_maturity', 'full_message_line' + ] + workbook = openpyxl.Workbook() + ws1 = workbook.active + ws1.title = 'Modsec_entries' + ws1.append(modsec_header_xlsx) + + for entry_mod in modsec_dict: + try: + transaction_id = entry_mod['transaction']['transaction_id'] + event_time = entry_mod['transaction']['time'] + remote_address = entry_mod['transaction']['remote_address'] + request_line = entry_mod['request']['request_line'] + request_line_method, request_line_url, request_line_protocol = get_params( + string_in=request_line, defaultmissing='-', params_to_get=3) + request_headers_useragent = safedictkey(entry_mod, ['request', 'headers', 'User-Agent'], '-') + request_headers_host = safedictkey(entry_mod, ['request', 'headers', 'Host'], '-') + response_protocol = safedictkey(entry_mod, ['response', 'protocol'], '-') + response_status = safedictkey(entry_mod, ['response', 'status'], '-') + audit_data_producer = safedictkey(entry_mod, ['audit_data', 'producer'], '-') # noqa: F841 + audit_data_server = safedictkey(entry_mod, ['audit_data', 'server'], '-') # noqa: F841 + audit_data_enginemode = safedictkey(entry_mod, ['audit_data', 'Engine-Mode'], '-') # noqa: F841 + audit_data_action_intercepted = 'intercepted' if ( + safedictkey(entry_mod, ['audit_data', 'action', 'intercepted'], '-') is True) else '-' + audit_data_action_message = safedictkey(entry_mod, ['audit_data', 'action', 'message'], '-') + audit_data_action_phase = safedictkey(entry_mod, ['audit_data', 'action', 'phase'], '-') + + if ('messages' in entry_mod['audit_data']) and (len(entry_mod['audit_data']) > 0): + if len(entry_mod['audit_data']['messages']) > 1: + audit_data_message_type = 'multiple' + else: + audit_data_message_type = 'single' + for each in entry_mod['audit_data']['messages']: + audit_data_message_message = regular_expression_evaluate(each, MODSEC_MESSAGE_MESSAGE_PATTERN) + audit_data_message_file = regular_expression_evaluate( + each, MODSEC_MESSAGE_FILE_PATTERN, to_split=True, to_split_value='/', to_split_column=-1) + audit_data_message_id = regular_expression_evaluate(each, MODSEC_MESSAGE_ID_PATTERN) + audit_data_message_msg = regular_expression_evaluate(each, MODSEC_MESSAGE_MSG_PATTERN) + audit_data_message_severity = regular_expression_evaluate(each, MODSEC_MESSAGE_SEVERITY_PATTERN) + audit_data_message_maturity = regular_expression_evaluate(each, MODSEC_MESSAGE_MATURITY_PATTERN) + audit_data_message_accuracy = regular_expression_evaluate(each, MODSEC_MESSAGE_ACCURACY_PATTERN) + # audit_data_message_tags = [] # TAGS not in use currently + ws1.append([transaction_id, event_time, remote_address, request_headers_host, + request_headers_useragent, request_line, request_line_method, + request_line_url, request_line_protocol, response_protocol, response_status, + audit_data_action_intercepted, audit_data_action_phase, audit_data_action_message, + audit_data_message_type, audit_data_message_message, audit_data_message_id, + audit_data_message_file, audit_data_message_msg, audit_data_message_severity, + audit_data_message_accuracy, audit_data_message_maturity, each + ]) + else: + audit_data_message_type = 'None' + each = 'None' + # print('M error - message not found for transaction_id :', transaction_id) + audit_data_message_message = audit_data_message_file = audit_data_message_id = \ + audit_data_message_msg = audit_data_message_severity = audit_data_message_maturity = \ + audit_data_message_accuracy = '-' + ws1.append([transaction_id, event_time, remote_address, request_headers_host, request_headers_useragent, + request_line, request_line_method, request_line_url, request_line_protocol, + response_protocol, response_status, audit_data_action_intercepted, + audit_data_action_phase, audit_data_action_message, audit_data_message_type, + audit_data_message_message, audit_data_message_id, audit_data_message_file, + audit_data_message_msg, audit_data_message_severity, audit_data_message_accuracy, + audit_data_message_maturity, each + ]) + except Exception as exception: + print(f'Exception at modsec_save_xlsx() :{exception}, transaction_id :{transaction_id}') + + if 'error' not in output_with_graphs: + img = openpyxl.drawing.image.Image(output_with_graphs) + ws2 = workbook.create_sheet('Graphs') + ws2.add_image(img) + + try: + if not os.path.isdir(fileBaseOutputDir): + os.mkdir(fileBaseOutputDir) + file_out = os.path.join(fileBaseOutputDir, output_xlsx_filename) + workbook.save(filename=file_out) + except Exception as exception: + print(f'modsec_save_xlsx() has thrown exception: {exception}') + + return True + + +def modsec_view_graphs(modsec_dict): # noqa: C901 + """_summary_ + + Module to visualize audit log as graphs + + Args: + modsec_dict (_type_): list of modsec_audit events given as a dictionary + + Returns: + _type_: png file output or string 'error' in case no valid image created + """ + if len(modsec_dict) < 1: + sys.exit('Error: No logs to visualize. Check log and Include/Exclude filters') + + # GRAPHS PART I + # Collect information into lists/dicts to make particular graphs + + src_ip_tab = [] + event_time_action = [] + event_messages = [] + intercepted_reason = [] + event_rules = [] + for entry_mod in modsec_dict: + try: + # Graph data for "TOP 10 IP source addresses" + src_ip_tab.append(entry_mod['transaction']['remote_address']) + + # Graph data for "Modsecurity Events reported vs intercepted" + if (VERSION3 is False) and \ + ('action' in entry_mod['audit_data'].keys() and + 'intercepted' in entry_mod['audit_data']['action'].keys()): + event_time_action.append([entry_mod['transaction']['time'], True]) + + elif (VERSION3 is True) and len(entry_mod['audit_data']) > 0: + for each_msg in entry_mod['audit_data']['messages']: + if each_msg.startswith("ModSecurity: Access denied"): + event_time_action.append([entry_mod['transaction']['time'], True]) + else: + event_time_action.append([entry_mod['transaction']['time'], False]) + else: + # No 'intercepted' + event_time_action.append([entry_mod['transaction']['time'], False]) + except Exception as exception2: + print(f'Exception in Graph TOP 10 IP source addresses: {exception2}') + + # Graph data for "TOP 20 rule hits" + try: + if 'messages' in entry_mod['audit_data'].keys(): + messages = safedictkey(entry_mod, ['audit_data', 'messages'], '-') + for each in messages: + event_messages.append(each) + rule_id = regular_expression_evaluate(each, MODSEC_MESSAGE_ID_PATTERN) + rule_msg = regular_expression_evaluate(each, MODSEC_MESSAGE_MSG_PATTERN) + rule_severity = regular_expression_evaluate(each, MODSEC_MESSAGE_SEVERITY_PATTERN) + rule_file = regular_expression_evaluate(each, MODSEC_MESSAGE_FILE_PATTERN) + + # Cut the [msg] to 27 chars if it is longer than 30 chars. + # If [msg] and [id] not found then treat message description as the [msg] + if len(rule_msg) > 30: + rule_msg = rule_msg[:27] + '...' + if rule_msg == '?' and rule_id == '-': + rule_msg = str(each)[:30] + rule_descr = 'id: ' + str(rule_id) + ', sev: ' + str(rule_severity) + ', msg: ' + str(rule_msg) + event_rules.append([rule_id, rule_msg, rule_severity, rule_file, rule_descr]) + else: + # Skip modsec_audit entries without [message] part + pass + except Exception as exception3: + print(f'Exception in TOP 20 rule hits: {exception3}') + print('for transaction_id :', safedictkey(entry_mod, ['transaction', 'transaction_id'], '-')) + + # Graph data for "TOP 10 Attacks intercepted" + try: + if (VERSION3 is False) and ('action' in entry_mod['audit_data']): + msg = entry_mod['audit_data']['action']['message'] + if len(msg) > 60: + msg = msg[:50] + '...' + intercepted_reason.append( + [entry_mod['audit_data']['action']['phase'], msg, + 'phase ' + str(entry_mod['audit_data']['action']['phase']) + ': ' + msg]) + elif (VERSION3 is True) and len(entry_mod['audit_data']) > 0: + for each_msg in entry_mod['audit_data']['messages']: + if each_msg.startswith("ModSecurity: Access denied"): + msg = regular_expression_evaluate(each_msg, MODSEC_V3_MESSAGE_MSG_PATTERN) + if len(msg) > 60: + msg = msg[:50] + '...' + phase = regular_expression_evaluate(each_msg, MODSEC_V3_MESSAGE_PHASE_PATTERN) + intercepted_reason.append([phase, msg, 'phase ' + phase + ': ' + msg]) + + except Exception as exception: + print(f'Exception in Graph TOP 10 Attacks intercepted {exception}') + + # Modsecurity events Passed vs Intercepted + np_event_time_action = np.array(event_time_action) + event_times1 = np_event_time_action[:, 0] + try: + event_times = list(map(lambda x: datetime.strptime(x.replace('--', '-'), + LOG_TIMESTAMP_FORMAT).replace(tzinfo=None), event_times1)) + except ValueError: + event_times = list(map(lambda x: datetime.strptime(x.replace('--', '-'), + LOG_TIMESTAMP_FORMAT_TIMEMS).replace(tzinfo=None), event_times1)) + except Exception as exception: + print(f'Exception timestamp extraction in Passed vs Intercepted {exception}') + event_action = np_event_time_action[:, 1] + event_times_min = min(event_times) + event_times_max = max(event_times) + event_times_range = event_times_max - event_times_min + event_times_range_seconds = int(event_times_range.total_seconds()) + event_times_range_minutes = int(event_times_range.total_seconds() / 60) + if event_times_range_minutes < 60: + periods = str(int(event_times_range_seconds / 1)) + 's' + else: + periods = str(int(event_times_range_minutes / 30)) + 'min' + events_df = pd.DataFrame({ + 'date': pd.to_datetime(event_times), + 'action': event_action + }) + intercepted = [] + passed = [] + passed_cnt2 = 0 + intercepted_cnt2 = 0 + for row in events_df['action']: + if row == 'True': + intercepted.append(1) + passed.append(0) + intercepted_cnt2 += 1 + else: + intercepted.append(0) + passed.append(1) + passed_cnt2 += 1 + events_df['intercepted'] = intercepted + events_df['passed'] = passed + + # GRAPHS PART II + + # TOP 10 IP addresses Graph - data preparation + ipaddr_cnt = Counter() + for word in src_ip_tab: + ipaddr_cnt[word] += 1 + ipaddr_cnt_top10 = dict(ipaddr_cnt.most_common(10)) + + # TOP 10 Interception Reason - data preparation + intercepted_cnt = Counter() + for word in intercepted_reason: + intercepted_cnt[word[2]] += 1 + intercepted_cnt_top10 = dict(intercepted_cnt.most_common(10)) + # TOP 20 Rule IDs hit - data preparation + event_messages_ids = Counter() + for word in event_rules: + event_messages_ids[word[4]] += 1 + event_messages_ids_top20 = dict(event_messages_ids.most_common(20)) + + # GRIDS VERSION BEGIN + fig = plt.figure(0) + grid = plt.GridSpec(3, 3, wspace=1.1, hspace=1.1) + ax1 = plt.subplot(grid[0, 0:3]) + ax21 = plt.subplot(grid[1, 0]) + ax22 = plt.subplot(grid[2, 0]) + ax31 = plt.subplot(grid[1, 1]) + ax32 = plt.subplot(grid[2, 1]) + ax41 = plt.subplot(grid[1, 2]) + ax42 = plt.subplot(grid[2, 2]) + + # Graph Included or Excluded + modsec_inc_exc_str = '' + if FILTER_INCLUDE: + modsec_inc_exc_str = 'Filter INCLUDE active. Skipped the rest of ' + str(RECORDS_SKIPPED_CNT) + \ + ' events where source IP address NOT in: ' + str(filter_include_table) + elif FILTER_EXCLUDE: + modsec_inc_exc_str = 'Filter EXCLUDE active. Skipped the rest of ' + str(RECORDS_SKIPPED_CNT) + \ + ' events where source IP address in: ' + str(filter_exclude_table) + else: + modsec_inc_exc_str = 'Filter INCLUDE/EXCLUDE non-active.' + + title_timespan = 'Analysis of ' + str(RECORDS_PROCESSED_CNT) + ' modsecurity events in timespan: ' + \ + str(event_times_min.strftime("%Y-%m-%d_%H:%M")) + ' - ' + \ + str(event_times_max.strftime("%Y-%m-%d_%H:%M")) + '\n' + title_total = 'Total number of events found in logfile ' + str(RECORDS_TOTAL) + \ + ' (output always trimmed to variable MAXEVENTS = ' + str(MAXEVENTS) + ' )\n' + title_reported_intercepted = 'events passed: ' + str(passed_cnt2) + \ + ' , events intercepted: ' + str(intercepted_cnt2) + plot_title = title_timespan + title_total + modsec_inc_exc_str + '\n\n' + title_reported_intercepted + if event_times_range_seconds < 1800: + short_time_range_message = 'Creating timeline graph is not available for timespan ' + \ + str(event_times_range_seconds) + ' seconds, skipping ...' + plt.subplot(ax1) + plt.text(0.5, 0.5, short_time_range_message, horizontalalignment='center', verticalalignment='center') + plt.title(plot_title) + else: + ex = events_df.groupby(pd.Grouper(key='date', freq=periods)).sum() + ex.plot(ax=ax1, kind='bar', title=plot_title, stacked=True, color={'purple', 'red'}, fontsize=7, rot=45) + + # Bar chart "TOP 10 IP addresses" + plt.subplot(ax21) + patches, texts, autotexts = plt.pie(ipaddr_cnt_top10.values(), autopct='%1.1f%%', + shadow=True, startangle=90, radius=1.0) + plt.title(f'TOP {len(ipaddr_cnt_top10)} IP addresses (out of total {len(ipaddr_cnt)}) ', + bbox={'facecolor': '0.8', 'pad': 5}) + + # Legend for chart "TOP 10 IP addresses" + # x_value = np.char.array(list(ipaddr_cnt_top10.keys())) + y_value = np.array(list(ipaddr_cnt_top10.values())) + labels = [f'{i} --> {j} hits' for i, j in + zip(ipaddr_cnt_top10.keys(), ipaddr_cnt_top10.values())] + if len(ipaddr_cnt_top10.keys()) >= 1: + patches, labels, dummy = zip(*sorted(zip(patches, labels, y_value), key=lambda x: x[2], reverse=True)) + plt.subplot(ax22) + plt.axis('off') + plt.legend(patches, labels, loc='center left', bbox_to_anchor=(-0.1, 1.), fontsize=7) + + # Bar chart "TOP 10 Attacks intercepted" + plt.subplot(ax31) + patches, texts, autotexts = plt.pie(intercepted_cnt_top10.values(), + autopct='%1.1f%%', shadow=True, startangle=90, radius=1.0, normalize=True) + [_.set_fontsize(7) for _ in texts] + plt.title('TOP 10 Attacks intercepted', bbox={'facecolor': '0.8', 'pad': 5}) + + # Legend for chart "TOP 10 Attacks intercepted" + # x_value = np.char.array(list(intercepted_cnt_top10.keys())) + y_value = np.array(list(intercepted_cnt_top10.values())) + labels = [f'{i} --> {j} hits' + for i, j in zip(intercepted_cnt_top10.keys(), intercepted_cnt_top10.values())] + if len(intercepted_cnt_top10.values()) >= 1: + patches, labels, dummy = zip(*sorted(zip(patches, labels, y_value), key=lambda x: x[2], reverse=True)) + plt.subplot(ax32) + plt.axis('off') + plt.legend(patches, labels, loc='center left', bbox_to_anchor=(-0.1, 1.), fontsize=7) + else: + plt.subplot(ax32) + plt.axis('off') + plt.text( + 0.5, 0.5, 'No intercepted events found for given data set', + horizontalalignment='center', verticalalignment='center') + + # Bar chart "TOP 20 Rule IDs hit" + plt.subplot(ax41) + patches, texts, autotexts = plt.pie( + event_messages_ids_top20.values(), + autopct='%1.1f%%', shadow=True, startangle=90, radius=1.0, normalize=True) + _ = autotexts + plt.title('TOP 20 Rule IDs hit', bbox={'facecolor': '0.8', 'pad': 5}) + + # Legend for chart "TOP 20 Rule IDs hit" + # x_value = np.char.array(list(event_messages_ids_top20.keys())) + y_value = np.array(list(event_messages_ids_top20.values())) + labels = [ + 'f{i} --> {j} hits' for i, j in zip(event_messages_ids_top20.keys(), + event_messages_ids_top20.values())] + if len(event_messages_ids_top20.keys()) >= 1: + patches, labels, dummy = zip(*sorted(zip(patches, labels, y_value), + key=lambda x_value: x_value[2], reverse=True)) + plt.subplot(ax42, axis='off') + plt.axis('off') + plt.legend(patches, labels, loc='center left', bbox_to_anchor=(-0.1, 1.), fontsize=7) + + # GRID VERSION END + + graph_title = 'Modsecurity events ' + str(datetimenow) + \ + ' from file: ' + input_filename + ' first ' + str(MAXEVENTS) + ' analyzed' + fig.canvas.set_window_title(graph_title) + fig.set_size_inches(18, 11) + # plt.get_current_fig_manager().window.wm_geometry("+10+10") + try: + if not os.path.isdir(fileBaseOutputDir): + os.mkdir(fileBaseOutputDir) + file_out = os.path.join(fileBaseOutputDir, GRAPH_OUTPUT_FILENAME) + plt.savefig(file_out) + return file_out + except Exception as exception: + print(f'modsec_view_graphs.savefig() thrown exception: {exception}') + return 'error' + + +def modsec_log_to_info(single_entry): + """_summary_ + + Module gets piece of log for single modsecurity event and transform into dict (JSON) + according to standard JSON logging + + Args: + single_entry (_type_): text consisted of many lines for single modsecurity event. + Expected it starting with section 'A' and ending with section 'Z' + Returns: + dict: modsec_audit entry converted into JSON + """ + modsec_dict = OrderedDict() + a_header = single_entry[0] + if VERSION3: + e_separator = a_header[a_header.find('^---') + 4:a_header.find('---A--')] + else: + e_separator = a_header[a_header.find('^--') + 3:a_header.find('-A-')] + item_number = 0 + item_kv = OrderedDict() + try: + for item in single_entry: + if item.__contains__(e_separator): + item_kv[item.rstrip()[-3:-2:]] = item_number + item_number += 1 + item_keys = list(item_kv.keys()) + item_kv_full = OrderedDict() + for item_letter in item_keys: + if item_letter in modsec_event_types: + i = int(item_kv[item_letter]) + 1 + j = item_kv[item_keys[item_keys.index(item_letter) + 1]] + item_kv_full[item_letter] = single_entry[i:j] + + modsec_a = item_kv_full['A'][0] + modsec_b = item_kv_full['B'] + modsec_f = item_kv_full['F'] + modsec_h = item_kv_full['H'] + + modsec_b_headers = dict(map(lambda s: [s[0:s.find(': ')], s[s.find(': ') + 2:]], modsec_b[1:-1])) + modsec_f_headers = dict(map(lambda s: [s, '-'] + if len(s.split(': ')) == 1 + else [s[0:s.find(': ')], s[s.find(': ') + 2:]], modsec_f[1:-1])) + modsec_h_dict = OrderedDict() + for elem in modsec_h: + if elem.startswith('Message:') or elem.startswith('ModSecurity:'): + if 'messages' not in modsec_h_dict: + modsec_h_dict['messages'] = [elem] + else: + modsec_h_dict['messages'].append(elem) + elif elem.startswith('Apache-Handler:'): + if 'handlers_messages' not in modsec_h_dict: + modsec_h_dict['handlers_messages'] = [elem] + else: + modsec_h_dict['handlers_messages'].append(elem) + elif elem.startswith('Apache-Error:'): + if 'error_messages' not in modsec_h_dict: + modsec_h_dict['error_messages'] = [elem] + else: + modsec_h_dict['error_messages'].append(elem) + elif elem.startswith('Producer:'): + modsec_h_dict['producer'] = elem.split(': ')[1].strip(' .').split('; ') + elif elem.startswith('Engine-Mode:'): + modsec_h_dict['Engine-Mode'] = elem.split(': ')[1].strip('"') + elif elem.startswith('Server:'): + modsec_h_dict['server'] = elem.split(': ')[1] + elif elem.startswith('Action: '): + modsec_h_dict['action'] = {} + if 'ntercepted' in elem: + modsec_h_dict['action']['intercepted'] = True + modsec_h_dict['action']['phase'] = int(elem[elem.find('phase') + 6]) + modsec_h_dict['action']['message'] = modsec_h_dict['messages'][-1].split('.')[1].strip() + elif elem.startswith('Stopwatch2'): + modsec_h_dict['stopwatch'] = {} + for stopw in elem.split(' '): + if '=' in stopw: + modsec_h_dict['stopwatch'][stopw.split('=')[0]] = int(stopw.split('=')[1].strip(',')) + + else: + pass + modsec_a_split = modsec_a.split() + modsec_dict['transaction'] = { + 'time': modsec_a_split[0].replace('[', '') + ' ' + modsec_a_split[1].replace(']', ''), + 'transaction_id': modsec_a_split[2], + 'remote_address': modsec_a_split[3], + 'remote_port': modsec_a_split[4], + 'local_address': modsec_a_split[5], + 'local_port': modsec_a_split[6]} + if len(modsec_b) > 0: + modsec_dict['request'] = {'request_line': modsec_b[0], 'headers': modsec_b_headers} + else: + modsec_dict['request'] = 'None' + + if len(modsec_f_headers) > 3: + modsec_dict['response'] = OrderedDict() + try: + modsec_dict['response'] = { + 'protocol': modsec_f[0].split(' ')[0], + 'status': modsec_f[0].split(' ')[1], + 'status_text': ' '.join(modsec_f[0].split(' ')[2:]), + 'headers': modsec_f_headers} + except Exception as exception: + print(f'Exception at modsec_dict["response"]: {exception}') + modsec_dict['response'] = 'None' + else: + modsec_dict['response'] = 'None' + modsec_dict['audit_data'] = OrderedDict() + modsec_dict['audit_data'] = modsec_h_dict + except Exception as exception: + print(f'modsec_log_to_info() error found: {exception} when processing: {single_entry}') + modsec_dict = 'ERROR' + + return modsec_dict + + +def process_modsec_audit_std(audit_input_file): + """_summary_ + + Args: + audit_input_file (_type_): _description_ + + Returns: + _type_: _description_ + """ + try: + with open(audit_input_file, 'r', encoding='cp437') as modsec_f_handler: + pmas_modsec_table = [] + for log_line in modsec_f_handler: + if a_pattern.search(log_line): + modsec_entry = [log_line] + for entry_log in modsec_f_handler: + if z_pattern.search(entry_log): + modsec_entry.append(entry_log.rstrip()) + pmas_modsec_table.append(modsec_entry) + break + else: + modsec_entry.append(entry_log.rstrip()) + return pmas_modsec_table + except FileNotFoundError: + print(f'File "{audit_input_file}" not found') + return 'error' + except Exception as exception: + print(f'Error found {exception} during read file {audit_input_file}') + return 'error' + + +def process_modsec_audit_json(audit_input_file): + """_summary_ + + Args: + audit_input_file (_type_): _description_ + + Returns: + _type_: _description_ + """ + line_number = 0 + pmaj_modsec_table = [] + try: + with open(audit_input_file, 'r', encoding='utf-8', errors='ignore') as modsec_f_handler: + for log_line in modsec_f_handler: + line_number += 1 + try: + jline = json.loads(log_line) + pmaj_modsec_table.append(jline) + except Exception as e_logline: + print(f'Error {e_logline} found during reading file {audit_input_file} at line {line_number}') + return pmaj_modsec_table + except FileNotFoundError: + print(f'File "{audit_input_file}" not found') + return 'error' + except Exception as exception: + print(f'Error {exception} found during read file {audit_input_file} at line {line_number}') + return 'error' + + +if __name__ == "__main__": + if JSONAUDIT is True: + main_modsec_table = process_modsec_audit_json(input_filename) + else: + main_modsec_table = process_modsec_audit_std(input_filename) + if isinstance(main_modsec_table, str) and main_modsec_table in 'error': + print('No modsecurity audit log found') + elif isinstance(main_modsec_table, list) and len(main_modsec_table) == 0: + print('No modsecurity events found in the specified file') + else: + RECORDS_TOTAL = len(main_modsec_table) + modsec_entries = [] + for modsec_entry in main_modsec_table: + if JSONAUDIT is False: + json_modsec_entry = modsec_log_to_info(modsec_entry) + else: + json_modsec_entry = modsec_entry + if FILTER_INCLUDE: + if dict(json_modsec_entry)['transaction']['remote_address'] in filter_include_table: + modsec_entries.append(json_modsec_entry) + RECORDS_PROCESSED_CNT += 1 + else: + RECORDS_SKIPPED_CNT += 1 + elif FILTER_EXCLUDE: + if dict(json_modsec_entry)['transaction']['remote_address'] not in filter_exclude_table: + modsec_entries.append(json_modsec_entry) + RECORDS_PROCESSED_CNT += 1 + else: + RECORDS_SKIPPED_CNT += 1 + elif (isinstance(json_modsec_entry, str)) and ('ERROR' in json_modsec_entry): + RECORDS_SKIPPED_CNT += 1 + else: + modsec_entries.append(json_modsec_entry) + RECORDS_PROCESSED_CNT += 1 + if RECORDS_PROCESSED_CNT >= MAXEVENTS: + print(f'----- Limit {MAXEVENTS} of events has been reached. -----') + print(f'----- The rest of {str(RECORDS_TOTAL - MAXEVENTS - RECORDS_SKIPPED_CNT)}' + ' events will be skipped ... -----') + break + print(f'----- modsec_audit events processed: {RECORDS_PROCESSED_CNT} -----') + print(f'----- modsec_audit events skipped by INCLUDED/EXCLUDED options or INVALID: {RECORDS_SKIPPED_CNT} -----') + if len(modsec_entries) < 1: + print('ERROR : modsec_audit entries to analyze not found with used filters') + else: + modsec_save_json(modsec_entries, JSON_OUTPUT_FILENAME, JSON_ONE_PER_LINE) + output_with_graphs = modsec_view_graphs(modsec_entries) + modsec_save_xlsx(modsec_entries, XLSX_OUTPUT_FILENAME, output_with_graphs) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..58eae94 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +addopts = --color=yes --cov=modsecurity_parser --cov-report=xml --cov-report=term -ra +# addopts = --color=yes --cov-report=xml --cov-report=term -ra +filterwarnings = +log_cli = 1 +log_cli_level = INFO +log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) +log_cli_date_format = %Y-%m-%d %H-%M-%S diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..613a29c --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,12 @@ +# Linting/Tooling +pylint +flake8 +mypy +isort +black +pre-commit +autopep8 +bandit +pytest +pytest-cov +codecov diff --git a/requirements.txt b/requirements.txt index 41af759..70daf80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -numpy==1.19.2 +#numpy==1.19.2 +numpy==1.22.4 matplotlib==3.3.2 -pillow==7.2.0 +pillow==9.2.0 pandas==1.1.3 -openpyxl==2.4.0 +openpyxl==2.4.2 diff --git a/run.sh b/run.sh index 8c3b1d1..f5cd521 100644 --- a/run.sh +++ b/run.sh @@ -30,6 +30,5 @@ if [[ ! -z "$HELP" ]]; then echo " VERSION3=Yes - to parse Modsecurity3 audit logs" echo " JSONAUDIT=Yes - to parse JSON type of Modsecurity2/3 logs" else - /usr/bin/python3 /opt/mparser/modsecurity-parser.py $PARAMS + /usr/bin/python3 /opt/mparser/modsecurity_parser.py $PARAMS fi - diff --git a/sample_audit_log/modsec_audit_utc-4.log b/sample_audit_log/modsec_audit_utc-4.log deleted file mode 100644 index 5210731..0000000 --- a/sample_audit_log/modsec_audit_utc-4.log +++ /dev/null @@ -1,84 +0,0 @@ ---c82b023d-A-- -[10/Mar/2020:12:13:30 --0400] Xme8qvZyuuIZU0265B9DWwAAAAc 200.200.200.200 59134 200.200.200.200 80 ---c82b023d-B-- -GET /test HTTP/1.1 -User-Agent: client -Host: test -Accept: */* -Cookie: AccessCard=; UserCard= - ---c82b023d-F-- -HTTP/1.1 400 Bad Request -X-Content-Type-Options: nosniff -X-Frame-Options: SAMEORIGIN -X-XSS-Protection: 1; mode=block -Strict-Transport-Security: max-age=31536000; includeSubDomains -Content-Length: 86 -Connection: close -Content-Type: application/json - ---c82b023d-H-- -Stopwatch: 1583856810084172 20194 (- - -) -Stopwatch2: 1583856810084172 20194; combined=315, p1=3, p2=308, p3=0, p4=0, p5=3, sr=0, sw=1, l=0, gc=0 -Producer: ModSecurity for Apache/2.9.3 (http://www.modsecurity.org/). -Server: Apache -Engine-Mode: "DETECTION_ONLY" - ---c82b023d-Z-- - ---d394963e-A-- -[10/Mar/2020:22:13:30 --0400] Xme8qiff04bQ7c8r9KTz@wAAAAI 200.200.200.200 59140 200.200.200.200 80 ---d394963e-B-- -GET /test HTTP/1.1 -User-Agent: client -Host: test -Accept: */* -Cookie: AccessCard=; UserCard= - ---d394963e-F-- -HTTP/1.1 400 Bad Request -X-Content-Type-Options: nosniff -X-Frame-Options: SAMEORIGIN -X-XSS-Protection: 1; mode=block -Strict-Transport-Security: max-age=31536000; includeSubDomains -Content-Length: 86 -Connection: close -Content-Type: application/json - ---d394963e-H-- -Stopwatch: 1583856810234093 16256 (- - -) -Stopwatch2: 1583856810234093 16256; combined=368, p1=6, p2=359, p3=0, p4=0, p5=3, sr=0, sw=0, l=0, gc=0 -Producer: ModSecurity for Apache/2.9.3 (http://www.modsecurity.org/). -Server: Apache -Engine-Mode: "DETECTION_ONLY" - ---d394963e-Z-- - ---646ede1b-A-- -[11/Mar/2020:12:13:30 --0400] Xme8qqHFvi108A74u@QKRQAAAAY 200.200.200.200 59146 200.200.200.200 80 ---646ede1b-B-- -GET /test HTTP/1.1 -User-Agent: client -Host: test -Accept: */* -Cookie: AccessCard=; UserCard= - ---646ede1b-F-- -HTTP/1.1 400 Bad Request -X-Content-Type-Options: nosniff -X-Frame-Options: SAMEORIGIN -X-XSS-Protection: 1; mode=block -Strict-Transport-Security: max-age=31536000; includeSubDomains -Content-Length: 86 -Connection: close -Content-Type: application/json - ---646ede1b-H-- -Stopwatch: 1583856810380195 44102 (- - -) -Stopwatch2: 1583856810380195 44102; combined=369, p1=4, p2=361, p3=0, p4=0, p5=4, sr=0, sw=0, l=0, gc=0 -Producer: ModSecurity for Apache/2.9.3 (http://www.modsecurity.org/). -Server: Apache -Engine-Mode: "DETECTION_ONLY" - ---646ede1b-Z-- - diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..4ac05f6 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,120 @@ +[pydocstyle] +inherit = false +ignore = D213,D100,D104 +match = .*/py + + +[pycodestyle] +max-line-length = 120 + + +[isort] +sections = + FUTURE, + STDLIB, + THIRDPARTY, + FIRSTPARTY, + LOCALFOLDER +default_section = LOCALFOLDER +known_third_party = + numpy, + pandas, + keras, + tensorflow, + sklearn, + matplotlib, + scipy +multi_line_output = 3 +lines_after_imports = 2 +force_single_line = True +use_parentheses = True +ensure_newline_before_comments = True +line_length = 80 + + +[flake8] +exclude = + .git + __pycache__, + docs/source/conf.py, + old, + build, + dist + .venv +max-complexity = 30 +max-line-length = 120 +ignore=W504 +# ignore=W504,F401,E402,E266,E203,W503,C408,C416,B001 + + +[bandit] +# targets: ['*.py'] +# targets= .,modsecurity_parser.py +# targets: modsecurity_parser +targets = . +# exclude_dirs = ['tests','__pytest_cache__','.venv'] +# exclude_dirs = ["*/tests/*", "*/.venv/*"] +exclude = ./.venv,./.pytest_cache,./tests,./.mypy_cache,__pycache__ + + +[coverage:run] +branch = True +omit = + */__main__.py + */tests/* + */venv/* + +[coverage:report] +exclude_lines = + pragma: no cover + if __name__ == .__main__.: + +[coverage:html] +directory = reports + + +[pylint.config] +[MASTER] + +extension-pkg-whitelist= + numpy, + pandas, + keras, + tensorflow, + sklearn, + matplotlib, + scipy + +[MESSAGES CONTROL] + +#disable=R,C +#enable=E,W +jobs=1 +confidence=HIGH +# confidence= + +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + # USER DEFINED + broad-except + simplifiable-if-expression + too-many-statements + #no-else-break + #too-many-branches + +[FORMAT] + +max-line-length=120 +max-module-lines = 2000 + +# [tool:pytest] +# minversion = 6.0 +# addopts = -ra -q +# testpaths = +# tests diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_modsecurity_parser.py b/tests/test_modsecurity_parser.py new file mode 100644 index 0000000..fc0859e --- /dev/null +++ b/tests/test_modsecurity_parser.py @@ -0,0 +1,2 @@ +def test_always_pass(): + pass diff --git a/tests/test_todo_list.py b/tests/test_todo_list.py new file mode 100644 index 0000000..feec750 --- /dev/null +++ b/tests/test_todo_list.py @@ -0,0 +1,62 @@ +# flake8: noqa F401 +# from typing import Any +# from typing import SupportsFloat + +# import sys +# import os +# import path +# from modsecurity_parser import safedictkey # noqa: E402, F401 + +from modsecurity_parser import get_params, safedictkey + +import pytest + + +def test_safedictkey(): + dict = {'audit_data': {"server": "Nginx"}} + keyname = ['audit_data', 'server'] + assert safedictkey(dict, keyname, '-') == "Nginx" + + +def test_get_params(): + string_in = 'GET /verifylogin.do HTTP/1.1' + separator = ' ' + default_missing = '-' + params_to_get = 3 + output = ['GET', '/verifylogin.do', 'HTTP/1.1'] + # assert get_params(string_in, separator, default_missing, params_to_get) == set([output[1], output[0], output[2]]) + assert get_params(string_in, separator, default_missing, params_to_get)[0] == output[0] + assert get_params(string_in, separator, default_missing, params_to_get)[1] == output[1] + assert get_params(string_in, separator, default_missing, params_to_get)[2] == output[2] + + +def test_regular_expression_evaluate(): + pass + + +def test_modsec_save_json(): + pass + + +def test_modsec_save_xlsx(): + pass + + +def test_modsec_view_graphs(): + pass + + +def test_modsec_log_to_info(): + pass + + +def test_process_modsec_audit_std(): + pass + + +def test_process_modsec_audit_json(): + pass + + +def test_all_params(): + pass