From bf09e9bc043e1048e57256e86b0af25a8f1d895e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lex=20Ruiz?= Date: Mon, 9 Oct 2023 12:48:08 +0200 Subject: [PATCH] Add ECS mappings generator (#36) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add ECS mappings generator, documentation and files for vulnerability detector * Add event generator script * Update template settings --------- Signed-off-by: Álex Ruiz --- ecs/.gitignore | 3 + ecs/README.md | 104 ++++++++ ecs/generate.sh | 83 +++++++ .../event-generator/event_generator.py | 235 ++++++++++++++++++ ecs/vulnerability-detector/fields/subset.yml | 19 ++ .../fields/template-settings-legacy.json | 19 ++ .../fields/template-settings.json | 21 ++ 7 files changed, 484 insertions(+) create mode 100644 ecs/.gitignore create mode 100644 ecs/README.md create mode 100755 ecs/generate.sh create mode 100755 ecs/vulnerability-detector/event-generator/event_generator.py create mode 100644 ecs/vulnerability-detector/fields/subset.yml create mode 100644 ecs/vulnerability-detector/fields/template-settings-legacy.json create mode 100644 ecs/vulnerability-detector/fields/template-settings.json diff --git a/ecs/.gitignore b/ecs/.gitignore new file mode 100644 index 0000000000000..a8047fcd2d67d --- /dev/null +++ b/ecs/.gitignore @@ -0,0 +1,3 @@ +**/mappings +*.log +generatedData.json \ No newline at end of file diff --git a/ecs/README.md b/ecs/README.md new file mode 100644 index 0000000000000..a2d353d245c81 --- /dev/null +++ b/ecs/README.md @@ -0,0 +1,104 @@ +## ECS mappings generator + +This script generates the ECS mappings for the Wazuh indices. + +### Requirements + +- ECS repository clone. The script is meant to be launched from the root level of that repository. +- Python 3.6 or higher +- jq + +### Folder structure + +There is a folder for each module. Inside each folder, there is a `fields` folder with the required +files to generate the mappings. These are the inputs for the ECS generator. + +### Usage + +**Copy the `generate.sh` script to the root level of the ECS repository.** + +Use the `generate.sh` script to generate the mappings for a module. The script takes 3 arguments, +plus 2 optional arguments to upload the mappings to the Wazuh indexer (using **composable** indexes). + +```plaintext +Usage: ./generate.sh [--upload ] + * ECS_VERSION: ECS version to generate mappings for + * INDEXER_SRC: Path to the wazuh-indexer repository + * MODULE: Module to generate mappings for + * --upload : Upload generated index template to the OpenSearch cluster. Defaults to https://localhost:9200 +Example: ./generate.sh v8.10.0 ~/wazuh-indexer vulnerability-detector --upload https://indexer:9200 +``` + +For example, to generate the mappings for the `vulnerability-detector` module using the +ECS version `v8.10.0` and the Wazuh indexer in path `~/wazuh/wazuh-indexer`: + +```bash +./generate.sh v8.10.0 ~/wazuh/wazuh-indexer vulnerability-detector +``` + +### Output + +A new `mappings` folder will be created inside the module folder, containing all the generated files. +The files are versioned using the ECS version, so different versions of the same module can be generated. +For our use case, the most important files are under `mappings//generated/elasticsearch/legacy/`: + +- `template.json`: Elasticsearch compatible index template for the module +- `opensearch-template.json`: OpenSearch compatible index template for the module + +The original output is `template.json`, which is not compatible with OpenSearch by default. In order +to make this template compatible with OpenSearch, the following changes are made: + +- the `order` property is renamed to `priority`. +- the `mappings` and `settings` properties are nested under the `template` property. + +The script takes care of these changes automatically, generating the `opensearch-template.json` file as a result. + +### Upload + +You can either upload the index template using cURL or the UI (dev tools). + +```bash +curl -u admin:admin -k -X PUT "https://indexer:9200/_index_template/wazuh-vulnerability-detector" -H "Content-Type: application/json" -d @opensearch-template.json +``` + +Notes: +- PUT and POST are interchangeable. +- The name of the index template does not matter. Any name can be used. +- Adjust credentials and URL accordingly. + +### Adding new mappings + +The easiest way to create mappings for a new module is to take a previous one as a base. +Copy a folder and rename it to the new module name. Then, edit the `fields` files to +match the new module fields. + +The name of the folder will be the name of the module to be passed to the script. All 3 files +are required. + +- `fields/subset.yml`: This file contains the subset of ECS fields to be used for the module. +- `fields/template-settings-legacy.json`: This file contains the legacy template settings for the module. +- `fields/template-settings.json`: This file contains the composable template settings for the module. + +### Event generator + +For testing purposes, the script `generate_events.py` can be used to generate events for a given module. +Currently, it is only able to generate events for the `vulnerability-detector` module. To support other +modules, please extend or refactor the script. + +The script prompts for the required parameters, so it can be launched without arguments: + +```bash +./event_generator.py +``` + +The script will generate a JSON file with the events, and will also ask whether to upload them to the +indexer. If the upload option is selected, the script will ask for the indexer URL and port, credentials, +and index name. + +The script uses a log file. Check it out for debugging or additional information. + +#### References + +- [ECS repository](https://github.com/elastic/ecs) +- [ECS usage](https://github.com/elastic/ecs/blob/main/USAGE.md) +- [ECS field reference](https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html) diff --git a/ecs/generate.sh b/ecs/generate.sh new file mode 100755 index 0000000000000..b618bb5e97fd6 --- /dev/null +++ b/ecs/generate.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Function to display usage information +show_usage() { + echo "Usage: $0 [--upload ]" + echo " * ECS_VERSION: ECS version to generate mappings for" + echo " * INDEXER_SRC: Path to the wazuh-indexer repository" + echo " * MODULE: Module to generate mappings for" + echo " * --upload : Upload generated index template to the OpenSearch cluster. Defaults to https://localhost:9200" + echo "Example: $0 v8.10.0 ~/wazuh-indexer vulnerability-detector --upload https://indexer:9200" +} + +# Function to generate mappings +generate_mappings() { + ECS_VERSION="$1" + INDEXER_SRC="$2" + MODULE="$3" + UPLOAD="$4" + URL="$5" + + IN_FILES_DIR="$INDEXER_SRC/ecs/$MODULE/fields" + OUT_DIR="$INDEXER_SRC/ecs/$MODULE/mappings/$ECS_VERSION" + + # Ensure the output directory exists + mkdir -p "$OUT_DIR" || exit 1 + + # Generate mappings + python scripts/generator.py --strict --ref "$ECS_VERSION" \ + --subset "$IN_FILES_DIR/subset.yml" \ + --template-settings "$IN_FILES_DIR/template-settings.json" \ + --template-settings-legacy "$IN_FILES_DIR/template-settings-legacy.json" \ + --out "$OUT_DIR" || exit 1 + + # Replace "match_only_text" type (not supported by OpenSearch) with "text" + echo "Replacing \"match_only_text\" type with \"text\"" + find "$OUT_DIR" -type f -exec sed -i 's/match_only_text/text/g' {} \; + + # Transform legacy index template for OpenSearch compatibility + cat "$OUT_DIR/generated/elasticsearch/legacy/template.json" | jq '{ + "index_patterns": .index_patterns, + "priority": .order, + "template": { + "settings": .settings, + "mappings": .mappings + } + }' >"$OUT_DIR/generated/elasticsearch/legacy/opensearch-template.json" + + # Check if the --upload flag has been provided + if [ "$UPLOAD" == "--upload" ]; then + upload_mappings "$OUT_DIR" "$URL" || exit 1 + fi + + echo "Mappings saved to $OUT_DIR" +} + +# Function to upload generated composable index template to the OpenSearch cluster +upload_mappings() { + OUT_DIR="$1" + URL="$2" + + echo "Uploading index template to the OpenSearch cluster" + for file in "$OUT_DIR/generated/elasticsearch/composable/component"/*.json; do + component_name=$(basename "$file" .json) + echo "Uploading $component_name" + curl -u admin:admin -X PUT "$URL/_component_template/$component_name?pretty" -H 'Content-Type: application/json' -d@"$file" || exit 1 + done +} + +# Check if the minimum required arguments have been provided +if [ $# -lt 3 ]; then + show_usage + exit 1 +fi + +# Parse command line arguments +ECS_VERSION="$1" +INDEXER_SRC="$2" +MODULE="$3" +UPLOAD="${4:-false}" +URL="${5:-https://localhost:9200}" + +# Generate mappings +generate_mappings "$ECS_VERSION" "$INDEXER_SRC" "$MODULE" "$UPLOAD" "$URL" diff --git a/ecs/vulnerability-detector/event-generator/event_generator.py b/ecs/vulnerability-detector/event-generator/event_generator.py new file mode 100755 index 0000000000000..9cbc0efc44f92 --- /dev/null +++ b/ecs/vulnerability-detector/event-generator/event_generator.py @@ -0,0 +1,235 @@ +#!/bin/python3 + +# This script generates sample events and injects them into the Wazuh Indexer. +# The events follow the Elastic Common Schema (ECS) format, and contains the following fields: +# - ecs +# - base +# - event +# - agent +# - package +# - host +# - vulnerability +# +# This is an ad-hoc script for the vulnearbility module. Extend to support other modules. + +import datetime +import random +import json +import requests +import warnings +import logging + +# Constants and Configuration +LOG_FILE = 'generate_data.log' +GENERATED_DATA_FILE = 'generatedData.json' +DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" + +# Configure logging +logging.basicConfig(filename=LOG_FILE, level=logging.INFO) + +# Suppress warnings +warnings.filterwarnings("ignore") + + +def generate_random_date(): + start_date = datetime.datetime.now() + end_date = start_date - datetime.timedelta(days=10) + random_date = start_date + (end_date - start_date) * random.random() + return random_date.strftime(DATE_FORMAT) + + +def generate_random_agent(): + agent = { + 'build': {'original': f'build{random.randint(0, 9999)}'}, + 'id': f'agent{random.randint(0, 99)}', + 'name': f'Agent{random.randint(0, 99)}', + 'version': f'v{random.randint(0, 9)}-stable', + 'ephemeral_id': f'{random.randint(0, 99999)}', + 'type': random.choice(['filebeat', 'windows', 'linux', 'macos']) + } + return agent + + +def generate_random_event(): + event = { + 'action': random.choice(['login', 'logout', 'create', 'delete', 'modify', 'read', 'write', 'upload', 'download', + 'copy', 'paste', 'cut', 'move', 'rename', 'open', 'close', 'execute', 'run', 'install', + 'uninstall', 'start', 'stop', 'kill', 'suspend', 'resume', 'sleep', 'wake', 'lock', + 'unlock', 'encrypt', 'decrypt', 'compress', 'decompress', 'archive', 'unarchive', + 'mount', 'unmount', 'eject', 'connect', 'disconnect', 'send', 'receive']), + 'agent_id_status': random.choice(['verified', 'mismatch', 'missing', 'auth_metadata_missing']), + 'category': random.choice(['authentication', 'authorization', 'configuration', 'communication', 'file', + 'network', 'process', 'registry', 'storage', 'system', 'web']), + 'code': f'{random.randint(0, 99999)}', + 'created': generate_random_date(), + 'dataset': random.choice(['process', 'file', 'registry', 'socket', 'dns', 'http', 'tls', 'alert', + 'authentication', 'authorization', 'configuration', 'communication', 'file', + 'network', 'process', 'registry', 'storage', 'system', 'web']), + 'duration': random.randint(0, 99999), + 'end': generate_random_date(), + 'hash': str(hash(f'hash{random.randint(0, 99999)}')), + 'id': f'{random.randint(0, 99999)}', + 'ingested': generate_random_date(), + 'kind': random.choice(['alert', 'asset', 'enrichment', 'event', 'metric', + 'state', 'pipeline_error', 'signal']), + 'module': random.choice(['process', 'file', 'registry', 'socket', 'dns', 'http', 'tls', 'alert', + 'authentication', 'authorization', 'configuration', 'communication', 'file', + 'network', 'process', 'registry', 'storage', 'system', 'web']), + 'original': f'original{random.randint(0, 99999)}', + 'outcome': random.choice(['success', 'failure', 'unknown']), + 'provider': random.choice(['process', 'file', 'registry', 'socket', 'dns', 'http', 'tls', 'alert', + 'authentication', 'authorization', 'configuration', 'communication', 'file', + 'network', 'process', 'registry', 'storage', 'system', 'web']), + 'reason': f'This event happened due to reason{random.randint(0, 99999)}', + 'reference': f'https://system.example.com/event/#{random.randint(0, 99999)}', + 'risk_score': round(random.uniform(0, 10), 1), + 'risk_score_norm': round(random.uniform(0, 10), 1), + 'sequence': random.randint(0, 10), + 'severity': random.randint(0, 10), + 'start': generate_random_date(), + 'timezone': random.choice(['UTC', 'GMT', 'PST', 'EST', 'CST', 'MST', 'PDT', 'EDT', 'CDT', 'MDT']), + 'type': random.choice(['access', 'admin', 'allowed', 'change', 'connection', 'creation', 'deletion', + 'denied', 'end', 'error', 'group', 'indicator', 'info', 'installation', 'protocol', + 'start', 'user']), + 'url': f'http://mysystem.example.com/alert/{random.randint(0, 99999)}' + } + return event + + +def generate_random_host(): + family = random.choice(['debian', 'ubuntu', 'macos', 'ios', 'android', 'RHEL']) + version = f'{random.randint(0, 99)}.{random.randint(0, 99)}' + host = { + 'os': { + 'family': family, + 'full': f'{family} {version}', + 'kernel': f'{version}kernel{random.randint(0, 99)}', + 'name': f'{family} {version}', + 'platform': family, + 'type': random.choice(['windows', 'linux', 'macos', 'ios', 'android', 'unix']), + 'version': version + } + } + return host + + +def generate_random_labels(): + labels = {'label1': f'label{random.randint(0, 99)}', 'label2': f'label{random.randint(0, 99)}'} + return labels + + +def generate_random_package(): + package = { + 'architecture': random.choice(['x86', 'x64', 'arm', 'arm64']), + 'build_version': f'build{random.randint(0, 9999)}', + 'checksum': f'checksum{random.randint(0, 9999)}', + 'description': f'description{random.randint(0, 9999)}', + 'install_scope': random.choice(['user', 'system']), + 'installed': generate_random_date(), + 'license': f'license{random.randint(0, 9)}', + 'name': f'name{random.randint(0, 99)}', + 'path': f'/path/to/package{random.randint(0, 99)}', + 'reference': f'package-reference-{random.randint(0, 99)}', + 'size': random.randint(0, 99999), + 'type': random.choice(['deb', 'rpm', 'msi', 'pkg', 'app', 'apk', 'exe', 'zip', 'tar', 'gz', '7z', + 'rar', 'cab', 'iso', 'dmg', 'tar.gz', 'tar.bz2', 'tar.xz', 'tar.Z', 'tar.lz4', + 'tar.sz', 'tar.zst']), + 'version': f'v{random.randint(0, 9)}-stable' + } + return package + + +def generate_random_tags(): + tags = [f'tag{random.randint(0, 99)}' for _ in range(random.randint(0, 9))] + return tags + + +def generate_random_vulnerability(): + id = random.randint(0, 9999) + vulnerability = { + 'category': random.choice(['security', 'config', 'os', 'package', 'custom']), + 'classification': [f'classification{random.randint(0, 9999)}'], + 'description': f'description{random.randint(0, 9999)}', + 'enumeration': 'CVE', + 'id': f'CVE-{id}', + 'reference': f'https://mycve.test.org/cgi-bin/cvename.cgi?name={id}', + 'report_id': f'report-{random.randint(0, 9999)}', + 'scanner': {'vendor': f'vendor-{random.randint(0, 9)}'}, + 'score': { + 'base': round(random.uniform(0, 10), 1), + 'environmental': round(random.uniform(0, 10), 1), + 'temporal': round(random.uniform(0, 10), 1), + 'version': round(random.uniform(0, 10), 1) + }, + 'severity': random.choice(['low', 'medium', 'high', 'critical']) + } + return vulnerability + + +def generate_random_data(number): + data = [] + for _ in range(number): + event_data = { + '@timestamp': generate_random_date(), + 'agent': generate_random_agent(), + 'ecs': {'version': '1.7.0'}, + 'event': generate_random_event(), + 'host': generate_random_host(), + 'labels': generate_random_labels(), + 'message': f'message{random.randint(0, 99999)}', + 'package': generate_random_package(), + 'tags': generate_random_tags(), + 'vulnerability': generate_random_vulnerability() + } + data.append(event_data) + return data + + +def inject_events(ip, port, index, username, password, data): + url = f'https://{ip}:{port}/{index}/_doc' + session = requests.Session() + session.auth = (username, password) + session.verify = False + headers = {'Content-Type': 'application/json'} + + try: + for event_data in data: + response = session.post(url, json=event_data, headers=headers) + if response.status_code != 201: + logging.error(f'Error: {response.status_code}') + logging.error(response.text) + break + logging.info('Data injection completed successfully.') + except Exception as e: + logging.error(f'Error: {str(e)}') + + +def main(): + try: + number = int(input("How many events do you want to generate? ")) + except ValueError: + logging.error("Invalid input. Please enter a valid number.") + return + + logging.info(f"Generating {number} events...") + data = generate_random_data(number) + + with open(GENERATED_DATA_FILE, 'a') as outfile: + for event_data in data: + json.dump(event_data, outfile) + outfile.write('\n') + + logging.info('Data generation completed.') + + inject = input("Do you want to inject the generated data into your indexer? (y/n) ").strip().lower() + if inject == 'y': + ip = input("Enter the IP of your Indexer: ") + port = input("Enter the port of your Indexer: ") + index = input("Enter the index name: ") + username = input("Username: ") + password = input("Password: ") + inject_events(ip, port, index, username, password, data) + + +if __name__ == "__main__": + main() diff --git a/ecs/vulnerability-detector/fields/subset.yml b/ecs/vulnerability-detector/fields/subset.yml new file mode 100644 index 0000000000000..2c8dc0ca3b30f --- /dev/null +++ b/ecs/vulnerability-detector/fields/subset.yml @@ -0,0 +1,19 @@ +--- +name: vulnerability_detector +fields: + base: + fields: "*" + agent: + fields: "*" + ecs: + fields: "*" + event: + fields: "*" + package: + fields: "*" + host: + fields: + os: + fields: "*" + vulnerability: + fields: "*" diff --git a/ecs/vulnerability-detector/fields/template-settings-legacy.json b/ecs/vulnerability-detector/fields/template-settings-legacy.json new file mode 100644 index 0000000000000..21ee9e7a850fc --- /dev/null +++ b/ecs/vulnerability-detector/fields/template-settings-legacy.json @@ -0,0 +1,19 @@ +{ + "index_patterns": [ + "wazuh-states-vulnerabilities" + ], + "order": 1, + "settings": { + "index": { + "codec": "best_compression", + "mapping": { + "total_fields": { + "limit": 1000 + } + }, + "number_of_shards": "1", + "number_of_replicas": "0", + "refresh_interval": "2s" + } + } +} \ No newline at end of file diff --git a/ecs/vulnerability-detector/fields/template-settings.json b/ecs/vulnerability-detector/fields/template-settings.json new file mode 100644 index 0000000000000..bf2dcb4216aff --- /dev/null +++ b/ecs/vulnerability-detector/fields/template-settings.json @@ -0,0 +1,21 @@ +{ + "index_patterns": [ + "wazuh-states-vulnerabilities" + ], + "priority": 1, + "template": { + "settings": { + "index": { + "codec": "best_compression", + "mapping": { + "total_fields": { + "limit": 2000 + } + }, + "number_of_shards": "1", + "number_of_replicas": "0", + "refresh_interval": "2s" + } + } + } +}