From 8663cd7469d969aef253beefa37d1daef520dd7b Mon Sep 17 00:00:00 2001 From: TrishaG189 Date: Mon, 30 Mar 2026 12:09:19 +0530 Subject: [PATCH] feat(telemetry): add centralized threat intelligence data pipeline with S3 sink, Lambda enrichment, Athena, and Fluent Bit log forwarder --- ansible/playbooks/install_log_forwarder.yml | 73 ++++ ansible/templates/fluent-bit.conf.j2 | 37 ++ lambda/enrichment/handler.py | 215 ++++++++++++ terraform/modules/telemetry/README.md | 79 +++++ terraform/modules/telemetry/main.tf | 365 ++++++++++++++++++++ terraform/modules/telemetry/outputs.tf | 39 +++ terraform/modules/telemetry/variables.tf | 28 ++ 7 files changed, 836 insertions(+) create mode 100644 ansible/playbooks/install_log_forwarder.yml create mode 100644 ansible/templates/fluent-bit.conf.j2 create mode 100644 lambda/enrichment/handler.py create mode 100644 terraform/modules/telemetry/README.md create mode 100644 terraform/modules/telemetry/main.tf create mode 100644 terraform/modules/telemetry/outputs.tf create mode 100644 terraform/modules/telemetry/variables.tf diff --git a/ansible/playbooks/install_log_forwarder.yml b/ansible/playbooks/install_log_forwarder.yml new file mode 100644 index 0000000..38ee053 --- /dev/null +++ b/ansible/playbooks/install_log_forwarder.yml @@ -0,0 +1,73 @@ +--- +# ============================================================================= +# Honeynet — Fluent Bit Log Forwarder +# +# Installs and configures Fluent Bit on honeypot nodes to ship Cowrie JSON +# logs directly to the centralized S3 log sink. +# +# Fluent Bit has a native S3 output plugin — lightweight, cloud-native, +# industry standard for exactly this use case. +# +# Usage: +# ansible-playbook -i inventory.ini ansible/playbooks/install_log_forwarder.yml \ +# -e "s3_bucket=honeynet-threat-log-sink aws_region=us-east-1 sensor_id=aws-us-east-1" +# ============================================================================= + +- name: Install and configure Fluent Bit log forwarder + hosts: honeypots + become: true + + vars: + cowrie_log_path: "/home/cowrie/cowrie/var/log/cowrie/cowrie.json" + sensor_id: "{{ sensor_id | default('unknown-sensor') }}" + aws_region: "{{ aws_region | default('us-east-1') }}" + s3_bucket: "{{ s3_bucket }}" + fluent_bit_version: "3.0" + + tasks: + - name: Install prerequisites + apt: + name: + - curl + - gnupg + - apt-transport-https + state: present + update_cache: true + + - name: Add Fluent Bit GPG key + apt_key: + url: https://packages.fluentbit.io/fluentbit.key + state: present + + - name: Add Fluent Bit apt repository + apt_repository: + repo: "deb https://packages.fluentbit.io/ubuntu/focal focal main" + state: present + filename: fluent-bit + + - name: Install Fluent Bit + apt: + name: fluent-bit + state: present + update_cache: true + + - name: Deploy Fluent Bit configuration + template: + src: ../templates/fluent-bit.conf.j2 + dest: /etc/fluent-bit/fluent-bit.conf + owner: root + group: root + mode: "0644" + notify: Restart Fluent Bit + + - name: Enable and start Fluent Bit + systemd: + name: fluent-bit + enabled: true + state: started + + handlers: + - name: Restart Fluent Bit + systemd: + name: fluent-bit + state: restarted diff --git a/ansible/templates/fluent-bit.conf.j2 b/ansible/templates/fluent-bit.conf.j2 new file mode 100644 index 0000000..248fcb4 --- /dev/null +++ b/ansible/templates/fluent-bit.conf.j2 @@ -0,0 +1,37 @@ +# Fluent Bit configuration — ships Cowrie JSON logs to S3 +# Generated by Ansible for sensor: {{ sensor_id }} + +[SERVICE] + Flush 5 + Daemon Off + Log_Level info + Parsers_File parsers.conf + +[INPUT] + Name tail + Path {{ cowrie_log_path }} + Tag cowrie.attacks + Parser json + DB /var/log/flb_cowrie.db + Mem_Buf_Limit 5MB + Skip_Long_Lines On + Refresh_Interval 10 + +[FILTER] + Name record_modifier + Match cowrie.* + Record sensor_id {{ sensor_id }} + Record cloud_provider {{ cloud_provider | default('unknown') }} + +[OUTPUT] + Name s3 + Match cowrie.* + bucket {{ s3_bucket }} + region {{ aws_region }} + store_dir /tmp/fluent-bit-s3 + s3_key_format /raw/{{ sensor_id }}/%Y/%m/%d/%H-%M-%S.json + total_file_size 10M + upload_timeout 60s + use_put_object On + compression gzip + content_type application/x-ndjson \ No newline at end of file diff --git a/lambda/enrichment/handler.py b/lambda/enrichment/handler.py new file mode 100644 index 0000000..7a88ecb --- /dev/null +++ b/lambda/enrichment/handler.py @@ -0,0 +1,215 @@ +""" +Honeynet IP Enrichment Lambda +============================== +Triggered by S3 PutObject on the raw log sink bucket. +For each Cowrie JSON log event: + 1. Parses the attacker's source IP + 2. Queries AbuseIPDB for threat score, country, ISP + — with in-memory caching to avoid rate limit exhaustion + — with explicit HTTP 429 handling for graceful degradation + 3. Writes enriched record to the enriched-logs S3 bucket + +AbuseIPDB free tier: 1,000 req/day +A busy honeypot gets 5,000+ attempts/day — caching is non-negotiable. +""" + +import json +import os +import urllib.request +import urllib.error +import boto3 +import logging +from datetime import datetime, timezone + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +s3 = boto3.client("s3") +secrets = boto3.client("secretsmanager") + +ENRICHED_BUCKET = os.environ["ENRICHED_BUCKET"] +SECRET_NAME = os.environ["SECRET_NAME"] + +# ------------------------------------------------------------------ +# Module-level caches — persist across warm Lambda invocations +# This is the critical fix for AbuseIPDB rate limit exhaustion. +# A busy honeypot can see the same IP thousands of times per day. +# ------------------------------------------------------------------ +_api_key_cache = None +_ip_cache = {} # { ip: enrichment_dict } +_rate_limited = False # If we hit 429, stop calling API for this container lifetime + + +def get_api_key() -> str: + """Fetch AbuseIPDB key from Secrets Manager (cached per Lambda container).""" + global _api_key_cache + if _api_key_cache: + return _api_key_cache + response = secrets.get_secret_value(SecretId=SECRET_NAME) + _api_key_cache = response["SecretString"].strip() + return _api_key_cache + + +def query_abuseipdb(ip: str, api_key: str) -> dict: + """ + Query AbuseIPDB v2 API for IP reputation data. + + Handles: + - HTTP 429: sets global rate_limited flag, stops further calls this session + - Any other failure: returns safe defaults, log still stored unenriched + - Results cached in _ip_cache to avoid duplicate API calls + """ + global _rate_limited + + # Check cache first — same IP seen multiple times costs only 1 API call + if ip in _ip_cache: + logger.info(f"Cache hit for {ip}") + return _ip_cache[ip] + + # If we already hit rate limit this container session, skip API call + if _rate_limited: + logger.warning(f"Rate limited — skipping AbuseIPDB lookup for {ip}") + return _default_enrichment(rate_limited=True) + + url = f"https://api.abuseipdb.com/api/v2/check?ipAddress={ip}&maxAgeInDays=90" + req = urllib.request.Request( + url, + headers={ + "Key": api_key, + "Accept": "application/json", + }, + ) + + try: + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read().decode())["data"] + result = { + "abuse_score": data.get("abuseConfidenceScore", 0), + "country_code": data.get("countryCode", "UNKNOWN"), + "isp": data.get("isp", "UNKNOWN"), + "domain": data.get("domain", "UNKNOWN"), + "is_tor": data.get("isTor", False), + "total_reports": data.get("totalReports", 0), + "last_reported": data.get("lastReportedAt", None), + "is_whitelisted": data.get("isWhitelisted", False), + "enrichment_source": "abuseipdb", + } + # Cache the result + _ip_cache[ip] = result + return result + + except urllib.error.HTTPError as e: + if e.code == 429: + # Rate limit hit — stop all API calls for this Lambda container lifetime + _rate_limited = True + logger.warning(f"AbuseIPDB rate limit (429) hit on {ip}. " + f"Disabling enrichment for remaining events this invocation.") + else: + logger.warning(f"AbuseIPDB HTTP {e.code} for {ip}") + except urllib.error.URLError as e: + logger.warning(f"AbuseIPDB network error for {ip}: {e.reason}") + except Exception as e: + logger.warning(f"AbuseIPDB unexpected error for {ip}: {e}") + + return _default_enrichment() + + +def _default_enrichment(rate_limited: bool = False) -> dict: + """Safe defaults when enrichment fails — log is still stored.""" + return { + "abuse_score": -1, + "country_code": "UNKNOWN", + "isp": "UNKNOWN", + "domain": "UNKNOWN", + "is_tor": False, + "total_reports": 0, + "last_reported": None, + "is_whitelisted": False, + "enrichment_source": "rate_limited" if rate_limited else "failed", + } + + +def _is_private_ip(ip: str) -> bool: + """Skip enrichment for private/loopback IPs — they're not attackers.""" + private_prefixes = ("10.", "172.16.", "172.17.", "172.18.", "172.19.", + "172.20.", "172.21.", "172.22.", "172.23.", "172.24.", + "172.25.", "172.26.", "172.27.", "172.28.", "172.29.", + "172.30.", "172.31.", "192.168.", "127.", "::1", "fc", "fd") + return ip.startswith(private_prefixes) + + +def enrich_event(event: dict, api_key: str) -> dict: + """Merge AbuseIPDB enrichment fields into a Cowrie log event.""" + src_ip = event.get("src_ip", "") + enrichment = {} + + if src_ip and not _is_private_ip(src_ip): + enrichment = query_abuseipdb(src_ip, api_key) + else: + logger.info(f"Skipping private/missing IP: {src_ip!r}") + + return { + **event, + **enrichment, + "enriched_at": datetime.now(timezone.utc).isoformat(), + "pipeline": "honeynet-lambda-v1", + } + + +def lambda_handler(event, context): + """ + Main Lambda entry point. + Processes each S3 PutObject event, enriches Cowrie log lines, + and writes results to the enriched bucket. + """ + api_key = get_api_key() + processed = 0 + errors = 0 + + for record in event.get("Records", []): + source_bucket = record["s3"]["bucket"]["name"] + source_key = record["s3"]["object"]["key"] + + logger.info(f"Processing: s3://{source_bucket}/{source_key}") + + try: + obj = s3.get_object(Bucket=source_bucket, Key=source_key) + raw_content = obj["Body"].read().decode("utf-8") + except Exception as e: + logger.error(f"Failed to read s3://{source_bucket}/{source_key}: {e}") + errors += 1 + continue + + enriched_lines = [] + for line in raw_content.strip().splitlines(): + line = line.strip() + if not line: + continue + try: + log_event = json.loads(line) + enriched = enrich_event(log_event, api_key) + enriched_lines.append(json.dumps(enriched)) + processed += 1 + except json.JSONDecodeError: + logger.warning(f"Skipping non-JSON line: {line[:80]}") + except Exception as e: + logger.error(f"Failed to enrich event: {e}") + errors += 1 + + if enriched_lines: + output_key = f"enriched/{source_key}" + enriched_body = "\n".join(enriched_lines) + s3.put_object( + Bucket=ENRICHED_BUCKET, + Key=output_key, + Body=enriched_body.encode("utf-8"), + ContentType="application/x-ndjson", + ServerSideEncryption="AES256", + ) + logger.info( + f"Written {len(enriched_lines)} records to " + f"s3://{ENRICHED_BUCKET}/{output_key}" + ) + + logger.info(f"Done. Processed: {processed}, Errors: {errors}") + return {"processed": processed, "errors": errors} \ No newline at end of file diff --git a/terraform/modules/telemetry/README.md b/terraform/modules/telemetry/README.md new file mode 100644 index 0000000..7de76d6 --- /dev/null +++ b/terraform/modules/telemetry/README.md @@ -0,0 +1,79 @@ +# Telemetry Module — Threat Intelligence Data Pipeline + +This module provisions a fully automated, serverless threat intelligence pipeline +that transforms raw honeypot logs into enriched, queryable attack data. + +## Architecture + +``` +Honeypot Node (any cloud) + ↓ Filebeat over TLS +S3 Log Sink (encrypted, append-only) + ↓ S3 event trigger +Lambda (Python) — parses Cowrie JSON, enriches IP via AbuseIPDB + ↓ writes enriched JSON +S3 Enriched Logs + ↓ hourly crawl +Glue Catalog (auto schema discovery) + ↓ SQL +Athena Workgroup — query attack data directly +``` + +## Resources Provisioned + +| Resource | Purpose | +| -------------------------------- | ------------------------------------------------------ | +| `aws_s3_bucket.log_sink` | Receives raw Cowrie/Dionaea logs | +| `aws_s3_bucket.enriched_logs` | Stores Lambda-enriched attack records | +| `aws_lambda_function.enrichment` | Python function: parses IP → AbuseIPDB → enriched JSON | +| `aws_secretsmanager_secret` | Stores AbuseIPDB API key securely | +| `aws_glue_crawler` | Auto-discovers schema from enriched logs hourly | +| `aws_athena_workgroup` | SQL query interface for attack analysis | + +## Usage + +```hcl +module "telemetry" { + source = "./modules/telemetry" + name_prefix = "honeynet" + aws_region = "us-east-1" + abuseipdb_api_key = var.abuseipdb_api_key +} +``` + +## Example Athena Queries + +```sql +-- Top attacking IPs +SELECT src_ip, COUNT(*) as attempts +FROM honeynet_attacks.enriched +ORDER BY attempts DESC LIMIT 20; + +-- High-risk IPs (abuse score > 80) +SELECT src_ip, abuse_score, country_code, isp +FROM honeynet_attacks.enriched +WHERE abuse_score > 80 +ORDER BY abuse_score DESC; + +-- Attack volume by country +SELECT country_code, COUNT(*) as attacks +FROM honeynet_attacks.enriched +GROUP BY country_code +ORDER BY attacks DESC; + +-- Most common credential attempts +SELECT username, password, COUNT(*) as tries +FROM honeynet_attacks.enriched +WHERE eventid = 'cowrie.login.failed' +GROUP BY username, password +ORDER BY tries DESC LIMIT 20; +``` + +## Prerequisites + +- Free AbuseIPDB API key: https://www.abuseipdb.com/register +- Store as Terraform variable (never hardcode): + +```bash + export TF_VAR_abuseipdb_api_key="your_key_here" +``` diff --git a/terraform/modules/telemetry/main.tf b/terraform/modules/telemetry/main.tf new file mode 100644 index 0000000..8a49c14 --- /dev/null +++ b/terraform/modules/telemetry/main.tf @@ -0,0 +1,365 @@ +# ============================================================================= +# Honeynet — Centralized Threat Intelligence Data Pipeline +# +# This module provisions the complete telemetry stack: +# 1. S3 Log Sink — receives Cowrie/Dionaea logs from all regions +# 2. Lambda Enrichment — auto-enriches attacker IPs via AbuseIPDB +# 3. Glue Crawler — auto-discovers log schema +# 4. Athena Workgroup — enables SQL queries on raw attack data +# ============================================================================= + +terraform { + required_version = ">= 1.5.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +# ----------------------------------------------------------------------------- +# S3 Log Sink +# ----------------------------------------------------------------------------- + +resource "aws_s3_bucket" "log_sink" { + bucket = "${var.name_prefix}-threat-log-sink" + force_destroy = false + + tags = { + Project = "honeynet" + Purpose = "threat-log-sink" + ManagedBy = "terraform" + Environment = var.environment + } +} + +resource "aws_s3_bucket_versioning" "log_sink" { + bucket = aws_s3_bucket.log_sink.id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "log_sink" { + bucket = aws_s3_bucket.log_sink.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + bucket_key_enabled = true + } +} + +resource "aws_s3_bucket_public_access_block" "log_sink" { + bucket = aws_s3_bucket.log_sink.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# Lifecycle: move old logs to cheaper storage automatically +resource "aws_s3_bucket_lifecycle_configuration" "log_sink" { + bucket = aws_s3_bucket.log_sink.id + + rule { + id = "archive-raw-logs" + status = "Enabled" + + transition { + days = 30 + storage_class = "STANDARD_IA" + } + + transition { + days = 90 + storage_class = "GLACIER" + } + } +} + +# S3 bucket for enriched logs output +resource "aws_s3_bucket" "enriched_logs" { + bucket = "${var.name_prefix}-enriched-threat-logs" + force_destroy = false + + tags = { + Project = "honeynet" + Purpose = "enriched-logs" + ManagedBy = "terraform" + Environment = var.environment + } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "enriched_logs" { + bucket = aws_s3_bucket.enriched_logs.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +resource "aws_s3_bucket_public_access_block" "enriched_logs" { + bucket = aws_s3_bucket.enriched_logs.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# Athena query results bucket +resource "aws_s3_bucket" "athena_results" { + bucket = "${var.name_prefix}-athena-query-results" + force_destroy = true + + tags = { + Project = "honeynet" + Purpose = "athena-results" + ManagedBy = "terraform" + } +} + +resource "aws_s3_bucket_public_access_block" "athena_results" { + bucket = aws_s3_bucket.athena_results.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +# ----------------------------------------------------------------------------- +# Secrets Manager — AbuseIPDB API Key +# ----------------------------------------------------------------------------- + +resource "aws_secretsmanager_secret" "abuseipdb_key" { + name = "${var.name_prefix}/abuseipdb-api-key" + description = "AbuseIPDB API key for threat intelligence enrichment" + recovery_window_in_days = 7 + + tags = { + Project = "honeynet" + ManagedBy = "terraform" + } +} + +resource "aws_secretsmanager_secret_version" "abuseipdb_key" { + secret_id = aws_secretsmanager_secret.abuseipdb_key.id + secret_string = var.abuseipdb_api_key +} + +# ----------------------------------------------------------------------------- +# IAM Role for Lambda +# ----------------------------------------------------------------------------- + +data "aws_iam_policy_document" "lambda_assume_role" { + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "lambda_enrichment" { + name = "${var.name_prefix}-lambda-enrichment-role" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role.json + + tags = { + Project = "honeynet" + ManagedBy = "terraform" + } +} + +data "aws_iam_policy_document" "lambda_permissions" { + # Read raw logs from S3 sink + statement { + effect = "Allow" + actions = ["s3:GetObject"] + resources = ["${aws_s3_bucket.log_sink.arn}/*"] + } + + # Write enriched logs to output bucket + statement { + effect = "Allow" + actions = ["s3:PutObject"] + resources = ["${aws_s3_bucket.enriched_logs.arn}/*"] + } + + # Read AbuseIPDB key from Secrets Manager + statement { + effect = "Allow" + actions = ["secretsmanager:GetSecretValue"] + resources = [aws_secretsmanager_secret.abuseipdb_key.arn] + } + + # CloudWatch Logs for Lambda execution logs + statement { + effect = "Allow" + actions = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ] + resources = ["arn:aws:logs:*:*:*"] + } +} + +resource "aws_iam_role_policy" "lambda_enrichment" { + name = "${var.name_prefix}-lambda-enrichment-policy" + role = aws_iam_role.lambda_enrichment.id + policy = data.aws_iam_policy_document.lambda_permissions.json +} + +# ----------------------------------------------------------------------------- +# Lambda Function — IP Enrichment +# ----------------------------------------------------------------------------- + +data "archive_file" "lambda_package" { + type = "zip" + source_dir = "${path.module}/../../../lambda/enrichment" + output_path = "${path.module}/lambda_enrichment.zip" +} + +resource "aws_lambda_function" "enrichment" { + filename = data.archive_file.lambda_package.output_path + function_name = "${var.name_prefix}-ip-enrichment" + role = aws_iam_role.lambda_enrichment.arn + handler = "handler.lambda_handler" + runtime = "python3.12" + source_code_hash = data.archive_file.lambda_package.output_base64sha256 + timeout = 30 + memory_size = 256 + + environment { + variables = { + ENRICHED_BUCKET = aws_s3_bucket.enriched_logs.bucket + SECRET_NAME = aws_secretsmanager_secret.abuseipdb_key.name + AWS_REGION_NAME = var.aws_region + } + } + + tags = { + Project = "honeynet" + ManagedBy = "terraform" + } +} + +# Allow S3 to invoke Lambda +resource "aws_lambda_permission" "allow_s3" { + statement_id = "AllowS3Invoke" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.enrichment.function_name + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.log_sink.arn +} + +# S3 trigger — fires Lambda on every new log file +resource "aws_s3_bucket_notification" "log_trigger" { + bucket = aws_s3_bucket.log_sink.id + + lambda_function { + lambda_function_arn = aws_lambda_function.enrichment.arn + events = ["s3:ObjectCreated:*"] + filter_suffix = ".json" + } + + depends_on = [aws_lambda_permission.allow_s3] +} + +# ----------------------------------------------------------------------------- +# AWS Glue — Schema Discovery +# ----------------------------------------------------------------------------- + +resource "aws_glue_catalog_database" "honeynet" { + name = "${replace(var.name_prefix, "-", "_")}_attacks" + description = "Honeynet enriched attack log database for Athena queries" +} + +resource "aws_iam_role" "glue_crawler" { + name = "${var.name_prefix}-glue-crawler-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = "sts:AssumeRole" + Principal = { Service = "glue.amazonaws.com" } + }] + }) +} + +resource "aws_iam_role_policy_attachment" "glue_service" { + role = aws_iam_role.glue_crawler.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole" +} + +data "aws_iam_policy_document" "glue_s3_access" { + statement { + effect = "Allow" + actions = ["s3:GetObject", "s3:ListBucket"] + resources = [ + aws_s3_bucket.enriched_logs.arn, + "${aws_s3_bucket.enriched_logs.arn}/*" + ] + } +} + +resource "aws_iam_role_policy" "glue_s3" { + name = "${var.name_prefix}-glue-s3-policy" + role = aws_iam_role.glue_crawler.id + policy = data.aws_iam_policy_document.glue_s3_access.json +} + +resource "aws_glue_crawler" "enriched_logs" { + name = "${var.name_prefix}-attack-log-crawler" + role = aws_iam_role.glue_crawler.arn + database_name = aws_glue_catalog_database.honeynet.name + description = "Crawls enriched Cowrie attack logs and infers schema for Athena" + + s3_target { + path = "s3://${aws_s3_bucket.enriched_logs.bucket}/enriched/" + } + + schedule = "cron(0 * * * ? *)" # Run every hour + + schema_change_policy { + update_behavior = "UPDATE_IN_DATABASE" + delete_behavior = "LOG" + } + + tags = { + Project = "honeynet" + ManagedBy = "terraform" + } +} + +# ----------------------------------------------------------------------------- +# Athena Workgroup +# ----------------------------------------------------------------------------- + +resource "aws_athena_workgroup" "honeynet" { + name = "${var.name_prefix}-attack-analysis" + description = "Athena workgroup for querying honeynet attack data" + + configuration { + enforce_workgroup_configuration = true + publish_cloudwatch_metrics_enabled = true + + result_configuration { + output_location = "s3://${aws_s3_bucket.athena_results.bucket}/results/" + + encryption_configuration { + encryption_option = "SSE_S3" + } + } + } + + tags = { + Project = "honeynet" + ManagedBy = "terraform" + } +} \ No newline at end of file diff --git a/terraform/modules/telemetry/outputs.tf b/terraform/modules/telemetry/outputs.tf new file mode 100644 index 0000000..483a666 --- /dev/null +++ b/terraform/modules/telemetry/outputs.tf @@ -0,0 +1,39 @@ +output "log_sink_bucket" { + description = "Name of the S3 bucket receiving raw honeypot logs." + value = aws_s3_bucket.log_sink.bucket +} + +output "log_sink_arn" { + description = "ARN of the raw log sink S3 bucket." + value = aws_s3_bucket.log_sink.arn +} + +output "enriched_logs_bucket" { + description = "Name of the S3 bucket storing enriched attack logs." + value = aws_s3_bucket.enriched_logs.bucket +} + +output "lambda_function_name" { + description = "Name of the IP enrichment Lambda function." + value = aws_lambda_function.enrichment.function_name +} + +output "athena_workgroup" { + description = "Athena workgroup name for querying attack data." + value = aws_athena_workgroup.honeynet.name +} + +output "glue_database" { + description = "Glue catalog database name." + value = aws_glue_catalog_database.honeynet.name +} + +output "example_athena_queries" { + description = "Example SQL queries to run in Athena." + value = { + top_attacking_ips = "SELECT src_ip, COUNT(*) as attempts FROM ${aws_glue_catalog_database.honeynet.name}.enriched ORDER BY attempts DESC LIMIT 20;" + high_abuse_scores = "SELECT src_ip, abuse_score, country_code FROM ${aws_glue_catalog_database.honeynet.name}.enriched WHERE abuse_score > 80 ORDER BY abuse_score DESC;" + attacks_by_country = "SELECT country_code, COUNT(*) as attacks FROM ${aws_glue_catalog_database.honeynet.name}.enriched GROUP BY country_code ORDER BY attacks DESC;" + credential_attempts = "SELECT username, password, COUNT(*) as tries FROM ${aws_glue_catalog_database.honeynet.name}.enriched WHERE eventid='cowrie.login.failed' GROUP BY username, password ORDER BY tries DESC LIMIT 20;" + } +} \ No newline at end of file diff --git a/terraform/modules/telemetry/variables.tf b/terraform/modules/telemetry/variables.tf new file mode 100644 index 0000000..4d75660 --- /dev/null +++ b/terraform/modules/telemetry/variables.tf @@ -0,0 +1,28 @@ +variable "name_prefix" { + description = "Prefix applied to all resource names. Must be lowercase alphanumeric and hyphens." + type = string + default = "honeynet" + + validation { + condition = can(regex("^[a-z][a-z0-9-]{1,20}$", var.name_prefix)) + error_message = "name_prefix must be lowercase alphanumeric/hyphens, start with a letter, max 20 chars." + } +} + +variable "aws_region" { + description = "AWS region where the telemetry pipeline will be deployed." + type = string + default = "us-east-1" +} + +variable "environment" { + description = "Deployment environment label." + type = string + default = "production" +} + +variable "abuseipdb_api_key" { + description = "AbuseIPDB API key for threat intelligence enrichment. Get a free key at https://www.abuseipdb.com" + type = string + sensitive = true +} \ No newline at end of file