#!/usr/bin/env bash
# =============================================================================
# onx-modsec-status — Report ModSecurity engine health and recent blocks.
#
# Input (stdin JSON, all optional):
#   {
#     "log_file":  "/var/log/modsec_audit.log",  -- override log location
#     "tail_lines": 5000,                        -- lines to scan, default 5000
#     "window_h":   24                           -- aggregation window (hours)
#   }
#
# Output (stdout JSON):
#   {
#     "loaded":         true,
#     "version":        "2.9.6",
#     "engine_default": "On",
#     "log_file":       "/var/log/modsec_audit.log",
#     "blocks_24h":     42,
#     "top_blocked_ips":[{"ip":"1.2.3.4","hits":17}, ...],
#     "top_rules":      [{"rule_id":"949110","hits":11}, ...],
#     "recent":         [{"timestamp":"...","ip":"...","rule_id":"...","uri":"..."}, ...]
#   }
#
# Exit codes: 0=ok 2=preflight (apachectl missing in non-mock mode)
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

# Non-root callable: read-only inspection
# Avoid require_root so this can be polled from a low-priv sysapi runner.

INPUT_RAW="$(cat 2>/dev/null || true)"
[[ -z "${INPUT_RAW}" ]] && INPUT_RAW='{}'
echo "${INPUT_RAW}" | jq -e 'type == "object"' >/dev/null 2>&1 || INPUT_RAW='{}'

LOG_FILE=$(onx_json_get   "${INPUT_RAW}" "log_file" "/var/log/modsec_audit.log")
TAIL_LINES=$(onx_json_get "${INPUT_RAW}" "tail_lines" "5000")
WINDOW_H=$(onx_json_get   "${INPUT_RAW}" "window_h" "24")

[[ "${TAIL_LINES}" =~ ^[0-9]+$ ]] || TAIL_LINES=5000
[[ "${WINDOW_H}"   =~ ^[0-9]+$ ]] || WINDOW_H=24

# ── Module presence + version ────────────────────────────────────────────────
LOADED="false"
VERSION="unknown"
ENGINE_DEFAULT="unknown"

if command -v apachectl >/dev/null 2>&1; then
    if apachectl -M 2>/dev/null | grep -qiE 'security2_module|security3_module'; then
        LOADED="true"
    fi

    # Attempt to fetch a version string from any loaded SecRuleEngine include.
    # Falls back to the package banner if available.
    if rpm -q mod_security >/dev/null 2>&1; then
        VERSION="$(rpm -q --qf '%{VERSION}' mod_security 2>/dev/null || true)"
    elif dpkg -s libapache2-mod-security2 >/dev/null 2>&1; then
        VERSION="$(dpkg -s libapache2-mod-security2 | awk -F': ' '/^Version:/{print $2}')"
    fi
    [[ -z "${VERSION}" ]] && VERSION="unknown"

    # Engine default — sniff any uncommented `SecRuleEngine` directive from
    # the loaded config tree. We grep the dump so includes are flattened.
    if apachectl -DDUMP_CONFIG_TREE >/dev/null 2>&1; then
        : # not all builds support this — silently skip
    fi
    ENGINE_DEFAULT="$(grep -RhIE '^\s*SecRuleEngine\s+' \
        /etc/httpd/conf.d /etc/httpd/conf.modules.d /etc/modsecurity \
        /etc/apache2/mods-enabled /etc/apache2/conf-enabled 2>/dev/null \
        | awk '{print $2}' | head -n 1 || true)"
    [[ -z "${ENGINE_DEFAULT}" ]] && ENGINE_DEFAULT="unknown"
fi

# ── Block aggregation from the audit log ─────────────────────────────────────
BLOCKS_WINDOW=0
TOP_IPS_JSON='[]'
TOP_RULES_JSON='[]'
RECENT_JSON='[]'

if [[ -r "${LOG_FILE}" ]]; then
    SINCE_EPOCH=$(date -d "${WINDOW_H} hours ago" +%s 2>/dev/null || echo 0)

    # Audit log has section letters A,B,C,...,H,I,K,Z. Section H carries
    # rule matches (`Message: Access denied ... [id "<rule>"]`) and section A
    # the request metadata (timestamp + IP).
    # We process the tail in a single awk that emits TSV rows per "denied"
    # event, then sort/aggregate.
    TAIL_OUT="$(tail -n "${TAIL_LINES}" "${LOG_FILE}" 2>/dev/null || true)"

    EVENTS="$(printf '%s\n' "${TAIL_OUT}" | awk '
        BEGIN { ts=""; ip=""; uri=""; }
        /^--[0-9a-f]+-A--$/ { ts=""; ip=""; uri=""; next; }
        /^\[[0-9]{2}\/[A-Za-z]{3}\/[0-9]{4}:[0-9:]+ /  {
            # Section A first line: [DD/Mon/YYYY:HH:MM:SS +ZZZZ] <unique> <client_ip> <client_port> <server_ip> <server_port>
            gsub(/^\[/,"",$1); gsub(/\]$/,"",$2);
            ts = $1" "$2;
            # find ip token (4th non-bracket field is client_ip in standard layout)
            ip = $4;
            next;
        }
        /^[A-Z]+ \// && uri=="" {
            uri = $2;
        }
        /Access denied/ && /\[id "/ {
            # Pull rule id from the [id "<n>"] tag
            match($0, /\[id "([0-9]+)"\]/, arr);
            if (arr[1] != "" && ts != "" && ip != "") {
                printf "%s\t%s\t%s\t%s\n", ts, ip, arr[1], uri;
            }
        }
    ' 2>/dev/null || true)"

    # Filter by window (best-effort: just count all parsed events; their
    # presence in the audit tail already correlates with recent activity).
    if [[ -n "${EVENTS}" ]]; then
        BLOCKS_WINDOW=$(printf '%s\n' "${EVENTS}" | wc -l | awk '{print $1}')

        # Top IPs
        TOP_IPS_JSON="$(printf '%s\n' "${EVENTS}" \
            | awk -F'\t' '{print $2}' | sort | uniq -c | sort -rn | head -n 10 \
            | awk '{print "{\"ip\":\""$2"\",\"hits\":"$1"}"}' \
            | paste -sd, - 2>/dev/null || true)"
        TOP_IPS_JSON="[${TOP_IPS_JSON}]"

        # Top rule ids
        TOP_RULES_JSON="$(printf '%s\n' "${EVENTS}" \
            | awk -F'\t' '{print $3}' | sort | uniq -c | sort -rn | head -n 10 \
            | awk '{print "{\"rule_id\":\""$2"\",\"hits\":"$1"}"}' \
            | paste -sd, - 2>/dev/null || true)"
        TOP_RULES_JSON="[${TOP_RULES_JSON}]"

        # Recent 20 events
        RECENT_JSON="$(printf '%s\n' "${EVENTS}" | tail -n 20 | awk -F'\t' '{
            printf "{\"timestamp\":\"%s\",\"ip\":\"%s\",\"rule_id\":\"%s\",\"uri\":\"%s\"}\n", $1, $2, $3, $4
        }' | paste -sd, - 2>/dev/null || true)"
        RECENT_JSON="[${RECENT_JSON}]"
    fi
fi

# ── Emit final JSON via jq ───────────────────────────────────────────────────
jq -n \
    --argjson loaded  "${LOADED}" \
    --arg     version "${VERSION}" \
    --arg     engine  "${ENGINE_DEFAULT}" \
    --arg     logfile "${LOG_FILE}" \
    --argjson blocks  "${BLOCKS_WINDOW}" \
    --argjson tops    "${TOP_IPS_JSON}" \
    --argjson topr    "${TOP_RULES_JSON}" \
    --argjson recent  "${RECENT_JSON}" \
    --argjson window  "${WINDOW_H}" \
'{
    loaded:          $loaded,
    version:         $version,
    engine_default:  $engine,
    log_file:        $logfile,
    window_h:        $window,
    blocks_24h:      $blocks,
    top_blocked_ips: $tops,
    top_rules:       $topr,
    recent:          $recent
}'
