#!/usr/bin/env bash
# =============================================================================
# onx-error-log-parse — Apache/PHP-FPM error log parser
#
# Input (stdin JSON):
#   { "username": "onx_xxxx", "domain": "example.com", "lines": 500 }
#
# Output:
#   {
#     "domain": "...", "log_file": "...", "lines_scanned": 500,
#     "by_severity": {"error": 12, "warn": 87, "notice": 145, "critical": 0},
#     "grouped": [
#       {"signature": "...", "severity": "...", "count": N,
#        "first_seen": "...", "last_seen": "...", "sample_ip": "..."}
#     ],
#     "recent": [{"timestamp":"...","level":"...","client":"...","message":"..."}, ...]
#   }
#
# Exit codes: 0=ok 1=invalid-input 2=preflight-fail
# Deployed to: /usr/local/onoxsoft/bin/onx-error-log-parse
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
source "${SCRIPT_DIR}/_lib/common.sh"

require_cmd jq
require_cmd awk

INPUT=$(cat)
onx_require_json "${INPUT}"

USERNAME=$(onx_json_get "${INPUT}" "username")
DOMAIN=$(onx_json_get   "${INPUT}" "domain")
LINES=$(onx_json_get    "${INPUT}" "lines" "500")

onx_validate_username "${USERNAME}"
onx_validate_domain   "${DOMAIN}"

# Sanity-cap lines to avoid OOM if the caller passes nonsense
[[ "${LINES}" =~ ^[0-9]+$ ]] || LINES=500
(( LINES > 50000 )) && LINES=50000

LOG_FILE=""
for candidate in \
    "/home/${USERNAME}/logs/${DOMAIN}-error.log" \
    "/var/log/httpd/${DOMAIN}-error.log" \
    "/var/log/nginx/${DOMAIN}-error.log" \
    "/home/${USERNAME}/logs/${DOMAIN}_error_log"; do
    if [[ -r "${candidate}" ]]; then
        LOG_FILE="${candidate}"
        break
    fi
done

if [[ -z "${LOG_FILE}" ]]; then
    printf '{"domain":"%s","log_file":null,"lines_scanned":0,"by_severity":{"critical":0,"error":0,"warn":0,"notice":0},"grouped":[],"recent":[],"note":"error log not found"}\n' "${DOMAIN}"
    exit 0
fi

TMPSCAN="$(mktemp -t onx-elog.XXXXXX)"
trap 'rm -f "${TMPSCAN}" 2>/dev/null || true' EXIT

tail -n "${LINES}" "${LOG_FILE}" > "${TMPSCAN}" 2>/dev/null || true
SCANNED=$(wc -l < "${TMPSCAN}" 2>/dev/null || echo 0)

# ── Severity counts ─────────────────────────────────────────────────────────
COUNT_CRIT=$(grep -ciE '\[(crit|alert|emerg|fatal)\]' "${TMPSCAN}" 2>/dev/null || echo 0)
COUNT_ERR=$(grep -ciE  '\[(error|err)\]'              "${TMPSCAN}" 2>/dev/null || echo 0)
COUNT_WARN=$(grep -ciE '\[(warn|warning)\]'           "${TMPSCAN}" 2>/dev/null || echo 0)
COUNT_NOT=$(grep -ciE  '\[(notice|info|debug)\]'      "${TMPSCAN}" 2>/dev/null || echo 0)

# ── Group identical messages (collapse IPs/timestamps) ──────────────────────
# Apache combined format: [Sun May 14 08:42:12.123456 2026] [core:error] [pid 1234] [client 1.2.3.4:5678] AH00128: ...
GROUPED_RAW=$(awk '
    {
        sev = "info"
        if (match($0, /\[(crit|alert|emerg|fatal)\]/, m))     sev = "critical"
        else if (match($0, /\[(error|err)\]/, m))              sev = "error"
        else if (match($0, /\[(warn|warning)\]/, m))           sev = "warn"
        else if (match($0, /\[(notice)\]/, m))                 sev = "notice"

        # Extract message body (after the last bracketed field)
        body = $0
        gsub(/^\[[^]]*\]\s*/, "", body)
        gsub(/\[[a-z_]+:[a-z]+\]/, "", body)
        gsub(/\[pid [0-9]+(:tid [0-9]+)?\]/, "", body)
        gsub(/\[client [^]]*\]/, "", body)
        gsub(/^[[:space:]]+/, "", body)

        # Signature: first 120 chars, with numbers replaced (so /var/log/foo-123.log -> /var/log/foo-N.log)
        sig = body
        gsub(/[0-9]+/, "N", sig)
        if (length(sig) > 120) sig = substr(sig, 1, 120)

        count[sig]++
        sev_of[sig] = sev
        last_of[sig] = $0
        if (!(sig in first_of)) first_of[sig] = $0
    }
    END {
        # Emit each group, tab-separated
        for (s in count) {
            printf "%d\t%s\t%s\t%s\t%s\n", count[s], sev_of[s], s, first_of[s], last_of[s]
        }
    }
' "${TMPSCAN}" 2>/dev/null | sort -rn | head -n 20 || true)

# Build grouped JSON
GROUPED_JSON='['
SEP=""
while IFS=$'\t' read -r cnt sev sig first last; do
    [[ -z "${cnt:-}" ]] && continue
    # Extract sample IP from first occurrence
    sample_ip=$(echo "${first}" | grep -oE 'client [0-9.]+' | head -1 | awk '{print $2}' || echo "")
    # Extract timestamps
    first_ts=$(echo "${first}" | grep -oE '^\[[^]]+\]' | head -1 | tr -d '[]' || echo "")
    last_ts=$(echo  "${last}"  | grep -oE '^\[[^]]+\]' | head -1 | tr -d '[]' || echo "")

    sig_json=$(printf '%s' "${sig}"      | jq -Rs '.')
    fts_json=$(printf '%s' "${first_ts}" | jq -Rs '.')
    lts_json=$(printf '%s' "${last_ts}"  | jq -Rs '.')
    ip_json=$(printf '%s' "${sample_ip}" | jq -Rs '.')

    GROUPED_JSON+="${SEP}{\"signature\":${sig_json},\"severity\":\"${sev}\",\"count\":${cnt},\"first_seen\":${fts_json},\"last_seen\":${lts_json},\"sample_ip\":${ip_json}}"
    SEP=","
done <<< "${GROUPED_RAW}"
GROUPED_JSON+="]"

# ── Recent N lines (last 50) ────────────────────────────────────────────────
RECENT_JSON='['
SEP=""
while IFS= read -r ln; do
    [[ -z "${ln}" ]] && continue
    sev="info"
    if [[ "${ln}" =~ \[(crit|alert|emerg|fatal)\] ]]; then sev="critical"
    elif [[ "${ln}" =~ \[(error|err)\] ]];           then sev="error"
    elif [[ "${ln}" =~ \[(warn|warning)\] ]];        then sev="warn"
    elif [[ "${ln}" =~ \[notice\] ]];                then sev="notice"
    fi
    ts=$(echo "${ln}" | grep -oE '^\[[^]]+\]' | head -1 | tr -d '[]' || echo "")
    client=$(echo "${ln}" | grep -oE 'client [0-9.]+' | awk '{print $2}' || echo "")
    msg="${ln}"

    ts_j=$(printf '%s' "${ts}"     | jq -Rs '.')
    cl_j=$(printf '%s' "${client}" | jq -Rs '.')
    msg_j=$(printf '%s' "${msg}"   | jq -Rs '.')

    RECENT_JSON+="${SEP}{\"timestamp\":${ts_j},\"level\":\"${sev}\",\"client\":${cl_j},\"message\":${msg_j}}"
    SEP=","
done < <(tail -n 50 "${TMPSCAN}" 2>/dev/null || true)
RECENT_JSON+="]"

onx_log "error-log-parse: ${DOMAIN} lines=${SCANNED} err=${COUNT_ERR} warn=${COUNT_WARN}"

cat <<EOF
{"domain":"${DOMAIN}","log_file":"${LOG_FILE}","lines_scanned":${SCANNED},"by_severity":{"critical":${COUNT_CRIT},"error":${COUNT_ERR},"warn":${COUNT_WARN},"notice":${COUNT_NOT}},"grouped":${GROUPED_JSON},"recent":${RECENT_JSON}}
EOF
