#!/usr/bin/env bash
# =============================================================================
# onx-log-tail — Whitelisted log file tail/grep/since reader.
#
# Input (stdin JSON):
#   {
#     "path":   "/var/log/httpd/error_log",
#     "lines":  200,        // 50..5000
#     "filter": "AH02032",  // optional grep pattern (case-insensitive)
#     "since":  "5m"        // optional: 5m / 1h / 1d / iso8601
#   }
#
# Output (stdout JSON):
#   {
#     "path":  "...",
#     "lines": [{"time":"YYYY-MM-DD HH:MM:SS","level":"error","message":"..."}],
#     "count": 42
#   }
#
# Exit codes:
#   0  — success
#   1  — invalid input (bad path / lines out of range)
#   2  — preflight fail (path not readable)
#   3  — execution fail
#
# Security:
#   • Path must match the ALLOWED_PATHS whitelist (glob patterns ok).
#   • No shell injection — we never pass user input to eval-ish constructs.
#   • Apache user runs as non-root; system logs may need ACL or sudoers.
#
# Sudoers:
#   apache ALL=(root) NOPASSWD: /usr/local/onoxsoft/bin/onx-log-tail
#
# Deployed to: /usr/local/onoxsoft/bin/onx-log-tail
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

onx_json_input

PATH_ARG=$(onx_json_field 'path' '')
LINES=$(onx_json_field 'lines' '200')
FILTER=$(onx_json_field 'filter' '')
SINCE=$(onx_json_field 'since' '')

[[ -n "$PATH_ARG" ]] || onx_die 1 "path is required"

# ── Path whitelist (glob match; bash extglob) ────────────────────────────────
ALLOWED_PATHS=(
    "/var/log/httpd/access_log"
    "/var/log/httpd/error_log"
    "/var/log/httpd/*-access_log"
    "/var/log/httpd/*-error_log"
    "/var/log/php-fpm/error.log"
    "/var/log/php-fpm/*.log"
    "/var/log/mysql/error.log"
    "/var/log/mariadb/mariadb.log"
    "/var/log/mariadb/slow-queries.log"
    "/var/log/maillog"
    "/var/log/secure"
    "/var/log/messages"
    "/var/log/fail2ban.log"
    "/var/log/pure-ftpd/pure-ftpd.log"
    "/var/log/onox/sysapi.log"
    "/var/log/onox/*.log"
    "/home/*/logs/*-access.log"
    "/home/*/logs/*-error.log"
    "/var/www/laravel/storage/logs/laravel.log"
    "/var/www/laravel/storage/logs/laravel-*.log"
)

allowed=false
for pattern in "${ALLOWED_PATHS[@]}"; do
    # shellcheck disable=SC2053
    if [[ "$PATH_ARG" == $pattern ]]; then
        allowed=true
        break
    fi
done

[[ "$allowed" == "true" ]] || onx_die 1 "path_not_allowed: $PATH_ARG"

# Validate lines (50..5000)
[[ "$LINES" =~ ^[0-9]+$ ]] || onx_die 1 "lines must be integer"
(( LINES >= 50 && LINES <= 5000 )) || onx_die 1 "lines out of range (50..5000)"

# Reject path-traversal sneakiness
[[ "$PATH_ARG" == *".."* ]] && onx_die 1 "path traversal not allowed"

# ── Reject filter regex chars that would break a literal grep -F ─────────────
# Force literal grep so users cannot run arbitrary regex (DoS / catastrophic
# backtracking). Filter is also length-capped.
if [[ -n "$FILTER" ]]; then
    if (( ${#FILTER} > 200 )); then
        onx_die 1 "filter too long"
    fi
fi

# Translate "since" → epoch seconds for awk timestamp filter
SINCE_EPOCH=0
if [[ -n "$SINCE" ]]; then
    case "$SINCE" in
        5m|5min)  SINCE_EPOCH=$(date -d '-5 minutes' +%s 2>/dev/null || echo 0) ;;
        15m)      SINCE_EPOCH=$(date -d '-15 minutes' +%s 2>/dev/null || echo 0) ;;
        1h)       SINCE_EPOCH=$(date -d '-1 hour' +%s 2>/dev/null || echo 0) ;;
        24h|1d)   SINCE_EPOCH=$(date -d '-1 day' +%s 2>/dev/null || echo 0) ;;
        *)        SINCE_EPOCH=$(date -d "$SINCE" +%s 2>/dev/null || echo 0) ;;
    esac
fi

# Existence + readability check (preflight; soft-fail if not yet rotated)
if [[ ! -r "$PATH_ARG" ]]; then
    if [[ -f "$PATH_ARG" ]]; then
        onx_die 2 "log file not readable (permissions)"
    fi
    # Empty result for missing files (rotated or never created yet)
    printf '{"path":%s,"lines":[],"count":0}\n' "$(printf '%s' "$PATH_ARG" | jq -Rs '.')"
    exit 0
fi

require_cmd jq
require_cmd tail

# ── Read tail with optional filter & timestamp gate ──────────────────────────
read_log() {
    if [[ -n "$FILTER" ]]; then
        tail -n "$LINES" "$PATH_ARG" 2>/dev/null | grep -F -i -- "$FILTER" || true
    else
        tail -n "$LINES" "$PATH_ARG" 2>/dev/null || true
    fi
}

# Level classifier — best-effort regex over each raw line. Buckets unknown to
# "info" so the UI always has something to colorize.
classify_level() {
    local line="$1"
    local lower
    lower="${line,,}"
    case "$lower" in
        *" emerg"*|*"emergency"*) printf 'crit' ;;
        *" alert"*) printf 'crit' ;;
        *" crit"*|*"critical"*) printf 'crit' ;;
        *" error"*|*" err "*|*"[error]"*|*"[err]"*|*"php fatal error"*|*"php parse error"*) printf 'error' ;;
        *" warn"*|*"warning"*|*"[warn]"*|*"php notice"*) printf 'warn' ;;
        *" notice"*|*"[notice]"*) printf 'notice' ;;
        *" debug"*|*"[debug]"*) printf 'debug' ;;
        *) printf 'info' ;;
    esac
}

# Extract iso/syslog timestamp (best-effort)
extract_ts() {
    local line="$1"
    # Apache combined: [Tue Sep 24 10:11:12.123456 2024]
    if [[ "$line" =~ \[([A-Z][a-z]{2}\ [A-Z][a-z]{2}\ +[0-9]{1,2}\ [0-9:]+(\.[0-9]+)?\ [0-9]{4})\] ]]; then
        printf '%s' "${BASH_REMATCH[1]}"
        return
    fi
    # syslog / maillog: Sep 24 10:11:12
    if [[ "$line" =~ ^([A-Z][a-z]{2}\ +[0-9]{1,2}\ [0-9:]{8}) ]]; then
        printf '%s' "${BASH_REMATCH[1]}"
        return
    fi
    # Laravel ISO: [2024-09-24 10:11:12]
    if [[ "$line" =~ \[([0-9]{4}-[0-9]{2}-[0-9]{2}\ [0-9:]+)\] ]]; then
        printf '%s' "${BASH_REMATCH[1]}"
        return
    fi
    printf ''
}

# Build JSON array of parsed entries
LINES_JSON='[]'
TMP_ENTRIES=$(mktemp)
trap 'rm -f "$TMP_ENTRIES"' EXIT

while IFS= read -r raw; do
    [[ -z "$raw" ]] && continue
    if (( SINCE_EPOCH > 0 )); then
        ts_str=$(extract_ts "$raw")
        if [[ -n "$ts_str" ]]; then
            line_epoch=$(date -d "$ts_str" +%s 2>/dev/null || echo 0)
            (( line_epoch > 0 && line_epoch < SINCE_EPOCH )) && continue
        fi
    fi
    lvl=$(classify_level "$raw")
    ts=$(extract_ts "$raw")
    [[ -z "$ts" ]] && ts=$(date +"%Y-%m-%d %H:%M:%S")
    jq -nc \
        --arg time "$ts" \
        --arg level "$lvl" \
        --arg message "$raw" \
        '{time: $time, level: $level, message: $message}' >> "$TMP_ENTRIES"
done < <(read_log)

if [[ -s "$TMP_ENTRIES" ]]; then
    LINES_JSON=$(jq -s '.' "$TMP_ENTRIES")
fi

jq -nc \
    --arg path "$PATH_ARG" \
    --argjson lines "$LINES_JSON" \
    '{path: $path, lines: $lines, count: ($lines | length)}'

exit 0
