#!/usr/bin/env bash
# =============================================================================
# onx-file-search — Find files by name and/or content under user's home
#
# Purpose:
#   Recursive search for filenames (find -name) and optionally content (rg/grep).
#   Results capped at MAX_RESULTS to prevent DoS. Used by File Manager search
#   panel.
#
# Input (stdin JSON):
#   {
#     "username":   "onx_xxxx",
#     "path":       "public_html",    -- optional; default home root
#     "pattern":    "*.php",          -- optional; shell glob (find -iname)
#     "content":    "wp_options",     -- optional; substring to grep in files
#     "case_sensitive": false,        -- optional; default false
#     "max_results": 100,             -- optional; default 100, hard cap 500
#     "max_depth":   8                -- optional; default 8
#   }
#
# Output (stdout JSON):
#   {
#     "username":    "onx_xxxx",
#     "path":        "/home/onx_xxxx/public_html",
#     "pattern":     "*.php",
#     "content":     "wp_options",
#     "result_count": 27,
#     "truncated":    false,
#     "matches": [
#       { "path": "wp-config.php", "line": 42, "snippet": "...wp_options..." },
#       { "path": "wp-admin/options.php", "line": 0, "snippet": null },
#       ...
#     ]
#   }
#
# Exit codes: 0=ok 1=invalid-input 2=preflight-fail 3=execution-fail
# Deployed to: /usr/local/onoxsoft/bin/onx-file-search
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

require_cmd jq
require_cmd find

onx_json_input

USERNAME="$(onx_json_field username)"
REL_PATH="$(onx_json_field path "")"
PATTERN="$(onx_json_field pattern "")"
CONTENT="$(onx_json_field content "")"
CASE_SENSITIVE="$(onx_json_get_bool "$INPUT" case_sensitive false)"
MAX_RESULTS="$(onx_json_field max_results 100)"
MAX_DEPTH="$(onx_json_field max_depth 8)"

onx_validate_username "$USERNAME"

[[ "${MAX_RESULTS}" =~ ^[0-9]+$ ]] || MAX_RESULTS=100
(( MAX_RESULTS < 1   )) && MAX_RESULTS=100
(( MAX_RESULTS > 500 )) && MAX_RESULTS=500
[[ "${MAX_DEPTH}" =~ ^[0-9]+$ ]] || MAX_DEPTH=8
(( MAX_DEPTH < 1 ))  && MAX_DEPTH=1
(( MAX_DEPTH > 16 )) && MAX_DEPTH=16

[[ -z "${PATTERN}" && -z "${CONTENT}" ]] && \
    onx_die 1 "Either 'pattern' or 'content' must be provided"

# Strip shell-metacharacters from pattern to keep find safe — keep wildcards.
[[ "${PATTERN}" =~ [\;\|\&\$\`] ]] && onx_die 1 "pattern contains forbidden shell chars"

HOME_DIR="/home/${USERNAME}"
[[ -d "$HOME_DIR" ]] || onx_die 2 "home directory missing: ${HOME_DIR}"

# Resolve path, guard /home/<user> escape.
TARGET_INPUT="${HOME_DIR}/${REL_PATH#/}"
[[ -z "${REL_PATH}" ]] && TARGET_INPUT="${HOME_DIR}"
TARGET="$(realpath -m "${TARGET_INPUT}" 2>/dev/null || printf '%s' "${TARGET_INPUT}")"
case "${TARGET}" in
    "${HOME_DIR}"|"${HOME_DIR}"/*) ;;
    *) onx_die 1 "path escapes /home/${USERNAME}: ${REL_PATH}" ;;
esac
[[ -d "${TARGET}" ]] || onx_die 2 "search root not a directory: ${REL_PATH}"

# ── Phase 1: filename search ───────────────────────────────────────────────
NAME_LIST_TMP="$(mktemp -t onx-search-names.XXXXXX)"
trap '[[ -f "${NAME_LIST_TMP}" ]] && rm -f "${NAME_LIST_TMP}"' EXIT

NAME_FLAG=( -iname )
[[ "${CASE_SENSITIVE}" == "true" ]] && NAME_FLAG=( -name )

if [[ -n "${PATTERN}" ]]; then
    find -P "${TARGET}" -maxdepth "${MAX_DEPTH}" -type f "${NAME_FLAG[@]}" "${PATTERN}" \
        2>/dev/null \
        | head -n "${MAX_RESULTS}" > "${NAME_LIST_TMP}" || true
else
    # No pattern → still produce a candidate list capped at MAX_RESULTS for the
    # content sweep (otherwise grep -r could traverse the entire home).
    find -P "${TARGET}" -maxdepth "${MAX_DEPTH}" -type f \
        2>/dev/null \
        | head -n $(( MAX_RESULTS * 4 )) > "${NAME_LIST_TMP}" || true
fi

# ── Phase 2: content search (optional) ──────────────────────────────────────
MATCHES_JSON="[]"
RESULT_COUNT=0
TRUNCATED="false"

if [[ -n "${CONTENT}" && -s "${NAME_LIST_TMP}" ]]; then
    GREP_BIN="grep"
    GREP_ARGS=(-n -I)
    [[ "${CASE_SENSITIVE}" == "true" ]] || GREP_ARGS+=(-i)

    # Prefer ripgrep when present for speed + binary detection.
    if command -v rg >/dev/null 2>&1; then
        GREP_BIN="rg"
        GREP_ARGS=(--no-heading --line-number --max-count=3 --no-messages)
        [[ "${CASE_SENSITIVE}" == "true" ]] || GREP_ARGS=(-i "${GREP_ARGS[@]}")
    fi

    LINES=()
    while IFS= read -r FPATH; do
        [[ -z "${FPATH}" ]] && continue
        [[ -f "${FPATH}" ]] || continue
        # Skip files >5MB to keep scan fast.
        SIZE="$(stat -c '%s' "${FPATH}" 2>/dev/null || echo 0)"
        (( SIZE > 5242880 )) && continue

        if HITS="$("${GREP_BIN}" "${GREP_ARGS[@]}" -- "${CONTENT}" "${FPATH}" 2>/dev/null | head -n 3)"; then
            while IFS=: read -r LINE_NO SNIPPET; do
                [[ -z "${LINE_NO}" ]] && continue
                REL="${FPATH#${HOME_DIR}/}"
                # Trim snippet to 240 chars to keep payload bounded.
                SNIPPET="${SNIPPET:0:240}"
                LINES+=("$(jq -nc \
                    --arg path "${REL}" \
                    --argjson line "${LINE_NO:-0}" \
                    --arg snippet "${SNIPPET}" \
                    '{path:$path,line:$line,snippet:$snippet}')")
                RESULT_COUNT=$((RESULT_COUNT + 1))
                (( RESULT_COUNT >= MAX_RESULTS )) && { TRUNCATED="true"; break 2; }
            done <<< "${HITS}"
        fi
    done < "${NAME_LIST_TMP}"

    if [[ ${#LINES[@]} -gt 0 ]]; then
        MATCHES_JSON="$(printf '%s\n' "${LINES[@]}" | jq -s '.')"
    fi
else
    # Filename-only: emit matches with null snippet/line.
    if [[ -s "${NAME_LIST_TMP}" ]]; then
        LINES=()
        while IFS= read -r FPATH; do
            [[ -z "${FPATH}" ]] && continue
            REL="${FPATH#${HOME_DIR}/}"
            LINES+=("$(jq -nc --arg path "${REL}" '{path:$path,line:0,snippet:null}')")
            RESULT_COUNT=$((RESULT_COUNT + 1))
            (( RESULT_COUNT >= MAX_RESULTS )) && { TRUNCATED="true"; break; }
        done < "${NAME_LIST_TMP}"
        if [[ ${#LINES[@]} -gt 0 ]]; then
            MATCHES_JSON="$(printf '%s\n' "${LINES[@]}" | jq -s '.')"
        fi
    fi
fi

onx_log "file-search: user=${USERNAME} pattern='${PATTERN}' content='${CONTENT:+set}' count=${RESULT_COUNT} truncated=${TRUNCATED}"

jq -nc \
    --arg username "${USERNAME}" \
    --arg path "${TARGET}" \
    --arg pattern "${PATTERN}" \
    --arg content "${CONTENT}" \
    --argjson result_count "${RESULT_COUNT}" \
    --argjson matches "${MATCHES_JSON}" \
    --argjson truncated "$([[ "${TRUNCATED}" == "true" ]] && echo true || echo false)" \
    '{
        username: $username,
        path: $path,
        pattern: $pattern,
        content: $content,
        result_count: $result_count,
        truncated: $truncated,
        matches: $matches
     }'
