#!/usr/bin/env bash
# =============================================================================
# onx-backup-prune — Garbage-collect old backup archives
#
# Purpose:
#   Applies a retention policy: keep the N most-recent backups per account AND
#   delete anything older than keep_days. Respects DRY_RUN=1 (no deletes, list
#   only). Removes the matching .sha256 sidecar when removing a .tar.gz.
#
# Input (stdin JSON):
#   {
#     "path":                   "/var/backups/onoxsoft",
#     "keep_days":              30,
#     "keep_count_per_account": 5
#   }
#
# Output (stdout JSON):
#   {
#     "path":         "/var/backups/onoxsoft",
#     "dry_run":      false,
#     "kept":         ["...", "..."],
#     "deleted":      ["...", "..."],
#     "freed_bytes":  N
#   }
#
# Exit codes: 0=ok 1=invalid 2=preflight 3=exec
#
# Deployed to: /usr/local/onoxsoft/bin/onx-backup-prune
# =============================================================================

set -euo pipefail

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
# shellcheck source=_lib/common.sh
source "${SCRIPT_DIR}/_lib/common.sh"

readonly BACKUP_ROOT_ALLOWED="/var/backups"
readonly DEFAULT_BACKUP_DIR="/var/backups/onoxsoft"
readonly DEFAULT_KEEP_DAYS=30
readonly DEFAULT_KEEP_COUNT=5

require_root

onx_json_input

BACKUP_DIR=$(onx_json_field "path" "${DEFAULT_BACKUP_DIR}")
KEEP_DAYS=$(onx_json_field  "keep_days" "${DEFAULT_KEEP_DAYS}")
KEEP_COUNT=$(onx_json_field "keep_count_per_account" "${DEFAULT_KEEP_COUNT}")

[[ -z "${BACKUP_DIR}" ]] && BACKUP_DIR="${DEFAULT_BACKUP_DIR}"

# ── Validation ───────────────────────────────────────────────────────────────
[[ "${KEEP_DAYS}"  =~ ^[0-9]+$ ]] || onx_die 1 "keep_days must be non-negative integer"
[[ "${KEEP_COUNT}" =~ ^[0-9]+$ ]] || onx_die 1 "keep_count_per_account must be non-negative integer"

BACKUP_DIR_REAL="$(realpath -m "${BACKUP_DIR}" 2>/dev/null || printf '%s' "${BACKUP_DIR}")"
case "${BACKUP_DIR_REAL}" in
    ${BACKUP_ROOT_ALLOWED}|${BACKUP_ROOT_ALLOWED}/*) : ;;
    *) onx_die 1 "path '${BACKUP_DIR}' must be under ${BACKUP_ROOT_ALLOWED}/" ;;
esac

DRY_RUN_FLAG="${DRY_RUN:-0}"
[[ "${DRY_RUN_FLAG}" == "1" ]] && DRY_RUN="true" || DRY_RUN="false"

if [[ ! -d "${BACKUP_DIR_REAL}" ]]; then
    jq -nc --arg p "${BACKUP_DIR_REAL}" --argjson d "${DRY_RUN}" \
        '{path:$p,dry_run:$d,kept:[],deleted:[],freed_bytes:0}'
    exit 0
fi

# ── Enumerate + group by username ────────────────────────────────────────────
NOW_SEC=$(date +%s)
CUTOFF=$(( NOW_SEC - KEEP_DAYS * 86400 ))

declare -A FILES_BY_USER=()
declare -A MTIMES=()
declare -A SIZES=()

if [[ "${MOCK_MODE}" == "1" ]]; then
    # Synthesise 5 backups across 2 accounts to exercise both policies
    for i in 1 2 3 4 5; do
        USR="onx_demo01"
        [[ $i -gt 3 ]] && USR="onx_demo02"
        DAYS_AGO=$(( i * 10 ))
        FAKE="${BACKUP_DIR_REAL}/${USR}-2026050$(printf '%01d' "${i}").tar.gz"
        FILES_BY_USER[${USR}]+="${FAKE}|"
        MTIMES[${FAKE}]=$(( NOW_SEC - DAYS_AGO * 86400 ))
        SIZES[${FAKE}]=$(( (RANDOM % 300 + 50) * 1024 * 1024 ))
    done
else
    shopt -s nullglob
    for f in "${BACKUP_DIR_REAL}"/*.tar.gz; do
        [[ -f "${f}" ]] || continue
        FN="$(basename "${f}")"
        USER_FROM_NAME="${FN%-*}"
        USER_FROM_NAME="${USER_FROM_NAME%.tar.gz}"
        # Only group filenames that match our username convention
        [[ "${USER_FROM_NAME}" =~ ^onx_[a-z0-9]{4,12}$ ]] || continue

        FILES_BY_USER[${USER_FROM_NAME}]+="${f}|"
        MTIMES[${f}]=$(stat -c '%Y' "${f}" 2>/dev/null || echo 0)
        SIZES[${f}]=$(stat -c '%s' "${f}" 2>/dev/null || echo 0)
    done
    shopt -u nullglob
fi

# ── Apply policy ─────────────────────────────────────────────────────────────
KEPT=()
DELETED=()
FREED=0

for USR in "${!FILES_BY_USER[@]}"; do
    # Split user's pipe-separated list
    IFS='|' read -ra USER_FILES <<< "${FILES_BY_USER[$USR]}"

    # Build "<mtime> <path>" lines then sort desc by mtime so newest is first
    declare -a TUPLES=()
    for f in "${USER_FILES[@]}"; do
        [[ -z "${f}" ]] && continue
        TUPLES+=("${MTIMES[$f]:-0} ${f}")
    done

    # Sort numerically desc by first column (mtime)
    SORTED=$(printf '%s\n' "${TUPLES[@]}" | sort -k1,1 -nr)

    IDX=0
    while IFS= read -r LINE; do
        [[ -z "${LINE}" ]] && continue
        FPATH="${LINE#* }"
        FMTIME="${LINE%% *}"

        DECISION="keep"
        # Rule 1: keep the most-recent KEEP_COUNT entries
        if [[ ${IDX} -lt ${KEEP_COUNT} ]]; then
            DECISION="keep"
        # Rule 2: anything older than cutoff goes
        elif [[ "${FMTIME}" -lt "${CUTOFF}" ]]; then
            DECISION="delete"
        else
            # Within retention window but beyond count → keep
            DECISION="keep"
        fi

        if [[ "${DECISION}" == "delete" ]]; then
            DELETED+=("${FPATH}")
            FREED=$(( FREED + ${SIZES[$FPATH]:-0} ))

            if [[ "${DRY_RUN}" != "true" && "${MOCK_MODE}" != "1" ]]; then
                rm -f "${FPATH}" "${FPATH}.sha256" 2>/dev/null \
                    || onx_log "WARNING: failed to delete ${FPATH}"
                onx_log "pruned: ${FPATH}"
            fi
        else
            KEPT+=("${FPATH}")
        fi

        IDX=$(( IDX + 1 ))
    done <<< "${SORTED}"

    unset TUPLES
done

# ── Build JSON output ────────────────────────────────────────────────────────
KEPT_JSON="[]"
DELETED_JSON="[]"

if [[ ${#KEPT[@]} -gt 0 ]]; then
    KEPT_JSON=$(printf '%s\n' "${KEPT[@]}" | jq -R . | jq -sc .)
fi
if [[ ${#DELETED[@]} -gt 0 ]]; then
    DELETED_JSON=$(printf '%s\n' "${DELETED[@]}" | jq -R . | jq -sc .)
fi

onx_audit "onx-backup" "prune path=${BACKUP_DIR_REAL} dry_run=${DRY_RUN} kept=${#KEPT[@]} deleted=${#DELETED[@]} freed=${FREED}"

jq -nc \
    --arg path "${BACKUP_DIR_REAL}" \
    --argjson dry "${DRY_RUN}" \
    --argjson kept "${KEPT_JSON}" \
    --argjson deleted "${DELETED_JSON}" \
    --argjson freed "${FREED}" \
    '{path:$path,dry_run:$dry,kept:$kept,deleted:$deleted,freed_bytes:$freed}'
