#!/bin/bash
set -euo pipefail

TOOLS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DEMO_ROOT="$(cd "${TOOLS_DIR}/.." && pwd)"

cleanup_slaves() {
  local pids
  pids="$(pgrep -f 'slavedaemon\.R|Rslaves\.sh' || true)"
  if [ -n "${pids}" ]; then
    printf '%s\n' "${pids}" | xargs kill >/dev/null 2>&1 || true
  fi
}

MONITOR_PID=""

cleanup_all() {
  if [ -n "${MONITOR_PID}" ]; then
    kill "${MONITOR_PID}" >/dev/null 2>&1 || true
  fi
  cleanup_slaves
}

trap cleanup_all EXIT INT TERM

if [ "${1:-}" = "--cleanup" ] || [ "${1:-}" = "cleanup" ]; then
  exec "${DEMO_ROOT}/cleanup"
fi

RESULTS_ROOT="${RESULTS_ROOT:-${DEMO_ROOT}/results}"
RUN_ID="${RUN_ID:-npRmpi_$(date +%Y%m%d_%H%M%S)}"
RUN_DIR="${RESULTS_ROOT}/${RUN_ID}"
SESSION_SLAVES="${SESSION_SLAVES:-1 2 3}"
MPI_RANKS="${MPI_RANKS:-2 3 4}"
DEMO_SET="${DEMO_SET:-all}"
NP_DEMO_TIER="${NP_DEMO_TIER:-smoke}"
TIMEOUT_SEC="${TIMEOUT_SEC:-600}"
CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR:-false}"
CPU_MONITOR="${CPU_MONITOR:-false}"
CPU_MONITOR_INTERVAL="${CPU_MONITOR_INTERVAL:-10}"
RUN_LOG="${RUN_LOG:-${RUN_DIR}/runall_progress.log}"

if [ -z "${DEMOS:-}" ]; then
  case "${DEMO_SET}" in
    all)
      ;;
    pilot)
      DEMOS="npcdens"
      ;;
    conditional-density)
      DEMOS="npcdens"
      ;;
    conditional-core)
      DEMOS="npcdens npcdist"
      ;;
    core-scaling)
      DEMOS="npreg npcdens npcdist npudens npudist"
      ;;
    nomad)
      DEMOS="npreg npcdens npcdist npindex npscoef npplreg"
      ;;
    semiparametric)
      DEMOS="npindex npscoef npplreg"
      ;;
    unconditional)
      DEMOS="npudens npudist"
      ;;
    tests)
      DEMOS="npdeneqtest npdeptest npsdeptest npsigtest npsymtest npunitest"
      ;;
    auxiliary)
      DEMOS="npcmstest npconmode npcopula npqreg npregiv"
      ;;
    *)
      echo "unknown DEMO_SET=${DEMO_SET}" >&2
      exit 2
      ;;
  esac
fi

write_metadata() {
  mkdir -p "${RUN_DIR}"
  {
    echo "run_id=${RUN_ID}"
    echo "start_time=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
    echo "host=$(hostname)"
    echo "demo_src=${DEMO_ROOT}"
    echo "tools_dir=${TOOLS_DIR}"
    echo "results_root=${RESULTS_ROOT}"
    echo "np_demo_n=${NP_DEMO_N:-}"
    echo "np_demo_n_frac=${NP_DEMO_N_FRAC:-}"
    echo "np_demo_tier=${NP_DEMO_TIER}"
    echo "np_demo_cases=${NP_DEMO_CASES:-}"
    echo "np_demo_matrix=${NP_DEMO_MATRIX:-}"
    echo "session_slaves=${SESSION_SLAVES}"
    echo "mpi_ranks=${MPI_RANKS}"
    echo "timeout_sec=${TIMEOUT_SEC}"
    echo "continue_on_error=${CONTINUE_ON_ERROR}"
    echo "cpu_monitor=${CPU_MONITOR}"
    echo "cpu_monitor_interval=${CPU_MONITOR_INTERVAL}"
    echo "demo_set=${DEMO_SET}"
    echo "demos=${DEMOS:-<makefile-default>}"
    echo "git_commit=$(git -C "${DEMO_ROOT}/.." rev-parse --short HEAD 2>/dev/null || true)"
    echo "git_status=$(git -C "${DEMO_ROOT}/.." status --short 2>/dev/null | tr '\n' ';' || true)"
    Rscript --vanilla -e 'cat("R.version.string=", R.version.string, "\n", sep=""); blas <- sessionInfo()$BLAS; if (is.null(blas) || length(blas) == 0L || is.na(blas[1L]) || !nzchar(blas[1L])) blas <- "unknown"; cat("BLAS=", as.character(blas[1L]), "\n", sep=""); cat("npRmpi.version=", as.character(utils::packageVersion("npRmpi")), "\n", sep="")' 2>/dev/null || true
    command -v mpiexec >/dev/null 2>&1 && echo "mpiexec=$(command -v mpiexec)" || echo "mpiexec="
  } > "${RUN_DIR}/RUN_METADATA.txt"
}

run_make() {
  local work_dir="$1"
  shift
  mkdir -p "${work_dir}"
  (
    cd "${work_dir}"
    if [ -n "${DEMOS:-}" ]; then
      make -f "${TOOLS_DIR}/makefile" DEMO_SRC="${DEMO_ROOT}" \
        NP_DEMO_TIER="${NP_DEMO_TIER}" \
        NP_DEMO_CASES="${NP_DEMO_CASES:-}" \
        NP_DEMO_MATRIX="${NP_DEMO_MATRIX:-}" \
        TIMEOUT_SEC="${TIMEOUT_SEC}" \
        CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR}" \
        "DEMOS=${DEMOS}" "$@"
    else
      make -f "${TOOLS_DIR}/makefile" DEMO_SRC="${DEMO_ROOT}" \
        NP_DEMO_TIER="${NP_DEMO_TIER}" \
        NP_DEMO_CASES="${NP_DEMO_CASES:-}" \
        NP_DEMO_MATRIX="${NP_DEMO_MATRIX:-}" \
        TIMEOUT_SEC="${TIMEOUT_SEC}" \
        CONTINUE_ON_ERROR="${CONTINUE_ON_ERROR}" \
        "$@"
    fi
  ) 2>&1 | tee -a "${RUN_LOG}"
  local status="${PIPESTATUS[0]}"
  return "${status}"
}

write_metadata
: > "${RUN_LOG}"

if [ "${CPU_MONITOR}" = "true" ]; then
  "${TOOLS_DIR}/monitor_demo_cpu.sh" "$$" "${RUN_LOG}" "${DEMO_ROOT}" \
    "${RUN_DIR}/cpu_monitor.csv" "${CPU_MONITOR_INTERVAL}" &
  MONITOR_PID="$!"
fi

run_make "${RUN_DIR}/serial" MODE=serial

for slaves in ${SESSION_SLAVES}; do
  printf -v slave_dir "slaves_%02d" "${slaves}"
  run_make "${RUN_DIR}/session/${slave_dir}" MODE=session NSLAVES="${slaves}"
done

for ranks in ${MPI_RANKS}; do
  printf -v rank_dir "ranks_%02d" "${ranks}"
  run_make "${RUN_DIR}/mpi_launch/${rank_dir}/attach" MODE=attach NP="${ranks}"
  run_make "${RUN_DIR}/mpi_launch/${rank_dir}/profile" MODE=profile NP="${ranks}"
done

mkdir -p "${RUN_DIR}/timing"
"${DEMO_ROOT}/timing" "${RUN_DIR}" "${RUN_DIR}/timing"

if [ -n "${MONITOR_PID}" ]; then
  kill "${MONITOR_PID}" >/dev/null 2>&1 || true
  wait "${MONITOR_PID}" >/dev/null 2>&1 || true
  MONITOR_PID=""
  if [ -s "${RUN_DIR}/cpu_monitor.csv" ]; then
    Rscript --vanilla "${TOOLS_DIR}/summarize_cpu_monitor.R" \
      "${RUN_DIR}/cpu_monitor.csv" "${RUN_DIR}/cpu_monitor_summary.csv" || true
  fi
fi

echo "demo run complete: ${RUN_DIR}"
