#!/bin/bash
# Watchdog externe pour dashboard_api_server.py
# À exécuter via cron toutes les 5 minutes :
#   */5 * * * * /home/ubuntu/crypto_trading_bot/watchdog_dashboard.sh >> /home/ubuntu/crypto_trading_bot/watchdog.log 2>&1

SCRIPT_DIR="/home/ubuntu/crypto_trading_bot"
VENV_PYTHON="$SCRIPT_DIR/.venv/bin/python3"
LOG="$SCRIPT_DIR/watchdog_dashboard.log"
MAX_RAM_MB=2048   # Redémarrer si > 2 GB
PORT=8889

timestamp() { date '+%Y-%m-%d %H:%M:%S'; }

# 1. Trouver tous les PIDs du dashboard (éviter les doublons)
PIDS=$(pgrep -f "dashboard_api_server.py")
PID_COUNT=$(echo "$PIDS" | grep -c . 2>/dev/null || echo 0)
PID=$(echo "$PIDS" | head -1)

if [[ -z "$PID" ]]; then
    echo "$(timestamp) [WATCHDOG] Dashboard non trouvé — démarrage..."
    cd "$SCRIPT_DIR"
    nohup "$VENV_PYTHON" dashboard_api_server.py > /dev/null 2>&1 &
    echo "$(timestamp) [WATCHDOG] Démarré PID=$!"
    exit 0
fi

# Tuer les doublons éventuels (garder uniquement le premier PID)
if [[ "$PID_COUNT" -gt 1 ]]; then
    EXTRA_PIDS=$(echo "$PIDS" | tail -n +2)
    echo "$(timestamp) [WATCHDOG] ⚠️ $PID_COUNT instances détectées — suppression des doublons: $EXTRA_PIDS"
    echo "$EXTRA_PIDS" | xargs kill 2>/dev/null
fi

# 2. Vérifier la mémoire RSS
RSS_KB=$(cat /proc/$PID/status 2>/dev/null | grep VmRSS | awk '{print $2}')
RSS_MB=$((RSS_KB / 1024))

# 3. Vérifier que l'API répond (health check)
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "http://localhost:$PORT/api/health" 2>/dev/null)

if [[ "$HTTP_CODE" != "200" && "$HTTP_CODE" != "401" ]]; then
    echo "$(timestamp) [WATCHDOG] ❌ API ne répond pas (HTTP $HTTP_CODE, RAM ${RSS_MB}MB) — redémarrage"
    kill -9 "$PID" 2>/dev/null
    sleep 2
    cd "$SCRIPT_DIR"
    nohup "$VENV_PYTHON" dashboard_api_server.py > /dev/null 2>&1 &
    echo "$(timestamp) [WATCHDOG] Redémarré PID=$!"
    exit 0
fi

# 4. Vérifier la mémoire
if [[ "$RSS_MB" -gt "$MAX_RAM_MB" ]]; then
    echo "$(timestamp) [WATCHDOG] ⚠️ RAM trop élevée: ${RSS_MB}MB > ${MAX_RAM_MB}MB — redémarrage"
    kill -9 "$PID" 2>/dev/null
    sleep 2
    cd "$SCRIPT_DIR"
    nohup "$VENV_PYTHON" dashboard_api_server.py > /dev/null 2>&1 &
    echo "$(timestamp) [WATCHDOG] Redémarré PID=$!"
    exit 0
fi

echo "$(timestamp) [WATCHDOG] OK — PID=$PID RAM=${RSS_MB}MB API=HTTP${HTTP_CODE}"
