"""
🔬 SPY EXIT ANALYZER — Analyse contrefactuelle des sorties du bot spy

Objectif: Pour chaque trade perdu ou sous-optimal, récupérer les klines Binance
APRÈS la sortie et mesurer ce qui s'est passé. Identifier quelles règles de sortie
sont responsables des pertes évitables.

Logique contrefactuelle:
  - INSTANT_REVERSAL court (< 60s, max_pnl~0): était-ce une mèche? Prix a-t-il récupéré?
  - EARLY_SL: même analyse
  - MOMENTUM_EXIT / REVERSAL: prix a-t-il continué après notre vente?
  - TRAILING trop serré: combien laissé sur la table?

Données utilisées:
  - espion_history.json (trades réels prod)
  - API Binance klines (klines 1m après exit_time)

Auteur: Trading Bot AI System
Date: 2026-04-29
"""

import json
import time
import requests
from datetime import datetime, timezone
from collections import defaultdict
from pathlib import Path

# ─── CONFIG ────────────────────────────────────────────────────────────────────
HISTORY_FILE  = Path("/home/ubuntu/crypto_trading_prod/data/espion_history.json")
BINANCE_API   = "https://api.binance.com"
POST_EXIT_MINUTES = 30       # Fenêtre d'analyse après exit (minutes)
WICK_RECOVERY_PCT = 0.30     # Prix remonte > 0.3% après exit → mèche probable
API_DELAY     = 0.25         # Pause entre appels Binance (éviter rate limit)

# Seuils par règle pour classifier "sortie prématurée suspectée"
PREMATURE_CRITERIA = {
    "INSTANT_REVERSAL": {"max_hold_s": 90,  "max_pnl_at_exit": 0.05},
    "EARLY_SL":         {"max_hold_s": 180, "max_pnl_at_exit": 0.20},
    "MOMENTUM_EXIT":    {"max_hold_s": 600, "max_pnl_at_exit": 1.00},
    "REVERSAL":         {"max_hold_s": 600, "max_pnl_at_exit": 5.00},
    "STAGNATION":       {"max_hold_s": 900, "max_pnl_at_exit": 0.50},
}

# ─── HELPERS ───────────────────────────────────────────────────────────────────

def _parse_exit_rule(exit_reason: str) -> str:
    """Extrait la règle de sortie depuis la chaîne exit_reason."""
    for rule in ["INSTANT_REVERSAL", "EARLY_SL", "EARLY_FLAT", "STAGNATION",
                 "MOMENTUM_EXIT", "REVERSAL", "VOLUME_ROUGE", "TRAILING",
                 "EMA7_DOWNTREND", "HARD_SL", "MAX_HOLD"]:
        if rule in exit_reason:
            return rule
    return "OTHER"


def _exit_ts_ms(exit_time_str: str) -> int:
    """Convertit exit_time ISO → timestamp ms Binance."""
    try:
        dt = datetime.fromisoformat(exit_time_str.replace("Z", "+00:00"))
        return int(dt.timestamp() * 1000)
    except Exception:
        return 0


def _fetch_post_exit_klines(symbol: str, exit_ts_ms: int, minutes: int = 30) -> list:
    """
    Récupère les klines 1m après exit_ts_ms.
    Retourne liste de dicts: {open, high, low, close, ts}.
    """
    try:
        resp = requests.get(
            f"{BINANCE_API}/api/v3/klines",
            params={
                "symbol": symbol,
                "interval": "1m",
                "startTime": exit_ts_ms,
                "limit": minutes,
            },
            timeout=5,
        )
        if resp.status_code != 200:
            return []
        klines = resp.json()
        return [
            {
                "ts":    int(k[0]),
                "open":  float(k[1]),
                "high":  float(k[2]),
                "low":   float(k[3]),
                "close": float(k[4]),
                "buy_vol_ratio": float(k[10]) / float(k[7]) if float(k[7]) > 0 else 0.5,
            }
            for k in klines
        ]
    except Exception:
        return []


# ─── ANALYSE PRINCIPALE ────────────────────────────────────────────────────────

class SpyExitAnalyzer:
    """Analyse contrefactuelle des sorties du spy bot."""

    def __init__(self, history_file: Path = HISTORY_FILE):
        self.history_file = history_file
        self.trades = self._load_trades()

    def _load_trades(self) -> list:
        if not self.history_file.exists():
            print(f"❌ Fichier introuvable: {self.history_file}")
            return []
        with open(self.history_file, encoding="utf-8") as f:
            return json.load(f)

    # ── Analyse d'un trade individuel ──────────────────────────────────────────

    def _analyze_trade(self, trade: dict) -> dict:
        """
        Pour un trade donné:
        1. Classe exit_rule
        2. Vérifie si c'est un candidat "sortie prématurée"
        3. Fetch klines post-exit et mesure l'opportunité manquée
        """
        exit_reason = trade.get("exit_reason", "")
        exit_rule   = _parse_exit_rule(exit_reason)
        sell_price  = trade.get("sell_price", 0)
        max_pnl     = trade.get("max_pnl", 0)
        hold_s      = trade.get("hold_seconds", 0)
        pnl_pct     = trade.get("pnl_pct", 0)
        symbol      = trade.get("symbol", "")
        exit_time   = trade.get("exit_time", "")

        result = {
            "symbol":     symbol,
            "exit_rule":  exit_rule,
            "pnl_pct":    pnl_pct,
            "max_pnl":    max_pnl,
            "hold_s":     hold_s,
            "exit_time":  exit_time,
            "premature_candidate": False,
            "wick_confirmed":      False,
            "price_recovered":     False,
            "max_price_after":     None,
            "max_gain_missed":     None,
            "time_to_recover_min": None,
            "ema7_bullish_at_entry": trade.get("ema7_bullish_at_entry", None),
        }

        # Vérifier critère "sortie prématurée"
        criteria = PREMATURE_CRITERIA.get(exit_rule)
        if criteria:
            if hold_s <= criteria["max_hold_s"] and max_pnl <= criteria["max_pnl_at_exit"]:
                result["premature_candidate"] = True

        # Fetch klines post-exit seulement pour candidats (économiser les appels)
        if result["premature_candidate"] and exit_time and symbol:
            ts_ms = _exit_ts_ms(exit_time)
            if ts_ms > 0:
                klines = _fetch_post_exit_klines(symbol, ts_ms, POST_EXIT_MINUTES)
                time.sleep(API_DELAY)

                if klines and sell_price > 0:
                    highs = [k["high"] for k in klines]
                    max_price = max(highs) if highs else sell_price
                    max_gain_missed = (max_price - sell_price) / sell_price * 100

                    result["max_price_after"]  = round(max_price, 8)
                    result["max_gain_missed"]  = round(max_gain_missed, 3)

                    # Mèche confirmée si prix remonte > WICK_RECOVERY_PCT dans les 10 premières minutes
                    for i, k in enumerate(klines[:10]):
                        gain = (k["high"] - sell_price) / sell_price * 100
                        if gain >= WICK_RECOVERY_PCT:
                            result["wick_confirmed"]     = True
                            result["price_recovered"]    = True
                            result["time_to_recover_min"] = i + 1
                            break

        return result

    # ── Analyse globale ────────────────────────────────────────────────────────

    def run(self, fetch_klines: bool = True, max_api_calls: int = 60) -> dict:
        """
        Lance l'analyse complète.
        fetch_klines=False pour mode offline (stats sans contrefactuel).
        max_api_calls: limite d'appels Binance pour éviter le rate limit.
        """
        print(f"\n🔬 SPY EXIT ANALYZER — {len(self.trades)} trades chargés")
        print(f"   Fichier: {self.history_file}")
        print(f"   Fenêtre post-exit: {POST_EXIT_MINUTES}min\n")

        # ── Stats globales par règle ──
        by_rule = defaultdict(list)
        for t in self.trades:
            rule = _parse_exit_rule(t.get("exit_reason", ""))
            by_rule[rule].append(t)

        print("📊 DISTRIBUTION DES SORTIES:")
        print(f"   {'Règle':<20} {'Trades':>6} {'WinRate':>8} {'Avg PnL':>9} {'Avg Max':>9}")
        print("   " + "─" * 56)
        for rule, trades in sorted(by_rule.items(), key=lambda x: -len(x[1])):
            pnls  = [t.get("pnl_pct", 0) for t in trades]
            maxs  = [t.get("max_pnl", 0) for t in trades]
            wins  = sum(1 for p in pnls if p > 0)
            avg_p = sum(pnls) / len(pnls) if pnls else 0
            avg_m = sum(maxs) / len(maxs) if maxs else 0
            wr    = wins / len(trades) if trades else 0
            print(f"   {rule:<20} {len(trades):>6} {wr:>7.0%}  {avg_p:>+8.2f}%  {avg_m:>+8.2f}%")

        if not fetch_klines:
            return {"by_rule": dict(by_rule)}

        # ── Analyse contrefactuelle (avec appels Binance) ──
        print(f"\n🔍 ANALYSE CONTREFACTUELLE (max {max_api_calls} appels Binance)...")
        api_calls = 0
        analyzed  = []
        skipped   = 0

        for trade in self.trades:
            rule = _parse_exit_rule(trade.get("exit_reason", ""))
            criteria = PREMATURE_CRITERIA.get(rule)
            if not criteria:
                continue  # Règles légitimes (HARD_SL, EMA7_DOWNTREND, TRAILING) → skip

            # Vérifier si candidat avant d'appeler Binance
            hold_s  = trade.get("hold_seconds", 0)
            max_pnl = trade.get("max_pnl", 0)
            if not (hold_s <= criteria["max_hold_s"] and max_pnl <= criteria["max_pnl_at_exit"]):
                continue

            if api_calls >= max_api_calls:
                skipped += 1
                continue

            result = self._analyze_trade(trade)
            if result["premature_candidate"]:
                api_calls += 1
                analyzed.append(result)

        # ── Synthèse contrefactuelle ──
        print(f"\n   Candidats analysés: {len(analyzed)} ({skipped} skippés, limite API)")

        confirmed_wicks = [r for r in analyzed if r["wick_confirmed"]]
        recovered       = [r for r in analyzed if r["price_recovered"]]
        missed_gains    = [r["max_gain_missed"] for r in analyzed if r["max_gain_missed"] is not None]

        print(f"\n⚠️  SORTIES PRÉMATURÉES SUSPECTÉES: {len(analyzed)}")
        print(f"   Mèches confirmées (récupération >{WICK_RECOVERY_PCT}% en 10min): {len(confirmed_wicks)} ({len(confirmed_wicks)/len(analyzed)*100:.0f}%)" if analyzed else "")
        print(f"   Gain moyen manqué (30min post-exit): {sum(missed_gains)/len(missed_gains):+.2f}%" if missed_gains else "")
        print(f"   Gain MAX manqué: {max(missed_gains):+.2f}%" if missed_gains else "")

        # ── Détail par règle ──
        by_rule_cf = defaultdict(list)
        for r in analyzed:
            by_rule_cf[r["exit_rule"]].append(r)

        print(f"\n📋 DÉTAIL PAR RÈGLE:")
        print(f"   {'Règle':<20} {'Candidats':>9} {'Wicks':>6} {'Gain moy manqué':>17} {'Avec EMA7✓':>11}")
        print("   " + "─" * 68)
        for rule, results in sorted(by_rule_cf.items(), key=lambda x: -len(x[1])):
            wicks = sum(1 for r in results if r["wick_confirmed"])
            gains = [r["max_gain_missed"] for r in results if r["max_gain_missed"] is not None]
            avg_g = sum(gains) / len(gains) if gains else 0
            ema7_known = sum(1 for r in results if r["ema7_bullish_at_entry"] is not None)
            ema7_bull  = sum(1 for r in results if r["ema7_bullish_at_entry"] is True)
            print(f"   {rule:<20} {len(results):>9} {wicks:>6} {avg_g:>+16.2f}%  {ema7_bull}/{len(results)} connus")

        # ── Top 10 pires sorties évitables ──
        top_missed = sorted(
            [r for r in analyzed if r["max_gain_missed"] is not None],
            key=lambda x: -x["max_gain_missed"]
        )[:10]
        if top_missed:
            print(f"\n🔥 TOP 10 GAINS MANQUÉS (30min post-exit):")
            print(f"   {'Symbol':<14} {'Règle':<20} {'PnL exit':>9} {'Gain 30m':>10} {'Récup?':>7} {'EMA7✓':>7}")
            print("   " + "─" * 72)
            for r in top_missed:
                ema7_tag = "✅" if r["ema7_bullish_at_entry"] else ("❓" if r["ema7_bullish_at_entry"] is None else "❌")
                rec_tag  = f"✅ {r['time_to_recover_min']}min" if r["wick_confirmed"] else "❌"
                print(f"   {r['symbol']:<14} {r['exit_rule']:<20} {r['pnl_pct']:>+8.2f}%  {r['max_gain_missed']:>+9.2f}%  {rec_tag:>7}  {ema7_tag:>5}")

        # ── Recommandations automatiques ──
        print(f"\n💡 RECOMMANDATIONS:")
        recommendations = self._generate_recommendations(analyzed, by_rule_cf)
        for i, rec in enumerate(recommendations, 1):
            print(f"   {i}. [{rec['priority']}] {rec['rule']}: {rec['message']}")
            if rec.get("action"):
                print(f"      → {rec['action']}")

        # ── Résultat JSON ──
        report = {
            "generated_at":    datetime.now(timezone.utc).isoformat(),
            "total_trades":    len(self.trades),
            "candidates_analyzed": len(analyzed),
            "wicks_confirmed": len(confirmed_wicks),
            "avg_gain_missed": round(sum(missed_gains) / len(missed_gains), 3) if missed_gains else 0,
            "by_rule_summary": {
                rule: {
                    "candidates": len(res),
                    "wicks":      sum(1 for r in res if r["wick_confirmed"]),
                    "avg_gain_missed": round(
                        sum(r["max_gain_missed"] for r in res if r["max_gain_missed"] is not None) /
                        max(1, sum(1 for r in res if r["max_gain_missed"] is not None)), 3
                    ),
                }
                for rule, res in by_rule_cf.items()
            },
            "top_missed":     top_missed[:10],
            "recommendations": recommendations,
        }

        out_path = Path("/home/ubuntu/crypto_trading_bot/spy_exit_analysis.json")
        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(report, f, indent=2, ensure_ascii=False)
        print(f"\n✅ Rapport JSON sauvegardé: {out_path}")

        return report

    def _generate_recommendations(self, analyzed: list, by_rule: dict) -> list:
        """Génère des recommandations priorisées basées sur l'analyse."""
        recs = []

        # INSTANT_REVERSAL
        ir_results = by_rule.get("INSTANT_REVERSAL", [])
        if ir_results:
            wicks = [r for r in ir_results if r["wick_confirmed"]]
            wick_rate = len(wicks) / len(ir_results)
            gains = [r["max_gain_missed"] for r in ir_results if r["max_gain_missed"] is not None]
            avg_g = sum(gains) / len(gains) if gains else 0
            if wick_rate > 0.40:
                recs.append({
                    "priority": "🔴 HIGH",
                    "rule":     "INSTANT_REVERSAL",
                    "message":  f"{wick_rate:.0%} des IR étaient des mèches (gain moy manqué: {avg_g:+.2f}%)",
                    "action":   "Élargir seuil IR à -1.0% si EMA7>EMA25 à l'entrée (déjà implémenté 29/04)",
                })
            elif wick_rate > 0.20:
                recs.append({
                    "priority": "🟡 MED",
                    "rule":     "INSTANT_REVERSAL",
                    "message":  f"{wick_rate:.0%} des IR étaient des mèches (gain moy manqué: {avg_g:+.2f}%)",
                    "action":   "Vérifier que ema7_bullish_at_entry est correctement stocké et utilisé",
                })

        # EARLY_SL
        esl_results = by_rule.get("EARLY_SL", [])
        if esl_results:
            wicks = [r for r in esl_results if r["wick_confirmed"]]
            wick_rate = len(wicks) / len(esl_results)
            gains = [r["max_gain_missed"] for r in esl_results if r["max_gain_missed"] is not None]
            avg_g = sum(gains) / len(gains) if gains else 0
            if wick_rate > 0.30:
                recs.append({
                    "priority": "🔴 HIGH",
                    "rule":     "EARLY_SL",
                    "message":  f"{wick_rate:.0%} des EARLY_SL étaient des creux temporaires ({avg_g:+.2f}% manqué)",
                    "action":   "Renforcer protection _ema7_protect avec ema7_bullish_at_entry",
                })

        # MOMENTUM_EXIT
        mom_results = by_rule.get("MOMENTUM_EXIT", [])
        if mom_results:
            gains = [r["max_gain_missed"] for r in mom_results if r["max_gain_missed"] is not None]
            avg_g = sum(gains) / len(gains) if gains else 0
            if avg_g > 0.5:
                recs.append({
                    "priority": "🟡 MED",
                    "rule":     "MOMENTUM_EXIT",
                    "message":  f"Gain moyen manqué: {avg_g:+.2f}% — drops consécutifs en tendance haussière",
                    "action":   "Utiliser _ema7_bullish (EMA7>EMA25) au lieu de _ema7_rising (slope seule)",
                })

        # TRAILING trop serré
        trail_trades = [t for t in self.trades if "TRAILING" in t.get("exit_reason", "")]
        if trail_trades:
            # Cas où max_pnl >> pnl_pct (laissé beaucoup sur la table)
            big_gap = [t for t in trail_trades if t.get("max_pnl", 0) - t.get("pnl_pct", 0) > 2.0]
            if len(big_gap) > len(trail_trades) * 0.3:
                recs.append({
                    "priority": "🟡 MED",
                    "rule":     "TRAILING",
                    "message":  f"{len(big_gap)}/{len(trail_trades)} trades TRAILING ont laissé >2% sur la table",
                    "action":   "Vérifier que EMA7↑ multiplie bien le trail (×1.4-1.6)",
                })

        return recs


# ─── POINT D'ENTRÉE ────────────────────────────────────────────────────────────

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Analyse contrefactuelle des sorties spy")
    parser.add_argument("--no-api",   action="store_true", help="Mode offline (stats seules, pas de klines Binance)")
    parser.add_argument("--max-calls", type=int, default=60, help="Limite d'appels Binance (défaut: 60)")
    args = parser.parse_args()

    analyzer = SpyExitAnalyzer()
    analyzer.run(fetch_klines=not args.no_api, max_api_calls=args.max_calls)
