#!/usr/bin/env python3
"""
compare_spy_behavior.py — Comparateur de comportement SPY testnet vs prod

Lit les deux logs et détecte :
  1. Surges vus par un bot mais pas l'autre (fenêtre 60s)
  2. Même surge → décision différente (acheté vs rejeté)
  3. Differences d'état (régime, coins bloqués, scores)
  4. Vue live : tail -f des deux logs côte à côte

Usage:
  python3 compare_spy_behavior.py             # analyse les logs existants
  python3 compare_spy_behavior.py --live      # mode live (tail)
  python3 compare_spy_behavior.py --state     # état actuel uniquement
"""

import sys, os, re, json, time, argparse
from datetime import datetime, timedelta
from collections import defaultdict

TESTNET_LOG  = '/home/ubuntu/crypto_trading_bot/market_spy_daemon.log'
PROD_LOG     = '/home/ubuntu/crypto_trading_prod/logs/market_spy_prod.log'
TESTNET_DIR  = '/home/ubuntu/crypto_trading_bot'
PROD_DIR     = '/home/ubuntu/crypto_trading_prod'

# ─── Couleurs terminal ───────────────────────────────────────────────────────
C = {
    'R': '\033[91m', 'G': '\033[92m', 'Y': '\033[93m',
    'B': '\033[94m', 'M': '\033[95m', 'C': '\033[96m',
    'W': '\033[97m', 'X': '\033[0m',  'BOLD': '\033[1m',
}
def color(c, s): return f"{C[c]}{s}{C['X']}"

# ─── Parser de log ──────────────────────────────────────────────────────────
RE_SURGE = re.compile(
    r'⚡ SURGE: (\w+USDT|\w+USDC).*?\+([\d.]+)%'
)
RE_SURGE_TS = re.compile(r'^(\d{2}:\d{2}:\d{2})')   # timestamp en début de ligne
RE_REJECT = re.compile(
    r'(\d{2}:\d{2}:\d{2}).*Rejeté.*?(\w+USDT|\w+USDC)?.*?:(.*)'
)
RE_REJECT2 = re.compile(r'(\d{2}:\d{2}:\d{2}).*❌ Rejeté: (.+)')
RE_BUY  = re.compile(r'(\d{2}:\d{2}:\d{2}).*ACHAT.*?de (\w+USDT|\w+USDC)')
RE_SELL = re.compile(r'(\d{2}:\d{2}:\d{2}).*(✅|❌) VENDU (\w+USDT|\w+USDC) @ .* PnL: ([+-]?[\d.]+)%.*Hold: ([\d.]+)min')
RE_REGIME = re.compile(r'(\d{2}:\d{2}:\d{2}).*CHANGEMENT: (\w+) → (\w+)')
RE_HEART  = re.compile(r'(\d{2}:\d{2}:\d{2}).*💓 #(\d+).* (\d+) paires.*Surges total: (\d+)')
RE_BLOCKED = re.compile(r'(\d{2}:\d{2}:\d{2}).*(\w+USDT|\w+USDC).*(?:bloqué|BLOCK|CB|cooldown).*')


def parse_log(path, max_lines=5000):
    """Parse les N dernières lignes du log et retourne les événements."""
    events = []
    try:
        with open(path, 'r', encoding='utf-8', errors='replace') as f:
            lines = f.readlines()[-max_lines:]
    except FileNotFoundError:
        return events

    last_ts = '00:00:00'
    i = 0
    while i < len(lines):
        line = lines[i].strip()

        # Mettre à jour le dernier timestamp vu
        m_ts = RE_SURGE_TS.match(line)
        if m_ts:
            last_ts = m_ts.group(1)

        # Surge détecté (la ligne ⚡ SURGE n'a pas toujours de timestamp)
        m = RE_SURGE.search(line)
        if m:
            coin = m.group(1)
            ts = last_ts
            # Chercher la décision dans les 15 lignes suivantes
            decision = 'unknown'
            reason = ''
            for j in range(i+1, min(i+15, len(lines))):
                l2 = lines[j].strip()
                m2 = RE_REJECT2.search(l2)
                if m2:
                    decision = 'rejected'
                    reason = m2.group(2).strip()
                    break
                if 'ACHAT' in l2 and coin in l2:
                    decision = 'bought'
                    break
                if 'bloqué' in l2.lower() or 'cooldown' in l2.lower() or 'SKIP' in l2:
                    decision = 'blocked'
                    reason = l2.split('|')[-1].strip() if '|' in l2 else l2[-50:]
                    break
                if '⚡ SURGE' in l2:
                    break
            events.append({'type': 'surge', 'ts': ts, 'coin': coin,
                           'decision': decision, 'reason': reason, 'line': i})

        # Trade acheté
        m = RE_BUY.search(line)
        if m and 'SURGE' not in line:
            events.append({'type': 'buy', 'ts': m.group(1), 'coin': m.group(2), 'line': i})

        # Trade vendu
        m = RE_SELL.search(line)
        if m:
            events.append({'type': 'sell', 'ts': m.group(1), 'coin': m.group(3),
                           'ok': m.group(2) == '✅', 'pnl': float(m.group(4)),
                           'hold': float(m.group(5)), 'line': i})

        # Changement régime
        m = RE_REGIME.search(line)
        if m:
            events.append({'type': 'regime', 'ts': m.group(1),
                           'from': m.group(2), 'to': m.group(3), 'line': i})

        i += 1
    return events


def ts_to_min(ts):
    """Convertit HH:MM:SS en minutes depuis minuit."""
    h, m, s = map(int, ts.split(':'))
    return h * 60 + m + s / 60


def compare_surges(t_events, p_events, window_min=1.5):
    """Compare les surges entre testnet et prod dans une fenêtre temporelle."""
    t_surges = {e['coin']: e for e in t_events if e['type'] == 'surge'}
    p_surges = {e['coin']: e for e in p_events if e['type'] == 'surge'}

    all_coins = set(t_surges) | set(p_surges)
    divergences = []
    common = []

    for coin in sorted(all_coins):
        t = t_surges.get(coin)
        p = p_surges.get(coin)

        if t and p:
            # Les deux ont vu le surge — comparer la décision
            if t['decision'] != p['decision']:
                divergences.append({
                    'coin': coin,
                    'testnet': t,
                    'prod': p,
                    'type': 'different_decision',
                })
            else:
                common.append(coin)
        elif t and not p:
            divergences.append({'coin': coin, 'testnet': t, 'prod': None,
                                 'type': 'only_testnet'})
        else:  # p and not t
            divergences.append({'coin': coin, 'testnet': None, 'prod': p,
                                 'type': 'only_prod'})

    return divergences, common


def print_state_diff():
    """Compare l'état des fichiers data entre testnet et prod."""
    print(color('BOLD', "\n══════════════════════════════════════════════════"))
    print(color('BOLD', " COMPARAISON D'ÉTAT TESTNET ↔ PROD"))
    print(color('BOLD', "══════════════════════════════════════════════════"))

    # 1. Régime
    print(color('C', "\n▶ RÉGIME MARCHÉ"))
    for label, base in [('TESTNET', TESTNET_DIR), ('PROD', PROD_DIR)]:
        f = os.path.join(base, 'data', 'spy_regime_state.json')
        if os.path.exists(f):
            d = json.load(open(f))
            age = time.time() - d.get('last_update_ts', 0)
            regime = d.get('current_regime', '?')
            col = 'R' if 'BEAR' in regime else ('Y' if 'EARLY' in regime or 'CORRECTION' in regime else 'G')
            print(f"  {label:<10} {color(col, regime):<25} sauvé: {d.get('saved_at','?')[11:19]}  âge: {age:.0f}s")
        else:
            print(f"  {label:<10} {color('Y', 'fichier absent')}")

    # 2. Coins bloqués (consec_losses)
    print(color('C', "\n▶ COINS BLOQUÉS PAR CONSEC_LOSSES (>= SPY_CB_MAX_CONSEC_LOSSES=7)"))
    for label, base in [('TESTNET', TESTNET_DIR), ('PROD', PROD_DIR)]:
        f = os.path.join(base, 'spy_coin_scores.json')
        if not os.path.exists(f):
            f = os.path.join(base, 'data', 'spy_coin_scores.json')
        if os.path.exists(f):
            d = json.load(open(f))
            blocked = {s: v for s, v in d.items() if v.get('consec_losses', 0) >= 7}
            near    = {s: v for s, v in d.items() if 3 <= v.get('consec_losses', 0) < 7}
            print(f"  {label:<10} bloqués({len(blocked)}): {color('R', str(sorted(blocked.keys())))}")
            print(f"             proches ({len(near)}): {color('Y', str(sorted(near.keys())))}")
        else:
            print(f"  {label:<10} {color('Y', 'fichier absent')}")

    # 3. Time-blocks actifs
    print(color('C', "\n▶ TIME-BLOCKS ACTIFS (spy_loss_state.json)"))
    now = time.time()
    for label, base in [('TESTNET', TESTNET_DIR), ('PROD', PROD_DIR)]:
        f = os.path.join(base, 'spy_loss_state.json')
        if not os.path.exists(f):
            f = os.path.join(base, 'data', 'spy_loss_state.json')
        if os.path.exists(f):
            d = json.load(open(f))
            active = {s: v for s, v in d.items() if v.get('blocked_until', 0) > now}
            if active:
                print(f"  {label:<10} {len(active)} bloqué(s):")
                for s, v in sorted(active.items(), key=lambda x: x[1].get('blocked_until', 0)):
                    exp = datetime.fromtimestamp(v['blocked_until']).strftime('%H:%M:%S')
                    left = (v['blocked_until'] - now) / 60
                    print(f"    {color('R', s):<30} jusqu'à {exp} ({left:.0f}min)")
            else:
                print(f"  {label:<10} {color('G', '0 coin bloqué')}")
        else:
            print(f"  {label:<10} {color('Y', 'fichier absent')}")

    # 4. Comparaison coin_scores communs
    print(color('C', "\n▶ COINS COMMUNS — DIVERGENCE DE SCORES"))
    t_file = os.path.join(TESTNET_DIR, 'spy_coin_scores.json')
    p_file = os.path.join(PROD_DIR, 'data', 'spy_coin_scores.json')
    if os.path.exists(t_file) and os.path.exists(p_file):
        t = json.load(open(t_file))
        p = json.load(open(p_file))
        common = set(t) & set(p)
        diverged = []
        for s in sorted(common):
            tv, pv = t[s], p[s]
            t_cl, p_cl = tv.get('consec_losses', 0), pv.get('consec_losses', 0)
            if abs(t_cl - p_cl) >= 2:  # divergence significative
                diverged.append((s, t_cl, p_cl, tv.get('wins',0), tv.get('losses',0),
                                   pv.get('wins',0), pv.get('losses',0)))
        if diverged:
            print(f"  {len(diverged)} coins avec divergence CL ≥ 2 :")
            for s, tcl, pcl, tw, tl, pw, pl in diverged:
                print(f"    {s:<22} TN: CL={tcl} W={tw}/L={tl}  |  PROD: CL={pcl} W={pw}/L={pl}")
        else:
            print(f"  {color('G', 'Aucune divergence significative sur les coins communs')}")
        print(f"  (Testnet: {len(t)} coins  |  Prod: {len(p)} coins  |  Communs: {len(common)})")


def print_surge_comparison(n_lines=3000):
    """Analyse les surges des N dernières lignes et compare les décisions."""
    print(color('BOLD', "\n══════════════════════════════════════════════════"))
    print(color('BOLD', " COMPARAISON SURGES (dernières lignes)"))
    print(color('BOLD', "══════════════════════════════════════════════════"))

    t_ev = parse_log(TESTNET_LOG, n_lines)
    p_ev = parse_log(PROD_LOG, n_lines)

    t_surges = [e for e in t_ev if e['type'] == 'surge']
    p_surges = [e for e in p_ev if e['type'] == 'surge']

    print(f"\n  Testnet: {len(t_surges)} surges détectés dans les {n_lines} dernières lignes")
    print(f"  Prod:    {len(p_surges)} surges détectés dans les {n_lines} dernières lignes")

    # Regrouper par coin récent (dernière occurrence)
    t_by_coin = {}
    for e in t_surges:
        t_by_coin[e['coin']] = e
    p_by_coin = {}
    for e in p_surges:
        p_by_coin[e['coin']] = e

    all_coins = set(t_by_coin) | set(p_by_coin)
    if not all_coins:
        print(color('Y', "\n  Aucun surge dans les logs analysés."))
        return

    print(f"\n{'Coin':<22} {'TESTNET':<30} {'PROD':<30} Divergence")
    print("─" * 90)
    for coin in sorted(all_coins):
        t = t_by_coin.get(coin)
        p = p_by_coin.get(coin)

        t_str = f"{t['ts']} {t['decision'][:8]}" if t else color('Y', 'absent')
        p_str = f"{p['ts']} {p['decision'][:8]}" if p else color('Y', 'absent')

        diverge = ''
        if t and p and t['decision'] != p['decision']:
            diverge = color('R', f"⚠ TN={t['decision']} PROD={p['decision']}")
        elif t and not p:
            diverge = color('Y', '← TN only')
        elif p and not t:
            diverge = color('Y', '→ PROD only')
        else:
            diverge = color('G', '✓ sync')

        print(f"{coin:<22} {t_str:<30} {p_str:<30} {diverge}")

    # Résumé régimes
    t_regimes = [e for e in t_ev if e['type'] == 'regime']
    p_regimes = [e for e in p_ev if e['type'] == 'regime']
    if t_regimes or p_regimes:
        print(color('C', "\n▶ CHANGEMENTS DE RÉGIME"))
        if t_regimes:
            r = t_regimes[-1]
            print(f"  Testnet dernier: {r['ts']} {r['from']} → {r['to']}")
        if p_regimes:
            r = p_regimes[-1]
            print(f"  Prod    dernier: {r['ts']} {r['from']} → {r['to']}")


def print_performance_comparison():
    """Compare les performances trades des deux bots."""
    print(color('BOLD', "\n══════════════════════════════════════════════════"))
    print(color('BOLD', " COMPARAISON PERFORMANCES"))
    print(color('BOLD', "══════════════════════════════════════════════════"))

    for label, log_path in [('TESTNET', TESTNET_LOG), ('PROD', PROD_LOG)]:
        t_ev = parse_log(log_path, max_lines=50000)
        sells = [e for e in t_ev if e['type'] == 'sell']
        if not sells:
            print(f"  {label}: aucun trade dans le log")
            continue
        n = len(sells)
        wins = [e for e in sells if e['ok']]
        losses = [e for e in sells if not e['ok']]
        avg_w = sum(e['pnl'] for e in wins) / len(wins) if wins else 0
        avg_l = sum(e['pnl'] for e in losses) / len(losses) if losses else 0
        avg_h = sum(e['hold'] for e in sells) / n
        pf = abs(avg_w / avg_l) if avg_l != 0 else 999

        col_wr = 'G' if len(wins)/n >= 0.5 else 'R'
        print(f"\n  {color('BOLD', label)}")
        print(f"    Trades: {n}  WR: {color(col_wr, f'{len(wins)/n*100:.0f}%')}  ({len(wins)}W/{len(losses)}L)")
        print(f"    Avg Win: {color('G', f'{avg_w:+.2f}%')}   Avg Loss: {color('R', f'{avg_l:+.2f}%')}   PF: {pf:.2f}x")
        print(f"    Hold moyen: {avg_h:.1f}min")

        # Top 3 meilleurs et pires coins
        coin_pnl = defaultdict(list)
        for e in sells:
            coin_pnl[e['coin']].append(e['pnl'])
        coin_avg = {c: sum(v)/len(v) for c, v in coin_pnl.items() if len(v) >= 2}
        if coin_avg:
            best = sorted(coin_avg.items(), key=lambda x: -x[1])[:3]
            worst = sorted(coin_avg.items(), key=lambda x: x[1])[:3]
            print(f"    Meilleurs: {', '.join(f'{c}({v:+.1f}%)' for c, v in best)}")
            print(f"    Pires:     {', '.join(f'{c}({v:+.1f}%)' for c, v in worst)}")


def live_mode():
    """Affiche en temps réel les 2 dernières lignes de chaque log."""
    import subprocess
    print(color('BOLD', "\n═══ MODE LIVE — Ctrl+C pour quitter ═══"))
    print(f"  {color('B', 'TESTNET')} ← {TESTNET_LOG}")
    print(f"  {color('M', 'PROD')}    ← {PROD_LOG}\n")

    # Get initial file sizes
    t_pos = os.path.getsize(TESTNET_LOG) if os.path.exists(TESTNET_LOG) else 0
    p_pos = os.path.getsize(PROD_LOG) if os.path.exists(PROD_LOG) else 0

    try:
        while True:
            for label, path, col, pos_attr in [
                ('TN', TESTNET_LOG, 'B', 't_pos'),
                ('PR', PROD_LOG, 'M', 'p_pos')
            ]:
                if not os.path.exists(path):
                    continue
                size = os.path.getsize(path)
                cur_pos = t_pos if label == 'TN' else p_pos
                if size > cur_pos:
                    with open(path, 'rb') as f:
                        f.seek(cur_pos)
                        new_data = f.read(size - cur_pos).decode('utf-8', errors='replace')
                    for line in new_data.splitlines():
                        line = line.strip()
                        if not line:
                            continue
                        # Colorier selon le contenu
                        if 'ACHAT' in line:
                            line_col = 'G'
                        elif 'VENDU' in line and '✅' in line:
                            line_col = 'G'
                        elif 'VENDU' in line and '❌' in line:
                            line_col = 'R'
                        elif 'SURGE' in line:
                            line_col = 'Y'
                        elif 'Rejeté' in line or 'bloqué' in line.lower():
                            line_col = 'R'
                        elif 'BEAR' in line:
                            line_col = 'R'
                        elif 'BULL' in line:
                            line_col = 'G'
                        else:
                            line_col = 'W'
                        print(f"[{color(col, label)}] {color(line_col, line)}")
                    if label == 'TN':
                        t_pos = size
                    else:
                        p_pos = size
            time.sleep(1)
    except KeyboardInterrupt:
        print("\nMode live terminé.")


# ─── Main ───────────────────────────────────────────────────────────────────
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Comparateur SPY testnet vs prod')
    parser.add_argument('--live',    action='store_true', help='Mode live (tail)')
    parser.add_argument('--state',   action='store_true', help='État seul')
    parser.add_argument('--surges',  action='store_true', help='Surges seul')
    parser.add_argument('--perf',    action='store_true', help='Performances seul')
    parser.add_argument('--lines',   type=int, default=3000, help='Nb lignes analysées (défaut 3000)')
    args = parser.parse_args()

    if args.live:
        live_mode()
    elif args.state:
        print_state_diff()
    elif args.surges:
        print_surge_comparison(args.lines)
    elif args.perf:
        print_performance_comparison()
    else:
        # Tout afficher
        print_state_diff()
        print_surge_comparison(args.lines)
        print_performance_comparison()