#!/usr/bin/env python3
"""
compare_spies.py — Surveillance comparatif testnet vs prod en temps réel.
Détecte les écarts de comportement entre les deux spies et les journalise.
Usage: python3 compare_spies.py [--interval 60]
"""
import re, time, json, os, sys, argparse
from datetime import datetime
from collections import defaultdict

TESTNET_LOG = "/home/ubuntu/crypto_trading_bot/market_spy.log"
PROD_LOG    = "/home/ubuntu/crypto_trading_prod/logs/market_spy_prod.log"
REPORT_FILE = "/home/ubuntu/crypto_trading_bot/spy_comparison_report.log"

# Regex patterns
RE_ACHAT   = re.compile(r'(\d{2}:\d{2}:\d{2}).*ACHAT OK.*?([\w\u4e00-\u9fff]+USDC?T?) @ ([\d.]+)')
RE_VENDU   = re.compile(r'(\d{2}:\d{2}:\d{2}).*VENDU ([\w\u4e00-\u9fff]+USDC?T?) @ ([\d.]+).*PnL: ([+-][\d.]+)%.*Hold: ([\d.]+)min')
RE_SURGE   = re.compile(r'(\d{2}:\d{2}:\d{2}).*SURGE.*?([\w\u4e00-\u9fff]+USDC?T?)\s*\|.*?([+-][\d.]+)%')
RE_BLOCK   = re.compile(r'(\d{2}:\d{2}:\d{2}).*🔒 ([\w\u4e00-\u9fff]+USDC?T?).*bloqué (\d+) min')
RE_INJECT  = re.compile(r'(\d{2}:\d{2}:\d{2}).*📡 ([\w\u4e00-\u9fff]+USDC?T?) INJECTÉ')

# Normalize USDT→USDC for cross-comparison (testnet uses USDT, prod uses USDC same base)
def normalize(sym):
    return sym.replace('USDT', 'USDC') if sym.endswith('USDT') else sym

def read_new_lines(fpath, pos):
    """Read new bytes since last position using binary mode to avoid UTF-8 offset issues."""
    try:
        file_size = os.path.getsize(fpath)
        if file_size < pos:
            # File was truncated/rotated — reset to start
            pos = 0
        if file_size == pos:
            return [], pos
        with open(fpath, 'rb') as f:
            f.seek(pos)
            raw = f.read()
            new_pos = pos + len(raw)
        text = raw.decode('utf-8', errors='replace')
        return text.splitlines(), new_pos
    except Exception:
        return [], pos

def parse_events(lines, spy_name):
    events = []
    for line in lines:
        m = RE_ACHAT.search(line)
        if m:
            events.append({'type': 'BUY', 'time': m.group(1), 'symbol': m.group(2),
                           'price': float(m.group(3)), 'spy': spy_name})
            continue
        m = RE_VENDU.search(line)
        if m:
            events.append({'type': 'SELL', 'time': m.group(1), 'symbol': m.group(2),
                           'price': float(m.group(3)), 'pnl_pct': float(m.group(4)),
                           'hold': float(m.group(5)), 'spy': spy_name})
            continue
        m = RE_INJECT.search(line)
        if m:
            events.append({'type': 'INJECT', 'time': m.group(1), 'symbol': m.group(2), 'spy': spy_name})
            continue
        m = RE_BLOCK.search(line)
        if m:
            events.append({'type': 'BLOCK', 'time': m.group(1), 'symbol': m.group(2),
                           'duration': int(m.group(3)), 'spy': spy_name})
    return events

class SpyComparator:
    def __init__(self, replay_minutes=0):
        """
        replay_minutes: relire les N dernières minutes de logs au démarrage.
        0 = seulement les nouveaux événements (temps réel).
        """
        import subprocess
        def _find_pos_minutes_ago(fpath, minutes):
            """Return byte offset of log lines from N minutes ago (approx via tail)."""
            if not os.path.exists(fpath):
                return 0
            if minutes == 0:
                return os.path.getsize(fpath)
            # Approximate: seek to 95% of file for short lookback, or 0 for full day
            size = os.path.getsize(fpath)
            return max(0, int(size * 0.6)) if minutes > 120 else max(0, size - 200_000)

        self.tn_pos = _find_pos_minutes_ago(TESTNET_LOG, replay_minutes)
        self.pr_pos = _find_pos_minutes_ago(PROD_LOG, replay_minutes)
        self.tn_events = []
        self.pr_events = []
        self.stats = {
            'tn_buys': 0, 'pr_buys': 0,
            'tn_sells': 0, 'pr_sells': 0,
            'tn_wins': 0, 'pr_wins': 0,
            'tn_pnl': 0.0, 'pr_pnl': 0.0,
            'only_tn': [],
            'only_pr': [],
            'both': [],
            'price_diffs': [],
            'pnl_diffs': [],
        }
        self.tn_recents = {}
        self.pr_recents = {}
        self.report_lines = []
        self._seen_sells = set()  # deduplicate duplicate log lines (was 2 instances)
        print(f"[{now()}] 🔍 Surveillance démarrée (replay={replay_minutes}min)")
        print(f"   TN pos={self.tn_pos:,}  PROD pos={self.pr_pos:,}")
        self._log(f"=== Surveillance démarrée {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} (replay={replay_minutes}min) ===")

    def _log(self, msg):
        print(f"[{now()}] {msg}")
        with open(REPORT_FILE, 'a', encoding='utf-8') as f:
            f.write(f"[{now()}] {msg}\n")

    def poll(self):
        # Read new lines
        tn_lines, self.tn_pos = read_new_lines(TESTNET_LOG, self.tn_pos)
        pr_lines, self.pr_pos = read_new_lines(PROD_LOG, self.pr_pos)

        tn_new = parse_events(tn_lines, 'TESTNET')
        pr_new = parse_events(pr_lines, 'PROD')

        for e in tn_new:
            self._process(e)
        for e in pr_new:
            self._process(e)

    def _process(self, e):
        sym_n = normalize(e['symbol'])
        t = e['type']
        spy = e['spy']

        if t == 'BUY':
            entry = {'time': e['time'], 'price': e['price'], 'ts': time.time()}
            if spy == 'TESTNET':
                self.stats['tn_buys'] += 1
                self.tn_recents[sym_n] = entry
                # Check if prod also bought recently (within 90s)
                pr = self.pr_recents.get(sym_n)
                if pr and (time.time() - pr['ts']) < 90:
                    price_diff = ((e['price'] - pr['price']) / pr['price']) * 100
                    self._log(f"✅ ACHAT COMMUN: {sym_n} | TN={e['price']} PROD={pr['price']} | Δprix={price_diff:+.2f}%")
                    self.stats['price_diffs'].append({'sym': sym_n, 'diff_pct': price_diff, 'time': e['time']})
                    self.stats['both'].append(sym_n)
                else:
                    # Prod didn't buy yet — give it 90s window
                    pass
            else:  # PROD
                self.stats['pr_buys'] += 1
                self.pr_recents[sym_n] = entry
                tn = self.tn_recents.get(sym_n)
                if tn and (time.time() - tn['ts']) < 90:
                    price_diff = ((e['price'] - tn['price']) / tn['price']) * 100
                    self._log(f"✅ ACHAT COMMUN: {sym_n} | TN={tn['price']} PROD={e['price']} | Δprix={price_diff:+.2f}%")
                    self.stats['price_diffs'].append({'sym': sym_n, 'diff_pct': price_diff, 'time': e['time']})
                    self.stats['both'].append(sym_n)

        if t == 'SELL':
            # Deduplicate: testnet had 2 instances writing same line
            dedup_key = f"{spy}:{sym_n}:{e['time']}:{e['pnl_pct']}"
            if dedup_key in self._seen_sells:
                return
            self._seen_sells.add(dedup_key)

            win = e['pnl_pct'] > 0
            if spy == 'TESTNET':
                self.stats['tn_sells'] += 1
                if win: self.stats['tn_wins'] += 1
                self.stats['tn_pnl'] += e['pnl_pct']
                # Compare to prod if we have a recent buy for this symbol
                icon = '✅' if win else '❌'
                self._log(f"{icon} VENTE TN:   {sym_n} {e['pnl_pct']:+.2f}% hold={e['hold']:.1f}min")
            else:
                self.stats['pr_sells'] += 1
                if win: self.stats['pr_wins'] += 1
                self.stats['pr_pnl'] += e['pnl_pct']
                icon = '✅' if win else '❌'
                self._log(f"{icon} VENTE PROD: {sym_n} {e['pnl_pct']:+.2f}% hold={e['hold']:.1f}min")

        elif t == 'INJECT':
            pass

        elif t == 'BLOCK':
            self._log(f"🔒 BLOCAGE {'TN' if spy=='TESTNET' else 'PROD'}: {sym_n} → {e['duration']}min")

    def check_orphans(self):
        """Detect coins bought by one but not the other (after 2-min window)."""
        now_ts = time.time()
        WINDOW = 120  # 2 min
        for sym, e in list(self.tn_recents.items()):
            age = now_ts - e['ts']
            if WINDOW < age < 300:  # Between 2 and 5 min old
                if sym not in self.pr_recents or (now_ts - self.pr_recents[sym]['ts']) > 300:
                    self._log(f"⚠️  MANQUÉ PROD: {sym} acheté TN@{e['time']} mais PAS en PROD")
                    self.stats['only_tn'].append({'sym': sym, 'time': e['time']})
                    self.tn_recents[sym]['ts'] = 0  # Mark as reported

        for sym, e in list(self.pr_recents.items()):
            age = now_ts - e['ts']
            if WINDOW < age < 300:
                if sym not in self.tn_recents or (now_ts - self.tn_recents[sym]['ts']) > 300:
                    self._log(f"⚠️  MANQUÉ TN: {sym} acheté PROD@{e['time']} mais PAS en TN")
                    self.stats['only_pr'].append({'sym': sym, 'time': e['time']})
                    self.pr_recents[sym]['ts'] = 0

    def print_summary(self):
        s = self.stats
        def wr(wins, sells):
            return f"{wins/sells*100:.0f}%" if sells > 0 else "N/A"
        self._log("─" * 55)
        self._log(f"📊 RÉSUMÉ COMPARATIF SESSION")
        self._log(f"   Achats   : TN={s['tn_buys']}  PROD={s['pr_buys']}")
        self._log(f"   Ventes   : TN={s['tn_sells']}  PROD={s['pr_sells']}")
        self._log(f"   Win rate : TN={wr(s['tn_wins'],s['tn_sells'])}  PROD={wr(s['pr_wins'],s['pr_sells'])}")
        self._log(f"   ΣPnL%    : TN={s['tn_pnl']:+.2f}%  PROD={s['pr_pnl']:+.2f}%")
        if s['price_diffs']:
            avg_diff = sum(d['diff_pct'] for d in s['price_diffs']) / len(s['price_diffs'])
            self._log(f"   Δprix moy: {avg_diff:+.3f}% (PROD vs TN, {len(s['price_diffs'])} trades communs)")
        if s['only_tn']:
            self._log(f"   MANQUÉS PROD: {len(s['only_tn'])} trades — {[x['sym'] for x in s['only_tn'][-5:]]}")
        if s['only_pr']:
            self._log(f"   MANQUÉS TN:   {len(s['only_pr'])} trades — {[x['sym'] for x in s['only_pr'][-5:]]}")
        self._log("─" * 55)

def now():
    return datetime.now().strftime('%H:%M:%S')

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--interval', type=int, default=7)
    parser.add_argument('--summary-every', type=int, default=300)
    parser.add_argument('--replay', type=int, default=480,
                        help='Relire les N dernières minutes de logs (défaut: 480 = 8h)')
    args = parser.parse_args()

    # Reset report file
    open(REPORT_FILE, 'w').close()

    comp = SpyComparator(replay_minutes=args.replay)
    last_summary = time.time()
    last_orphan_check = time.time()

    try:
        while True:
            comp.poll()
            now_ts = time.time()

            # Check orphans every 30s
            if now_ts - last_orphan_check >= 30:
                comp.check_orphans()
                last_orphan_check = now_ts

            # Print summary every N seconds
            if now_ts - last_summary >= args.summary_every:
                comp.print_summary()
                last_summary = now_ts

            time.sleep(args.interval)

    except KeyboardInterrupt:
        print("\n[STOP] Surveillance arrêtée.")
        comp.print_summary()

if __name__ == '__main__':
    main()
