{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "523a7e7a",
   "metadata": {},
   "source": [
    "# 🔍 Test Optimisation — Audit Compression de Prix\n",
    "\n",
    "> **Spy Pro mis en veille le 07/05/2026** — cet onglet centralise tous les tests d'optimisation.\n",
    ">\n",
    "> **Hypothèse à valider :** la compression de prix > 1% dans les 30min avant une entrée est-elle un prédicteur de performance supérieure ?\n",
    ">\n",
    "> Source des trades : `/home/ubuntu/crypto_trading_bot/espion_history.json` (1381 trades réels testnet)\n",
    "\n",
    "---\n",
    "\n",
    "**Statut Spy Prod :** ⏸️ EN VEILLE  \n",
    "**Date début audit :** 07/05/2026  \n",
    "**Fichier de sortie :** `./audit_output/`"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f4bddc0",
   "metadata": {},
   "source": [
    "## 1. Import Libraries & Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d91b00f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import sys\n",
    "import time\n",
    "import logging\n",
    "from datetime import datetime, timedelta, timezone\n",
    "from pathlib import Path\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import requests\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as mticker\n",
    "from scipy.stats import mannwhitneyu\n",
    "\n",
    "logging.basicConfig(level=logging.INFO, format=\"%(asctime)s [%(levelname)s] %(message)s\")\n",
    "log = logging.getLogger(\"audit\")\n",
    "\n",
    "# ── CONFIG ──────────────────────────────────────────────────────────────────\n",
    "TRADES_FILE   = Path(\"/home/ubuntu/crypto_trading_bot/espion_history.json\")\n",
    "OUTPUT_DIR    = Path(\"/home/ubuntu/crypto_trading_bot/test_optimisation/audit_output\")\n",
    "BINANCE_BASE  = \"https://api.binance.com\"   # fallback: \"https://data-api.binance.vision\"\n",
    "\n",
    "PRE_WINDOW_KLINES = 6       # 6 × 5min = 30 min avant l'entrée\n",
    "KLINE_INTERVAL    = \"5m\"\n",
    "HTTP_TIMEOUT      = 15\n",
    "\n",
    "COMPRESSION_BUCKETS = [\n",
    "    (0.0, 0.3,  \"très faible\"),\n",
    "    (0.3, 0.7,  \"faible\"),\n",
    "    (0.7, 1.0,  \"modérée\"),\n",
    "    (1.0, 1.5,  \"élevée\"),\n",
    "    (1.5, 99.0, \"extrême\"),\n",
    "]\n",
    "\n",
    "REQUIRED_FIELDS = {\n",
    "    \"symbol\":     [\"symbol\", \"coin\", \"pair\", \"asset\"],\n",
    "    \"entry_time\": [\"entry_time\", \"entry_timestamp\", \"open_time\", \"buy_time\", \"timestamp\", \"time\"],\n",
    "    \"pnl_pct\":    [\"pnl_pct\", \"pnl_percent\", \"profit_pct\", \"return_pct\", \"pnl\"],\n",
    "}\n",
    "\n",
    "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
    "print(\"✅ Libs chargées — configuration OK\")\n",
    "print(f\"   Source trades : {TRADES_FILE}\")\n",
    "print(f\"   Sortie        : {OUTPUT_DIR}\")\n",
    "print(f\"   Binance API   : {BINANCE_BASE}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0377ad6a",
   "metadata": {},
   "source": [
    "## 2. Chargement & Nettoyage des Trades"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f8fb87f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_trades(path: Path) -> pd.DataFrame:\n",
    "    log.info(f\"Chargement depuis : {path}\")\n",
    "    if path.suffix.lower() == \".csv\":\n",
    "        df = pd.read_csv(path)\n",
    "    elif path.suffix.lower() in (\".json\", \".jsonl\"):\n",
    "        try:\n",
    "            df = pd.read_json(path)\n",
    "        except ValueError:\n",
    "            df = pd.read_json(path, lines=True)\n",
    "    elif path.suffix.lower() in (\".parquet\", \".pq\"):\n",
    "        df = pd.read_parquet(path)\n",
    "    else:\n",
    "        raise ValueError(f\"Format non reconnu : {path.suffix}\")\n",
    "\n",
    "    # Auto-mapping des colonnes\n",
    "    mapping = {}\n",
    "    for canonical, aliases in REQUIRED_FIELDS.items():\n",
    "        found = next(\n",
    "            (col for alias in aliases for col in df.columns if col.lower() == alias.lower()),\n",
    "            None\n",
    "        )\n",
    "        if not found:\n",
    "            raise ValueError(f\"Colonne '{canonical}' introuvable. Colonnes dispo : {list(df.columns)}\")\n",
    "        mapping[found] = canonical\n",
    "    df = df.rename(columns=mapping)\n",
    "\n",
    "    df[\"entry_time\"] = pd.to_datetime(df[\"entry_time\"], utc=True, errors=\"coerce\")\n",
    "    df = df.dropna(subset=[\"entry_time\"])\n",
    "    df[\"pnl_pct\"] = pd.to_numeric(df[\"pnl_pct\"], errors=\"coerce\")\n",
    "    df = df.dropna(subset=[\"pnl_pct\"])\n",
    "    df[\"symbol\"] = df[\"symbol\"].astype(str).str.upper().apply(\n",
    "        lambda s: s if s.endswith(\"USDT\") else (s[:-4] + \"USDT\" if s.endswith(\"USDC\") else f\"{s}USDT\")\n",
    "    )\n",
    "    return df.reset_index(drop=True)\n",
    "\n",
    "\n",
    "trades = load_trades(TRADES_FILE)\n",
    "\n",
    "print(f\"\\n{'='*60}\")\n",
    "print(f\"TRADES CHARGÉS : {len(trades)}\")\n",
    "print(f\"Période        : {trades['entry_time'].min().strftime('%Y-%m-%d')} → {trades['entry_time'].max().strftime('%Y-%m-%d')}\")\n",
    "print(f\"Win rate brut  : {(trades['pnl_pct'] > 0).mean()*100:.1f}%\")\n",
    "print(f\"PnL moyen brut : {trades['pnl_pct'].mean():+.3f}%\")\n",
    "print(f\"PnL total brut : {trades['pnl_pct'].sum():+.1f}%\")\n",
    "print(f\"{'='*60}\")\n",
    "trades[[\"symbol\", \"entry_time\", \"pnl_pct\"]].head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6d7775e8",
   "metadata": {},
   "source": [
    "## 3. Récupération des Klines Binance (avec cache)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8bdfb45",
   "metadata": {},
   "outputs": [],
   "source": [
    "def fetch_klines_around(symbol: str, entry_time: datetime, pre_minutes: int = 35) -> pd.DataFrame:\n",
    "    \"\"\"Klines 5m sur la fenêtre [entry - pre_minutes, entry]. Endpoint Binance spot public.\"\"\"\n",
    "    end_ts   = int(entry_time.timestamp() * 1000)\n",
    "    start_ts = int((entry_time - timedelta(minutes=pre_minutes)).timestamp() * 1000)\n",
    "    try:\n",
    "        r = requests.get(\n",
    "            f\"{BINANCE_BASE}/api/v3/klines\",\n",
    "            params={\"symbol\": symbol, \"interval\": KLINE_INTERVAL,\n",
    "                    \"startTime\": start_ts, \"endTime\": end_ts, \"limit\": 50},\n",
    "            timeout=HTTP_TIMEOUT,\n",
    "        )\n",
    "        r.raise_for_status()\n",
    "        data = r.json()\n",
    "    except Exception as e:\n",
    "        log.warning(f\"  Klines KO {symbol} @ {entry_time}: {e}\")\n",
    "        return pd.DataFrame()\n",
    "\n",
    "    if not data:\n",
    "        return pd.DataFrame()\n",
    "\n",
    "    cols = [\"open_time\",\"open\",\"high\",\"low\",\"close\",\"volume\",\"close_time\",\n",
    "            \"quote_volume\",\"trades\",\"taker_buy_base\",\"taker_buy_quote\",\"ignore\"]\n",
    "    df = pd.DataFrame(data, columns=cols)\n",
    "    for c in [\"open\",\"high\",\"low\",\"close\",\"volume\",\"quote_volume\",\"taker_buy_base\",\"taker_buy_quote\"]:\n",
    "        df[c] = df[c].astype(float)\n",
    "    df[\"open_time\"] = pd.to_datetime(df[\"open_time\"], unit=\"ms\", utc=True)\n",
    "    return df\n",
    "\n",
    "\n",
    "# Test rapide de connectivité\n",
    "test_sym = trades[\"symbol\"].iloc[0]\n",
    "test_time = trades[\"entry_time\"].iloc[0]\n",
    "test_klines = fetch_klines_around(test_sym, test_time)\n",
    "if test_klines.empty:\n",
    "    print(f\"❌ API inaccessible — changer BINANCE_BASE = 'https://data-api.binance.vision'\")\n",
    "else:\n",
    "    print(f\"✅ API OK — {len(test_klines)} klines récupérées pour {test_sym} @ {test_time.strftime('%Y-%m-%d %H:%M')}\")\n",
    "    display(test_klines[[\"open_time\",\"open\",\"high\",\"low\",\"close\"]].tail(7))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "64dac26c",
   "metadata": {},
   "source": [
    "## 4 & 5. Calcul Compression + Enrichissement des Trades\n",
    "\n",
    "> ⏱️ **~1min pour 1381 trades** (50ms throttle + cache JSON). Relancer = instantané grâce au cache."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1691611e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_compression(klines: pd.DataFrame) -> dict:\n",
    "    \"\"\"Compression = range moyen (high-low)/close sur les 6 dernières klines.\"\"\"\n",
    "    if len(klines) < PRE_WINDOW_KLINES:\n",
    "        return {}\n",
    "    pre = klines.iloc[-PRE_WINDOW_KLINES:]\n",
    "    range_pct   = ((pre[\"high\"] - pre[\"low\"]) / pre[\"close\"]).mean() * 100\n",
    "    spread_max  = ((pre[\"high\"] - pre[\"low\"]) / pre[\"close\"]).max() * 100\n",
    "    return {\"compression_pct\": float(range_pct), \"spread_max_pct\": float(spread_max), \"n_klines_used\": len(pre)}\n",
    "\n",
    "\n",
    "def enrich_trades(df: pd.DataFrame, max_trades: int = None, cache_dir: Path = OUTPUT_DIR) -> pd.DataFrame:\n",
    "    if max_trades:\n",
    "        df = df.head(max_trades).copy()\n",
    "        print(f\"⚠️  Mode test : limité à {max_trades} trades\")\n",
    "\n",
    "    # Cache JSON local\n",
    "    cache, cache_path = {}, cache_dir / \"klines_cache.json\"\n",
    "    if cache_path.exists():\n",
    "        with open(cache_path) as f:\n",
    "            cache = json.load(f)\n",
    "        print(f\"📦 Cache chargé : {len(cache)} entrées existantes\")\n",
    "\n",
    "    enriched_rows = []\n",
    "    n_ok, n_fail = 0, 0\n",
    "    n_total = len(df)\n",
    "\n",
    "    for i, row in df.iterrows():\n",
    "        key = f\"{row['symbol']}_{int(row['entry_time'].timestamp())}\"\n",
    "        if key in cache:\n",
    "            features = cache[key]\n",
    "        else:\n",
    "            klines   = fetch_klines_around(row[\"symbol\"], row[\"entry_time\"])\n",
    "            features = compute_compression(klines) if not klines.empty else {}\n",
    "            cache[key] = features\n",
    "            time.sleep(0.05)\n",
    "            if (i + 1) % 100 == 0:\n",
    "                with open(cache_path, \"w\") as f:\n",
    "                    json.dump(cache, f)\n",
    "\n",
    "        n_ok += bool(features)\n",
    "        n_fail += not bool(features)\n",
    "        merged = row.to_dict()\n",
    "        merged.update(features)\n",
    "        enriched_rows.append(merged)\n",
    "\n",
    "        if (i + 1) % 200 == 0 or (i + 1) == n_total:\n",
    "            pct = (i + 1) / n_total * 100\n",
    "            print(f\"  [{pct:5.1f}%] {i+1}/{n_total} trades — {n_ok} OK, {n_fail} fails\")\n",
    "\n",
    "    with open(cache_path, \"w\") as f:\n",
    "        json.dump(cache, f)\n",
    "\n",
    "    result = pd.DataFrame(enriched_rows)\n",
    "    print(f\"\\n✅ Enrichissement terminé : {n_ok}/{n_total} trades avec compression calculable\")\n",
    "    return result\n",
    "\n",
    "\n",
    "# ── LANCEMENT (FULL ou TEST) ─────────────────────────────────────────────────\n",
    "# Décommenter la ligne souhaitée :\n",
    "# enriched = enrich_trades(trades, max_trades=50)   # test rapide\n",
    "enriched = enrich_trades(trades)                    # audit complet\n",
    "\n",
    "enriched[[\"symbol\",\"entry_time\",\"pnl_pct\",\"compression_pct\",\"spread_max_pct\"]].dropna().head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6a676b5d",
   "metadata": {},
   "source": [
    "## 6. Stratification par Bucket de Compression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95689838",
   "metadata": {},
   "outputs": [],
   "source": [
    "valid = enriched.dropna(subset=[\"compression_pct\"]).copy()\n",
    "print(f\"Trades avec compression calculable : {len(valid)} / {len(enriched)}\\n\")\n",
    "\n",
    "strat_rows = []\n",
    "for lo, hi, label in COMPRESSION_BUCKETS:\n",
    "    mask   = (valid[\"compression_pct\"] >= lo) & (valid[\"compression_pct\"] < hi)\n",
    "    bucket = valid[mask]\n",
    "    wr     = (bucket[\"pnl_pct\"] > 0).mean() * 100 if len(bucket) > 0 else 0.0\n",
    "    strat_rows.append({\n",
    "        \"Bucket\":    f\"[{lo:.1f} – {hi:.1f}[\",\n",
    "        \"Label\":     label,\n",
    "        \"N trades\":  len(bucket),\n",
    "        \"Win rate\":  round(wr, 1),\n",
    "        \"PnL moyen\": round(bucket[\"pnl_pct\"].mean(), 3) if len(bucket) > 0 else 0.0,\n",
    "        \"PnL méd.\":  round(bucket[\"pnl_pct\"].median(), 3) if len(bucket) > 0 else 0.0,\n",
    "        \"PnL total\": round(bucket[\"pnl_pct\"].sum(), 1) if len(bucket) > 0 else 0.0,\n",
    "    })\n",
    "strat_df = pd.DataFrame(strat_rows)\n",
    "display(strat_df.style.format({\n",
    "    \"Win rate\": \"{:.1f}%\", \"PnL moyen\": \"{:+.3f}%\",\n",
    "    \"PnL méd.\": \"{:+.3f}%\", \"PnL total\": \"{:+.1f}%\"\n",
    "}).bar(subset=[\"Win rate\"], color=\"#4caf50\", vmin=0, vmax=100)\n",
    " .bar(subset=[\"PnL moyen\"], color=[\"#f44336\",\"#4caf50\"], align=\"zero\"))\n",
    "\n",
    "# Graphique\n",
    "fig, axes = plt.subplots(1, 2, figsize=(13, 4))\n",
    "fig.suptitle(\"Stratification par Bucket de Compression de Prix\", fontsize=13, fontweight=\"bold\")\n",
    "\n",
    "labels_short = [f\"{lo:.1f}-{hi:.1f}\" for lo, hi, _ in COMPRESSION_BUCKETS]\n",
    "colors = [\"#e53935\",\"#fb8c00\",\"#fdd835\",\"#66bb6a\",\"#26a69a\"]\n",
    "\n",
    "axes[0].bar(labels_short, strat_df[\"Win rate\"], color=colors, edgecolor=\"white\")\n",
    "axes[0].axhline(50, color=\"gray\", linestyle=\"--\", linewidth=0.8, label=\"50% (neutre)\")\n",
    "axes[0].set_title(\"Win Rate (%)\")\n",
    "axes[0].set_ylabel(\"Win rate (%)\")\n",
    "axes[0].set_xlabel(\"Compression (% range/close)\")\n",
    "axes[0].legend()\n",
    "\n",
    "bar_colors = [\"#f44336\" if v < 0 else \"#4caf50\" for v in strat_df[\"PnL moyen\"]]\n",
    "axes[1].bar(labels_short, strat_df[\"PnL moyen\"], color=bar_colors, edgecolor=\"white\")\n",
    "axes[1].axhline(0, color=\"gray\", linestyle=\"--\", linewidth=0.8)\n",
    "axes[1].set_title(\"PnL Moyen par Trade (%)\")\n",
    "axes[1].set_ylabel(\"PnL moyen (%)\")\n",
    "axes[1].set_xlabel(\"Compression (% range/close)\")\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(OUTPUT_DIR / \"stratification_chart.png\", dpi=150, bbox_inches=\"tight\")\n",
    "plt.show()\n",
    "print(\"📊 Graphique sauvegardé\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b11ad0fa",
   "metadata": {},
   "source": [
    "## 7. Tests Statistiques (Mann-Whitney & Cliff's Delta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ea37eef2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_threshold(df: pd.DataFrame, threshold: float) -> dict:\n",
    "    v    = df.dropna(subset=[\"compression_pct\"])\n",
    "    high = v[v[\"compression_pct\"] >= threshold]\n",
    "    low  = v[v[\"compression_pct\"] <  threshold]\n",
    "    if len(high) < 10 or len(low) < 10:\n",
    "        return {\"threshold\": threshold, \"error\": \"échantillons trop petits\"}\n",
    "    try:\n",
    "        _, pvalue = mannwhitneyu(high[\"pnl_pct\"].values, low[\"pnl_pct\"].values, alternative=\"greater\")\n",
    "    except ValueError:\n",
    "        pvalue = 1.0\n",
    "    h, l = high[\"pnl_pct\"].values, low[\"pnl_pct\"].values\n",
    "    gt   = (h[:, None] > l[None, :]).sum()\n",
    "    lt   = (h[:, None] < l[None, :]).sum()\n",
    "    cliff = (gt - lt) / (len(h) * len(l))\n",
    "    return {\n",
    "        \"threshold\": threshold,\n",
    "        \"n_high\": len(high), \"n_low\": len(low),\n",
    "        \"wr_high\": round((high[\"pnl_pct\"] > 0).mean() * 100, 1),\n",
    "        \"wr_low\":  round((low[\"pnl_pct\"]  > 0).mean() * 100, 1),\n",
    "        \"pnl_high\": round(high[\"pnl_pct\"].mean(), 3),\n",
    "        \"pnl_low\":  round(low[\"pnl_pct\"].mean(), 3),\n",
    "        \"pvalue\":  round(pvalue, 4),\n",
    "        \"cliff\":   round(cliff, 3),\n",
    "    }\n",
    "\n",
    "print(f\"{'Seuil':>8} {'N_high':>7} {'N_low':>6} {'WR_high':>9} {'WR_low':>8} {'PnL_high':>10} {'PnL_low':>9} {'p-value':>9} {'Cliff':>7} {'Verdict'}\")\n",
    "print(\"-\" * 110)\n",
    "for thr in [0.5, 0.7, 1.0, 1.5]:\n",
    "    r = test_threshold(enriched, thr)\n",
    "    if \"error\" in r:\n",
    "        print(f\"  {thr:.1f}%  →  {r['error']}\")\n",
    "        continue\n",
    "    delta_pnl = r[\"pnl_high\"] - r[\"pnl_low\"]\n",
    "    if r[\"pvalue\"] < 0.05 and r[\"cliff\"] > 0.10:\n",
    "        verdict = \"✅ CONFIRMÉ\"\n",
    "    elif r[\"pvalue\"] < 0.05:\n",
    "        verdict = \"~ Significatif (effet faible)\"\n",
    "    else:\n",
    "        verdict = \"❌ Non significatif\"\n",
    "    print(f\"  {thr:.1f}%  {r['n_high']:>7} {r['n_low']:>6} {r['wr_high']:>8.1f}% {r['wr_low']:>7.1f}% \"\n",
    "          f\"{r['pnl_high']:>+9.3f}% {r['pnl_low']:>+8.3f}%  {r['pvalue']:>8.4f}  {r['cliff']:>+6.3f}  {verdict}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9f1d09a8",
   "metadata": {},
   "source": [
    "## 8. Simulation Économique — Filtre Compression >= 1.0%"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86851527",
   "metadata": {},
   "outputs": [],
   "source": [
    "valid = enriched.dropna(subset=[\"compression_pct\"]).copy()\n",
    "FILTER_THR = 1.0\n",
    "\n",
    "kept     = valid[valid[\"compression_pct\"] >= FILTER_THR]\n",
    "excluded = valid[valid[\"compression_pct\"] <  FILTER_THR]\n",
    "\n",
    "print(f\"{'='*60}\")\n",
    "print(f\"SIMULATION : filtre compression >= {FILTER_THR}%\")\n",
    "print(f\"{'='*60}\")\n",
    "print(f\"Réel observé          : {len(valid):>5} trades | PnL total {valid['pnl_pct'].sum():+.1f}%  | WR {(valid['pnl_pct']>0).mean()*100:.1f}%\")\n",
    "print(f\"Avec filtre {FILTER_THR}%     : {len(kept):>5} trades | PnL total {kept['pnl_pct'].sum():+.1f}%  | WR {(kept['pnl_pct']>0).mean()*100:.1f}%\")\n",
    "print(f\"Trades exclus         : {len(excluded):>5} trades | PnL total {excluded['pnl_pct'].sum():+.1f}%\")\n",
    "print(f\"\")\n",
    "gain_net = -excluded['pnl_pct'].sum()\n",
    "print(f\"→ Éviter ces trades aurait {'GAGNÉ' if gain_net > 0 else 'PERDU'} {abs(gain_net):+.1f}% de PnL cumulé\")\n",
    "print(f\"→ Trades évités : {len(excluded)} ({len(excluded)/len(valid)*100:.1f}% du volume total)\")\n",
    "\n",
    "# Graphique comparatif\n",
    "fig, axes = plt.subplots(1, 3, figsize=(14, 4))\n",
    "fig.suptitle(f\"Impact du filtre Compression >= {FILTER_THR}%\", fontsize=13, fontweight=\"bold\")\n",
    "\n",
    "# WR comparé\n",
    "groups = [\"Sans filtre\", f\"Compression\\n>= {FILTER_THR}%\", f\"Compression\\n< {FILTER_THR}%\"]\n",
    "wrs = [(valid[\"pnl_pct\"] > 0).mean()*100, (kept[\"pnl_pct\"] > 0).mean()*100, (excluded[\"pnl_pct\"] > 0).mean()*100]\n",
    "bar_c = [\"#90a4ae\", \"#4caf50\", \"#f44336\"]\n",
    "axes[0].bar(groups, wrs, color=bar_c, edgecolor=\"white\")\n",
    "axes[0].axhline(50, color=\"gray\", linestyle=\"--\", linewidth=0.8)\n",
    "axes[0].set_title(\"Win Rate (%)\")\n",
    "axes[0].set_ylabel(\"%\")\n",
    "\n",
    "# PnL moyen comparé\n",
    "pnls = [valid[\"pnl_pct\"].mean(), kept[\"pnl_pct\"].mean(), excluded[\"pnl_pct\"].mean()]\n",
    "bar_c2 = [(\"#f44336\" if v < 0 else \"#4caf50\") for v in pnls]\n",
    "axes[1].bar(groups, pnls, color=bar_c2, edgecolor=\"white\")\n",
    "axes[1].axhline(0, color=\"gray\", linestyle=\"--\", linewidth=0.8)\n",
    "axes[1].set_title(\"PnL Moyen / trade (%)\")\n",
    "axes[1].set_ylabel(\"%\")\n",
    "\n",
    "# Distribution PnL high vs low\n",
    "axes[2].hist(kept[\"pnl_pct\"],     bins=30, alpha=0.6, color=\"#4caf50\", label=f\">= {FILTER_THR}%\")\n",
    "axes[2].hist(excluded[\"pnl_pct\"], bins=30, alpha=0.6, color=\"#f44336\", label=f\"< {FILTER_THR}%\")\n",
    "axes[2].axvline(0, color=\"black\", linewidth=0.8)\n",
    "axes[2].set_title(\"Distribution PnL\")\n",
    "axes[2].set_xlabel(\"PnL %\")\n",
    "axes[2].legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(OUTPUT_DIR / \"simulation_filtre.png\", dpi=150, bbox_inches=\"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5574929b",
   "metadata": {},
   "source": [
    "## 9. Export des Résultats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e82523fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import timezone\n",
    "\n",
    "valid = enriched.dropna(subset=[\"compression_pct\"])\n",
    "\n",
    "# ── Parquet (données complètes pour analyses ad hoc)\n",
    "enriched.to_parquet(OUTPUT_DIR / \"trades_enriched.parquet\")\n",
    "\n",
    "# ── CSV stratification\n",
    "strat_df.to_csv(OUTPUT_DIR / \"stratification.csv\", index=False)\n",
    "\n",
    "# ── Rapport texte\n",
    "lines = [\n",
    "    \"=\" * 78,\n",
    "    \"RAPPORT D'AUDIT — IMPACT DE LA COMPRESSION SUR LES TRADES\",\n",
    "    \"=\" * 78,\n",
    "    f\"Date         : {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}\",\n",
    "    f\"Source       : {TRADES_FILE}\",\n",
    "    f\"Trades total : {len(enriched)} ({len(valid)} avec compression calculable)\",\n",
    "    f\"Win rate     : {(valid['pnl_pct'] > 0).mean()*100:.1f}%\",\n",
    "    f\"PnL moyen    : {valid['pnl_pct'].mean():+.3f}%\",\n",
    "    f\"PnL total    : {valid['pnl_pct'].sum():+.1f}%\",\n",
    "    \"\",\n",
    "    \"-\" * 78,\n",
    "    \"STRATIFICATION\",\n",
    "    \"-\" * 78,\n",
    "]\n",
    "lines.append(f\"{'Bucket':<14}{'Label':<14}{'N':>6}{'WR':>9}{'PnL moy':>10}{'PnL tot':>10}\")\n",
    "lines.append(\"-\" * 63)\n",
    "for _, row in strat_df.iterrows():\n",
    "    lines.append(f\"{row['Bucket']:<14}{row['Label']:<14}{row['N trades']:>6}\"\n",
    "                 f\"{row['Win rate']:>8.1f}%{row['PnL moyen']:>+9.3f}%{row['PnL total']:>+9.1f}%\")\n",
    "lines += [\"\", \"-\" * 78, \"TESTS STATISTIQUES\", \"-\" * 78]\n",
    "for thr in [0.5, 0.7, 1.0, 1.5]:\n",
    "    r = test_threshold(enriched, thr)\n",
    "    if \"error\" in r:\n",
    "        lines.append(f\"Seuil {thr}: {r['error']}\")\n",
    "        continue\n",
    "    verdict = (\"✅ CONFIRMÉ\" if r[\"pvalue\"] < 0.05 and r[\"cliff\"] > 0.10\n",
    "               else \"~ effet faible\" if r[\"pvalue\"] < 0.05 else \"❌ Non significatif\")\n",
    "    lines.append(f\"Seuil {thr}% → high WR={r['wr_high']:.1f}% PnL={r['pnl_high']:+.3f}% | \"\n",
    "                 f\"low WR={r['wr_low']:.1f}% PnL={r['pnl_low']:+.3f}% | \"\n",
    "                 f\"p={r['pvalue']:.4f} cliff={r['cliff']:+.3f} → {verdict}\")\n",
    "lines += [\"\", \"-\" * 78, \"SIMULATION FILTRE >= 1.0%\", \"-\" * 78]\n",
    "kept = valid[valid[\"compression_pct\"] >= 1.0]\n",
    "excl = valid[valid[\"compression_pct\"] <  1.0]\n",
    "lines.append(f\"Sans filtre : {len(valid)} trades, PnL {valid['pnl_pct'].sum():+.1f}%\")\n",
    "lines.append(f\"Avec filtre : {len(kept)} trades, PnL {kept['pnl_pct'].sum():+.1f}%\")\n",
    "lines.append(f\"Exclus      : {len(excl)} trades, PnL {excl['pnl_pct'].sum():+.1f}%\")\n",
    "lines.append(f\"Gain net (éviter exclus) : {-excl['pnl_pct'].sum():+.1f}%\")\n",
    "\n",
    "report_text = \"\\n\".join(lines)\n",
    "(OUTPUT_DIR / \"report.txt\").write_text(report_text, encoding=\"utf-8\")\n",
    "\n",
    "print(report_text)\n",
    "print(f\"\\n📁 Fichiers exportés dans : {OUTPUT_DIR}\")\n",
    "print(\"   - trades_enriched.parquet\")\n",
    "print(\"   - stratification.csv\")\n",
    "print(\"   - report.txt\")\n",
    "print(\"   - stratification_chart.png\")\n",
    "print(\"   - simulation_filtre.png\")\n",
    "print(\"   - klines_cache.json\")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}