"""
SPY Optimizer — Signal Classifier
Modèle ML qui prédit si un signal de surge détecté par le SPY sera rentable.

Architecture:
  - Ensemble LightGBM + XGBoost avec stacking
  - Optuna pour l'auto-tuning des hyperparamètres
  - Walk-forward validation (entraîne sur passé, valide sur futur)
  - Recalibration quotidienne adaptative

Le modèle apprend à filtrer les faux signaux (INSTANT_REVERSAL, HARD_SL)
en se basant sur les conditions de marché au moment de la détection.
"""
import json
import os
import pickle
import warnings
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional

import numpy as np
import pandas as pd
import lightgbm as lgb
import xgboost as xgb
import optuna
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, classification_report, confusion_matrix,
)
from sklearn.preprocessing import LabelEncoder

from feature_engineering import FEATURE_COLUMNS, SURGE_TYPES, build_dataset_from_trades

warnings.filterwarnings("ignore", category=UserWarning)
optuna.logging.set_verbosity(optuna.logging.WARNING)

# ─── Paths ───
PROJECT_DIR = Path(__file__).parent
MODELS_DIR = PROJECT_DIR / "models"
DATA_DIR = PROJECT_DIR / "data"
KLINES_DIR = DATA_DIR / "klines_1m"


class SignalClassifier:
    """
    Ensemble classifier qui prédit la probabilité qu'un signal SPY soit rentable.
    
    Combine LightGBM (rapide, bon sur tabular) + XGBoost (robuste, bonne généralisation)
    avec un méta-modèle logistique pour le stacking.
    """

    def __init__(self):
        self.lgb_model = None
        self.xgb_model = None
        self.feature_columns = FEATURE_COLUMNS.copy()
        self.surge_encoder = LabelEncoder()
        self.is_trained = False
        self.training_stats = {}
        self.optimal_threshold = 0.5  # Seuil de décision (optimisé pendant l'entraînement)
        self.feature_importance = {}
        self.deep_predictor = None  # GPU-trained LSTM model (loaded lazily)

    def _load_deep_predictor(self):
        """Charge le modèle LSTM si disponible (entraîné sur GPU, inference CPU)."""
        if self.deep_predictor is not None:
            return
        try:
            from deep_inference import DeepPredictor
            self.deep_predictor = DeepPredictor()
            if not self.deep_predictor.is_loaded:
                self.deep_predictor = None
        except Exception:
            self.deep_predictor = None

    def _prepare_features(self, df: pd.DataFrame, fit_encoder: bool = False) -> np.ndarray:
        """Prépare la matrice de features pour le modèle."""
        X = df[self.feature_columns].copy()
        X = X.fillna(0)

        # Encoder le surge_type en one-hot
        if fit_encoder:
            self.surge_encoder.fit(SURGE_TYPES + ["UNKNOWN"])

        surge_col = df["surge_type"].fillna("UNKNOWN")
        # One-hot encode
        for st in SURGE_TYPES:
            X[f"surge_{st}"] = (surge_col == st).astype(int)

        return X.values, X.columns.tolist()

    def train(
        self,
        dataset: pd.DataFrame,
        optimize_hyperparams: bool = True,
        n_optuna_trials: int = 100,
        verbose: bool = True,
    ) -> dict:
        """
        Entraîne l'ensemble avec walk-forward validation.

        Args:
            dataset: DataFrame issu de build_dataset_from_trades()
            optimize_hyperparams: utiliser Optuna pour tuner les hyperparamètres
            n_optuna_trials: nombre d'essais Optuna
            verbose: afficher la progression

        Returns:
            dict avec les métriques de performance
        """
        if len(dataset) < 50:
            raise ValueError(f"Pas assez de données: {len(dataset)} trades (min: 50)")

        # Trier par date pour walk-forward
        dataset = dataset.sort_values("entry_time").reset_index(drop=True)

        y = dataset["target_profitable"].values
        X, col_names = self._prepare_features(dataset, fit_encoder=True)
        self.final_feature_names = col_names

        if verbose:
            pos = y.sum()
            neg = len(y) - pos
            print(f"\n{'='*60}")
            print(f"  Signal Classifier — Training")
            print(f"{'='*60}")
            print(f"  Samples: {len(y)} ({pos} profitable, {neg} perdants)")
            print(f"  Features: {X.shape[1]}")
            print(f"  Balance: {pos/len(y)*100:.1f}% positif")

        # ─── Walk-Forward Split ───
        # Train sur les 75% les plus anciens, valid sur les 25% les plus récents
        split_idx = int(len(dataset) * 0.75)
        X_train, X_val = X[:split_idx], X[split_idx:]
        y_train, y_val = y[:split_idx], y[split_idx:]

        if verbose:
            print(f"  Train: {len(y_train)} | Validation: {len(y_val)}")
            train_dates = dataset["entry_time"].iloc[:split_idx]
            val_dates = dataset["entry_time"].iloc[split_idx:]
            print(f"  Train period: {str(train_dates.iloc[0])[:10]} → {str(train_dates.iloc[-1])[:10]}")
            print(f"  Valid period: {str(val_dates.iloc[0])[:10]} → {str(val_dates.iloc[-1])[:10]}")

        # ─── Hyperparameter Optimization ───
        # Check for GPU-optimized params first
        gpu_params_path = MODELS_DIR / "optimized_params.json"
        if gpu_params_path.exists() and not optimize_hyperparams:
            gpu_params = json.loads(gpu_params_path.read_text())
            best_lgb_params = gpu_params["lgb_params"]
            best_xgb_params = gpu_params["xgb_params"]
            if verbose:
                print(f"\n  🎮 Using GPU-optimized params (AUC: {gpu_params.get('ensemble_auc', '?')})")
        elif optimize_hyperparams:
            if verbose:
                print(f"\n  🔍 Optuna optimization ({n_optuna_trials} trials)...")

            best_lgb_params = self._optimize_lgb(X_train, y_train, X_val, y_val, n_optuna_trials)
            best_xgb_params = self._optimize_xgb(X_train, y_train, X_val, y_val, n_optuna_trials)
        else:
            best_lgb_params = self._default_lgb_params()
            best_xgb_params = self._default_xgb_params()

        # ─── Train Final Models ───
        if verbose:
            print(f"\n  🏋️ Training final models...")

        # LightGBM
        pos_weight = (len(y_train) - y_train.sum()) / max(y_train.sum(), 1)
        lgb_train = lgb.Dataset(X_train, y_train)
        lgb_val = lgb.Dataset(X_val, y_val, reference=lgb_train)

        lgb_params = {
            **best_lgb_params,
            "objective": "binary",
            "metric": "auc",
            "verbosity": -1,
            "scale_pos_weight": pos_weight,
            "seed": 42,
        }
        callbacks = [lgb.log_evaluation(period=0)]
        self.lgb_model = lgb.train(
            lgb_params, lgb_train,
            num_boost_round=1000,
            valid_sets=[lgb_val],
            callbacks=[lgb.early_stopping(50), *callbacks],
        )

        # XGBoost
        dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=col_names)
        dval = xgb.DMatrix(X_val, label=y_val, feature_names=col_names)

        xgb_params = {
            **best_xgb_params,
            "objective": "binary:logistic",
            "eval_metric": "auc",
            "scale_pos_weight": pos_weight,
            "seed": 42,
            "verbosity": 0,
        }
        self.xgb_model = xgb.train(
            xgb_params, dtrain,
            num_boost_round=1000,
            evals=[(dval, "val")],
            early_stopping_rounds=50,
            verbose_eval=False,
        )

        # ─── Ensemble Predictions ───
        lgb_pred = self.lgb_model.predict(X_val)
        xgb_pred = self.xgb_model.predict(dval)
        ensemble_pred = 0.5 * lgb_pred + 0.5 * xgb_pred

        # ─── Optimize Threshold ───
        # Cherche le seuil qui maximise le profit attendu (pas juste l'accuracy)
        self.optimal_threshold = self._optimize_threshold(ensemble_pred, y_val, dataset.iloc[split_idx:])

        # ─── Metrics ───
        y_pred = (ensemble_pred >= self.optimal_threshold).astype(int)
        metrics = self._compute_metrics(y_val, y_pred, ensemble_pred, dataset.iloc[split_idx:])

        # Feature importance
        lgb_imp = self.lgb_model.feature_importance(importance_type="gain")
        self.feature_importance = dict(zip(col_names, lgb_imp))

        self.is_trained = True
        self.training_stats = {
            "trained_at": datetime.now(timezone.utc).isoformat(),
            "train_samples": int(len(y_train)),
            "val_samples": int(len(y_val)),
            "metrics": metrics,
            "optimal_threshold": float(self.optimal_threshold),
            "lgb_params": best_lgb_params,
            "xgb_params": best_xgb_params,
        }

        if verbose:
            self._print_results(metrics, col_names)

        return metrics

    def predict(self, features: dict, klines_df=None, timestamp_ms: int = 0) -> dict:
        """
        Prédit si un signal est rentable.
        
        Si le modèle LSTM GPU est disponible ET que klines_df est fourni,
        combine LGB+XGB (tabular) avec LSTM (séquentiel) pour un ensemble hybride.

        Args:
            features: dict de features (output de compute_features_at_timestamp)
            klines_df: DataFrame klines 1m (optionnel, pour le modèle LSTM)
            timestamp_ms: timestamp en ms (requis si klines_df fourni)

        Returns:
            {
                "probability": float 0-1,
                "signal": "BUY" | "SKIP",
                "confidence": float 0-100,
                "threshold": float,
                "model_type": str,
            }
        """
        if not self.is_trained:
            return {"probability": 0.5, "signal": "BUY", "confidence": 0, "threshold": 0.5, "model_type": "none"}

        df = pd.DataFrame([features])
        X, _ = self._prepare_features(df)

        lgb_pred = self.lgb_model.predict(X)[0]
        dmatrix = xgb.DMatrix(X, feature_names=self.final_feature_names)
        xgb_pred = self.xgb_model.predict(dmatrix)[0]

        tabular_prob = 0.5 * lgb_pred + 0.5 * xgb_pred

        # Hybrid ensemble with deep model if available
        self._load_deep_predictor()
        deep_result = None
        if self.deep_predictor and klines_df is not None and timestamp_ms > 0:
            surge_type = features.get("surge_type", "UNKNOWN")
            deep_result = self.deep_predictor.predict(klines_df, timestamp_ms, surge_type)

        if deep_result and deep_result.get("model_type") == "lstm_attention":
            # Weighted ensemble: 60% tabular (proven) + 40% deep (captures temporal patterns)
            deep_prob = deep_result["probability"]
            prob = 0.6 * tabular_prob + 0.4 * deep_prob
            model_type = "hybrid_lgbxgb_lstm"
        else:
            prob = tabular_prob
            model_type = "lgbxgb"

        signal = "BUY" if prob >= self.optimal_threshold else "SKIP"
        confidence = abs(prob - self.optimal_threshold) / self.optimal_threshold * 100
        confidence = min(confidence, 100)

        return {
            "probability": float(prob),
            "signal": signal,
            "confidence": float(confidence),
            "threshold": float(self.optimal_threshold),
            "model_type": model_type,
        }

    def predict_batch(self, features_list: list[dict]) -> list[dict]:
        """Prédit pour une liste de signaux (batch)."""
        return [self.predict(f) for f in features_list]

    # ─── Optuna Hyperparameter Optimization ───

    def _optimize_lgb(self, X_train, y_train, X_val, y_val, n_trials: int) -> dict:
        pos_weight = (len(y_train) - y_train.sum()) / max(y_train.sum(), 1)

        def objective(trial):
            params = {
                "objective": "binary",
                "metric": "auc",
                "verbosity": -1,
                "scale_pos_weight": pos_weight,
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
                "num_leaves": trial.suggest_int("num_leaves", 15, 127),
                "max_depth": trial.suggest_int("max_depth", 3, 12),
                "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
                "subsample": trial.suggest_float("subsample", 0.5, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
                "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
                "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
                "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 1.0),
            }

            train_set = lgb.Dataset(X_train, y_train)
            val_set = lgb.Dataset(X_val, y_val, reference=train_set)

            model = lgb.train(
                params, train_set,
                num_boost_round=500,
                valid_sets=[val_set],
                callbacks=[lgb.early_stopping(30), lgb.log_evaluation(period=0)],
            )
            pred = model.predict(X_val)
            return roc_auc_score(y_val, pred)

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials, n_jobs=4, show_progress_bar=False)
        return study.best_params

    def _optimize_xgb(self, X_train, y_train, X_val, y_val, n_trials: int) -> dict:
        pos_weight = (len(y_train) - y_train.sum()) / max(y_train.sum(), 1)

        def objective(trial):
            params = {
                "objective": "binary:logistic",
                "eval_metric": "auc",
                "scale_pos_weight": pos_weight,
                "verbosity": 0,
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
                "max_depth": trial.suggest_int("max_depth", 3, 10),
                "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
                "subsample": trial.suggest_float("subsample", 0.5, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
                "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0, log=True),
                "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0, log=True),
                "gamma": trial.suggest_float("gamma", 0.0, 5.0),
            }

            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            model = xgb.train(
                params, dtrain,
                num_boost_round=500,
                evals=[(dval, "val")],
                early_stopping_rounds=30,
                verbose_eval=False,
            )
            pred = model.predict(dval)
            return roc_auc_score(y_val, pred)

        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials, n_jobs=4, show_progress_bar=False)
        return study.best_params

    def _optimize_threshold(
        self, pred_proba: np.ndarray, y_true: np.ndarray, val_df: pd.DataFrame
    ) -> float:
        """
        Optimise le seuil de décision pour maximiser le PROFIT, pas l'accuracy.
        Un trade filtré (SKIP) = 0$ de gain. Un trade passé = son PnL réel.
        """
        pnl_values = val_df["target_pnl_pct"].values

        best_threshold = 0.5
        best_profit = -float("inf")

        for threshold in np.arange(0.30, 0.75, 0.01):
            passed = pred_proba >= threshold
            if passed.sum() == 0:
                continue
            # Profit = somme des PnL des trades passés
            profit = pnl_values[passed].sum()
            # Pénalité: on veut quand même trader (pas tout filtrer)
            trade_rate = passed.sum() / len(passed)
            if trade_rate < 0.15:  # filtre trop agressif
                continue
            # Score = profit pondéré par le trade rate
            score = profit * (0.5 + 0.5 * trade_rate)

            if score > best_profit:
                best_profit = score
                best_threshold = threshold

        return best_threshold

    @staticmethod
    def _default_lgb_params() -> dict:
        return {
            "learning_rate": 0.05,
            "num_leaves": 31,
            "max_depth": 6,
            "min_child_samples": 20,
            "subsample": 0.8,
            "colsample_bytree": 0.8,
            "reg_alpha": 0.1,
            "reg_lambda": 1.0,
        }

    @staticmethod
    def _default_xgb_params() -> dict:
        return {
            "learning_rate": 0.05,
            "max_depth": 6,
            "min_child_weight": 5,
            "subsample": 0.8,
            "colsample_bytree": 0.8,
            "reg_alpha": 0.1,
            "reg_lambda": 1.0,
            "gamma": 0.1,
        }

    # ─── Metrics & Reporting ───

    @staticmethod
    def _compute_metrics(
        y_true: np.ndarray, y_pred: np.ndarray, y_proba: np.ndarray,
        val_df: pd.DataFrame,
    ) -> dict:
        pnl_values = val_df["target_pnl_pct"].values
        passed = y_pred == 1
        filtered = y_pred == 0

        # PnL simulation
        pnl_passed = pnl_values[passed].sum() if passed.any() else 0
        pnl_filtered = pnl_values[filtered].sum() if filtered.any() else 0
        pnl_all = pnl_values.sum()

        # Combien de mauvais trades filtrés?
        bad_trades_total = (y_true == 0).sum()
        bad_trades_filtered = ((y_true == 0) & filtered).sum()
        good_trades_total = (y_true == 1).sum()
        good_trades_kept = ((y_true == 1) & passed).sum()

        return {
            "accuracy": float(accuracy_score(y_true, y_pred)),
            "precision": float(precision_score(y_true, y_pred, zero_division=0)),
            "recall": float(recall_score(y_true, y_pred, zero_division=0)),
            "f1": float(f1_score(y_true, y_pred, zero_division=0)),
            "auc_roc": float(roc_auc_score(y_true, y_proba)),
            "trades_passed": int(passed.sum()),
            "trades_filtered": int(filtered.sum()),
            "trade_rate": float(passed.sum() / len(y_true)),
            "pnl_with_filter": float(pnl_passed),
            "pnl_without_filter": float(pnl_all),
            "pnl_improvement": float(pnl_passed - pnl_all),
            "bad_trades_filtered_pct": float(bad_trades_filtered / max(bad_trades_total, 1) * 100),
            "good_trades_kept_pct": float(good_trades_kept / max(good_trades_total, 1) * 100),
            "win_rate_filtered": float(
                good_trades_kept / max(passed.sum(), 1) * 100
            ),
        }

    def _print_results(self, metrics: dict, feature_names: list):
        """Affiche les résultats de manière lisible."""
        print(f"\n{'─'*60}")
        print(f"  📊 Résultats Validation (walk-forward)")
        print(f"{'─'*60}")
        print(f"  Seuil optimal:        {self.optimal_threshold:.2f}")
        print(f"  AUC-ROC:              {metrics['auc_roc']:.3f}")
        print(f"  Accuracy:             {metrics['accuracy']:.1%}")
        print(f"  Precision:            {metrics['precision']:.1%}")
        print(f"  Recall:               {metrics['recall']:.1%}")
        print(f"  F1-Score:             {metrics['f1']:.3f}")
        print()
        print(f"  📈 Impact Trading:")
        print(f"  Trades passés:        {metrics['trades_passed']} / {metrics['trades_passed'] + metrics['trades_filtered']}")
        print(f"  Trade rate:           {metrics['trade_rate']:.1%}")
        print(f"  Mauvais trades filtrés: {metrics['bad_trades_filtered_pct']:.0f}%")
        print(f"  Bons trades conservés:  {metrics['good_trades_kept_pct']:.0f}%")
        print(f"  Win rate filtré:      {metrics['win_rate_filtered']:.1f}%")
        print()
        print(f"  💰 PnL Simulation:")
        print(f"  Sans filtre:          {metrics['pnl_without_filter']:+.2f}%")
        print(f"  Avec filtre IA:       {metrics['pnl_with_filter']:+.2f}%")
        print(f"  Amélioration:         {metrics['pnl_improvement']:+.2f}%")

        # Top features
        print(f"\n  🔑 Top 15 Features:")
        sorted_imp = sorted(self.feature_importance.items(), key=lambda x: x[1], reverse=True)
        for name, imp in sorted_imp[:15]:
            bar = "█" * int(imp / sorted_imp[0][1] * 20)
            print(f"    {name:30s} {bar} {imp:.0f}")

    # ─── Save/Load ───

    def save(self, path: Optional[str] = None):
        """Sauvegarde le modèle entraîné."""
        if path is None:
            MODELS_DIR.mkdir(parents=True, exist_ok=True)
            path = str(MODELS_DIR / "signal_classifier.pkl")

        state = {
            "lgb_model": self.lgb_model,
            "xgb_model": self.xgb_model,
            "feature_columns": self.feature_columns,
            "final_feature_names": self.final_feature_names,
            "surge_encoder": self.surge_encoder,
            "optimal_threshold": self.optimal_threshold,
            "training_stats": self.training_stats,
            "feature_importance": self.feature_importance,
            "is_trained": self.is_trained,
        }
        with open(path, "wb") as f:
            pickle.dump(state, f)
        print(f"  💾 Modèle sauvegardé: {path}")

    @classmethod
    def load(cls, path: Optional[str] = None) -> "SignalClassifier":
        """Charge un modèle sauvegardé."""
        if path is None:
            path = str(MODELS_DIR / "signal_classifier.pkl")
        if not os.path.exists(path):
            raise FileNotFoundError(f"Modèle non trouvé: {path}")

        with open(path, "rb") as f:
            state = pickle.load(f)

        model = cls()
        model.lgb_model = state["lgb_model"]
        model.xgb_model = state["xgb_model"]
        model.feature_columns = state["feature_columns"]
        model.final_feature_names = state["final_feature_names"]
        model.surge_encoder = state["surge_encoder"]
        model.optimal_threshold = state["optimal_threshold"]
        model.training_stats = state["training_stats"]
        model.feature_importance = state["feature_importance"]
        model.is_trained = state["is_trained"]
        return model


# ─── Walk-Forward Backtester ───

def walk_forward_backtest(
    dataset: pd.DataFrame,
    train_days: int = 7,
    val_days: int = 2,
    step_days: int = 1,
    n_optuna_trials: int = 50,
    verbose: bool = True,
) -> dict:
    """
    Backteste le modèle avec walk-forward rolling window.
    Simule le recalibrage quotidien: entraîne sur les N derniers jours,
    prédit le jour suivant, avance d'un jour.

    Returns:
        dict avec les résultats cumulés
    """
    dataset = dataset.sort_values("entry_time").reset_index(drop=True)
    dataset["entry_date"] = pd.to_datetime(dataset["entry_time"]).dt.date

    dates = sorted(dataset["entry_date"].unique())
    if len(dates) < train_days + val_days:
        raise ValueError(f"Pas assez de jours ({len(dates)}) pour train={train_days}+val={val_days}")

    all_predictions = []
    all_actuals = []
    all_pnls = []
    window_results = []

    if verbose:
        print(f"\n{'='*60}")
        print(f"  Walk-Forward Backtest")
        print(f"  Train: {train_days}j | Val: {val_days}j | Step: {step_days}j")
        print(f"  Période: {dates[0]} → {dates[-1]} ({len(dates)} jours)")
        print(f"{'='*60}")

    for i in range(train_days, len(dates) - val_days + 1, step_days):
        train_end_date = dates[i - 1]
        val_start_date = dates[i]
        val_end_date = dates[min(i + val_days - 1, len(dates) - 1)]

        train_start_date = dates[max(0, i - train_days)]

        # Split
        train_mask = (dataset["entry_date"] >= train_start_date) & (dataset["entry_date"] <= train_end_date)
        val_mask = (dataset["entry_date"] >= val_start_date) & (dataset["entry_date"] <= val_end_date)

        train_df = dataset[train_mask]
        val_df = dataset[val_mask]

        if len(train_df) < 30 or len(val_df) < 3:
            continue

        # Train
        clf = SignalClassifier()
        try:
            clf.train(train_df, optimize_hyperparams=True, n_optuna_trials=n_optuna_trials, verbose=False)
        except Exception as e:
            if verbose:
                print(f"  ⚠️ Window {val_start_date}: train failed ({e})")
            continue

        # Predict on validation
        X_val, _ = clf._prepare_features(val_df)
        lgb_pred = clf.lgb_model.predict(X_val)
        dval = xgb.DMatrix(X_val, feature_names=clf.final_feature_names)
        xgb_pred = clf.xgb_model.predict(dval)
        ensemble_pred = 0.5 * lgb_pred + 0.5 * xgb_pred

        y_val = val_df["target_profitable"].values
        y_pred = (ensemble_pred >= clf.optimal_threshold).astype(int)

        passed = y_pred == 1
        pnl_vals = val_df["target_pnl_pct"].values

        pnl_with = pnl_vals[passed].sum() if passed.any() else 0
        pnl_without = pnl_vals.sum()
        wr_with = y_val[passed].mean() * 100 if passed.any() else 0

        window_results.append({
            "val_date": str(val_start_date),
            "n_trades": len(val_df),
            "n_passed": int(passed.sum()),
            "pnl_with_filter": float(pnl_with),
            "pnl_without_filter": float(pnl_without),
            "win_rate_filtered": float(wr_with),
            "threshold": float(clf.optimal_threshold),
        })

        all_predictions.extend(ensemble_pred.tolist())
        all_actuals.extend(y_val.tolist())
        all_pnls.extend(pnl_vals.tolist())

        if verbose:
            delta = pnl_with - pnl_without
            sign = "✅" if delta >= 0 else "❌"
            print(f"  {val_start_date} | {len(val_df):3d} trades → {int(passed.sum()):3d} passés"
                  f" | WR: {wr_with:5.1f}% | PnL: {pnl_with:+6.1f}% vs {pnl_without:+6.1f}%"
                  f" | Δ={delta:+5.1f}% {sign}")

    # ─── Aggregate Results ───
    all_pnls = np.array(all_pnls)
    all_predictions = np.array(all_predictions)
    all_actuals = np.array(all_actuals)

    total_pnl_without = sum(w["pnl_without_filter"] for w in window_results)
    total_pnl_with = sum(w["pnl_with_filter"] for w in window_results)
    total_trades = sum(w["n_trades"] for w in window_results)
    total_passed = sum(w["n_passed"] for w in window_results)
    positive_windows = sum(1 for w in window_results if w["pnl_with_filter"] >= w["pnl_without_filter"])

    results = {
        "windows": len(window_results),
        "total_trades": total_trades,
        "total_passed": total_passed,
        "total_pnl_without_filter": float(total_pnl_without),
        "total_pnl_with_filter": float(total_pnl_with),
        "pnl_improvement": float(total_pnl_with - total_pnl_without),
        "positive_windows_pct": float(positive_windows / max(len(window_results), 1) * 100),
        "window_details": window_results,
    }

    if verbose:
        print(f"\n{'─'*60}")
        print(f"  📊 Résultats Walk-Forward Globaux")
        print(f"{'─'*60}")
        print(f"  Windows testées:    {len(window_results)}")
        print(f"  Trades total:       {total_trades} → {total_passed} passés ({total_passed/max(total_trades,1)*100:.0f}%)")
        print(f"  PnL sans filtre:    {total_pnl_without:+.2f}%")
        print(f"  PnL avec filtre IA: {total_pnl_with:+.2f}%")
        print(f"  Amélioration:       {total_pnl_with - total_pnl_without:+.2f}%")
        print(f"  Windows positives:  {positive_windows}/{len(window_results)} ({results['positive_windows_pct']:.0f}%)")

    return results
