feat: add LSTM, hybrid ensemble, PCA, scaler, ATR stops, rolling window

Major upgrade to the ML engine: - LSTM model type: 2-layer PyTorch LSTM with early stopping, GPU support - Hybrid mode: LSTM (60%) + XGBoost (40%) with agreement gating - StandardScaler normalization (critical for LSTM) - PCA dimensionality reduction (configurable variance retention) - ATR-based dynamic stop-loss/take-profit adapting to volatility - Rolling window retraining for more realistic time series validation - Updated LLM system prompt with docs for all new parameters - All backward compatible (xgboost/lightgbm/catboost still work) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 23:02:11 +00:00 · 2026-03-19 23:02:11 +00:00 · a21e635d9f
commit a21e635d9f
parent e24b6605d7
3 changed files with 534 additions and 79 deletions
--- a/config/initial_config.json
+++ b/config/initial_config.json
@ -1,43 +1,40 @@
 {
-  "model_type": "xgboost",
+  "model_type": "hybrid",
  "features": {
-    "technical_indicators": [
+    "technical_indicators": ["RSI_14", "RSI_7", "MACD_line", "MACD_signal", "MACD_hist", "BB_upper", "BB_lower", "BB_width", "ATR_14", "SMA_20", "SMA_50", "EMA_10", "EMA_20", "OBV", "stoch_k", "stoch_d", "williams_r", "CCI_20", "ROC_10"],
      "RSI_14", "RSI_7", "RSI_21",
      "MACD_line", "MACD_signal", "MACD_hist",
      "BB_upper", "BB_lower", "BB_width",
      "ATR_14",
      "SMA_5", "SMA_10", "SMA_20", "SMA_50", "SMA_200",
      "EMA_5", "EMA_10", "EMA_20", "EMA_50",
      "OBV",
      "stoch_k", "stoch_d",
      "williams_r",
      "CCI_20",
      "ROC_10",
      "keltner_upper", "keltner_lower"
    ],
    "lookback_periods": [3, 5, 10, 20],
    "use_volume_features": true,
    "use_volatility_features": true,
-    "use_candle_patterns": true,
+    "use_candle_patterns": false,
    "use_lag_features": true,
-    "lag_periods": [1, 2, 3, 5]
+    "lag_periods": [1, 2, 3, 5],
    "use_pca": true,
    "pca_variance": 0.95,
    "use_scaler": true
  },
  "target": {
    "type": "classification",
-    "direction": "long",
+    "direction": "both",
-    "horizon_candles": 6,
+    "horizon_candles": 8,
-    "threshold_pct": 1.0
+    "threshold_pct": 1.5
  },
  "hyperparameters": {
-    "learning_rate": 0.05,
+    "learning_rate": 0.001,
-    "max_depth": 6,
+    "max_depth": 5,
-    "n_estimators": 500,
+    "n_estimators": 300,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "min_child_weight": 5,
-    "gamma": 0.1,
+    "gamma": 0.3,
    "reg_alpha": 0.1,
-    "reg_lambda": 1.0
+    "reg_lambda": 2.0,
    "lstm_hidden_size": 128,
    "lstm_num_layers": 2,
    "lstm_dropout": 0.3,
    "lstm_epochs": 100,
    "lstm_batch_size": 64,
    "lstm_sequence_length": 20,
    "lstm_patience": 10
  },
  "strategy": {
    "entry_threshold": 0.60,
@ -47,13 +44,19 @@
    "trailing_stop_pct": 1.5,
    "position_sizing": "confidence_scaled",
    "max_position_pct": 100,
-    "min_confidence_to_trade": 0.55
+    "min_confidence_to_trade": 0.55,
    "dynamic_sl_tp": true,
    "atr_sl_multiplier": 1.5,
    "atr_tp_multiplier": 3.0
  },
  "training": {
    "walk_forward_windows": 5,
    "train_pct": 0.7,
    "validation_pct": 0.15,
-    "test_pct": 0.15
+    "test_pct": 0.15,
    "rolling_window": true,
    "rolling_train_size": 2000,
    "rolling_test_size": 200
  },
  "timeframe": "4h"
 }
--- a/llm_client/analyzer.py
+++ b/llm_client/analyzer.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-LLM Strategy Analyzer — Calls Ollama on Mac Mini to analyze results
+LLM Strategy Analyzer -- Calls Ollama on Mac Mini to analyze results
 and suggest config modifications for the next iteration.
 """
@ -14,17 +14,19 @@ MODEL = "qwen3.5:27b"
 SYSTEM_PROMPT = """You are a quantitative trading strategy optimizer. You analyze ML model backtesting results for a BTC/USDT trading strategy and suggest precise modifications to improve performance.
 ## Your Task
-Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific — change one thing at a time when possible.
+Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific -- change one thing at a time when possible.
 ## Config Parameters You Can Modify
-**model_type**: "xgboost", "lightgbm", "catboost", or "ensemble"
+**model_type**: "xgboost", "lightgbm", "catboost", "ensemble", "lstm", or "hybrid"
  - xgboost: Generally best for structured data, fast GPU training
  - lightgbm: Faster training, good with large feature sets
  - catboost: Handles feature interactions well, less tuning needed
-  - ensemble: Combines all three, reduces variance but slower
+  - ensemble: Combines xgboost+lightgbm+catboost, reduces variance but slower
  - lstm: PyTorch LSTM neural network, captures temporal/sequential patterns in price data
  - hybrid: Combines LSTM (60% weight) + XGBoost (40% weight). Only enters trades when BOTH models agree on direction. The hybrid model typically outperforms single models -- LSTM captures temporal patterns while XGBoost handles feature interactions. Recommended as default.
-**hyperparameters**:
+**hyperparameters** (gradient boosting):
  - learning_rate (0.001-0.3): Lower = more robust but slower. If overfitting, decrease.
  - max_depth (3-10): Controls model complexity. Deeper = more overfitting risk.
  - n_estimators (100-2000): More trees = better fit but diminishing returns.
@ -35,18 +37,30 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
  - reg_alpha (0-10): L1 regularization. Encourages sparsity.
  - reg_lambda (0-10): L2 regularization. Prevents large weights.
 **hyperparameters** (LSTM-specific, used by lstm and hybrid model_types):
  - lstm_hidden_size (32-256): LSTM hidden units. Larger = more capacity but overfitting risk. Default 128.
  - lstm_num_layers (1-4): Stacked LSTM layers. 2 is usually optimal. More layers need more data.
  - lstm_dropout (0.1-0.5): Dropout between LSTM layers and before output. Higher = more regularization.
  - lstm_epochs (50-200): Max training epochs. Early stopping usually triggers before this.
  - lstm_batch_size (32-128): Training batch size. Smaller = noisier gradients but better generalization.
  - lstm_sequence_length (10-50): How many past candles the LSTM sees per prediction. Longer = more context but more memory. Default 20.
  - lstm_patience (5-20): Early stopping patience on validation loss. Lower = stop sooner.
 **target**:
-  - direction: "long" or "both"
+  - direction: "long", "short", or "both"
  - horizon_candles (1-20): How far ahead to predict. Longer = smoother but lagging.
  - threshold_pct (0.3-3.0): Minimum move % to label as positive. Higher = fewer but clearer signals.
 **strategy**:
  - entry_threshold (0.5-0.8): Min prediction probability to enter trade. Higher = fewer trades, higher quality.
-  - stop_loss_pct (0.5-5.0): Max loss before exit. Tighter = more stopped out.
+  - stop_loss_pct (0.5-5.0): Max loss before exit (used when dynamic_sl_tp is false).
-  - take_profit_pct (1.0-10.0): Target profit. Should be > stop_loss for positive expectancy.
+  - take_profit_pct (1.0-10.0): Target profit (used when dynamic_sl_tp is false). Should be > stop_loss for positive expectancy.
  - trailing_stop_pct (0.5-3.0): Trailing stop distance. Tighter = locks profit faster but exits early.
  - min_confidence_to_trade (0.5-0.9): Absolute minimum confidence to consider.
  - exit_type: "trailing_stop" or "fixed" (just SL/TP)
  - dynamic_sl_tp (true/false): Use ATR-based dynamic stop-loss and take-profit instead of fixed percentages. Adapts to current volatility. Recommended: true.
  - atr_sl_multiplier (1.0-3.0): ATR multiplier for stop-loss. E.g., 1.5 means SL = 1.5 * ATR(14). Lower = tighter stops.
  - atr_tp_multiplier (2.0-5.0): ATR multiplier for take-profit. E.g., 3.0 means TP = 3.0 * ATR(14). Should be > atr_sl_multiplier.
 **features**:
  - use_volume_features (true/false): Volume features can be noisy in crypto.
@ -54,9 +68,15 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
  - use_lag_features (true/false): Lagged features capture momentum.
  - lag_periods: List of lag periods [1,2,3,5,10]
  - lookback_periods: List of lookback windows [3,5,10,20]
  - use_scaler (true/false): Apply StandardScaler normalization to all features. Critical for LSTM, also helps gradient boosting. Recommended: true.
  - use_pca (true/false): Apply PCA dimensionality reduction after scaling. Reduces noise and multicollinearity. Recommended with many features.
  - pca_variance (0.80-0.99): Fraction of variance to retain with PCA. 0.95 keeps 95% of information. Lower = fewer dimensions, more noise removed.
 **training**:
-  - walk_forward_windows (3-10): More windows = more robust but less data per window.
+  - walk_forward_windows (3-10): More windows = more robust but less data per window. Used when rolling_window is false.
  - rolling_window (true/false): Use rolling window instead of static walk-forward splits. Trains on last N candles, tests on next M, slides forward. More realistic for time series. Recommended: true.
  - rolling_train_size (1000-5000): Number of candles in the rolling training window. Larger = more data but older patterns.
  - rolling_test_size (100-500): Number of candles in the rolling test window. Smaller = more retraining, better adaptation.
 ## Key Metrics to Optimize (in priority order)
 1. **Sharpe Ratio** (target: > 2.0): Risk-adjusted return. Most important metric.
@ -66,16 +86,18 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
 5. **Trade Count**: Need enough trades for statistical significance (>50).
 ## Decision Guidelines
- If Sharpe < 1.0: The strategy is not working well. Consider larger changes.
+- If Sharpe < 1.0: The strategy is not working well. Consider larger changes (switch to hybrid, enable PCA/scaler, adjust target).
 - If Sharpe 1.0-1.5: Decent. Fine-tune hyperparameters and thresholds.
 - If Sharpe 1.5-2.0: Good. Make small, targeted improvements.
 - If Sharpe > 2.0: Very good. Be careful not to overfit.
- If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins — ok, tighten SL.
+- If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins -- ok, tighten SL.
- If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big — widen TP or tighten SL.
+- If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big -- widen TP or tighten SL.
 - If trade_count < 30: Not enough trades. Lower entry_threshold or min_confidence.
- If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss.
+- If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss, enable dynamic_sl_tp.
- If per_window_sharpe has high variance: Model is not stable. More regularization or simpler model.
+- If per_window_sharpe has high variance: Model is not stable. More regularization, enable PCA, or try hybrid.
- Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting.
+- Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting -- enable PCA or reduce features.
 - For LSTM/hybrid: if underfitting, increase lstm_hidden_size or lstm_num_layers. If overfitting, increase lstm_dropout or decrease lstm_sequence_length.
 - The hybrid model combining LSTM + XGBoost typically outperforms single models. LSTM captures temporal patterns while XGBoost handles feature interactions. Use hybrid as the default unless you have a specific reason not to.
 ## Response Format
 You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
--- a/ml_engine/train_and_backtest.py
+++ b/ml_engine/train_and_backtest.py
@ -21,6 +21,9 @@ from ta.trend import MACD, CCIIndicator, SMAIndicator, EMAIndicator
 from ta.volatility import BollingerBands, AverageTrueRange, KeltnerChannel
 from ta.volume import OnBalanceVolumeIndicator
 from sklearn.preprocessing import StandardScaler
 from sklearn.decomposition import PCA
 warnings.filterwarnings("ignore")
 # ---------------------------------------------------------------------------
@ -140,15 +143,179 @@ def create_target(df: pd.DataFrame, config: dict) -> pd.Series:
    return target
 # ---------------------------------------------------------------------------
 # LSTM Model (PyTorch)
 # ---------------------------------------------------------------------------
 def get_device():
    """Detect best available device for PyTorch."""
    import torch
    if torch.cuda.is_available():
        return torch.device("cuda")
    return torch.device("cpu")
 class LSTMClassifier:
    """PyTorch LSTM for binary classification of trading signals."""
    def __init__(self, input_size, hp):
        import torch
        import torch.nn as nn
        self.hp = hp
        self.device = get_device()
        self.sequence_length = hp.get("lstm_sequence_length", 20)
        hidden_size = hp.get("lstm_hidden_size", 128)
        num_layers = hp.get("lstm_num_layers", 2)
        dropout = hp.get("lstm_dropout", 0.3)
        class _LSTMNet(nn.Module):
            def __init__(self_net):
                super().__init__()
                self_net.lstm = nn.LSTM(
                    input_size=input_size,
                    hidden_size=hidden_size,
                    num_layers=num_layers,
                    batch_first=True,
                    dropout=dropout if num_layers > 1 else 0.0,
                )
                self_net.dropout = nn.Dropout(dropout)
                self_net.fc = nn.Linear(hidden_size, 1)
                self_net.sigmoid = nn.Sigmoid()
            def forward(self_net, x):
                # x: (batch, seq_len, features)
                lstm_out, _ = self_net.lstm(x)
                # Take last time step
                last_hidden = lstm_out[:, -1, :]
                out = self_net.dropout(last_hidden)
                out = self_net.fc(out)
                out = self_net.sigmoid(out)
                return out.squeeze(-1)
        self.model = _LSTMNet().to(self.device)
        self.feature_importances_ = None
    def _make_sequences(self, X, y=None):
        """Convert flat feature arrays into overlapping sequences."""
        import torch
        seq_len = self.sequence_length
        sequences = []
        targets = []
        for i in range(seq_len, len(X)):
            sequences.append(X[i - seq_len:i])
            if y is not None:
                targets.append(y[i])
        X_seq = torch.FloatTensor(np.array(sequences)).to(self.device)
        if y is not None:
            y_seq = torch.FloatTensor(np.array(targets)).to(self.device)
            return X_seq, y_seq
        return X_seq
    def fit(self, X_train, y_train, X_val=None, y_val=None):
        import torch
        import torch.nn as nn
        lr = self.hp.get("learning_rate", 0.001)
        epochs = self.hp.get("lstm_epochs", 100)
        batch_size = self.hp.get("lstm_batch_size", 64)
        patience = self.hp.get("lstm_patience", 10)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        criterion = nn.BCELoss()
        X_seq, y_seq = self._make_sequences(X_train, y_train)
        if X_val is not None and y_val is not None:
            X_val_seq, y_val_seq = self._make_sequences(X_val, y_val)
            has_val = len(X_val_seq) > 0
        else:
            has_val = False
        best_val_loss = float("inf")
        patience_counter = 0
        best_state = None
        self.model.train()
        n_samples = len(X_seq)
        for epoch in range(epochs):
            # Shuffle
            perm = torch.randperm(n_samples)
            X_seq = X_seq[perm]
            y_seq = y_seq[perm]
            epoch_loss = 0.0
            n_batches = 0
            for start in range(0, n_samples, batch_size):
                end = min(start + batch_size, n_samples)
                X_batch = X_seq[start:end]
                y_batch = y_seq[start:end]
                optimizer.zero_grad()
                preds = self.model(X_batch)
                loss = criterion(preds, y_batch)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                optimizer.step()
                epoch_loss += loss.item()
                n_batches += 1
            avg_loss = epoch_loss / max(n_batches, 1)
            # Validation
            if has_val:
                self.model.eval()
                with torch.no_grad():
                    val_preds = self.model(X_val_seq)
                    val_loss = criterion(val_preds, y_val_seq).item()
                self.model.train()
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_state = {k: v.clone() for k, v in self.model.state_dict().items()}
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print(f"    LSTM early stop at epoch {epoch+1}, val_loss={val_loss:.4f}")
                        break
                if (epoch + 1) % 20 == 0:
                    print(f"    Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}, val_loss={val_loss:.4f}")
            else:
                if (epoch + 1) % 20 == 0:
                    print(f"    Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
        if best_state is not None:
            self.model.load_state_dict(best_state)
    def predict_proba(self, X):
        import torch
        self.model.eval()
        X_seq = self._make_sequences(X)
        with torch.no_grad():
            proba_pos = self.model(X_seq).cpu().numpy()
        proba_neg = 1.0 - proba_pos
        return np.column_stack([proba_neg, proba_pos])
    def predict(self, X):
        proba = self.predict_proba(X)
        return (proba[:, 1] >= 0.5).astype(int)
 # ---------------------------------------------------------------------------
 # Model Building
 # ---------------------------------------------------------------------------
-def build_model(config: dict):
+def build_model(config: dict, input_size: int = 0):
    """Build the ML model based on config."""
    model_type = config.get("model_type", "xgboost")
    hp = config.get("hyperparameters", {})
    if model_type == "lstm":
        return LSTMClassifier(input_size, hp)
    if model_type == "xgboost":
        import xgboost as xgb
        # Detect GPU
@ -231,24 +398,61 @@ def build_model(config: dict):
 # ---------------------------------------------------------------------------
-# Walk-Forward Validation + Backtesting
+# Scaling & PCA
 # ---------------------------------------------------------------------------
 def apply_scaling_pca(X_train, X_val, X_test, config):
    """Apply StandardScaler and optional PCA. Returns transformed arrays and fitted objects."""
    feat_cfg = config.get("features", {})
    scaler = None
    pca = None
    if feat_cfg.get("use_scaler", False):
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        if X_val is not None:
            X_val = scaler.transform(X_val)
        X_test = scaler.transform(X_test)
    if feat_cfg.get("use_pca", False):
        variance = feat_cfg.get("pca_variance", 0.95)
        pca = PCA(n_components=variance, svd_solver="full")
        X_train = pca.fit_transform(X_train)
        if X_val is not None:
            X_val = pca.transform(X_val)
        X_test = pca.transform(X_test)
        print(f"    PCA: {pca.n_components_} components (retaining {variance*100:.0f}% variance)")
    return X_train, X_val, X_test, scaler, pca
 # ---------------------------------------------------------------------------
 # Walk-Forward / Rolling Window Validation + Backtesting
 # ---------------------------------------------------------------------------
 def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict:
-    """Walk-forward validation with backtesting on each window."""
+    """Walk-forward or rolling window validation with backtesting."""
    training_cfg = config.get("training", {})
    strategy = config.get("strategy", {})
    model_type = config.get("model_type", "xgboost")
    use_rolling = training_cfg.get("rolling_window", False)
    if use_rolling:
        return rolling_window_train_test(df, feature_cols, config)
    # Standard walk-forward
    n_windows = training_cfg.get("walk_forward_windows", 5)
    train_pct = training_cfg.get("train_pct", 0.7)
    val_pct = training_cfg.get("validation_pct", 0.15)
    n = len(df)
    window_size = n // n_windows
    strategy = config.get("strategy", {})
    all_trades = []
    per_window_sharpe = []
-    feature_importances_sum = np.zeros(len(feature_cols))
+    feature_importances_sum = None
    fi_count = 0
    effective_n_features = len(feature_cols)
    for w in range(n_windows):
        start = w * window_size
@ -275,37 +479,23 @@ def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict)
        y_val = val_df["target"].values
        X_test = test_df[feature_cols].values
-        # Train model
+        # Scale and PCA
-        model = build_model(config)
+        X_train, X_val, X_test, scaler, pca = apply_scaling_pca(X_train, X_val, X_test, config)
-        try:
+        current_n_features = X_train.shape[1]
-            model.fit(X_train, y_train)
+        if feature_importances_sum is None:
-        except Exception as e:
+            effective_n_features = current_n_features
-            print(f"  Window {w+1}: training failed -- {e}", file=sys.stderr)
+            feature_importances_sum = np.zeros(effective_n_features)
            continue
-        # Get predictions on test set
+        # Build and train
-        try:
+        trades, fi = _train_and_backtest_window(
-            proba = model.predict_proba(X_test)[:, 1]
+            X_train, y_train, X_val, y_val, X_test, test_df,
-        except Exception:
+            config, strategy, current_n_features, w, n_windows
-            preds = model.predict(X_test)
+        )
            proba = preds.astype(float)
-        # Extract feature importances
+        if fi is not None and len(fi) == effective_n_features:
-        try:
+            feature_importances_sum += fi
            if hasattr(model, "feature_importances_"):
                fi = model.feature_importances_
            elif hasattr(model, "get_booster"):
                fi_dict = model.get_booster().get_score(importance_type="gain")
                fi = np.array([fi_dict.get(f"f{i}", 0) for i in range(len(feature_cols))])
            else:
                fi = np.zeros(len(feature_cols))
            feature_importances_sum += fi / (fi.sum() + 1e-10)
            fi_count += 1
        except Exception:
            pass
        # Backtest on test set
        trades = backtest(test_df, proba, strategy)
        all_trades.extend(trades)
        # Window sharpe
@ -320,26 +510,253 @@ def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict)
        print(f"  Window {w+1}/{n_windows}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}")
-    return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, feature_cols, df)
+    # Build feature names for output
    if pca is not None and config.get("features", {}).get("use_pca", False):
        effective_feature_names = [f"PC_{i+1}" for i in range(effective_n_features)]
    else:
        effective_feature_names = feature_cols
    if feature_importances_sum is None:
        feature_importances_sum = np.zeros(len(effective_feature_names))
    return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, effective_feature_names, df)
 def rolling_window_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict:
    """Rolling window train/test with sliding forward."""
    training_cfg = config.get("training", {})
    strategy = config.get("strategy", {})
    train_size = training_cfg.get("rolling_train_size", 2000)
    test_size = training_cfg.get("rolling_test_size", 200)
    val_pct = training_cfg.get("validation_pct", 0.15)
    n = len(df)
    all_trades = []
    per_window_sharpe = []
    feature_importances_sum = None
    fi_count = 0
    effective_n_features = len(feature_cols)
    window_count = 0
    start = 0
    while start + train_size + test_size <= n:
        train_end = start + train_size
        test_end = min(train_end + test_size, n)
        train_full = df.iloc[start:train_end]
        test_df = df.iloc[train_end:test_end]
        if len(test_df) < 10 or train_full["target"].nunique() < 2:
            start += test_size
            continue
        # Split train into train/val
        val_split = int(len(train_full) * (1.0 - val_pct))
        train_df = train_full.iloc[:val_split]
        val_df = train_full.iloc[val_split:]
        X_train = train_df[feature_cols].values
        y_train = train_df["target"].values
        X_val = val_df[feature_cols].values
        y_val = val_df["target"].values
        X_test = test_df[feature_cols].values
        # Scale and PCA
        X_train, X_val, X_test, scaler, pca = apply_scaling_pca(X_train, X_val, X_test, config)
        current_n_features = X_train.shape[1]
        if feature_importances_sum is None:
            effective_n_features = current_n_features
            feature_importances_sum = np.zeros(effective_n_features)
        window_count += 1
        total_possible = (n - train_size) // test_size
        trades, fi = _train_and_backtest_window(
            X_train, y_train, X_val, y_val, X_test, test_df,
            config, strategy, current_n_features, window_count - 1, total_possible
        )
        if fi is not None and len(fi) == effective_n_features:
            feature_importances_sum += fi
            fi_count += 1
        all_trades.extend(trades)
        if trades:
            returns = [t["return_pct"] for t in trades]
            mean_r = np.mean(returns)
            std_r = np.std(returns) if len(returns) > 1 else 1.0
            sharpe = (mean_r / std_r) * np.sqrt(252 / max(1, len(trades))) if std_r > 0 else 0
            per_window_sharpe.append(round(sharpe, 3))
        else:
            per_window_sharpe.append(0.0)
        print(f"  Rolling window {window_count}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}")
        start += test_size
    if pca is not None and config.get("features", {}).get("use_pca", False):
        effective_feature_names = [f"PC_{i+1}" for i in range(effective_n_features)]
    else:
        effective_feature_names = feature_cols
    if feature_importances_sum is None:
        feature_importances_sum = np.zeros(len(effective_feature_names))
    return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, effective_feature_names, df)
 def _train_and_backtest_window(X_train, y_train, X_val, y_val, X_test, test_df,
                                config, strategy, n_features, w_idx, n_windows):
    """Train model on one window and backtest. Returns (trades, feature_importances)."""
    model_type = config.get("model_type", "xgboost")
    if model_type == "hybrid":
        return _hybrid_train_and_backtest(
            X_train, y_train, X_val, y_val, X_test, test_df,
            config, strategy, n_features
        )
    # Single model path
    if model_type == "lstm":
        model = build_model(config, input_size=n_features)
    else:
        model = build_model(config)
    try:
        if model_type == "lstm":
            model.fit(X_train, y_train, X_val, y_val)
        else:
            model.fit(X_train, y_train)
    except Exception as e:
        print(f"  Window {w_idx+1}: training failed -- {e}", file=sys.stderr)
        return [], None
    # Get predictions
    try:
        if model_type == "lstm":
            seq_len = config.get("hyperparameters", {}).get("lstm_sequence_length", 20)
            proba = model.predict_proba(X_test)[:, 1]
            # Align test_df to account for sequence trimming
            test_df_aligned = test_df.iloc[seq_len:]
        else:
            proba = model.predict_proba(X_test)[:, 1]
            test_df_aligned = test_df
    except Exception:
        preds = model.predict(X_test)
        proba = preds.astype(float)
        test_df_aligned = test_df
    # Feature importances
    fi = _extract_feature_importances(model, n_features)
    # Backtest
    trades = backtest(test_df_aligned, proba, strategy)
    return trades, fi
 def _hybrid_train_and_backtest(X_train, y_train, X_val, y_val, X_test, test_df,
                                config, strategy, n_features):
    """Hybrid: LSTM (60%) + XGBoost (40%), only enter when both agree."""
    hp = config.get("hyperparameters", {})
    seq_len = hp.get("lstm_sequence_length", 20)
    # Train LSTM
    lstm_config = {**config, "model_type": "lstm"}
    lstm_model = build_model(lstm_config, input_size=n_features)
    try:
        lstm_model.fit(X_train, y_train, X_val, y_val)
        lstm_proba_full = lstm_model.predict_proba(X_test)[:, 1]
    except Exception as e:
        print(f"    Hybrid LSTM failed: {e}", file=sys.stderr)
        lstm_proba_full = np.full(max(0, len(X_test) - seq_len), 0.5)
    # Train XGBoost
    xgb_config = {**config, "model_type": "xgboost"}
    xgb_model = build_model(xgb_config)
    try:
        xgb_model.fit(X_train, y_train)
        xgb_proba_full = xgb_model.predict_proba(X_test)[:, 1]
    except Exception as e:
        print(f"    Hybrid XGBoost failed: {e}", file=sys.stderr)
        xgb_proba_full = np.full(len(X_test), 0.5)
    # Align: LSTM output is shorter by seq_len
    xgb_proba = xgb_proba_full[seq_len:]
    lstm_proba = lstm_proba_full
    min_len = min(len(lstm_proba), len(xgb_proba))
    lstm_proba = lstm_proba[:min_len]
    xgb_proba = xgb_proba[:min_len]
    test_df_aligned = test_df.iloc[seq_len:seq_len + min_len]
    # Combine: 60% LSTM + 40% XGBoost, only when both agree
    lstm_weight = 0.6
    xgb_weight = 0.4
    combined_proba = lstm_weight * lstm_proba + xgb_weight * xgb_proba
    # Both must agree on direction (both > 0.5 or both < 0.5)
    lstm_bullish = lstm_proba > 0.5
    xgb_bullish = xgb_proba > 0.5
    agreement = lstm_bullish == xgb_bullish
    # Zero out signals where models disagree
    combined_proba[~agreement] = 0.5
    # Feature importances from XGBoost
    fi = _extract_feature_importances(xgb_model, n_features)
    trades = backtest(test_df_aligned, combined_proba, strategy)
    return trades, fi
 def _extract_feature_importances(model, n_features):
    """Extract normalized feature importances from a model."""
    try:
        if hasattr(model, "feature_importances_"):
            fi = model.feature_importances_
        elif hasattr(model, "get_booster"):
            fi_dict = model.get_booster().get_score(importance_type="gain")
            fi = np.array([fi_dict.get(f"f{i}", 0) for i in range(n_features)])
        else:
            return None
        return fi / (fi.sum() + 1e-10)
    except Exception:
        return None
 # ---------------------------------------------------------------------------
 # Backtesting
 # ---------------------------------------------------------------------------
 def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list:
-    """Simulate trades using model predictions."""
+    """Simulate trades using model predictions. Supports ATR-based dynamic SL/TP."""
    entry_threshold = strategy.get("entry_threshold", 0.6)
-    stop_loss = strategy.get("stop_loss_pct", 2.0) / 100
+    stop_loss_fixed = strategy.get("stop_loss_pct", 2.0) / 100
-    take_profit = strategy.get("take_profit_pct", 4.0) / 100
+    take_profit_fixed = strategy.get("take_profit_pct", 4.0) / 100
    trailing_stop = strategy.get("trailing_stop_pct", 1.5) / 100
    exit_type = strategy.get("exit_type", "trailing_stop")
    min_confidence = strategy.get("min_confidence_to_trade", 0.55)
    use_dynamic = strategy.get("dynamic_sl_tp", False)
    atr_sl_mult = strategy.get("atr_sl_multiplier", 1.5)
    atr_tp_mult = strategy.get("atr_tp_multiplier", 3.0)
    fee = 0.001  # 0.1% per trade
    closes = test_df["close"].values
    highs = test_df["high"].values
    lows = test_df["low"].values
    # ATR for dynamic SL/TP
    if use_dynamic and "ATR_14" in test_df.columns:
        atr_values = test_df["ATR_14"].values
    else:
        atr_values = None
    trades = []
    i = 0
    while i < len(closes) - 1:
        if i >= len(proba):
            break
        if proba[i] < min_confidence or proba[i] < entry_threshold:
            i += 1
            continue
@ -347,6 +764,16 @@ def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list:
        # Enter trade
        entry_price = closes[i]
        confidence = proba[i]
        # Compute dynamic SL/TP if enabled
        if use_dynamic and atr_values is not None and not np.isnan(atr_values[i]):
            atr = atr_values[i]
            stop_loss = (atr * atr_sl_mult) / entry_price
            take_profit = (atr * atr_tp_mult) / entry_price
        else:
            stop_loss = stop_loss_fixed
            take_profit = take_profit_fixed
        # Position sizing based on confidence
        if strategy.get("position_sizing") == "confidence_scaled":
            if confidence > 0.8:
@ -522,7 +949,10 @@ def main():
    print(f"  Target distribution: {df['target'].value_counts().to_dict()}")
    # Run walk-forward training + backtesting
-    print("\nRunning walk-forward validation...")
+    model_type = config.get("model_type", "xgboost")
    rolling = config.get("training", {}).get("rolling_window", False)
    print(f"\nModel: {model_type}, Rolling: {rolling}")
    print("Running validation...")
    results = walk_forward_train_test(df, feature_cols, config)
    # Save results