feat: add LSTM, hybrid ensemble, PCA, scaler, ATR stops, rolling window

Major upgrade to the ML engine: - LSTM model type: 2-layer PyTorch LSTM with early stopping, GPU support - Hybrid mode: LSTM (60%) + XGBoost (40%) with agreement gating - StandardScaler normalization (critical for LSTM) - PCA dimensionality reduction (configurable variance retention) - ATR-based dynamic stop-loss/take-profit adapting to volatility - Rolling window retraining for more realistic time series validation - Updated LLM system prompt with docs for all new parameters - All backward compatible (xgboost/lightgbm/catboost still work) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 23:02:11 +00:00 · 2026-03-19 23:02:11 +00:00 · a21e635d9f
commit a21e635d9f
parent e24b6605d7
3 changed files with 534 additions and 79 deletions
--- a/config/initial_config.json
+++ b/config/initial_config.json
@ -1,43 +1,40 @@
 {
-  "model_type": "xgboost",
+  "model_type": "hybrid",
  "features": {
-    "technical_indicators": [
-      "RSI_14", "RSI_7", "RSI_21",
-      "MACD_line", "MACD_signal", "MACD_hist",
-      "BB_upper", "BB_lower", "BB_width",
-      "ATR_14",
-      "SMA_5", "SMA_10", "SMA_20", "SMA_50", "SMA_200",
-      "EMA_5", "EMA_10", "EMA_20", "EMA_50",
-      "OBV",
-      "stoch_k", "stoch_d",
-      "williams_r",
-      "CCI_20",
-      "ROC_10",
-      "keltner_upper", "keltner_lower"
-    ],
+    "technical_indicators": ["RSI_14", "RSI_7", "MACD_line", "MACD_signal", "MACD_hist", "BB_upper", "BB_lower", "BB_width", "ATR_14", "SMA_20", "SMA_50", "EMA_10", "EMA_20", "OBV", "stoch_k", "stoch_d", "williams_r", "CCI_20", "ROC_10"],
    "lookback_periods": [3, 5, 10, 20],
    "use_volume_features": true,
    "use_volatility_features": true,
-    "use_candle_patterns": true,
+    "use_candle_patterns": false,
    "use_lag_features": true,
-    "lag_periods": [1, 2, 3, 5]
+    "lag_periods": [1, 2, 3, 5],
+    "use_pca": true,
+    "pca_variance": 0.95,
+    "use_scaler": true
  },
  "target": {
    "type": "classification",
-    "direction": "long",
-    "horizon_candles": 6,
-    "threshold_pct": 1.0
+    "direction": "both",
+    "horizon_candles": 8,
+    "threshold_pct": 1.5
  },
  "hyperparameters": {
-    "learning_rate": 0.05,
-    "max_depth": 6,
-    "n_estimators": 500,
+    "learning_rate": 0.001,
+    "max_depth": 5,
+    "n_estimators": 300,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "min_child_weight": 5,
-    "gamma": 0.1,
+    "gamma": 0.3,
    "reg_alpha": 0.1,
-    "reg_lambda": 1.0
+    "reg_lambda": 2.0,
+    "lstm_hidden_size": 128,
+    "lstm_num_layers": 2,
+    "lstm_dropout": 0.3,
+    "lstm_epochs": 100,
+    "lstm_batch_size": 64,
+    "lstm_sequence_length": 20,
+    "lstm_patience": 10
  },
  "strategy": {
    "entry_threshold": 0.60,
@ -47,13 +44,19 @@
    "trailing_stop_pct": 1.5,
    "position_sizing": "confidence_scaled",
    "max_position_pct": 100,
-    "min_confidence_to_trade": 0.55
+    "min_confidence_to_trade": 0.55,
+    "dynamic_sl_tp": true,
+    "atr_sl_multiplier": 1.5,
+    "atr_tp_multiplier": 3.0
  },
  "training": {
    "walk_forward_windows": 5,
    "train_pct": 0.7,
    "validation_pct": 0.15,
-    "test_pct": 0.15
+    "test_pct": 0.15,
+    "rolling_window": true,
+    "rolling_train_size": 2000,
+    "rolling_test_size": 200
  },
  "timeframe": "4h"
 }
--- a/llm_client/analyzer.py
+++ b/llm_client/analyzer.py
@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-LLM Strategy Analyzer — Calls Ollama on Mac Mini to analyze results
+LLM Strategy Analyzer -- Calls Ollama on Mac Mini to analyze results
 and suggest config modifications for the next iteration.
 """

@ -14,17 +14,19 @@ MODEL = "qwen3.5:27b"
 SYSTEM_PROMPT = """You are a quantitative trading strategy optimizer. You analyze ML model backtesting results for a BTC/USDT trading strategy and suggest precise modifications to improve performance.

 ## Your Task
-Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific — change one thing at a time when possible.
+Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific -- change one thing at a time when possible.

 ## Config Parameters You Can Modify

-**model_type**: "xgboost", "lightgbm", "catboost", or "ensemble"
+**model_type**: "xgboost", "lightgbm", "catboost", "ensemble", "lstm", or "hybrid"
  - xgboost: Generally best for structured data, fast GPU training
  - lightgbm: Faster training, good with large feature sets
  - catboost: Handles feature interactions well, less tuning needed
-  - ensemble: Combines all three, reduces variance but slower
+  - ensemble: Combines xgboost+lightgbm+catboost, reduces variance but slower
+  - lstm: PyTorch LSTM neural network, captures temporal/sequential patterns in price data
+  - hybrid: Combines LSTM (60% weight) + XGBoost (40% weight). Only enters trades when BOTH models agree on direction. The hybrid model typically outperforms single models -- LSTM captures temporal patterns while XGBoost handles feature interactions. Recommended as default.

-**hyperparameters**:
+**hyperparameters** (gradient boosting):
  - learning_rate (0.001-0.3): Lower = more robust but slower. If overfitting, decrease.
  - max_depth (3-10): Controls model complexity. Deeper = more overfitting risk.
  - n_estimators (100-2000): More trees = better fit but diminishing returns.
@ -35,18 +37,30 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
  - reg_alpha (0-10): L1 regularization. Encourages sparsity.
  - reg_lambda (0-10): L2 regularization. Prevents large weights.

+**hyperparameters** (LSTM-specific, used by lstm and hybrid model_types):
+  - lstm_hidden_size (32-256): LSTM hidden units. Larger = more capacity but overfitting risk. Default 128.
+  - lstm_num_layers (1-4): Stacked LSTM layers. 2 is usually optimal. More layers need more data.
+  - lstm_dropout (0.1-0.5): Dropout between LSTM layers and before output. Higher = more regularization.
+  - lstm_epochs (50-200): Max training epochs. Early stopping usually triggers before this.
+  - lstm_batch_size (32-128): Training batch size. Smaller = noisier gradients but better generalization.
+  - lstm_sequence_length (10-50): How many past candles the LSTM sees per prediction. Longer = more context but more memory. Default 20.
+  - lstm_patience (5-20): Early stopping patience on validation loss. Lower = stop sooner.
+
 **target**:
-  - direction: "long" or "both"
+  - direction: "long", "short", or "both"
  - horizon_candles (1-20): How far ahead to predict. Longer = smoother but lagging.
  - threshold_pct (0.3-3.0): Minimum move % to label as positive. Higher = fewer but clearer signals.

 **strategy**:
  - entry_threshold (0.5-0.8): Min prediction probability to enter trade. Higher = fewer trades, higher quality.
-  - stop_loss_pct (0.5-5.0): Max loss before exit. Tighter = more stopped out.
-  - take_profit_pct (1.0-10.0): Target profit. Should be > stop_loss for positive expectancy.
+  - stop_loss_pct (0.5-5.0): Max loss before exit (used when dynamic_sl_tp is false).
+  - take_profit_pct (1.0-10.0): Target profit (used when dynamic_sl_tp is false). Should be > stop_loss for positive expectancy.
  - trailing_stop_pct (0.5-3.0): Trailing stop distance. Tighter = locks profit faster but exits early.
  - min_confidence_to_trade (0.5-0.9): Absolute minimum confidence to consider.
  - exit_type: "trailing_stop" or "fixed" (just SL/TP)
+  - dynamic_sl_tp (true/false): Use ATR-based dynamic stop-loss and take-profit instead of fixed percentages. Adapts to current volatility. Recommended: true.
+  - atr_sl_multiplier (1.0-3.0): ATR multiplier for stop-loss. E.g., 1.5 means SL = 1.5 * ATR(14). Lower = tighter stops.
+  - atr_tp_multiplier (2.0-5.0): ATR multiplier for take-profit. E.g., 3.0 means TP = 3.0 * ATR(14). Should be > atr_sl_multiplier.

 **features**:
  - use_volume_features (true/false): Volume features can be noisy in crypto.
@ -54,9 +68,15 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
  - use_lag_features (true/false): Lagged features capture momentum.
  - lag_periods: List of lag periods [1,2,3,5,10]
  - lookback_periods: List of lookback windows [3,5,10,20]
+  - use_scaler (true/false): Apply StandardScaler normalization to all features. Critical for LSTM, also helps gradient boosting. Recommended: true.
+  - use_pca (true/false): Apply PCA dimensionality reduction after scaling. Reduces noise and multicollinearity. Recommended with many features.
+  - pca_variance (0.80-0.99): Fraction of variance to retain with PCA. 0.95 keeps 95% of information. Lower = fewer dimensions, more noise removed.

 **training**:
-  - walk_forward_windows (3-10): More windows = more robust but less data per window.
+  - walk_forward_windows (3-10): More windows = more robust but less data per window. Used when rolling_window is false.
+  - rolling_window (true/false): Use rolling window instead of static walk-forward splits. Trains on last N candles, tests on next M, slides forward. More realistic for time series. Recommended: true.
+  - rolling_train_size (1000-5000): Number of candles in the rolling training window. Larger = more data but older patterns.
+  - rolling_test_size (100-500): Number of candles in the rolling test window. Smaller = more retraining, better adaptation.

 ## Key Metrics to Optimize (in priority order)
 1. **Sharpe Ratio** (target: > 2.0): Risk-adjusted return. Most important metric.
@ -66,16 +86,18 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
 5. **Trade Count**: Need enough trades for statistical significance (>50).

 ## Decision Guidelines
- If Sharpe < 1.0: The strategy is not working well. Consider larger changes.
+- If Sharpe < 1.0: The strategy is not working well. Consider larger changes (switch to hybrid, enable PCA/scaler, adjust target).
 - If Sharpe 1.0-1.5: Decent. Fine-tune hyperparameters and thresholds.
 - If Sharpe 1.5-2.0: Good. Make small, targeted improvements.
 - If Sharpe > 2.0: Very good. Be careful not to overfit.
- If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins — ok, tighten SL.
- If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big — widen TP or tighten SL.
+- If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins -- ok, tighten SL.
+- If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big -- widen TP or tighten SL.
 - If trade_count < 30: Not enough trades. Lower entry_threshold or min_confidence.
- If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss.
- If per_window_sharpe has high variance: Model is not stable. More regularization or simpler model.
- Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting.
+- If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss, enable dynamic_sl_tp.
+- If per_window_sharpe has high variance: Model is not stable. More regularization, enable PCA, or try hybrid.
+- Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting -- enable PCA or reduce features.
+- For LSTM/hybrid: if underfitting, increase lstm_hidden_size or lstm_num_layers. If overfitting, increase lstm_dropout or decrease lstm_sequence_length.
+- The hybrid model combining LSTM + XGBoost typically outperforms single models. LSTM captures temporal patterns while XGBoost handles feature interactions. Use hybrid as the default unless you have a specific reason not to.

 ## Response Format
 You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON):
--- a/ml_engine/train_and_backtest.py
+++ b/ml_engine/train_and_backtest.py
@ -21,6 +21,9 @@ from ta.trend import MACD, CCIIndicator, SMAIndicator, EMAIndicator
 from ta.volatility import BollingerBands, AverageTrueRange, KeltnerChannel
 from ta.volume import OnBalanceVolumeIndicator

+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+
 warnings.filterwarnings("ignore")

 # ---------------------------------------------------------------------------
@ -140,15 +143,179 @@ def create_target(df: pd.DataFrame, config: dict) -> pd.Series:
    return target


+# ---------------------------------------------------------------------------
+# LSTM Model (PyTorch)
+# ---------------------------------------------------------------------------
+
+def get_device():
+    """Detect best available device for PyTorch."""
+    import torch
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    return torch.device("cpu")
+
+
+class LSTMClassifier:
+    """PyTorch LSTM for binary classification of trading signals."""
+
+    def __init__(self, input_size, hp):
+        import torch
+        import torch.nn as nn
+
+        self.hp = hp
+        self.device = get_device()
+        self.sequence_length = hp.get("lstm_sequence_length", 20)
+
+        hidden_size = hp.get("lstm_hidden_size", 128)
+        num_layers = hp.get("lstm_num_layers", 2)
+        dropout = hp.get("lstm_dropout", 0.3)
+
+        class _LSTMNet(nn.Module):
+            def __init__(self_net):
+                super().__init__()
+                self_net.lstm = nn.LSTM(
+                    input_size=input_size,
+                    hidden_size=hidden_size,
+                    num_layers=num_layers,
+                    batch_first=True,
+                    dropout=dropout if num_layers > 1 else 0.0,
+                )
+                self_net.dropout = nn.Dropout(dropout)
+                self_net.fc = nn.Linear(hidden_size, 1)
+                self_net.sigmoid = nn.Sigmoid()
+
+            def forward(self_net, x):
+                # x: (batch, seq_len, features)
+                lstm_out, _ = self_net.lstm(x)
+                # Take last time step
+                last_hidden = lstm_out[:, -1, :]
+                out = self_net.dropout(last_hidden)
+                out = self_net.fc(out)
+                out = self_net.sigmoid(out)
+                return out.squeeze(-1)
+
+        self.model = _LSTMNet().to(self.device)
+        self.feature_importances_ = None
+
+    def _make_sequences(self, X, y=None):
+        """Convert flat feature arrays into overlapping sequences."""
+        import torch
+        seq_len = self.sequence_length
+        sequences = []
+        targets = []
+        for i in range(seq_len, len(X)):
+            sequences.append(X[i - seq_len:i])
+            if y is not None:
+                targets.append(y[i])
+        X_seq = torch.FloatTensor(np.array(sequences)).to(self.device)
+        if y is not None:
+            y_seq = torch.FloatTensor(np.array(targets)).to(self.device)
+            return X_seq, y_seq
+        return X_seq
+
+    def fit(self, X_train, y_train, X_val=None, y_val=None):
+        import torch
+        import torch.nn as nn
+
+        lr = self.hp.get("learning_rate", 0.001)
+        epochs = self.hp.get("lstm_epochs", 100)
+        batch_size = self.hp.get("lstm_batch_size", 64)
+        patience = self.hp.get("lstm_patience", 10)
+
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
+        criterion = nn.BCELoss()
+
+        X_seq, y_seq = self._make_sequences(X_train, y_train)
+        if X_val is not None and y_val is not None:
+            X_val_seq, y_val_seq = self._make_sequences(X_val, y_val)
+            has_val = len(X_val_seq) > 0
+        else:
+            has_val = False
+
+        best_val_loss = float("inf")
+        patience_counter = 0
+        best_state = None
+
+        self.model.train()
+        n_samples = len(X_seq)
+
+        for epoch in range(epochs):
+            # Shuffle
+            perm = torch.randperm(n_samples)
+            X_seq = X_seq[perm]
+            y_seq = y_seq[perm]
+
+            epoch_loss = 0.0
+            n_batches = 0
+            for start in range(0, n_samples, batch_size):
+                end = min(start + batch_size, n_samples)
+                X_batch = X_seq[start:end]
+                y_batch = y_seq[start:end]
+
+                optimizer.zero_grad()
+                preds = self.model(X_batch)
+                loss = criterion(preds, y_batch)
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+                optimizer.step()
+                epoch_loss += loss.item()
+                n_batches += 1
+
+            avg_loss = epoch_loss / max(n_batches, 1)
+
+            # Validation
+            if has_val:
+                self.model.eval()
+                with torch.no_grad():
+                    val_preds = self.model(X_val_seq)
+                    val_loss = criterion(val_preds, y_val_seq).item()
+                self.model.train()
+
+                if val_loss < best_val_loss:
+                    best_val_loss = val_loss
+                    patience_counter = 0
+                    best_state = {k: v.clone() for k, v in self.model.state_dict().items()}
+                else:
+                    patience_counter += 1
+                    if patience_counter >= patience:
+                        print(f"    LSTM early stop at epoch {epoch+1}, val_loss={val_loss:.4f}")
+                        break
+
+                if (epoch + 1) % 20 == 0:
+                    print(f"    Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}, val_loss={val_loss:.4f}")
+            else:
+                if (epoch + 1) % 20 == 0:
+                    print(f"    Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
+
+        if best_state is not None:
+            self.model.load_state_dict(best_state)
+
+    def predict_proba(self, X):
+        import torch
+        self.model.eval()
+        X_seq = self._make_sequences(X)
+        with torch.no_grad():
+            proba_pos = self.model(X_seq).cpu().numpy()
+        proba_neg = 1.0 - proba_pos
+        return np.column_stack([proba_neg, proba_pos])
+
+    def predict(self, X):
+        proba = self.predict_proba(X)
+        return (proba[:, 1] >= 0.5).astype(int)
+
+
 # ---------------------------------------------------------------------------
 # Model Building
 # ---------------------------------------------------------------------------

-def build_model(config: dict):
+def build_model(config: dict, input_size: int = 0):
    """Build the ML model based on config."""
    model_type = config.get("model_type", "xgboost")
    hp = config.get("hyperparameters", {})

+    if model_type == "lstm":
+        return LSTMClassifier(input_size, hp)
+
    if model_type == "xgboost":
        import xgboost as xgb
        # Detect GPU
@ -231,24 +398,61 @@ def build_model(config: dict):


 # ---------------------------------------------------------------------------
-# Walk-Forward Validation + Backtesting
+# Scaling & PCA
+# ---------------------------------------------------------------------------
+
+def apply_scaling_pca(X_train, X_val, X_test, config):
+    """Apply StandardScaler and optional PCA. Returns transformed arrays and fitted objects."""
+    feat_cfg = config.get("features", {})
+    scaler = None
+    pca = None
+
+    if feat_cfg.get("use_scaler", False):
+        scaler = StandardScaler()
+        X_train = scaler.fit_transform(X_train)
+        if X_val is not None:
+            X_val = scaler.transform(X_val)
+        X_test = scaler.transform(X_test)
+
+    if feat_cfg.get("use_pca", False):
+        variance = feat_cfg.get("pca_variance", 0.95)
+        pca = PCA(n_components=variance, svd_solver="full")
+        X_train = pca.fit_transform(X_train)
+        if X_val is not None:
+            X_val = pca.transform(X_val)
+        X_test = pca.transform(X_test)
+        print(f"    PCA: {pca.n_components_} components (retaining {variance*100:.0f}% variance)")
+
+    return X_train, X_val, X_test, scaler, pca
+
+
+# ---------------------------------------------------------------------------
+# Walk-Forward / Rolling Window Validation + Backtesting
 # ---------------------------------------------------------------------------

 def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict:
-    """Walk-forward validation with backtesting on each window."""
+    """Walk-forward or rolling window validation with backtesting."""
    training_cfg = config.get("training", {})
+    strategy = config.get("strategy", {})
+    model_type = config.get("model_type", "xgboost")
+    use_rolling = training_cfg.get("rolling_window", False)
+
+    if use_rolling:
+        return rolling_window_train_test(df, feature_cols, config)
+
+    # Standard walk-forward
    n_windows = training_cfg.get("walk_forward_windows", 5)
    train_pct = training_cfg.get("train_pct", 0.7)
    val_pct = training_cfg.get("validation_pct", 0.15)

    n = len(df)
    window_size = n // n_windows
-    strategy = config.get("strategy", {})

    all_trades = []
    per_window_sharpe = []
-    feature_importances_sum = np.zeros(len(feature_cols))
+    feature_importances_sum = None
    fi_count = 0
+    effective_n_features = len(feature_cols)

    for w in range(n_windows):
        start = w * window_size
@ -275,37 +479,23 @@ def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict)
        y_val = val_df["target"].values
        X_test = test_df[feature_cols].values

-        # Train model
-        model = build_model(config)
-        try:
-            model.fit(X_train, y_train)
-        except Exception as e:
-            print(f"  Window {w+1}: training failed -- {e}", file=sys.stderr)
-            continue
+        # Scale and PCA
+        X_train, X_val, X_test, scaler, pca = apply_scaling_pca(X_train, X_val, X_test, config)
+        current_n_features = X_train.shape[1]
+        if feature_importances_sum is None:
+            effective_n_features = current_n_features
+            feature_importances_sum = np.zeros(effective_n_features)

-        # Get predictions on test set
-        try:
-            proba = model.predict_proba(X_test)[:, 1]
-        except Exception:
-            preds = model.predict(X_test)
-            proba = preds.astype(float)
+        # Build and train
+        trades, fi = _train_and_backtest_window(
+            X_train, y_train, X_val, y_val, X_test, test_df,
+            config, strategy, current_n_features, w, n_windows
+        )

-        # Extract feature importances
-        try:
-            if hasattr(model, "feature_importances_"):
-                fi = model.feature_importances_
-            elif hasattr(model, "get_booster"):
-                fi_dict = model.get_booster().get_score(importance_type="gain")
-                fi = np.array([fi_dict.get(f"f{i}", 0) for i in range(len(feature_cols))])
-            else:
-                fi = np.zeros(len(feature_cols))
-            feature_importances_sum += fi / (fi.sum() + 1e-10)
+        if fi is not None and len(fi) == effective_n_features:
+            feature_importances_sum += fi
            fi_count += 1
-        except Exception:
-            pass

-        # Backtest on test set
-        trades = backtest(test_df, proba, strategy)
        all_trades.extend(trades)

        # Window sharpe
@ -320,26 +510,253 @@ def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict)

        print(f"  Window {w+1}/{n_windows}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}")

-    return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, feature_cols, df)
+    # Build feature names for output
+    if pca is not None and config.get("features", {}).get("use_pca", False):
+        effective_feature_names = [f"PC_{i+1}" for i in range(effective_n_features)]
+    else:
+        effective_feature_names = feature_cols

+    if feature_importances_sum is None:
+        feature_importances_sum = np.zeros(len(effective_feature_names))
+
+    return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, effective_feature_names, df)
+
+
+def rolling_window_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict:
+    """Rolling window train/test with sliding forward."""
+    training_cfg = config.get("training", {})
+    strategy = config.get("strategy", {})
+    train_size = training_cfg.get("rolling_train_size", 2000)
+    test_size = training_cfg.get("rolling_test_size", 200)
+    val_pct = training_cfg.get("validation_pct", 0.15)
+
+    n = len(df)
+    all_trades = []
+    per_window_sharpe = []
+    feature_importances_sum = None
+    fi_count = 0
+    effective_n_features = len(feature_cols)
+    window_count = 0
+
+    start = 0
+    while start + train_size + test_size <= n:
+        train_end = start + train_size
+        test_end = min(train_end + test_size, n)
+
+        train_full = df.iloc[start:train_end]
+        test_df = df.iloc[train_end:test_end]
+
+        if len(test_df) < 10 or train_full["target"].nunique() < 2:
+            start += test_size
+            continue
+
+        # Split train into train/val
+        val_split = int(len(train_full) * (1.0 - val_pct))
+        train_df = train_full.iloc[:val_split]
+        val_df = train_full.iloc[val_split:]
+
+        X_train = train_df[feature_cols].values
+        y_train = train_df["target"].values
+        X_val = val_df[feature_cols].values
+        y_val = val_df["target"].values
+        X_test = test_df[feature_cols].values
+
+        # Scale and PCA
+        X_train, X_val, X_test, scaler, pca = apply_scaling_pca(X_train, X_val, X_test, config)
+        current_n_features = X_train.shape[1]
+        if feature_importances_sum is None:
+            effective_n_features = current_n_features
+            feature_importances_sum = np.zeros(effective_n_features)
+
+        window_count += 1
+        total_possible = (n - train_size) // test_size
+
+        trades, fi = _train_and_backtest_window(
+            X_train, y_train, X_val, y_val, X_test, test_df,
+            config, strategy, current_n_features, window_count - 1, total_possible
+        )
+
+        if fi is not None and len(fi) == effective_n_features:
+            feature_importances_sum += fi
+            fi_count += 1
+
+        all_trades.extend(trades)
+
+        if trades:
+            returns = [t["return_pct"] for t in trades]
+            mean_r = np.mean(returns)
+            std_r = np.std(returns) if len(returns) > 1 else 1.0
+            sharpe = (mean_r / std_r) * np.sqrt(252 / max(1, len(trades))) if std_r > 0 else 0
+            per_window_sharpe.append(round(sharpe, 3))
+        else:
+            per_window_sharpe.append(0.0)
+
+        print(f"  Rolling window {window_count}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}")
+
+        start += test_size
+
+    if pca is not None and config.get("features", {}).get("use_pca", False):
+        effective_feature_names = [f"PC_{i+1}" for i in range(effective_n_features)]
+    else:
+        effective_feature_names = feature_cols
+
+    if feature_importances_sum is None:
+        feature_importances_sum = np.zeros(len(effective_feature_names))
+
+    return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, effective_feature_names, df)
+
+
+def _train_and_backtest_window(X_train, y_train, X_val, y_val, X_test, test_df,
+                                config, strategy, n_features, w_idx, n_windows):
+    """Train model on one window and backtest. Returns (trades, feature_importances)."""
+    model_type = config.get("model_type", "xgboost")
+
+    if model_type == "hybrid":
+        return _hybrid_train_and_backtest(
+            X_train, y_train, X_val, y_val, X_test, test_df,
+            config, strategy, n_features
+        )
+
+    # Single model path
+    if model_type == "lstm":
+        model = build_model(config, input_size=n_features)
+    else:
+        model = build_model(config)
+
+    try:
+        if model_type == "lstm":
+            model.fit(X_train, y_train, X_val, y_val)
+        else:
+            model.fit(X_train, y_train)
+    except Exception as e:
+        print(f"  Window {w_idx+1}: training failed -- {e}", file=sys.stderr)
+        return [], None
+
+    # Get predictions
+    try:
+        if model_type == "lstm":
+            seq_len = config.get("hyperparameters", {}).get("lstm_sequence_length", 20)
+            proba = model.predict_proba(X_test)[:, 1]
+            # Align test_df to account for sequence trimming
+            test_df_aligned = test_df.iloc[seq_len:]
+        else:
+            proba = model.predict_proba(X_test)[:, 1]
+            test_df_aligned = test_df
+    except Exception:
+        preds = model.predict(X_test)
+        proba = preds.astype(float)
+        test_df_aligned = test_df
+
+    # Feature importances
+    fi = _extract_feature_importances(model, n_features)
+
+    # Backtest
+    trades = backtest(test_df_aligned, proba, strategy)
+    return trades, fi
+
+
+def _hybrid_train_and_backtest(X_train, y_train, X_val, y_val, X_test, test_df,
+                                config, strategy, n_features):
+    """Hybrid: LSTM (60%) + XGBoost (40%), only enter when both agree."""
+    hp = config.get("hyperparameters", {})
+    seq_len = hp.get("lstm_sequence_length", 20)
+
+    # Train LSTM
+    lstm_config = {**config, "model_type": "lstm"}
+    lstm_model = build_model(lstm_config, input_size=n_features)
+    try:
+        lstm_model.fit(X_train, y_train, X_val, y_val)
+        lstm_proba_full = lstm_model.predict_proba(X_test)[:, 1]
+    except Exception as e:
+        print(f"    Hybrid LSTM failed: {e}", file=sys.stderr)
+        lstm_proba_full = np.full(max(0, len(X_test) - seq_len), 0.5)
+
+    # Train XGBoost
+    xgb_config = {**config, "model_type": "xgboost"}
+    xgb_model = build_model(xgb_config)
+    try:
+        xgb_model.fit(X_train, y_train)
+        xgb_proba_full = xgb_model.predict_proba(X_test)[:, 1]
+    except Exception as e:
+        print(f"    Hybrid XGBoost failed: {e}", file=sys.stderr)
+        xgb_proba_full = np.full(len(X_test), 0.5)
+
+    # Align: LSTM output is shorter by seq_len
+    xgb_proba = xgb_proba_full[seq_len:]
+    lstm_proba = lstm_proba_full
+    min_len = min(len(lstm_proba), len(xgb_proba))
+    lstm_proba = lstm_proba[:min_len]
+    xgb_proba = xgb_proba[:min_len]
+    test_df_aligned = test_df.iloc[seq_len:seq_len + min_len]
+
+    # Combine: 60% LSTM + 40% XGBoost, only when both agree
+    lstm_weight = 0.6
+    xgb_weight = 0.4
+    combined_proba = lstm_weight * lstm_proba + xgb_weight * xgb_proba
+
+    # Both must agree on direction (both > 0.5 or both < 0.5)
+    lstm_bullish = lstm_proba > 0.5
+    xgb_bullish = xgb_proba > 0.5
+    agreement = lstm_bullish == xgb_bullish
+
+    # Zero out signals where models disagree
+    combined_proba[~agreement] = 0.5
+
+    # Feature importances from XGBoost
+    fi = _extract_feature_importances(xgb_model, n_features)
+
+    trades = backtest(test_df_aligned, combined_proba, strategy)
+    return trades, fi
+
+
+def _extract_feature_importances(model, n_features):
+    """Extract normalized feature importances from a model."""
+    try:
+        if hasattr(model, "feature_importances_"):
+            fi = model.feature_importances_
+        elif hasattr(model, "get_booster"):
+            fi_dict = model.get_booster().get_score(importance_type="gain")
+            fi = np.array([fi_dict.get(f"f{i}", 0) for i in range(n_features)])
+        else:
+            return None
+        return fi / (fi.sum() + 1e-10)
+    except Exception:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Backtesting
+# ---------------------------------------------------------------------------

 def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list:
-    """Simulate trades using model predictions."""
+    """Simulate trades using model predictions. Supports ATR-based dynamic SL/TP."""
    entry_threshold = strategy.get("entry_threshold", 0.6)
-    stop_loss = strategy.get("stop_loss_pct", 2.0) / 100
-    take_profit = strategy.get("take_profit_pct", 4.0) / 100
+    stop_loss_fixed = strategy.get("stop_loss_pct", 2.0) / 100
+    take_profit_fixed = strategy.get("take_profit_pct", 4.0) / 100
    trailing_stop = strategy.get("trailing_stop_pct", 1.5) / 100
    exit_type = strategy.get("exit_type", "trailing_stop")
    min_confidence = strategy.get("min_confidence_to_trade", 0.55)
+    use_dynamic = strategy.get("dynamic_sl_tp", False)
+    atr_sl_mult = strategy.get("atr_sl_multiplier", 1.5)
+    atr_tp_mult = strategy.get("atr_tp_multiplier", 3.0)
    fee = 0.001  # 0.1% per trade

    closes = test_df["close"].values
    highs = test_df["high"].values
    lows = test_df["low"].values
+
+    # ATR for dynamic SL/TP
+    if use_dynamic and "ATR_14" in test_df.columns:
+        atr_values = test_df["ATR_14"].values
+    else:
+        atr_values = None
+
    trades = []
    i = 0

    while i < len(closes) - 1:
+        if i >= len(proba):
+            break
        if proba[i] < min_confidence or proba[i] < entry_threshold:
            i += 1
            continue
@ -347,6 +764,16 @@ def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list:
        # Enter trade
        entry_price = closes[i]
        confidence = proba[i]
+
+        # Compute dynamic SL/TP if enabled
+        if use_dynamic and atr_values is not None and not np.isnan(atr_values[i]):
+            atr = atr_values[i]
+            stop_loss = (atr * atr_sl_mult) / entry_price
+            take_profit = (atr * atr_tp_mult) / entry_price
+        else:
+            stop_loss = stop_loss_fixed
+            take_profit = take_profit_fixed
+
        # Position sizing based on confidence
        if strategy.get("position_sizing") == "confidence_scaled":
            if confidence > 0.8:
@ -522,7 +949,10 @@ def main():
    print(f"  Target distribution: {df['target'].value_counts().to_dict()}")

    # Run walk-forward training + backtesting
-    print("\nRunning walk-forward validation...")
+    model_type = config.get("model_type", "xgboost")
+    rolling = config.get("training", {}).get("rolling_window", False)
+    print(f"\nModel: {model_type}, Rolling: {rolling}")
+    print("Running validation...")
    results = walk_forward_train_test(df, feature_cols, config)

    # Save results