feat: add LSTM, hybrid ensemble, PCA, scaler, ATR stops, rolling window

Major upgrade to the ML engine:
- LSTM model type: 2-layer PyTorch LSTM with early stopping, GPU support
- Hybrid mode: LSTM (60%) + XGBoost (40%) with agreement gating
- StandardScaler normalization (critical for LSTM)
- PCA dimensionality reduction (configurable variance retention)
- ATR-based dynamic stop-loss/take-profit adapting to volatility
- Rolling window retraining for more realistic time series validation
- Updated LLM system prompt with docs for all new parameters
- All backward compatible (xgboost/lightgbm/catboost still work)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
BizzleBot 2026-03-19 23:02:11 +00:00
parent e24b6605d7
commit a21e635d9f
3 changed files with 534 additions and 79 deletions

View File

@ -1,43 +1,40 @@
{ {
"model_type": "xgboost", "model_type": "hybrid",
"features": { "features": {
"technical_indicators": [ "technical_indicators": ["RSI_14", "RSI_7", "MACD_line", "MACD_signal", "MACD_hist", "BB_upper", "BB_lower", "BB_width", "ATR_14", "SMA_20", "SMA_50", "EMA_10", "EMA_20", "OBV", "stoch_k", "stoch_d", "williams_r", "CCI_20", "ROC_10"],
"RSI_14", "RSI_7", "RSI_21",
"MACD_line", "MACD_signal", "MACD_hist",
"BB_upper", "BB_lower", "BB_width",
"ATR_14",
"SMA_5", "SMA_10", "SMA_20", "SMA_50", "SMA_200",
"EMA_5", "EMA_10", "EMA_20", "EMA_50",
"OBV",
"stoch_k", "stoch_d",
"williams_r",
"CCI_20",
"ROC_10",
"keltner_upper", "keltner_lower"
],
"lookback_periods": [3, 5, 10, 20], "lookback_periods": [3, 5, 10, 20],
"use_volume_features": true, "use_volume_features": true,
"use_volatility_features": true, "use_volatility_features": true,
"use_candle_patterns": true, "use_candle_patterns": false,
"use_lag_features": true, "use_lag_features": true,
"lag_periods": [1, 2, 3, 5] "lag_periods": [1, 2, 3, 5],
"use_pca": true,
"pca_variance": 0.95,
"use_scaler": true
}, },
"target": { "target": {
"type": "classification", "type": "classification",
"direction": "long", "direction": "both",
"horizon_candles": 6, "horizon_candles": 8,
"threshold_pct": 1.0 "threshold_pct": 1.5
}, },
"hyperparameters": { "hyperparameters": {
"learning_rate": 0.05, "learning_rate": 0.001,
"max_depth": 6, "max_depth": 5,
"n_estimators": 500, "n_estimators": 300,
"subsample": 0.8, "subsample": 0.8,
"colsample_bytree": 0.8, "colsample_bytree": 0.8,
"min_child_weight": 5, "min_child_weight": 5,
"gamma": 0.1, "gamma": 0.3,
"reg_alpha": 0.1, "reg_alpha": 0.1,
"reg_lambda": 1.0 "reg_lambda": 2.0,
"lstm_hidden_size": 128,
"lstm_num_layers": 2,
"lstm_dropout": 0.3,
"lstm_epochs": 100,
"lstm_batch_size": 64,
"lstm_sequence_length": 20,
"lstm_patience": 10
}, },
"strategy": { "strategy": {
"entry_threshold": 0.60, "entry_threshold": 0.60,
@ -47,13 +44,19 @@
"trailing_stop_pct": 1.5, "trailing_stop_pct": 1.5,
"position_sizing": "confidence_scaled", "position_sizing": "confidence_scaled",
"max_position_pct": 100, "max_position_pct": 100,
"min_confidence_to_trade": 0.55 "min_confidence_to_trade": 0.55,
"dynamic_sl_tp": true,
"atr_sl_multiplier": 1.5,
"atr_tp_multiplier": 3.0
}, },
"training": { "training": {
"walk_forward_windows": 5, "walk_forward_windows": 5,
"train_pct": 0.7, "train_pct": 0.7,
"validation_pct": 0.15, "validation_pct": 0.15,
"test_pct": 0.15 "test_pct": 0.15,
"rolling_window": true,
"rolling_train_size": 2000,
"rolling_test_size": 200
}, },
"timeframe": "4h" "timeframe": "4h"
} }

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
LLM Strategy Analyzer Calls Ollama on Mac Mini to analyze results LLM Strategy Analyzer -- Calls Ollama on Mac Mini to analyze results
and suggest config modifications for the next iteration. and suggest config modifications for the next iteration.
""" """
@ -14,17 +14,19 @@ MODEL = "qwen3.5:27b"
SYSTEM_PROMPT = """You are a quantitative trading strategy optimizer. You analyze ML model backtesting results for a BTC/USDT trading strategy and suggest precise modifications to improve performance. SYSTEM_PROMPT = """You are a quantitative trading strategy optimizer. You analyze ML model backtesting results for a BTC/USDT trading strategy and suggest precise modifications to improve performance.
## Your Task ## Your Task
Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific change one thing at a time when possible. Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific -- change one thing at a time when possible.
## Config Parameters You Can Modify ## Config Parameters You Can Modify
**model_type**: "xgboost", "lightgbm", "catboost", or "ensemble" **model_type**: "xgboost", "lightgbm", "catboost", "ensemble", "lstm", or "hybrid"
- xgboost: Generally best for structured data, fast GPU training - xgboost: Generally best for structured data, fast GPU training
- lightgbm: Faster training, good with large feature sets - lightgbm: Faster training, good with large feature sets
- catboost: Handles feature interactions well, less tuning needed - catboost: Handles feature interactions well, less tuning needed
- ensemble: Combines all three, reduces variance but slower - ensemble: Combines xgboost+lightgbm+catboost, reduces variance but slower
- lstm: PyTorch LSTM neural network, captures temporal/sequential patterns in price data
- hybrid: Combines LSTM (60% weight) + XGBoost (40% weight). Only enters trades when BOTH models agree on direction. The hybrid model typically outperforms single models -- LSTM captures temporal patterns while XGBoost handles feature interactions. Recommended as default.
**hyperparameters**: **hyperparameters** (gradient boosting):
- learning_rate (0.001-0.3): Lower = more robust but slower. If overfitting, decrease. - learning_rate (0.001-0.3): Lower = more robust but slower. If overfitting, decrease.
- max_depth (3-10): Controls model complexity. Deeper = more overfitting risk. - max_depth (3-10): Controls model complexity. Deeper = more overfitting risk.
- n_estimators (100-2000): More trees = better fit but diminishing returns. - n_estimators (100-2000): More trees = better fit but diminishing returns.
@ -35,18 +37,30 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
- reg_alpha (0-10): L1 regularization. Encourages sparsity. - reg_alpha (0-10): L1 regularization. Encourages sparsity.
- reg_lambda (0-10): L2 regularization. Prevents large weights. - reg_lambda (0-10): L2 regularization. Prevents large weights.
**hyperparameters** (LSTM-specific, used by lstm and hybrid model_types):
- lstm_hidden_size (32-256): LSTM hidden units. Larger = more capacity but overfitting risk. Default 128.
- lstm_num_layers (1-4): Stacked LSTM layers. 2 is usually optimal. More layers need more data.
- lstm_dropout (0.1-0.5): Dropout between LSTM layers and before output. Higher = more regularization.
- lstm_epochs (50-200): Max training epochs. Early stopping usually triggers before this.
- lstm_batch_size (32-128): Training batch size. Smaller = noisier gradients but better generalization.
- lstm_sequence_length (10-50): How many past candles the LSTM sees per prediction. Longer = more context but more memory. Default 20.
- lstm_patience (5-20): Early stopping patience on validation loss. Lower = stop sooner.
**target**: **target**:
- direction: "long" or "both" - direction: "long", "short", or "both"
- horizon_candles (1-20): How far ahead to predict. Longer = smoother but lagging. - horizon_candles (1-20): How far ahead to predict. Longer = smoother but lagging.
- threshold_pct (0.3-3.0): Minimum move % to label as positive. Higher = fewer but clearer signals. - threshold_pct (0.3-3.0): Minimum move % to label as positive. Higher = fewer but clearer signals.
**strategy**: **strategy**:
- entry_threshold (0.5-0.8): Min prediction probability to enter trade. Higher = fewer trades, higher quality. - entry_threshold (0.5-0.8): Min prediction probability to enter trade. Higher = fewer trades, higher quality.
- stop_loss_pct (0.5-5.0): Max loss before exit. Tighter = more stopped out. - stop_loss_pct (0.5-5.0): Max loss before exit (used when dynamic_sl_tp is false).
- take_profit_pct (1.0-10.0): Target profit. Should be > stop_loss for positive expectancy. - take_profit_pct (1.0-10.0): Target profit (used when dynamic_sl_tp is false). Should be > stop_loss for positive expectancy.
- trailing_stop_pct (0.5-3.0): Trailing stop distance. Tighter = locks profit faster but exits early. - trailing_stop_pct (0.5-3.0): Trailing stop distance. Tighter = locks profit faster but exits early.
- min_confidence_to_trade (0.5-0.9): Absolute minimum confidence to consider. - min_confidence_to_trade (0.5-0.9): Absolute minimum confidence to consider.
- exit_type: "trailing_stop" or "fixed" (just SL/TP) - exit_type: "trailing_stop" or "fixed" (just SL/TP)
- dynamic_sl_tp (true/false): Use ATR-based dynamic stop-loss and take-profit instead of fixed percentages. Adapts to current volatility. Recommended: true.
- atr_sl_multiplier (1.0-3.0): ATR multiplier for stop-loss. E.g., 1.5 means SL = 1.5 * ATR(14). Lower = tighter stops.
- atr_tp_multiplier (2.0-5.0): ATR multiplier for take-profit. E.g., 3.0 means TP = 3.0 * ATR(14). Should be > atr_sl_multiplier.
**features**: **features**:
- use_volume_features (true/false): Volume features can be noisy in crypto. - use_volume_features (true/false): Volume features can be noisy in crypto.
@ -54,9 +68,15 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
- use_lag_features (true/false): Lagged features capture momentum. - use_lag_features (true/false): Lagged features capture momentum.
- lag_periods: List of lag periods [1,2,3,5,10] - lag_periods: List of lag periods [1,2,3,5,10]
- lookback_periods: List of lookback windows [3,5,10,20] - lookback_periods: List of lookback windows [3,5,10,20]
- use_scaler (true/false): Apply StandardScaler normalization to all features. Critical for LSTM, also helps gradient boosting. Recommended: true.
- use_pca (true/false): Apply PCA dimensionality reduction after scaling. Reduces noise and multicollinearity. Recommended with many features.
- pca_variance (0.80-0.99): Fraction of variance to retain with PCA. 0.95 keeps 95% of information. Lower = fewer dimensions, more noise removed.
**training**: **training**:
- walk_forward_windows (3-10): More windows = more robust but less data per window. - walk_forward_windows (3-10): More windows = more robust but less data per window. Used when rolling_window is false.
- rolling_window (true/false): Use rolling window instead of static walk-forward splits. Trains on last N candles, tests on next M, slides forward. More realistic for time series. Recommended: true.
- rolling_train_size (1000-5000): Number of candles in the rolling training window. Larger = more data but older patterns.
- rolling_test_size (100-500): Number of candles in the rolling test window. Smaller = more retraining, better adaptation.
## Key Metrics to Optimize (in priority order) ## Key Metrics to Optimize (in priority order)
1. **Sharpe Ratio** (target: > 2.0): Risk-adjusted return. Most important metric. 1. **Sharpe Ratio** (target: > 2.0): Risk-adjusted return. Most important metric.
@ -66,16 +86,18 @@ Given the current configuration and results, suggest 1-3 specific, justified cha
5. **Trade Count**: Need enough trades for statistical significance (>50). 5. **Trade Count**: Need enough trades for statistical significance (>50).
## Decision Guidelines ## Decision Guidelines
- If Sharpe < 1.0: The strategy is not working well. Consider larger changes. - If Sharpe < 1.0: The strategy is not working well. Consider larger changes (switch to hybrid, enable PCA/scaler, adjust target).
- If Sharpe 1.0-1.5: Decent. Fine-tune hyperparameters and thresholds. - If Sharpe 1.0-1.5: Decent. Fine-tune hyperparameters and thresholds.
- If Sharpe 1.5-2.0: Good. Make small, targeted improvements. - If Sharpe 1.5-2.0: Good. Make small, targeted improvements.
- If Sharpe > 2.0: Very good. Be careful not to overfit. - If Sharpe > 2.0: Very good. Be careful not to overfit.
- If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins ok, tighten SL. - If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins -- ok, tighten SL.
- If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big widen TP or tighten SL. - If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big -- widen TP or tighten SL.
- If trade_count < 30: Not enough trades. Lower entry_threshold or min_confidence. - If trade_count < 30: Not enough trades. Lower entry_threshold or min_confidence.
- If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss. - If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss, enable dynamic_sl_tp.
- If per_window_sharpe has high variance: Model is not stable. More regularization or simpler model. - If per_window_sharpe has high variance: Model is not stable. More regularization, enable PCA, or try hybrid.
- Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting. - Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting -- enable PCA or reduce features.
- For LSTM/hybrid: if underfitting, increase lstm_hidden_size or lstm_num_layers. If overfitting, increase lstm_dropout or decrease lstm_sequence_length.
- The hybrid model combining LSTM + XGBoost typically outperforms single models. LSTM captures temporal patterns while XGBoost handles feature interactions. Use hybrid as the default unless you have a specific reason not to.
## Response Format ## Response Format
You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON): You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON):

View File

@ -21,6 +21,9 @@ from ta.trend import MACD, CCIIndicator, SMAIndicator, EMAIndicator
from ta.volatility import BollingerBands, AverageTrueRange, KeltnerChannel from ta.volatility import BollingerBands, AverageTrueRange, KeltnerChannel
from ta.volume import OnBalanceVolumeIndicator from ta.volume import OnBalanceVolumeIndicator
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -140,15 +143,179 @@ def create_target(df: pd.DataFrame, config: dict) -> pd.Series:
return target return target
# ---------------------------------------------------------------------------
# LSTM Model (PyTorch)
# ---------------------------------------------------------------------------
def get_device():
"""Detect best available device for PyTorch."""
import torch
if torch.cuda.is_available():
return torch.device("cuda")
return torch.device("cpu")
class LSTMClassifier:
"""PyTorch LSTM for binary classification of trading signals."""
def __init__(self, input_size, hp):
import torch
import torch.nn as nn
self.hp = hp
self.device = get_device()
self.sequence_length = hp.get("lstm_sequence_length", 20)
hidden_size = hp.get("lstm_hidden_size", 128)
num_layers = hp.get("lstm_num_layers", 2)
dropout = hp.get("lstm_dropout", 0.3)
class _LSTMNet(nn.Module):
def __init__(self_net):
super().__init__()
self_net.lstm = nn.LSTM(
input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True,
dropout=dropout if num_layers > 1 else 0.0,
)
self_net.dropout = nn.Dropout(dropout)
self_net.fc = nn.Linear(hidden_size, 1)
self_net.sigmoid = nn.Sigmoid()
def forward(self_net, x):
# x: (batch, seq_len, features)
lstm_out, _ = self_net.lstm(x)
# Take last time step
last_hidden = lstm_out[:, -1, :]
out = self_net.dropout(last_hidden)
out = self_net.fc(out)
out = self_net.sigmoid(out)
return out.squeeze(-1)
self.model = _LSTMNet().to(self.device)
self.feature_importances_ = None
def _make_sequences(self, X, y=None):
"""Convert flat feature arrays into overlapping sequences."""
import torch
seq_len = self.sequence_length
sequences = []
targets = []
for i in range(seq_len, len(X)):
sequences.append(X[i - seq_len:i])
if y is not None:
targets.append(y[i])
X_seq = torch.FloatTensor(np.array(sequences)).to(self.device)
if y is not None:
y_seq = torch.FloatTensor(np.array(targets)).to(self.device)
return X_seq, y_seq
return X_seq
def fit(self, X_train, y_train, X_val=None, y_val=None):
import torch
import torch.nn as nn
lr = self.hp.get("learning_rate", 0.001)
epochs = self.hp.get("lstm_epochs", 100)
batch_size = self.hp.get("lstm_batch_size", 64)
patience = self.hp.get("lstm_patience", 10)
optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
criterion = nn.BCELoss()
X_seq, y_seq = self._make_sequences(X_train, y_train)
if X_val is not None and y_val is not None:
X_val_seq, y_val_seq = self._make_sequences(X_val, y_val)
has_val = len(X_val_seq) > 0
else:
has_val = False
best_val_loss = float("inf")
patience_counter = 0
best_state = None
self.model.train()
n_samples = len(X_seq)
for epoch in range(epochs):
# Shuffle
perm = torch.randperm(n_samples)
X_seq = X_seq[perm]
y_seq = y_seq[perm]
epoch_loss = 0.0
n_batches = 0
for start in range(0, n_samples, batch_size):
end = min(start + batch_size, n_samples)
X_batch = X_seq[start:end]
y_batch = y_seq[start:end]
optimizer.zero_grad()
preds = self.model(X_batch)
loss = criterion(preds, y_batch)
loss.backward()
torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
optimizer.step()
epoch_loss += loss.item()
n_batches += 1
avg_loss = epoch_loss / max(n_batches, 1)
# Validation
if has_val:
self.model.eval()
with torch.no_grad():
val_preds = self.model(X_val_seq)
val_loss = criterion(val_preds, y_val_seq).item()
self.model.train()
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_counter = 0
best_state = {k: v.clone() for k, v in self.model.state_dict().items()}
else:
patience_counter += 1
if patience_counter >= patience:
print(f" LSTM early stop at epoch {epoch+1}, val_loss={val_loss:.4f}")
break
if (epoch + 1) % 20 == 0:
print(f" Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}, val_loss={val_loss:.4f}")
else:
if (epoch + 1) % 20 == 0:
print(f" Epoch {epoch+1}/{epochs}: loss={avg_loss:.4f}")
if best_state is not None:
self.model.load_state_dict(best_state)
def predict_proba(self, X):
import torch
self.model.eval()
X_seq = self._make_sequences(X)
with torch.no_grad():
proba_pos = self.model(X_seq).cpu().numpy()
proba_neg = 1.0 - proba_pos
return np.column_stack([proba_neg, proba_pos])
def predict(self, X):
proba = self.predict_proba(X)
return (proba[:, 1] >= 0.5).astype(int)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Model Building # Model Building
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def build_model(config: dict): def build_model(config: dict, input_size: int = 0):
"""Build the ML model based on config.""" """Build the ML model based on config."""
model_type = config.get("model_type", "xgboost") model_type = config.get("model_type", "xgboost")
hp = config.get("hyperparameters", {}) hp = config.get("hyperparameters", {})
if model_type == "lstm":
return LSTMClassifier(input_size, hp)
if model_type == "xgboost": if model_type == "xgboost":
import xgboost as xgb import xgboost as xgb
# Detect GPU # Detect GPU
@ -231,24 +398,61 @@ def build_model(config: dict):
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Walk-Forward Validation + Backtesting # Scaling & PCA
# ---------------------------------------------------------------------------
def apply_scaling_pca(X_train, X_val, X_test, config):
"""Apply StandardScaler and optional PCA. Returns transformed arrays and fitted objects."""
feat_cfg = config.get("features", {})
scaler = None
pca = None
if feat_cfg.get("use_scaler", False):
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
if X_val is not None:
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
if feat_cfg.get("use_pca", False):
variance = feat_cfg.get("pca_variance", 0.95)
pca = PCA(n_components=variance, svd_solver="full")
X_train = pca.fit_transform(X_train)
if X_val is not None:
X_val = pca.transform(X_val)
X_test = pca.transform(X_test)
print(f" PCA: {pca.n_components_} components (retaining {variance*100:.0f}% variance)")
return X_train, X_val, X_test, scaler, pca
# ---------------------------------------------------------------------------
# Walk-Forward / Rolling Window Validation + Backtesting
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict: def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict:
"""Walk-forward validation with backtesting on each window.""" """Walk-forward or rolling window validation with backtesting."""
training_cfg = config.get("training", {}) training_cfg = config.get("training", {})
strategy = config.get("strategy", {})
model_type = config.get("model_type", "xgboost")
use_rolling = training_cfg.get("rolling_window", False)
if use_rolling:
return rolling_window_train_test(df, feature_cols, config)
# Standard walk-forward
n_windows = training_cfg.get("walk_forward_windows", 5) n_windows = training_cfg.get("walk_forward_windows", 5)
train_pct = training_cfg.get("train_pct", 0.7) train_pct = training_cfg.get("train_pct", 0.7)
val_pct = training_cfg.get("validation_pct", 0.15) val_pct = training_cfg.get("validation_pct", 0.15)
n = len(df) n = len(df)
window_size = n // n_windows window_size = n // n_windows
strategy = config.get("strategy", {})
all_trades = [] all_trades = []
per_window_sharpe = [] per_window_sharpe = []
feature_importances_sum = np.zeros(len(feature_cols)) feature_importances_sum = None
fi_count = 0 fi_count = 0
effective_n_features = len(feature_cols)
for w in range(n_windows): for w in range(n_windows):
start = w * window_size start = w * window_size
@ -275,37 +479,23 @@ def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict)
y_val = val_df["target"].values y_val = val_df["target"].values
X_test = test_df[feature_cols].values X_test = test_df[feature_cols].values
# Train model # Scale and PCA
model = build_model(config) X_train, X_val, X_test, scaler, pca = apply_scaling_pca(X_train, X_val, X_test, config)
try: current_n_features = X_train.shape[1]
model.fit(X_train, y_train) if feature_importances_sum is None:
except Exception as e: effective_n_features = current_n_features
print(f" Window {w+1}: training failed -- {e}", file=sys.stderr) feature_importances_sum = np.zeros(effective_n_features)
continue
# Get predictions on test set # Build and train
try: trades, fi = _train_and_backtest_window(
proba = model.predict_proba(X_test)[:, 1] X_train, y_train, X_val, y_val, X_test, test_df,
except Exception: config, strategy, current_n_features, w, n_windows
preds = model.predict(X_test) )
proba = preds.astype(float)
# Extract feature importances if fi is not None and len(fi) == effective_n_features:
try: feature_importances_sum += fi
if hasattr(model, "feature_importances_"):
fi = model.feature_importances_
elif hasattr(model, "get_booster"):
fi_dict = model.get_booster().get_score(importance_type="gain")
fi = np.array([fi_dict.get(f"f{i}", 0) for i in range(len(feature_cols))])
else:
fi = np.zeros(len(feature_cols))
feature_importances_sum += fi / (fi.sum() + 1e-10)
fi_count += 1 fi_count += 1
except Exception:
pass
# Backtest on test set
trades = backtest(test_df, proba, strategy)
all_trades.extend(trades) all_trades.extend(trades)
# Window sharpe # Window sharpe
@ -320,26 +510,253 @@ def walk_forward_train_test(df: pd.DataFrame, feature_cols: list, config: dict)
print(f" Window {w+1}/{n_windows}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}") print(f" Window {w+1}/{n_windows}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}")
return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, feature_cols, df) # Build feature names for output
if pca is not None and config.get("features", {}).get("use_pca", False):
effective_feature_names = [f"PC_{i+1}" for i in range(effective_n_features)]
else:
effective_feature_names = feature_cols
if feature_importances_sum is None:
feature_importances_sum = np.zeros(len(effective_feature_names))
return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, effective_feature_names, df)
def rolling_window_train_test(df: pd.DataFrame, feature_cols: list, config: dict) -> dict:
"""Rolling window train/test with sliding forward."""
training_cfg = config.get("training", {})
strategy = config.get("strategy", {})
train_size = training_cfg.get("rolling_train_size", 2000)
test_size = training_cfg.get("rolling_test_size", 200)
val_pct = training_cfg.get("validation_pct", 0.15)
n = len(df)
all_trades = []
per_window_sharpe = []
feature_importances_sum = None
fi_count = 0
effective_n_features = len(feature_cols)
window_count = 0
start = 0
while start + train_size + test_size <= n:
train_end = start + train_size
test_end = min(train_end + test_size, n)
train_full = df.iloc[start:train_end]
test_df = df.iloc[train_end:test_end]
if len(test_df) < 10 or train_full["target"].nunique() < 2:
start += test_size
continue
# Split train into train/val
val_split = int(len(train_full) * (1.0 - val_pct))
train_df = train_full.iloc[:val_split]
val_df = train_full.iloc[val_split:]
X_train = train_df[feature_cols].values
y_train = train_df["target"].values
X_val = val_df[feature_cols].values
y_val = val_df["target"].values
X_test = test_df[feature_cols].values
# Scale and PCA
X_train, X_val, X_test, scaler, pca = apply_scaling_pca(X_train, X_val, X_test, config)
current_n_features = X_train.shape[1]
if feature_importances_sum is None:
effective_n_features = current_n_features
feature_importances_sum = np.zeros(effective_n_features)
window_count += 1
total_possible = (n - train_size) // test_size
trades, fi = _train_and_backtest_window(
X_train, y_train, X_val, y_val, X_test, test_df,
config, strategy, current_n_features, window_count - 1, total_possible
)
if fi is not None and len(fi) == effective_n_features:
feature_importances_sum += fi
fi_count += 1
all_trades.extend(trades)
if trades:
returns = [t["return_pct"] for t in trades]
mean_r = np.mean(returns)
std_r = np.std(returns) if len(returns) > 1 else 1.0
sharpe = (mean_r / std_r) * np.sqrt(252 / max(1, len(trades))) if std_r > 0 else 0
per_window_sharpe.append(round(sharpe, 3))
else:
per_window_sharpe.append(0.0)
print(f" Rolling window {window_count}: {len(trades)} trades, sharpe={per_window_sharpe[-1]}")
start += test_size
if pca is not None and config.get("features", {}).get("use_pca", False):
effective_feature_names = [f"PC_{i+1}" for i in range(effective_n_features)]
else:
effective_feature_names = feature_cols
if feature_importances_sum is None:
feature_importances_sum = np.zeros(len(effective_feature_names))
return compile_results(all_trades, per_window_sharpe, feature_importances_sum, fi_count, effective_feature_names, df)
def _train_and_backtest_window(X_train, y_train, X_val, y_val, X_test, test_df,
config, strategy, n_features, w_idx, n_windows):
"""Train model on one window and backtest. Returns (trades, feature_importances)."""
model_type = config.get("model_type", "xgboost")
if model_type == "hybrid":
return _hybrid_train_and_backtest(
X_train, y_train, X_val, y_val, X_test, test_df,
config, strategy, n_features
)
# Single model path
if model_type == "lstm":
model = build_model(config, input_size=n_features)
else:
model = build_model(config)
try:
if model_type == "lstm":
model.fit(X_train, y_train, X_val, y_val)
else:
model.fit(X_train, y_train)
except Exception as e:
print(f" Window {w_idx+1}: training failed -- {e}", file=sys.stderr)
return [], None
# Get predictions
try:
if model_type == "lstm":
seq_len = config.get("hyperparameters", {}).get("lstm_sequence_length", 20)
proba = model.predict_proba(X_test)[:, 1]
# Align test_df to account for sequence trimming
test_df_aligned = test_df.iloc[seq_len:]
else:
proba = model.predict_proba(X_test)[:, 1]
test_df_aligned = test_df
except Exception:
preds = model.predict(X_test)
proba = preds.astype(float)
test_df_aligned = test_df
# Feature importances
fi = _extract_feature_importances(model, n_features)
# Backtest
trades = backtest(test_df_aligned, proba, strategy)
return trades, fi
def _hybrid_train_and_backtest(X_train, y_train, X_val, y_val, X_test, test_df,
config, strategy, n_features):
"""Hybrid: LSTM (60%) + XGBoost (40%), only enter when both agree."""
hp = config.get("hyperparameters", {})
seq_len = hp.get("lstm_sequence_length", 20)
# Train LSTM
lstm_config = {**config, "model_type": "lstm"}
lstm_model = build_model(lstm_config, input_size=n_features)
try:
lstm_model.fit(X_train, y_train, X_val, y_val)
lstm_proba_full = lstm_model.predict_proba(X_test)[:, 1]
except Exception as e:
print(f" Hybrid LSTM failed: {e}", file=sys.stderr)
lstm_proba_full = np.full(max(0, len(X_test) - seq_len), 0.5)
# Train XGBoost
xgb_config = {**config, "model_type": "xgboost"}
xgb_model = build_model(xgb_config)
try:
xgb_model.fit(X_train, y_train)
xgb_proba_full = xgb_model.predict_proba(X_test)[:, 1]
except Exception as e:
print(f" Hybrid XGBoost failed: {e}", file=sys.stderr)
xgb_proba_full = np.full(len(X_test), 0.5)
# Align: LSTM output is shorter by seq_len
xgb_proba = xgb_proba_full[seq_len:]
lstm_proba = lstm_proba_full
min_len = min(len(lstm_proba), len(xgb_proba))
lstm_proba = lstm_proba[:min_len]
xgb_proba = xgb_proba[:min_len]
test_df_aligned = test_df.iloc[seq_len:seq_len + min_len]
# Combine: 60% LSTM + 40% XGBoost, only when both agree
lstm_weight = 0.6
xgb_weight = 0.4
combined_proba = lstm_weight * lstm_proba + xgb_weight * xgb_proba
# Both must agree on direction (both > 0.5 or both < 0.5)
lstm_bullish = lstm_proba > 0.5
xgb_bullish = xgb_proba > 0.5
agreement = lstm_bullish == xgb_bullish
# Zero out signals where models disagree
combined_proba[~agreement] = 0.5
# Feature importances from XGBoost
fi = _extract_feature_importances(xgb_model, n_features)
trades = backtest(test_df_aligned, combined_proba, strategy)
return trades, fi
def _extract_feature_importances(model, n_features):
"""Extract normalized feature importances from a model."""
try:
if hasattr(model, "feature_importances_"):
fi = model.feature_importances_
elif hasattr(model, "get_booster"):
fi_dict = model.get_booster().get_score(importance_type="gain")
fi = np.array([fi_dict.get(f"f{i}", 0) for i in range(n_features)])
else:
return None
return fi / (fi.sum() + 1e-10)
except Exception:
return None
# ---------------------------------------------------------------------------
# Backtesting
# ---------------------------------------------------------------------------
def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list: def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list:
"""Simulate trades using model predictions.""" """Simulate trades using model predictions. Supports ATR-based dynamic SL/TP."""
entry_threshold = strategy.get("entry_threshold", 0.6) entry_threshold = strategy.get("entry_threshold", 0.6)
stop_loss = strategy.get("stop_loss_pct", 2.0) / 100 stop_loss_fixed = strategy.get("stop_loss_pct", 2.0) / 100
take_profit = strategy.get("take_profit_pct", 4.0) / 100 take_profit_fixed = strategy.get("take_profit_pct", 4.0) / 100
trailing_stop = strategy.get("trailing_stop_pct", 1.5) / 100 trailing_stop = strategy.get("trailing_stop_pct", 1.5) / 100
exit_type = strategy.get("exit_type", "trailing_stop") exit_type = strategy.get("exit_type", "trailing_stop")
min_confidence = strategy.get("min_confidence_to_trade", 0.55) min_confidence = strategy.get("min_confidence_to_trade", 0.55)
use_dynamic = strategy.get("dynamic_sl_tp", False)
atr_sl_mult = strategy.get("atr_sl_multiplier", 1.5)
atr_tp_mult = strategy.get("atr_tp_multiplier", 3.0)
fee = 0.001 # 0.1% per trade fee = 0.001 # 0.1% per trade
closes = test_df["close"].values closes = test_df["close"].values
highs = test_df["high"].values highs = test_df["high"].values
lows = test_df["low"].values lows = test_df["low"].values
# ATR for dynamic SL/TP
if use_dynamic and "ATR_14" in test_df.columns:
atr_values = test_df["ATR_14"].values
else:
atr_values = None
trades = [] trades = []
i = 0 i = 0
while i < len(closes) - 1: while i < len(closes) - 1:
if i >= len(proba):
break
if proba[i] < min_confidence or proba[i] < entry_threshold: if proba[i] < min_confidence or proba[i] < entry_threshold:
i += 1 i += 1
continue continue
@ -347,6 +764,16 @@ def backtest(test_df: pd.DataFrame, proba: np.ndarray, strategy: dict) -> list:
# Enter trade # Enter trade
entry_price = closes[i] entry_price = closes[i]
confidence = proba[i] confidence = proba[i]
# Compute dynamic SL/TP if enabled
if use_dynamic and atr_values is not None and not np.isnan(atr_values[i]):
atr = atr_values[i]
stop_loss = (atr * atr_sl_mult) / entry_price
take_profit = (atr * atr_tp_mult) / entry_price
else:
stop_loss = stop_loss_fixed
take_profit = take_profit_fixed
# Position sizing based on confidence # Position sizing based on confidence
if strategy.get("position_sizing") == "confidence_scaled": if strategy.get("position_sizing") == "confidence_scaled":
if confidence > 0.8: if confidence > 0.8:
@ -522,7 +949,10 @@ def main():
print(f" Target distribution: {df['target'].value_counts().to_dict()}") print(f" Target distribution: {df['target'].value_counts().to_dict()}")
# Run walk-forward training + backtesting # Run walk-forward training + backtesting
print("\nRunning walk-forward validation...") model_type = config.get("model_type", "xgboost")
rolling = config.get("training", {}).get("rolling_window", False)
print(f"\nModel: {model_type}, Rolling: {rolling}")
print("Running validation...")
results = walk_forward_train_test(df, feature_cols, config) results = walk_forward_train_test(df, feature_cols, config)
# Save results # Save results