#!/usr/bin/env python3 """ LLM Strategy Analyzer — Calls Ollama on Mac Mini to analyze results and suggest config modifications for the next iteration. """ import json import re import requests OLLAMA_URL = "http://100.100.242.21:11434" MODEL = "qwen3.5:27b" SYSTEM_PROMPT = """You are a quantitative trading strategy optimizer. You analyze ML model backtesting results for a BTC/USDT trading strategy and suggest precise modifications to improve performance. ## Your Task Given the current configuration and results, suggest 1-3 specific, justified changes to the configuration for the next iteration. Be methodical and scientific — change one thing at a time when possible. ## Config Parameters You Can Modify **model_type**: "xgboost", "lightgbm", "catboost", or "ensemble" - xgboost: Generally best for structured data, fast GPU training - lightgbm: Faster training, good with large feature sets - catboost: Handles feature interactions well, less tuning needed - ensemble: Combines all three, reduces variance but slower **hyperparameters**: - learning_rate (0.001-0.3): Lower = more robust but slower. If overfitting, decrease. - max_depth (3-10): Controls model complexity. Deeper = more overfitting risk. - n_estimators (100-2000): More trees = better fit but diminishing returns. - subsample (0.5-1.0): Row sampling. Lower = more regularization. - colsample_bytree (0.5-1.0): Feature sampling per tree. Lower = more diversity. - min_child_weight (1-20): Higher = more conservative splits. - gamma (0-5): Minimum loss reduction for split. Higher = more pruning. - reg_alpha (0-10): L1 regularization. Encourages sparsity. - reg_lambda (0-10): L2 regularization. Prevents large weights. **target**: - direction: "long" or "both" - horizon_candles (1-20): How far ahead to predict. Longer = smoother but lagging. - threshold_pct (0.3-3.0): Minimum move % to label as positive. Higher = fewer but clearer signals. **strategy**: - entry_threshold (0.5-0.8): Min prediction probability to enter trade. Higher = fewer trades, higher quality. - stop_loss_pct (0.5-5.0): Max loss before exit. Tighter = more stopped out. - take_profit_pct (1.0-10.0): Target profit. Should be > stop_loss for positive expectancy. - trailing_stop_pct (0.5-3.0): Trailing stop distance. Tighter = locks profit faster but exits early. - min_confidence_to_trade (0.5-0.9): Absolute minimum confidence to consider. - exit_type: "trailing_stop" or "fixed" (just SL/TP) **features**: - use_volume_features (true/false): Volume features can be noisy in crypto. - use_candle_patterns (true/false): Candle patterns may or may not help. - use_lag_features (true/false): Lagged features capture momentum. - lag_periods: List of lag periods [1,2,3,5,10] - lookback_periods: List of lookback windows [3,5,10,20] **training**: - walk_forward_windows (3-10): More windows = more robust but less data per window. ## Key Metrics to Optimize (in priority order) 1. **Sharpe Ratio** (target: > 2.0): Risk-adjusted return. Most important metric. 2. **Profit Factor** (target: > 1.5): Gross profit / gross loss. 3. **Max Drawdown** (target: > -15%): Worst peak-to-trough decline. 4. **Win Rate** (target: > 55%): Percentage of winning trades. 5. **Trade Count**: Need enough trades for statistical significance (>50). ## Decision Guidelines - If Sharpe < 1.0: The strategy is not working well. Consider larger changes. - If Sharpe 1.0-1.5: Decent. Fine-tune hyperparameters and thresholds. - If Sharpe 1.5-2.0: Good. Make small, targeted improvements. - If Sharpe > 2.0: Very good. Be careful not to overfit. - If win_rate < 0.50 but profit_factor > 1.5: Strategy relies on big wins — ok, tighten SL. - If win_rate > 0.60 but profit_factor < 1.2: Many small wins but losses are too big — widen TP or tighten SL. - If trade_count < 30: Not enough trades. Lower entry_threshold or min_confidence. - If max_drawdown < -20%: Too risky. Increase regularization, tighten stop loss. - If per_window_sharpe has high variance: Model is not stable. More regularization or simpler model. - Check feature_importances: If top features make financial sense, good. If random features dominate, possible overfitting. ## Response Format You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON): ``` { "reasoning": "Explanation of what you observed and why you're making these changes", "changes": ["Change 1 description", "Change 2 description"], "config": { } } ``` The "config" field must contain the COMPLETE config (not just changes) so it can be used directly.""" def analyze_and_suggest(current_config: dict, results: dict, iteration_history: list = None) -> tuple[dict, str]: """ Send current results to LLM and get suggested config modifications. Returns (new_config, reasoning). """ # Build the user prompt with context history_text = "" if iteration_history: history_text = "\n## Previous Iterations (most recent last)\n" for h in iteration_history[-5:]: history_text += ( f"- Iteration {h['iteration']}: Sharpe={h['sharpe']}, " f"Return={h['return']}%, WinRate={h['win_rate']}, " f"Trades={h['trades']}, Model={h['model_type']}\n" ) user_prompt = f"""## Current Configuration ```json {json.dumps(current_config, indent=2)} ``` ## Current Results - Sharpe Ratio: {results.get('sharpe_ratio', 0)} - Total Return: {results.get('total_return_pct', 0)}% - Max Drawdown: {results.get('max_drawdown_pct', 0)}% - Win Rate: {results.get('win_rate', 0)} - Trade Count: {results.get('trade_count', 0)} - Profit Factor: {results.get('profit_factor', 0)} - Avg Trade Duration: {results.get('avg_trade_duration_candles', 0)} candles - Per-Window Sharpe: {results.get('per_window_sharpe', [])} ## Top Feature Importances {json.dumps(dict(list(results.get('feature_importances', {}).items())[:15]), indent=2)} {history_text} Analyze these results and suggest 1-3 specific modifications to the config. Return ONLY valid JSON.""" # Call Ollama payload = { "model": MODEL, "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ], "stream": False, "think": False, "options": { "temperature": 0.7, "num_predict": 4096, }, } print(f" Calling LLM ({MODEL} on Mac Mini)...") resp = requests.post(f"{OLLAMA_URL}/api/chat", json=payload, timeout=600) resp.raise_for_status() content = resp.json()["message"]["content"] # Parse JSON from response (handle markdown code blocks) # Strip thinking tags if present content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", content, re.DOTALL) if json_match: parsed = json.loads(json_match.group(1)) else: # Try parsing the whole response as JSON # Find the outermost JSON object brace_start = content.find("{") if brace_start >= 0: depth = 0 for i in range(brace_start, len(content)): if content[i] == "{": depth += 1 elif content[i] == "}": depth -= 1 if depth == 0: parsed = json.loads(content[brace_start:i + 1]) break else: raise ValueError("Could not find complete JSON in LLM response") else: raise ValueError(f"No JSON found in LLM response: {content[:200]}") reasoning = parsed.get("reasoning", "No reasoning provided") changes = parsed.get("changes", []) new_config = parsed.get("config", current_config) # Validate that config has required fields required_keys = ["model_type", "features", "target", "hyperparameters", "strategy", "training"] for key in required_keys: if key not in new_config: new_config[key] = current_config[key] change_summary = f"{reasoning}\nChanges: {', '.join(changes)}" return new_config, change_summary if __name__ == "__main__": # Test with dummy data import sys config_path = sys.argv[1] if len(sys.argv) > 1 else "config/initial_config.json" with open(config_path) as f: config = json.load(f) dummy_results = { "sharpe_ratio": 1.2, "total_return_pct": 15.3, "max_drawdown_pct": -12.5, "win_rate": 0.55, "trade_count": 120, "profit_factor": 1.4, "avg_trade_duration_candles": 7.2, "feature_importances": {"RSI_14": 0.15, "MACD_hist": 0.12, "BB_width": 0.10}, "per_window_sharpe": [1.0, 1.3, 1.5, 0.9, 1.1], } new_config, reasoning = analyze_and_suggest(config, dummy_results) print(f"\nReasoning: {reasoning}") print(f"\nNew config:\n{json.dumps(new_config, indent=2)}")