#!/usr/bin/env python3 """ LLM Accumulation Signal Analyzer -- Calls LLM to analyze results and suggest config modifications for the next iteration. Supports multiple providers: Ollama, LM Studio, OpenAI, Anthropic, OpenRouter. """ import json import os import re import requests BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LLM_SETTINGS_PATH = os.path.join(BASE_DIR, "config", "llm_settings.json") # Fallback defaults DEFAULT_OLLAMA_URL = "http://100.100.242.21:11434" DEFAULT_MODEL = "qwen3.5:27b" def load_llm_settings(): """Load LLM settings from config file, with fallback to defaults.""" if os.path.exists(LLM_SETTINGS_PATH): with open(LLM_SETTINGS_PATH) as f: return json.load(f) return { "provider": "ollama", "model": DEFAULT_MODEL, "providers": { "ollama": {"base_url": DEFAULT_OLLAMA_URL}, }, } SYSTEM_PROMPT = """You are a quantitative analyst optimizing a BTC ACCUMULATION SIGNAL model. The goal is NOT day-trading -- it is finding statistically optimal times to BUY BTC for long-term holding. ## Core Question "Given current market conditions, is NOW a good time to BUY BTC for long-term holding?" ## What the Model Does For each candle, the model predicts an Accumulation Score (0-100): - 90-100: STRONG BUY -- historically rare, excellent entry point - 70-89: GOOD BUY -- better than average entry - 50-69: NEUTRAL -- average time to buy - 30-49: WAIT -- price likely to come down - 0-29: POOR -- historically bad time to buy (near local tops) The model is trained on ACTUAL forward returns at 7d, 30d, and 90d horizons, weighted 20/30/50. Times when buying led to the best long-term returns get the highest scores. ## Primary Metric: cost_basis_improvement_pct This measures how much better the model's average buy price is vs uniform DCA. - 10%+ = good - 15%+ = excellent - 20%+ = exceptional Also require strong_buy_signal_count >= 30 for statistical validity. ## Config Parameters You Can Modify **model_type**: "xgboost", "lightgbm", "catboost", "lstm", or "hybrid" - hybrid: Average of LSTM + XGBoost regression predictions. Recommended default. - xgboost: Fast GPU training, good for structured features. - lstm: Captures temporal patterns in price sequences. **hyperparameters** (gradient boosting): - learning_rate (0.001-0.1): Lower = more robust. Start conservative. - max_depth (3-8): Controls complexity. Deeper risks overfitting. - n_estimators (200-1500): More trees = better fit but diminishing returns. - subsample (0.5-1.0): Row sampling for regularization. - colsample_bytree (0.5-1.0): Feature sampling per tree. - min_child_weight (5-30): Higher = more conservative (important for noisy targets). - gamma (0-5): Minimum loss reduction for split. - reg_alpha (0-10): L1 regularization. - reg_lambda (1-10): L2 regularization. Higher values prevent overfitting. **hyperparameters** (LSTM): - lstm_hidden_size (32-256): Hidden units. - lstm_num_layers (1-4): Stacked layers. 2 is usually optimal. - lstm_dropout (0.1-0.5): Regularization. - lstm_epochs (50-200): Max training epochs (early stopping usually triggers). - lstm_batch_size (32-128): Smaller = noisier but better generalization. - lstm_sequence_length (15-60): Past candles the LSTM sees. Longer = more context. - lstm_patience (5-20): Early stopping patience. **target**: - forward_periods_4h: List of 3 forward periods in 4h candles [short, medium, long]. Defaults: [42, 180, 540] = roughly [7d, 30d, 90d] - weights: Weights for each period. Default [0.2, 0.3, 0.5] (emphasize long-term). - score_range: [0, 100] -- do not change. **strategy**: - strong_buy_threshold (70-95): Score above which = STRONG BUY signal. Higher = fewer but better signals. - good_buy_threshold (50-80): Score above which = GOOD BUY. Used for cost basis comparison. - poor_threshold (10-40): Score below which = POOR time to buy. **features**: - use_price_position (true/false): Distance from ATH, 52w high/low, percentile. - use_momentum (true/false): RSI, MACD, Stochastic, Williams %R, ROC. - use_volatility (true/false): Bollinger Bands, ATR, consecutive red candles, drawdown. - use_volume (true/false): Volume ratio, OBV, red/green volume ratio. - use_cycle (true/false): MA cross regime, candles since major drawdown. - use_pca (true/false): PCA dimensionality reduction. - pca_variance (0.80-0.99): Variance to retain. - use_scaler (true/false): StandardScaler. Critical for LSTM. **training**: - rolling_window (true/false): Rolling vs static walk-forward. - rolling_train_size (1500-5000): Training window candles. - rolling_test_size (100-500): Test window candles. ## Key Metrics to Analyze 1. **cost_basis_improvement_pct**: PRIMARY metric. How much better is model buy price vs DCA. 2. **strong_buy_signal_count**: Must be >= 30 for validity. Too few = raise threshold. Too many = lower it. 3. **signal_frequency_pct**: Should be 5-15%. If outside, adjust thresholds. 4. **avg_score_at_actual_bottoms**: Should be high (>70). Model should recognize bottoms. 5. **avg_score_at_actual_tops**: Should be low (<30). Model should avoid tops. 6. **model_r2_score**: Regression fit quality. > 0.2 is decent for financial data. 7. **per_window_cost_improvement**: Consistency across windows. Low variance = robust. ## Decision Guidelines - If cost_improvement < 5%: Strategy is barely working. Try: switch model type, enable all features, increase training window, lower good_buy_threshold. - If cost_improvement 5-10%: Decent. Fine-tune thresholds and hyperparameters. - If cost_improvement 10-15%: Good. Make targeted improvements -- focus on signal consistency. - If cost_improvement > 15%: Very good. Be careful not to overfit. Check per_window variance. - If signal_count < 30: Not statistically valid. Lower strong_buy_threshold, increase training data. - If signal_frequency > 20%: Too many signals = not selective enough. Raise threshold. - If signal_frequency < 3%: Too few signals. Lower threshold. - If score_at_bottoms < 60: Model is missing bottoms. More features, different model type. - If score_at_tops > 40: Model is not avoiding tops. More regularization. - If per_window has high variance: Model is unstable. Increase regularization, try hybrid. - Check feature_importances: price position features should dominate (distance from ATH, percentile). ## Response Format You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON): ``` { "reasoning": "Explanation of observations and why you are making these changes", "changes": ["Change 1 description", "Change 2 description"], "config": { } } ``` The "config" field must contain the COMPLETE config so it can be used directly.""" def _call_ollama(settings, messages): """Call Ollama API.""" provider_cfg = settings.get("providers", {}).get("ollama", {}) base_url = provider_cfg.get("base_url", DEFAULT_OLLAMA_URL) model = settings.get("model", DEFAULT_MODEL) payload = { "model": model, "messages": messages, "stream": False, "think": False, "options": {"temperature": 0.7, "num_predict": 4096}, } print(f" Calling LLM ({model} via Ollama at {base_url})...") resp = requests.post(f"{base_url}/api/chat", json=payload, timeout=600) resp.raise_for_status() return resp.json()["message"]["content"] def _call_openai_compatible(settings, messages, provider_name): """Call OpenAI-compatible API (LM Studio, OpenAI, OpenRouter).""" provider_cfg = settings.get("providers", {}).get(provider_name, {}) model = settings.get("model", "") if provider_name == "lmstudio": base_url = provider_cfg.get("base_url", "http://100.100.242.21:1234") url = f"{base_url}/v1/chat/completions" headers = {"Content-Type": "application/json"} elif provider_name == "openai": url = "https://api.openai.com/v1/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {provider_cfg.get('api_key', '')}", } elif provider_name == "openrouter": url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {provider_cfg.get('api_key', '')}", } else: raise ValueError(f"Unknown OpenAI-compatible provider: {provider_name}") payload = { "model": model, "messages": messages, "temperature": 0.7, "max_tokens": 4096, } print(f" Calling LLM ({model} via {provider_name})...") resp = requests.post(url, json=payload, headers=headers, timeout=600) resp.raise_for_status() return resp.json()["choices"][0]["message"]["content"] def _call_anthropic(settings, messages): """Call Anthropic Messages API.""" provider_cfg = settings.get("providers", {}).get("anthropic", {}) model = settings.get("model", "claude-sonnet-4-20250514") api_key = provider_cfg.get("api_key", "") # Anthropic uses system as a top-level param, not in messages system_msg = "" api_messages = [] for m in messages: if m["role"] == "system": system_msg = m["content"] else: api_messages.append(m) payload = { "model": model, "max_tokens": 4096, "messages": api_messages, } if system_msg: payload["system"] = system_msg headers = { "Content-Type": "application/json", "x-api-key": api_key, "anthropic-version": "2023-06-01", } print(f" Calling LLM ({model} via Anthropic)...") resp = requests.post( "https://api.anthropic.com/v1/messages", json=payload, headers=headers, timeout=600, ) resp.raise_for_status() data = resp.json() # Extract text from content blocks return "".join( block["text"] for block in data.get("content", []) if block.get("type") == "text" ) def call_llm(messages): """Route LLM call to the configured provider.""" settings = load_llm_settings() provider = settings.get("provider", "ollama") if provider == "ollama": return _call_ollama(settings, messages) elif provider in ("lmstudio", "openai", "openrouter"): return _call_openai_compatible(settings, messages, provider) elif provider == "anthropic": return _call_anthropic(settings, messages) else: raise ValueError(f"Unknown LLM provider: {provider}") def analyze_and_suggest(current_config, results, iteration_history=None): """ Send current results to LLM and get suggested config modifications. Returns (new_config, reasoning). """ history_text = "" if iteration_history: history_text = "\n## Previous Iterations (most recent last)\n" for h in iteration_history[-5:]: history_text += ( f"- Iteration {h.get('iteration', '?')}: " f"CostImprovement={h.get('cost_improvement', 0):.1f}%, " f"Signals={h.get('signal_count', 0)}, " f"R2={h.get('r2_score', 0):.4f}, " f"Model={h.get('model_type', '?')}\n" ) user_prompt = f"""## Current Configuration ```json {json.dumps(current_config, indent=2)} ``` ## Current Results - Cost Basis Improvement: {results.get('cost_basis_improvement_pct', 0):.1f}% - Avg Cost (Model): ${results.get('avg_cost_basis_model', 0):,.2f} - Avg Cost (DCA): ${results.get('avg_cost_basis_dca', 0):,.2f} - Strong Buy Signals: {results.get('strong_buy_signal_count', 0)} - Good Buy Signals: {results.get('good_buy_signal_count', 0)} - Signal Frequency: {results.get('signal_frequency_pct', 0):.1f}% - Quality of Strong Buys: {results.get('pct_quality_strong_buy', 0):.1%} - Model R2: {results.get('model_r2_score', 0):.4f} - Score at Actual Bottoms: {results.get('avg_score_at_actual_bottoms', 0):.1f} - Score at Actual Tops: {results.get('avg_score_at_actual_tops', 0):.1f} - Per-Window Improvement: {results.get('per_window_cost_improvement', [])} - Score Distribution: {results.get('score_distribution', {})} ## Top Feature Importances {json.dumps(dict(list(results.get('feature_importances', {}).items())[:15]), indent=2)} {history_text} Analyze these results and suggest 1-3 specific modifications to the config. Return ONLY valid JSON.""" messages = [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ] content = call_llm(messages) # Strip thinking tags if present content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", content, re.DOTALL) if json_match: parsed = json.loads(json_match.group(1)) else: brace_start = content.find("{") if brace_start >= 0: depth = 0 for i in range(brace_start, len(content)): if content[i] == "{": depth += 1 elif content[i] == "}": depth -= 1 if depth == 0: parsed = json.loads(content[brace_start : i + 1]) break else: raise ValueError("Could not find complete JSON in LLM response") else: raise ValueError(f"No JSON found in LLM response: {content[:200]}") reasoning = parsed.get("reasoning", "No reasoning provided") changes = parsed.get("changes", []) new_config = parsed.get("config", current_config) required_keys = [ "model_type", "features", "target", "hyperparameters", "strategy", "training", ] for key in required_keys: if key not in new_config: new_config[key] = current_config[key] change_summary = f"{reasoning}\nChanges: {', '.join(changes)}" return new_config, change_summary if __name__ == "__main__": import sys config_path = sys.argv[1] if len(sys.argv) > 1 else "config/initial_config.json" with open(config_path) as f: config = json.load(f) dummy_results = { "cost_basis_improvement_pct": 8.5, "avg_cost_basis_model": 65000, "avg_cost_basis_dca": 71000, "strong_buy_signal_count": 45, "good_buy_signal_count": 120, "signal_frequency_pct": 7.2, "pct_quality_strong_buy": 0.72, "model_r2_score": 0.22, "avg_score_at_actual_bottoms": 68.5, "avg_score_at_actual_tops": 35.2, "per_window_cost_improvement": [7.1, 9.3, 8.8, 10.2, 7.0], "score_distribution": { "0-20": 80, "20-40": 150, "40-60": 200, "60-80": 130, "80-100": 40, }, "feature_importances": { "dist_from_ath_pct": 0.18, "RSI_14": 0.12, "price_percentile_365": 0.10, }, } new_config, reasoning = analyze_and_suggest(config, dummy_results) print(f"\nReasoning: {reasoning}") print(f"\nNew config:\n{json.dumps(new_config, indent=2)}")