#!/usr/bin/env python3 """ LLM Accumulation Signal Analyzer -- Calls Ollama on Mac Mini to analyze results and suggest config modifications for the next iteration. """ import json import re import requests OLLAMA_URL = "http://100.100.242.21:11434" MODEL = "qwen3.5:27b" SYSTEM_PROMPT = """You are a quantitative analyst optimizing a BTC ACCUMULATION SIGNAL model. The goal is NOT day-trading -- it is finding statistically optimal times to BUY BTC for long-term holding. ## Core Question "Given current market conditions, is NOW a good time to BUY BTC for long-term holding?" ## What the Model Does For each candle, the model predicts an Accumulation Score (0-100): - 90-100: STRONG BUY -- historically rare, excellent entry point - 70-89: GOOD BUY -- better than average entry - 50-69: NEUTRAL -- average time to buy - 30-49: WAIT -- price likely to come down - 0-29: POOR -- historically bad time to buy (near local tops) The model is trained on ACTUAL forward returns at 7d, 30d, and 90d horizons, weighted 20/30/50. Times when buying led to the best long-term returns get the highest scores. ## Primary Metric: cost_basis_improvement_pct This measures how much better the model's average buy price is vs uniform DCA. - 10%+ = good - 15%+ = excellent - 20%+ = exceptional Also require strong_buy_signal_count >= 30 for statistical validity. ## Config Parameters You Can Modify **model_type**: "xgboost", "lightgbm", "catboost", "lstm", or "hybrid" - hybrid: Average of LSTM + XGBoost regression predictions. Recommended default. - xgboost: Fast GPU training, good for structured features. - lstm: Captures temporal patterns in price sequences. **hyperparameters** (gradient boosting): - learning_rate (0.001-0.1): Lower = more robust. Start conservative. - max_depth (3-8): Controls complexity. Deeper risks overfitting. - n_estimators (200-1500): More trees = better fit but diminishing returns. - subsample (0.5-1.0): Row sampling for regularization. - colsample_bytree (0.5-1.0): Feature sampling per tree. - min_child_weight (5-30): Higher = more conservative (important for noisy targets). - gamma (0-5): Minimum loss reduction for split. - reg_alpha (0-10): L1 regularization. - reg_lambda (1-10): L2 regularization. Higher values prevent overfitting. **hyperparameters** (LSTM): - lstm_hidden_size (32-256): Hidden units. - lstm_num_layers (1-4): Stacked layers. 2 is usually optimal. - lstm_dropout (0.1-0.5): Regularization. - lstm_epochs (50-200): Max training epochs (early stopping usually triggers). - lstm_batch_size (32-128): Smaller = noisier but better generalization. - lstm_sequence_length (15-60): Past candles the LSTM sees. Longer = more context. - lstm_patience (5-20): Early stopping patience. **target**: - forward_periods_4h: List of 3 forward periods in 4h candles [short, medium, long]. Defaults: [42, 180, 540] = roughly [7d, 30d, 90d] - weights: Weights for each period. Default [0.2, 0.3, 0.5] (emphasize long-term). - score_range: [0, 100] -- do not change. **strategy**: - strong_buy_threshold (70-95): Score above which = STRONG BUY signal. Higher = fewer but better signals. - good_buy_threshold (50-80): Score above which = GOOD BUY. Used for cost basis comparison. - poor_threshold (10-40): Score below which = POOR time to buy. **features**: - use_price_position (true/false): Distance from ATH, 52w high/low, percentile. - use_momentum (true/false): RSI, MACD, Stochastic, Williams %R, ROC. - use_volatility (true/false): Bollinger Bands, ATR, consecutive red candles, drawdown. - use_volume (true/false): Volume ratio, OBV, red/green volume ratio. - use_cycle (true/false): MA cross regime, candles since major drawdown. - use_pca (true/false): PCA dimensionality reduction. - pca_variance (0.80-0.99): Variance to retain. - use_scaler (true/false): StandardScaler. Critical for LSTM. **training**: - rolling_window (true/false): Rolling vs static walk-forward. - rolling_train_size (1500-5000): Training window candles. - rolling_test_size (100-500): Test window candles. ## Key Metrics to Analyze 1. **cost_basis_improvement_pct**: PRIMARY metric. How much better is model buy price vs DCA. 2. **strong_buy_signal_count**: Must be >= 30 for validity. Too few = raise threshold. Too many = lower it. 3. **signal_frequency_pct**: Should be 5-15%. If outside, adjust thresholds. 4. **avg_score_at_actual_bottoms**: Should be high (>70). Model should recognize bottoms. 5. **avg_score_at_actual_tops**: Should be low (<30). Model should avoid tops. 6. **model_r2_score**: Regression fit quality. > 0.2 is decent for financial data. 7. **per_window_cost_improvement**: Consistency across windows. Low variance = robust. ## Decision Guidelines - If cost_improvement < 5%: Strategy is barely working. Try: switch model type, enable all features, increase training window, lower good_buy_threshold. - If cost_improvement 5-10%: Decent. Fine-tune thresholds and hyperparameters. - If cost_improvement 10-15%: Good. Make targeted improvements -- focus on signal consistency. - If cost_improvement > 15%: Very good. Be careful not to overfit. Check per_window variance. - If signal_count < 30: Not statistically valid. Lower strong_buy_threshold, increase training data. - If signal_frequency > 20%: Too many signals = not selective enough. Raise threshold. - If signal_frequency < 3%: Too few signals. Lower threshold. - If score_at_bottoms < 60: Model is missing bottoms. More features, different model type. - If score_at_tops > 40: Model is not avoiding tops. More regularization. - If per_window has high variance: Model is unstable. Increase regularization, try hybrid. - Check feature_importances: price position features should dominate (distance from ATH, percentile). ## Response Format You MUST respond with ONLY a JSON object (no markdown, no explanation outside the JSON): ``` { "reasoning": "Explanation of observations and why you are making these changes", "changes": ["Change 1 description", "Change 2 description"], "config": { } } ``` The "config" field must contain the COMPLETE config so it can be used directly.""" def analyze_and_suggest(current_config, results, iteration_history=None): """ Send current results to LLM and get suggested config modifications. Returns (new_config, reasoning). """ history_text = "" if iteration_history: history_text = "\n## Previous Iterations (most recent last)\n" for h in iteration_history[-5:]: history_text += ( f"- Iteration {h.get('iteration', '?')}: " f"CostImprovement={h.get('cost_improvement', 0):.1f}%, " f"Signals={h.get('signal_count', 0)}, " f"R2={h.get('r2_score', 0):.4f}, " f"Model={h.get('model_type', '?')}\n" ) user_prompt = f"""## Current Configuration ```json {json.dumps(current_config, indent=2)} ``` ## Current Results - Cost Basis Improvement: {results.get('cost_basis_improvement_pct', 0):.1f}% - Avg Cost (Model): ${results.get('avg_cost_basis_model', 0):,.2f} - Avg Cost (DCA): ${results.get('avg_cost_basis_dca', 0):,.2f} - Strong Buy Signals: {results.get('strong_buy_signal_count', 0)} - Good Buy Signals: {results.get('good_buy_signal_count', 0)} - Signal Frequency: {results.get('signal_frequency_pct', 0):.1f}% - Quality of Strong Buys: {results.get('pct_quality_strong_buy', 0):.1%} - Model R2: {results.get('model_r2_score', 0):.4f} - Score at Actual Bottoms: {results.get('avg_score_at_actual_bottoms', 0):.1f} - Score at Actual Tops: {results.get('avg_score_at_actual_tops', 0):.1f} - Per-Window Improvement: {results.get('per_window_cost_improvement', [])} - Score Distribution: {results.get('score_distribution', {})} ## Top Feature Importances {json.dumps(dict(list(results.get('feature_importances', {}).items())[:15]), indent=2)} {history_text} Analyze these results and suggest 1-3 specific modifications to the config. Return ONLY valid JSON.""" payload = { "model": MODEL, "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}, ], "stream": False, "think": False, "options": { "temperature": 0.7, "num_predict": 4096, }, } print(f" Calling LLM ({MODEL} on Mac Mini)...") resp = requests.post(f"{OLLAMA_URL}/api/chat", json=payload, timeout=600) resp.raise_for_status() content = resp.json()["message"]["content"] # Strip thinking tags if present content = re.sub(r".*?", "", content, flags=re.DOTALL).strip() json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", content, re.DOTALL) if json_match: parsed = json.loads(json_match.group(1)) else: brace_start = content.find("{") if brace_start >= 0: depth = 0 for i in range(brace_start, len(content)): if content[i] == "{": depth += 1 elif content[i] == "}": depth -= 1 if depth == 0: parsed = json.loads(content[brace_start:i + 1]) break else: raise ValueError("Could not find complete JSON in LLM response") else: raise ValueError(f"No JSON found in LLM response: {content[:200]}") reasoning = parsed.get("reasoning", "No reasoning provided") changes = parsed.get("changes", []) new_config = parsed.get("config", current_config) required_keys = ["model_type", "features", "target", "hyperparameters", "strategy", "training"] for key in required_keys: if key not in new_config: new_config[key] = current_config[key] change_summary = f"{reasoning}\nChanges: {', '.join(changes)}" return new_config, change_summary if __name__ == "__main__": import sys config_path = sys.argv[1] if len(sys.argv) > 1 else "config/initial_config.json" with open(config_path) as f: config = json.load(f) dummy_results = { "cost_basis_improvement_pct": 8.5, "avg_cost_basis_model": 65000, "avg_cost_basis_dca": 71000, "strong_buy_signal_count": 45, "good_buy_signal_count": 120, "signal_frequency_pct": 7.2, "pct_quality_strong_buy": 0.72, "model_r2_score": 0.22, "avg_score_at_actual_bottoms": 68.5, "avg_score_at_actual_tops": 35.2, "per_window_cost_improvement": [7.1, 9.3, 8.8, 10.2, 7.0], "score_distribution": {"0-20": 80, "20-40": 150, "40-60": 200, "60-80": 130, "80-100": 40}, "feature_importances": {"dist_from_ath_pct": 0.18, "RSI_14": 0.12, "price_percentile_365": 0.10}, } new_config, reasoning = analyze_and_suggest(config, dummy_results) print(f"\nReasoning: {reasoning}") print(f"\nNew config:\n{json.dumps(new_config, indent=2)}")