feat: ML-optimized accumulation scoring with dashboard toggle

Train GradientBoostedClassifier on 2,601 days of historical data (2018-2025) to find optimal metric weights for identifying the best long-term buying opportunities. Uses time-series cross-validation to prevent look-ahead bias. Key results: - pct_above_200w_sma: 50.7% weight (was 11.1% equal) - drawdown: 14.6%, lth_rp: 10.9%, rhodl: 8.9% - fear_greed demoted from 11.1% to 5.1% - nupl/mvrv nearly eliminated (0.7-1.8%) ML Strong Accumulation bracket: avg +210% 1yr (vs +176% classic) New files: ml/optimizer.py, config/ml_weights.json Modified: scoring/engine.py (score_all_ml), backtesting/engine.py (ml_mode), dashboard/server.py (Classic/ML toggle) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-21 23:18:29 +00:00 · 2026-03-21 23:18:29 +00:00 · 4647c596b3
commit 4647c596b3
parent f1d38f9abb
6 changed files with 942 additions and 18 deletions
--- a/backtesting/engine.py
+++ b/backtesting/engine.py
@ -121,8 +121,35 @@ def _compute_ath_series(price_lookup, dates):
    return drawdowns
-def score_day(date, index, drawdowns):
+def _load_ml_weights():
-    """Score a single day using all available metrics. Returns (composite_score, individual_scores, n_metrics)."""
+    """Load ML weights for ML-optimized scoring mode."""
    ml_path = _os.path.join(_os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))), "config", "ml_weights.json")
    try:
        with open(ml_path) as f:
            data = _json.load(f)
        return data.get("weights", {})
    except Exception:
        return {}
 # ML weight key mapping (backtest metric keys -> ML weight keys)
 _BT_ML_KEY_MAP = {
    "fear_greed": "fear_greed",
    "puell_multiple": "puell_multiple",
    "mvrv_zscore": "mvrv_zscore",
    "reserve_risk": "reserve_risk",
    "rhodl_ratio": "rhodl_ratio",
    "nupl": "nupl",
    "price_vs_200w_sma": "pct_above_200w_sma",
    "lth_realized_price": "pct_above_lth_rp",
    "drawdown": "drawdown",
 }
 def score_day(date, index, drawdowns, ml_weights=None):
    """Score a single day using all available metrics. Returns (composite_score, individual_scores, n_metrics).
    If ml_weights is provided, uses ML-optimized weighting instead of equal weights.
    """
    scores = []
    details = {}
@ -163,6 +190,20 @@ def score_day(date, index, drawdowns):
    if not scores:
        return None, details, 0
    if ml_weights:
        # ML-weighted composite
        weighted_sum = 0.0
        weight_total = 0.0
        for metric_key, info in details.items():
            ml_key = _BT_ML_KEY_MAP.get(metric_key, metric_key)
            w = ml_weights.get(ml_key, 0.0)
            weighted_sum += info["score"] * w
            weight_total += w
        if weight_total > 0:
            composite = weighted_sum / weight_total * 10
        else:
            composite = sum(scores) / len(scores) * 10
    else:
        composite = sum(scores) / len(scores) * 10
    return round(composite, 1), details, len(scores)
@ -208,9 +249,12 @@ def compute_max_drawdown_forward(price_lookup, date, window=90):
    return round(max_dd, 2) if max_dd > 0 else 0
-def run_backtest():
+def run_backtest(ml_mode=False):
-    """Run the full backtest and return comprehensive results."""
+    """Run the full backtest and return comprehensive results.
-    log.info("Loading historical data...")
+
    If ml_mode=True, uses ML-optimized metric weights instead of equal weights.
    """
    log.info("Loading historical data... (ml_mode=%s)", ml_mode)
    if not os.path.exists(HISTORY_PATH):
        return {"error": "No historical data found. Run history collector first."}
@ -240,11 +284,17 @@ def run_backtest():
    log.info("Computing forward returns...")
    fwd_returns = compute_forward_returns(price_lookup, all_dates)
    # Load ML weights if in ML mode
    ml_weights = _load_ml_weights() if ml_mode else None
    if ml_mode and not ml_weights:
        log.warning("ML mode requested but no weights found — falling back to equal weights")
        ml_weights = None
    # Score each day
    log.info("Scoring %d days...", len(all_dates))
    daily_scores = []
    for d in all_dates:
-        composite, details, n_metrics = score_day(d, index, drawdowns)
+        composite, details, n_metrics = score_day(d, index, drawdowns, ml_weights=ml_weights)
        if composite is not None and n_metrics >= 3:  # Require at least 3 metrics
            price = price_lookup.get(d)
            entry = {
@ -435,6 +485,7 @@ def run_backtest():
        "signal_events": signal_events,
        "current_context": current_context,
        "chart_data": chart_data,
        "ml_mode": ml_mode,
        "computed_at": datetime.utcnow().isoformat() + "Z",
    }
--- a/config/ml_weights.json
+++ b/config/ml_weights.json
@ -0,0 +1,159 @@
 {
  "weights": {
    "pct_above_200w_sma": 0.5075,
    "drawdown": 0.1459,
    "pct_above_lth_rp": 0.1095,
    "rhodl_ratio": 0.089,
    "fear_greed": 0.0515,
    "reserve_risk": 0.046,
    "puell_multiple": 0.0255,
    "mvrv_zscore": 0.0182,
    "nupl": 0.0068
  },
  "feature_importances": {
    "raw_pct_above_200w_sma": 0.436377,
    "days_since_ath": 0.119405,
    "raw_pct_above_lth_rp": 0.109451,
    "raw_rhodl_ratio": 0.088999,
    "score_pct_above_200w_sma": 0.071148,
    "raw_fear_greed": 0.051475,
    "puell_x_reserve": 0.032886,
    "raw_drawdown": 0.026474,
    "raw_reserve_risk": 0.021707,
    "raw_mvrv_zscore": 0.012429,
    "raw_puell_multiple": 0.008599,
    "delta_30d_reserve_risk": 0.007865,
    "delta_30d_mvrv_zscore": 0.004263,
    "raw_nupl": 0.003271,
    "mvrv_x_nupl": 0.002979,
    "delta_30d_nupl": 0.002056,
    "delta_30d_puell_multiple": 0.000473,
    "score_fear_greed": 6.8e-05,
    "score_mvrv_zscore": 5.4e-05,
    "score_puell_multiple": 1e-05,
    "score_pct_above_lth_rp": 6e-06,
    "score_rhodl_ratio": 2e-06,
    "score_reserve_risk": 0.0,
    "score_nupl": 0.0,
    "score_drawdown": 0.0
  },
  "cv_results": {
    "mean_auc": 0.6164,
    "std_auc": 0.3317,
    "mean_f1": 0.6736,
    "mean_precision": 0.8015,
    "mean_recall": 0.7047
  },
  "training_info": {
    "n_samples": 2601,
    "n_positive": 1553,
    "positive_rate": 0.5971,
    "n_features": 25,
    "target_threshold": 30.0,
    "date_range": "2018-02-01 to 2025-03-21",
    "model": "GradientBoostingClassifier"
  },
  "comparison": {
    "equal_weight": [
      {
        "range": "0-20",
        "label": "Extreme Caution",
        "days": 295,
        "avg_365d": -5.94,
        "median_365d": -11.99,
        "win_rate_365d": 35.6
      },
      {
        "range": "21-40",
        "label": "Caution",
        "days": 587,
        "avg_365d": 23.84,
        "median_365d": -7.2,
        "win_rate_365d": 45.3
      },
      {
        "range": "41-55",
        "label": "Neutral",
        "days": 697,
        "avg_365d": 108.96,
        "median_365d": 75.92,
        "win_rate_365d": 70.4
      },
      {
        "range": "56-70",
        "label": "Moderate Opportunity",
        "days": 450,
        "avg_365d": 128.81,
        "median_365d": 109.03,
        "win_rate_365d": 96.4
      },
      {
        "range": "71-85",
        "label": "Strong Accumulation",
        "days": 275,
        "avg_365d": 175.76,
        "median_365d": 117.95,
        "win_rate_365d": 86.9
      },
      {
        "range": "86-100",
        "label": "Extreme Accumulation",
        "days": 247,
        "avg_365d": 115.5,
        "median_365d": 90.08,
        "win_rate_365d": 100.0
      }
    ],
    "ml_weighted": [
      {
        "range": "0-20",
        "label": "Extreme Caution",
        "days": 577,
        "avg_365d": -6.17,
        "median_365d": -26.21,
        "win_rate_365d": 27.0
      },
      {
        "range": "21-40",
        "label": "Caution",
        "days": 855,
        "avg_365d": 77.5,
        "median_365d": 39.28,
        "win_rate_365d": 72.7
      },
      {
        "range": "41-55",
        "label": "Neutral",
        "days": 241,
        "avg_365d": 165.77,
        "median_365d": 124.05,
        "win_rate_365d": 92.5
      },
      {
        "range": "56-70",
        "label": "Moderate Opportunity",
        "days": 328,
        "avg_365d": 144.47,
        "median_365d": 124.27,
        "win_rate_365d": 89.6
      },
      {
        "range": "71-85",
        "label": "Strong Accumulation",
        "days": 201,
        "avg_365d": 210.2,
        "median_365d": 122.22,
        "win_rate_365d": 99.0
      },
      {
        "range": "86-100",
        "label": "Extreme Accumulation",
        "days": 287,
        "avg_365d": 113.92,
        "median_365d": 99.53,
        "win_rate_365d": 100.0
      }
    ]
  },
  "trained_at": "2026-03-21T23:15:38.277703+00:00"
 }
--- a/dashboard/server.py
+++ b/dashboard/server.py
@ -192,10 +192,17 @@ def run_scrape(force_full=False):
            if "_onchain_timestamp" in existing_cache:
                metrics["_onchain_timestamp"] = existing_cache["_onchain_timestamp"]
-        # 4. Score everything
+        # 4. Score everything (classic + ML)
        log.info("Scoring metrics...")
        scored = engine.score_all(metrics)
        metrics["_scored"] = scored
        # ML-optimized scoring (parallel)
        try:
            scored_ml = engine.score_all_ml(metrics)
            metrics["_scored_ml"] = scored_ml
        except Exception as e:
            log.warning("ML scoring failed (non-critical): %s", e)
        metrics["_timestamp"] = datetime.now(timezone.utc).isoformat()
        save_cache(metrics)
@ -335,9 +342,14 @@ def _fetch_models(provider, providers):
 # ── API Routes ────────────────────────────────────────────────────────────
@app.get("/api/data")
-def api_data():
+def api_data(mode: str = "classic"):
-    """Return current cached metrics + scores."""
+    """Return current cached metrics + scores.
    mode=classic (default) or mode=ml for ML-optimized scoring.
    """
    cache = load_cache()
    if mode == "ml":
        scored = cache.get("_scored_ml", cache.get("_scored", {}))
    else:
        scored = cache.get("_scored", {})
    price_data = cache.get("price", {})
    drawdown_data = cache.get("drawdown", {})
@ -352,6 +364,7 @@ def api_data():
        "last_update": cache.get("_timestamp"),
        "scraper_running": _scraper_running,
        "last_error": _last_error,
        "mode": mode,
    }
@ -513,6 +526,13 @@ DASHBOARD_HTML = """<!DOCTYPE html>
 .status-dot.stale{background:var(--yellow)}
 .status-dot.error{background:var(--red)}
@keyframes pulse{0%,100%{opacity:1}50%{opacity:.3}}
 .mode-toggle{display:flex;border-radius:6px;overflow:hidden;border:1px solid var(--border)}
 .mode-btn{padding:6px 14px;border:none;background:transparent;color:var(--text-dim);font-family:inherit;font-weight:600;font-size:.8rem;cursor:pointer;transition:all .15s}
 .mode-btn:hover{color:var(--text)}
 .mode-btn.active[data-mode="classic"]{background:var(--accent);color:#000}
 .mode-btn.active[data-mode="ml"]{background:#8b5cf6;color:#fff}
 .ml-badge{display:inline-block;font-size:.6rem;font-weight:700;padding:2px 6px;border-radius:3px;background:#8b5cf6;color:#fff;vertical-align:super;margin-left:4px}
 .ml-weight{font-size:.65rem;color:#8b5cf6;font-family:var(--mono);margin-top:2px}
 </style>
 </head>
 <body>
@ -530,6 +550,10 @@ DASHBOARD_HTML = """<!DOCTYPE html>
      <span class="status-dot" id="statusDot"></span>
      <span id="statusText">Loading...</span>
    </div>
    <div class="mode-toggle" id="modeToggle" title="Switch between Classic (equal-weight) and ML-optimized scoring">
      <button class="mode-btn active" data-mode="classic" onclick="setMode('classic')">Classic</button>
      <button class="mode-btn" data-mode="ml" onclick="setMode('ml')">ML</button>
    </div>
    <button class="btn btn-accent" onclick="doRefresh(false)" id="btnRefresh">⚡ Quick Refresh</button>
    <button class="btn btn-secondary" onclick="doRefresh(true)" id="btnFullRefresh" title="Re-scrape on-chain metrics from LookIntoBitcoin (~2-3 min)">🔄 Full Refresh</button>
  </div>
@ -689,6 +713,11 @@ function renderMetrics(metrics) {
    html += '</div></div>';
    html += '<div class="metric-value">' + (m.display_value || 'N/A') + '</div>';
    html += '<div class="metric-desc">' + (m.description || '') + '</div>';
    if (currentMode === 'ml' && m.ml_weight != null) {
      const wpct = (m.ml_weight * 100).toFixed(1);
      const contrib = m.ml_contribution != null ? m.ml_contribution.toFixed(1) : '--';
      html += '<div class="ml-weight">ML weight: ' + wpct + '% · contribution: ' + contrib + ' pts</div>';
    }
    if (hasSparkline) {
      html += '<div class="metric-sparkline"><canvas id="spark-' + idx + '"></canvas></div>';
    }
@ -841,7 +870,7 @@ function renderHistoryFromData(history) {
 // Load backtest daily scores for the chart
 async function loadBacktestChart() {
  try {
-    const r = await fetch('/api/backtest');
+    const r = await fetch('/api/backtest?mode=' + currentMode);
    const data = await r.json();
    if (data.chart_data && data.chart_data.length) {
      fullDailyScores = data.chart_data;
@ -894,7 +923,7 @@ function updateStatus(data) {
 async function poll() {
  try {
    const [dataRes, histRes] = await Promise.all([
-      fetch('/api/data'), fetch('/api/history')
+      fetch('/api/data?mode=' + currentMode), fetch('/api/history')
    ]);
    const data = await dataRes.json();
    const history = await histRes.json();
@ -906,7 +935,12 @@ async function poll() {
    // Assessment
    const el = document.getElementById('assessment');
-    el.textContent = scored.assessment || 'Loading...';
+    let assessText = scored.assessment || 'Loading...';
    if (currentMode === 'ml') {
      el.innerHTML = assessText + '<span class="ml-badge">ML</span>';
    } else {
      el.textContent = assessText;
    }
    el.style.color = assessmentColor(composite);
    // Price
@ -925,7 +959,11 @@ async function poll() {
    if (data.mayer_multiple) document.getElementById('mayerDisplay').textContent = data.mayer_multiple.toFixed(2);
    if (data.sma_200d) document.getElementById('sma200dDisplay').textContent = '$' + Math.round(data.sma_200d).toLocaleString();
    if (scored.scored_count != null) {
-      document.getElementById('scoredCount').textContent = scored.scored_count + '/' + scored.total_count + ' metrics active';
+      let countText = scored.scored_count + '/' + scored.total_count + ' metrics active';
      if (currentMode === 'ml' && scored.classic_score != null) {
        countText += ' · Classic: ' + scored.classic_score;
      }
      document.getElementById('scoredCount').textContent = countText;
    }
    // Metrics
@ -954,6 +992,17 @@ async function doRefresh(full) {
  setTimeout(() => { btn.disabled = false; btn.textContent = origText; }, delay);
 }
 let currentMode = 'classic';
 function setMode(mode) {
  currentMode = mode;
  document.querySelectorAll('.mode-btn').forEach(b => {
    b.classList.toggle('active', b.dataset.mode === mode);
  });
  poll(); // Refresh with new mode
  loadBacktestChart(); // Reload chart with new mode
 }
 drawScoreRing(0);
 poll();
 setInterval(poll, 30000);
@ -1174,11 +1223,13 @@ _history_collector_progress = {}
@app.get("/api/backtest")
-def api_backtest():
+def api_backtest(mode: str = "classic"):
-    """Run backtest and return full results."""
+    """Run backtest and return full results.
    mode=classic (default) or mode=ml for ML-optimized scoring.
    """
    try:
        from backtesting.engine import run_backtest
-        return run_backtest()
+        return run_backtest(ml_mode=(mode == "ml"))
    except Exception as e:
        log.error("Backtest error: %s", traceback.format_exc())
        return JSONResponse({"error": str(e)}, status_code=500)
--- a/ml/init.py
+++ b/ml/init.py
--- a/ml/optimizer.py
+++ b/ml/optimizer.py
@ -0,0 +1,562 @@
 #!/usr/bin/env python3
 """
 ML Optimizer for Bitcoin Accumulation Zone Scoring.
 Trains a gradient boosted tree model on historical on-chain metrics to find
 optimal metric weights for identifying the best long-term buying opportunities.
 Output: config/ml_weights.json with optimized weights and feature importances.
 """
 import json
 import logging
 import os
 import sys
 from datetime import datetime, timedelta
 import numpy as np
 from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.metrics import (
    classification_report,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
 )
 from sklearn.model_selection import TimeSeriesSplit
 from sklearn.preprocessing import StandardScaler
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
 )
 log = logging.getLogger("ml-optimizer")
 BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 HISTORY_PATH = os.path.join(BASE_DIR, "data", "history.json")
 OUTPUT_PATH = os.path.join(BASE_DIR, "config", "ml_weights.json")
 THRESHOLDS_PATH = os.path.join(BASE_DIR, "config", "thresholds.json")
 # Date range: 2018-02-01 onward (when all 8 metrics + fear_greed available)
 START_DATE = "2018-02-01"
 # Training cutoff: need 1yr forward data for labels
 TRAIN_CUTOFF_DAYS = 365
 # Target: forward 365d return > 30% = "good time to buy"
 GOOD_BUY_THRESHOLD = 30.0
 # The 8 core metrics we score
 METRIC_KEYS = [
    "puell_multiple",
    "mvrv_zscore",
    "reserve_risk",
    "rhodl_ratio",
    "nupl",
    "fear_greed",
 ]
 # Ratio-based metrics (derived from price vs reference)
 RATIO_METRICS = {
    "pct_above_200w_sma": {"price_key": "btc_price", "ref_key": "200w_sma"},
    "pct_above_lth_rp": {"price_key": "btc_price", "ref_key": "lth_realized_price"},
 }
 def load_history():
    """Load historical data and build date-aligned lookup."""
    with open(HISTORY_PATH) as f:
        raw = json.load(f)
    index = {}
    for key, data in raw.items():
        if not isinstance(data, dict) or "dates" not in data:
            continue
        lookup = {}
        for d, v in zip(data["dates"], data["values"]):
            if v is not None:
                lookup[d] = v
        index[key] = lookup
    return index
 def load_thresholds():
    """Load scoring thresholds for converting raw values to 0-10 scores."""
    with open(THRESHOLDS_PATH) as f:
        return json.load(f)
 def score_range(value, ranges):
    """Score a value using range-based thresholds (same logic as scoring/engine.py)."""
    if value is None:
        return None
    for low, high, score in ranges:
        low_ok = low is None or value >= low
        high_ok = high is None or value < high
        if low_ok and high_ok:
            return score
    return 0
 def build_dataset(index, thresholds):
    """Build aligned training dataset: metric scores + forward returns."""
    # Get all dates from 2018-02-01 onward
    all_dates = set()
    for lookup in index.values():
        all_dates.update(lookup.keys())
    dates = sorted(d for d in all_dates if d >= START_DATE)
    # Build price lookup for forward returns
    price_lookup = {}
    for pk in ["btc_price", "btc_price_sma", "btc_price_lth"]:
        if pk in index:
            for d, v in index[pk].items():
                if d not in price_lookup:
                    price_lookup[d] = v
    # Compute ATH series for drawdown
    all_dates_sorted = sorted(all_dates)
    ath = 0
    drawdowns = {}
    for d in all_dates_sorted:
        p = price_lookup.get(d)
        if p is None:
            continue
        if p > ath:
            ath = p
        if ath > 0:
            drawdowns[d] = ((ath - p) / ath) * 100
    # Get threshold ranges for scoring raw values
    metric_ranges = {
        "puell_multiple": thresholds.get("puell_multiple", {}).get("ranges", []),
        "mvrv_zscore": thresholds.get("mvrv_zscore", {}).get("ranges", []),
        "reserve_risk": thresholds.get("reserve_risk", {}).get("ranges", []),
        "rhodl_ratio": thresholds.get("rhodl_ratio", {}).get("ranges", []),
        "nupl": thresholds.get("nupl", {}).get("ranges", []),
        "fear_greed": thresholds.get("fear_greed", {}).get("ranges", []),
        "drawdown": thresholds.get("drawdown", {}).get("ranges", []),
        "price_vs_200w_sma": thresholds.get("price_vs_200w_sma", {}).get("ranges", []),
        "lth_realized_price": thresholds.get("lth_realized_price", {}).get("ranges", []),
    }
    log.info("Building dataset from %d dates (%s to %s)", len(dates), dates[0], dates[-1])
    rows = []
    for d in dates:
        # Get raw metric values
        vals = {}
        skip = False
        for key in METRIC_KEYS:
            v = index.get(key, {}).get(d)
            if v is None:
                skip = True
                break
            vals[key] = v
        if skip:
            continue
        # Compute ratio metrics
        price = price_lookup.get(d)
        sma_200w = index.get("200w_sma", {}).get(d)
        lth_rp = index.get("lth_realized_price", {}).get(d)
        if price is None or sma_200w is None or lth_rp is None:
            continue
        if sma_200w == 0 or lth_rp == 0:
            continue
        pct_200w = ((price - sma_200w) / sma_200w) * 100
        pct_lth = ((price - lth_rp) / lth_rp) * 100
        dd = drawdowns.get(d, 0)
        vals["pct_above_200w_sma"] = pct_200w
        vals["pct_above_lth_rp"] = pct_lth
        vals["drawdown"] = dd
        # Score each metric (0-10) using existing thresholds
        scores = {}
        scores["puell_multiple"] = score_range(vals["puell_multiple"], metric_ranges["puell_multiple"])
        scores["mvrv_zscore"] = score_range(vals["mvrv_zscore"], metric_ranges["mvrv_zscore"])
        scores["reserve_risk"] = score_range(vals["reserve_risk"], metric_ranges["reserve_risk"])
        scores["rhodl_ratio"] = score_range(vals["rhodl_ratio"], metric_ranges["rhodl_ratio"])
        scores["nupl"] = score_range(vals["nupl"], metric_ranges["nupl"])
        scores["fear_greed"] = score_range(vals["fear_greed"], metric_ranges["fear_greed"])
        scores["drawdown"] = score_range(dd, metric_ranges["drawdown"])
        scores["pct_above_200w_sma"] = score_range(pct_200w, metric_ranges["price_vs_200w_sma"])
        scores["pct_above_lth_rp"] = score_range(pct_lth, metric_ranges["lth_realized_price"])
        if any(s is None for s in scores.values()):
            continue
        # Forward returns
        dt = datetime.strptime(d, "%Y-%m-%d")
        fwd = {}
        for days in [30, 90, 180, 365]:
            future_d = (dt + timedelta(days=days)).strftime("%Y-%m-%d")
            fp = price_lookup.get(future_d)
            if fp is not None and price > 0:
                fwd[f"fwd_{days}d"] = ((fp - price) / price) * 100
        # Compute rate-of-change features (30d deltas)
        deltas = {}
        d_30ago = (dt - timedelta(days=30)).strftime("%Y-%m-%d")
        for key in ["mvrv_zscore", "nupl", "puell_multiple", "reserve_risk"]:
            v_now = vals[key]
            v_prev = index.get(key, {}).get(d_30ago)
            if v_prev is not None and v_prev != 0:
                deltas[f"delta_30d_{key}"] = v_now - v_prev
            else:
                deltas[f"delta_30d_{key}"] = 0.0
        # Interaction terms
        interactions = {
            "mvrv_x_nupl": vals["mvrv_zscore"] * vals["nupl"],
            "puell_x_reserve": vals["puell_multiple"] * vals["reserve_risk"],
        }
        # Days since last ATH
        days_since_ath = 0
        for i in range(1, 2000):
            check_d = (dt - timedelta(days=i)).strftime("%Y-%m-%d")
            check_dd = drawdowns.get(check_d, 100)
            if check_dd < 0.1:  # essentially at ATH
                days_since_ath = i
                break
        else:
            days_since_ath = 2000
        row = {
            "date": d,
            "price": price,
            **{f"score_{k}": v for k, v in scores.items()},
            **{f"raw_{k}": v for k, v in vals.items()},
            **deltas,
            **interactions,
            "days_since_ath": days_since_ath,
            **fwd,
        }
        rows.append(row)
    log.info("Built %d complete data rows", len(rows))
    return rows
 def train_model(rows):
    """Train gradient boosted classifier to identify good buying opportunities."""
    # Filter to rows that have 365d forward return (for labeling)
    labeled = [r for r in rows if "fwd_365d" in r]
    log.info("Rows with 365d forward data: %d", len(labeled))
    if len(labeled) < 100:
        log.error("Not enough labeled data. Need at least 100 rows, got %d", len(labeled))
        return None
    # Create binary target: forward 365d return > threshold
    for r in labeled:
        r["target"] = 1 if r["fwd_365d"] > GOOD_BUY_THRESHOLD else 0
    positive = sum(r["target"] for r in labeled)
    log.info("Target distribution: %d positive (%.1f%%), %d negative",
             positive, positive / len(labeled) * 100, len(labeled) - positive)
    # Feature columns: scores + raw values + deltas + interactions + cycle position
    score_features = [
        "score_puell_multiple", "score_mvrv_zscore", "score_reserve_risk",
        "score_rhodl_ratio", "score_nupl", "score_fear_greed",
        "score_drawdown", "score_pct_above_200w_sma", "score_pct_above_lth_rp",
    ]
    raw_features = [
        "raw_puell_multiple", "raw_mvrv_zscore", "raw_reserve_risk",
        "raw_rhodl_ratio", "raw_nupl", "raw_fear_greed",
        "raw_pct_above_200w_sma", "raw_pct_above_lth_rp", "raw_drawdown",
    ]
    delta_features = [
        "delta_30d_mvrv_zscore", "delta_30d_nupl",
        "delta_30d_puell_multiple", "delta_30d_reserve_risk",
    ]
    interaction_features = ["mvrv_x_nupl", "puell_x_reserve"]
    cycle_features = ["days_since_ath"]
    feature_cols = score_features + raw_features + delta_features + interaction_features + cycle_features
    X = np.array([[r[f] for f in feature_cols] for r in labeled])
    y = np.array([r["target"] for r in labeled])
    log.info("Feature matrix: %d samples x %d features", X.shape[0], X.shape[1])
    # Time-series cross-validation (expanding window, 5 splits)
    tscv = TimeSeriesSplit(n_splits=5)
    cv_scores = []
    cv_f1 = []
    cv_precision = []
    cv_recall = []
    for fold, (train_idx, val_idx) in enumerate(tscv.split(X)):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]
        scaler = StandardScaler()
        X_train_s = scaler.fit_transform(X_train)
        X_val_s = scaler.transform(X_val)
        model = GradientBoostingClassifier(
            n_estimators=300,
            learning_rate=0.05,
            max_depth=4,
            subsample=0.8,
            min_samples_leaf=20,
            random_state=42,
        )
        model.fit(X_train_s, y_train)
        y_pred = model.predict(X_val_s)
        y_prob = model.predict_proba(X_val_s)[:, 1]
        auc = roc_auc_score(y_val, y_prob) if len(np.unique(y_val)) > 1 else 0
        f1 = f1_score(y_val, y_pred, zero_division=0)
        prec = precision_score(y_val, y_pred, zero_division=0)
        rec = recall_score(y_val, y_pred, zero_division=0)
        cv_scores.append(auc)
        cv_f1.append(f1)
        cv_precision.append(prec)
        cv_recall.append(rec)
        train_dates = f"{labeled[train_idx[0]]['date']} to {labeled[train_idx[-1]]['date']}"
        val_dates = f"{labeled[val_idx[0]]['date']} to {labeled[val_idx[-1]]['date']}"
        log.info("Fold %d: Train %s | Val %s | AUC=%.3f F1=%.3f P=%.3f R=%.3f",
                 fold + 1, train_dates, val_dates, auc, f1, prec, rec)
    log.info("CV Mean AUC: %.3f (+/- %.3f)", np.mean(cv_scores), np.std(cv_scores))
    log.info("CV Mean F1:  %.3f (+/- %.3f)", np.mean(cv_f1), np.std(cv_f1))
    # Train final model on all labeled data
    log.info("Training final model on all %d labeled samples...", len(labeled))
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    final_model = GradientBoostingClassifier(
        n_estimators=300,
        learning_rate=0.05,
        max_depth=4,
        subsample=0.8,
        min_samples_leaf=20,
        random_state=42,
    )
    final_model.fit(X_scaled, y)
    # Feature importances
    importances = final_model.feature_importances_
    feat_imp = sorted(
        zip(feature_cols, importances),
        key=lambda x: x[1],
        reverse=True,
    )
    log.info("\nFeature Importance Ranking:")
    log.info("-" * 50)
    for name, imp in feat_imp:
        bar = "#" * int(imp * 200)
        log.info("  %-30s %.4f %s", name, imp, bar)
    # Extract optimal weights by aggregating importance per metric
    # Map each feature back to its parent metric
    metric_names = [
        "puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio",
        "nupl", "fear_greed", "drawdown", "pct_above_200w_sma", "pct_above_lth_rp",
    ]
    feature_to_metric = {}
    for m in metric_names:
        feature_to_metric[f"score_{m}"] = m
        feature_to_metric[f"raw_{m}"] = m
    # Delta features map to their base metric
    feature_to_metric["delta_30d_mvrv_zscore"] = "mvrv_zscore"
    feature_to_metric["delta_30d_nupl"] = "nupl"
    feature_to_metric["delta_30d_puell_multiple"] = "puell_multiple"
    feature_to_metric["delta_30d_reserve_risk"] = "reserve_risk"
    # Interaction terms split evenly between constituent metrics
    # mvrv_x_nupl -> mvrv_zscore + nupl
    # puell_x_reserve -> puell_multiple + reserve_risk
    metric_importances = {m: 0.0 for m in metric_names}
    for name, imp in feat_imp:
        if name in feature_to_metric:
            metric_importances[feature_to_metric[name]] += imp
        elif name == "mvrv_x_nupl":
            metric_importances["mvrv_zscore"] += imp / 2
            metric_importances["nupl"] += imp / 2
        elif name == "puell_x_reserve":
            metric_importances["puell_multiple"] += imp / 2
            metric_importances["reserve_risk"] += imp / 2
        # days_since_ath maps to drawdown conceptually
        elif name == "days_since_ath":
            metric_importances["drawdown"] += imp
    # Normalize weights to sum to 1
    total_imp = sum(metric_importances.values())
    if total_imp > 0:
        weights = {k: round(v / total_imp, 4) for k, v in metric_importances.items()}
    else:
        weights = {k: round(1 / len(metric_importances), 4) for k in metric_importances}
    # Sort by weight descending
    weights = dict(sorted(weights.items(), key=lambda x: x[1], reverse=True))
    log.info("\nOptimal Metric Weights:")
    log.info("-" * 50)
    equal_weight = round(1 / len(weights), 4)
    for metric, w in weights.items():
        change = "+" if w > equal_weight else ""
        diff = (w - equal_weight) / equal_weight * 100
        log.info("  %-25s %.4f (%s%.0f%% vs equal)", metric, w, change, diff)
    # Run comparison backtest: ML-weighted vs equal-weight
    log.info("\n" + "=" * 60)
    log.info("COMPARISON BACKTEST: ML-Weighted vs Equal-Weight")
    log.info("=" * 60)
    comparison = run_comparison(rows, weights)
    # Build output
    result = {
        "weights": weights,
        "feature_importances": {name: round(float(imp), 6) for name, imp in feat_imp},
        "cv_results": {
            "mean_auc": round(float(np.mean(cv_scores)), 4),
            "std_auc": round(float(np.std(cv_scores)), 4),
            "mean_f1": round(float(np.mean(cv_f1)), 4),
            "mean_precision": round(float(np.mean(cv_precision)), 4),
            "mean_recall": round(float(np.mean(cv_recall)), 4),
        },
        "training_info": {
            "n_samples": len(labeled),
            "n_positive": int(positive),
            "positive_rate": round(positive / len(labeled), 4),
            "n_features": len(feature_cols),
            "target_threshold": GOOD_BUY_THRESHOLD,
            "date_range": f"{labeled[0]['date']} to {labeled[-1]['date']}",
            "model": "GradientBoostingClassifier",
        },
        "comparison": comparison,
        "trained_at": datetime.now(tz=__import__('datetime').timezone.utc).isoformat(),
    }
    return result
 def run_comparison(rows, ml_weights):
    """Compare ML-weighted scoring vs equal-weight scoring across score brackets."""
    # Metrics used in scoring (maps to score_* columns)
    score_keys = [
        "puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio",
        "nupl", "fear_greed", "drawdown", "pct_above_200w_sma", "pct_above_lth_rp",
    ]
    n_metrics = len(score_keys)
    equal_weight = 1.0 / n_metrics
    brackets = [
        (0, 20, "Extreme Caution"),
        (21, 40, "Caution"),
        (41, 55, "Neutral"),
        (56, 70, "Moderate Opportunity"),
        (71, 85, "Strong Accumulation"),
        (86, 100, "Extreme Accumulation"),
    ]
    # Only use rows with forward returns
    scored_rows = [r for r in rows if "fwd_365d" in r]
    results = {"equal_weight": [], "ml_weighted": []}
    for mode in ["equal_weight", "ml_weighted"]:
        for r in scored_rows:
            scores = [r[f"score_{k}"] for k in score_keys]
            if mode == "equal_weight":
                composite = sum(scores) / n_metrics * 10
            else:
                weighted_sum = sum(r[f"score_{k}"] * ml_weights.get(k, equal_weight) for k in score_keys)
                composite = weighted_sum * 10
            r[f"composite_{mode}"] = composite
        for low, high, label in brackets:
            days_in = [r for r in scored_rows if low <= r[f"composite_{mode}"] <= high]
            if not days_in:
                results[mode].append({
                    "range": f"{low}-{high}", "label": label,
                    "days": 0, "avg_365d": None,
                })
                continue
            returns_365 = [r["fwd_365d"] for r in days_in]
            win_rate = len([r for r in returns_365 if r > 0]) / len(returns_365) * 100
            results[mode].append({
                "range": f"{low}-{high}",
                "label": label,
                "days": len(days_in),
                "avg_365d": round(sum(returns_365) / len(returns_365), 2),
                "median_365d": round(sorted(returns_365)[len(returns_365) // 2], 2),
                "win_rate_365d": round(win_rate, 1),
            })
    # Print comparison
    log.info("\n%-18s | %-8s %-8s %-8s | %-8s %-8s %-8s",
             "Bracket", "EQ Avg", "EQ Med", "EQ Win%", "ML Avg", "ML Med", "ML Win%")
    log.info("-" * 80)
    for eq, ml in zip(results["equal_weight"], results["ml_weighted"]):
        eq_avg = f"{eq['avg_365d']:.1f}%" if eq["avg_365d"] is not None else "--"
        eq_med = f"{eq['median_365d']:.1f}%" if eq.get("median_365d") is not None else "--"
        eq_win = f"{eq['win_rate_365d']:.0f}%" if eq.get("win_rate_365d") is not None else "--"
        ml_avg = f"{ml['avg_365d']:.1f}%" if ml["avg_365d"] is not None else "--"
        ml_med = f"{ml['median_365d']:.1f}%" if ml.get("median_365d") is not None else "--"
        ml_win = f"{ml['win_rate_365d']:.0f}%" if ml.get("win_rate_365d") is not None else "--"
        log.info("%-18s | %-8s %-8s %-8s | %-8s %-8s %-8s",
                 eq["label"], eq_avg, eq_med, eq_win, ml_avg, ml_med, ml_win)
    return results
 def main():
    log.info("=" * 60)
    log.info("Bitcoin Accumulation Zone ML Optimizer")
    log.info("=" * 60)
    if not os.path.exists(HISTORY_PATH):
        log.error("No historical data at %s. Run history collector first.", HISTORY_PATH)
        sys.exit(1)
    # Load data
    log.info("Loading historical data...")
    index = load_history()
    thresholds = load_thresholds()
    # Build dataset
    log.info("Building training dataset...")
    rows = build_dataset(index, thresholds)
    # Train model
    log.info("Training ML model...")
    result = train_model(rows)
    if result is None:
        log.error("Training failed.")
        sys.exit(1)
    # Save weights
    with open(OUTPUT_PATH, "w") as f:
        json.dump(result, f, indent=2)
    log.info("\nSaved ML weights to %s", OUTPUT_PATH)
    # Print summary
    log.info("\n" + "=" * 60)
    log.info("SUMMARY")
    log.info("=" * 60)
    log.info("Model: %s", result["training_info"]["model"])
    log.info("Samples: %d (%d positive)", result["training_info"]["n_samples"], result["training_info"]["n_positive"])
    log.info("CV AUC: %.3f (+/- %.3f)", result["cv_results"]["mean_auc"], result["cv_results"]["std_auc"])
    log.info("CV F1:  %.3f", result["cv_results"]["mean_f1"])
    log.info("\nTop 5 Feature Importances:")
    for name, imp in list(result["feature_importances"].items())[:5]:
        log.info("  %-30s %.4f", name, imp)
    log.info("\nMetric Weights (ML-Optimized):")
    for metric, weight in result["weights"].items():
        log.info("  %-25s %.1f%%", metric, weight * 100)
 if __name__ == "__main__":
    main()
--- a/scoring/engine.py
+++ b/scoring/engine.py
@ -428,3 +428,104 @@ def score_all(metrics):
        "scored_count": len(valid_scores),
        "total_count": len(results),
    }
 # ── ML-Optimized Scoring ──────────────────────────────────────────────
 ML_WEIGHTS_PATH = os.path.join(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
    "config",
    "ml_weights.json",
 )
 # Maps scoring engine metric keys to ML weight keys
 _ML_KEY_MAP = {
    "fear_greed": "fear_greed",
    "puell_multiple": "puell_multiple",
    "mvrv_zscore": "mvrv_zscore",
    "drawdown": "drawdown",
    "price_vs_200w_sma": "pct_above_200w_sma",
    "reserve_risk": "reserve_risk",
    "rhodl_ratio": "rhodl_ratio",
    "nupl": "nupl",
    "lth_realized_price": "pct_above_lth_rp",
 }
 def load_ml_weights():
    """Load ML-optimized weights from config."""
    try:
        with open(ML_WEIGHTS_PATH) as f:
            data = json.load(f)
        return data.get("weights", {})
    except Exception:
        return {}
 def score_all_ml(metrics):
    """Score all metrics using ML-optimized weights.
    Same output format as score_all() but uses learned weights
    instead of equal weighting. Each metric still shows its
    individual 0-10 score plus the ML weight applied to it.
    """
    # Get classic scores first (reuses all individual scoring logic)
    classic = score_all(metrics)
    ml_weights = load_ml_weights()
    if not ml_weights:
        # Fallback to classic if no ML weights available
        classic["ml_mode"] = False
        classic["ml_error"] = "ML weights not found — run ml/optimizer.py"
        return classic
    results = classic["metrics"]
    # Compute ML-weighted composite
    weighted_sum = 0.0
    weight_total = 0.0
    for m in results:
        if m["score"] is None:
            continue
        ml_key = _ML_KEY_MAP.get(m["key"])
        if ml_key is None:
            # Hash ribbons or unknown metric — use small default weight
            w = 0.01
        else:
            w = ml_weights.get(ml_key, 0.0)
        m["ml_weight"] = round(w, 4)
        m["ml_contribution"] = round(m["score"] * w * 10, 2)
        weighted_sum += m["score"] * w
        weight_total += w
    # Normalize if weights don't sum to 1 (e.g., missing metrics)
    if weight_total > 0:
        composite = weighted_sum / weight_total * 10
    else:
        composite = 0
    # Assessment text (same thresholds as classic)
    if composite >= 80:
        assessment = "EXTREME ACCUMULATION ZONE"
    elif composite >= 65:
        assessment = "STRONG ACCUMULATION ZONE"
    elif composite >= 50:
        assessment = "MODERATE OPPORTUNITY"
    elif composite >= 35:
        assessment = "NEUTRAL"
    elif composite >= 20:
        assessment = "CAUTION — OVERHEATED"
    else:
        assessment = "EXTREME CAUTION"
    return {
        "metrics": results,
        "composite_score": round(composite, 1),
        "assessment": assessment,
        "scored_count": classic["scored_count"],
        "total_count": classic["total_count"],
        "ml_mode": True,
        "classic_score": classic["composite_score"],
    }