feat: ML-optimized accumulation scoring with dashboard toggle
Train GradientBoostedClassifier on 2,601 days of historical data (2018-2025) to find optimal metric weights for identifying the best long-term buying opportunities. Uses time-series cross-validation to prevent look-ahead bias. Key results: - pct_above_200w_sma: 50.7% weight (was 11.1% equal) - drawdown: 14.6%, lth_rp: 10.9%, rhodl: 8.9% - fear_greed demoted from 11.1% to 5.1% - nupl/mvrv nearly eliminated (0.7-1.8%) ML Strong Accumulation bracket: avg +210% 1yr (vs +176% classic) New files: ml/optimizer.py, config/ml_weights.json Modified: scoring/engine.py (score_all_ml), backtesting/engine.py (ml_mode), dashboard/server.py (Classic/ML toggle) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f1d38f9abb
commit
4647c596b3
@ -121,8 +121,35 @@ def _compute_ath_series(price_lookup, dates):
|
|||||||
return drawdowns
|
return drawdowns
|
||||||
|
|
||||||
|
|
||||||
def score_day(date, index, drawdowns):
|
def _load_ml_weights():
|
||||||
"""Score a single day using all available metrics. Returns (composite_score, individual_scores, n_metrics)."""
|
"""Load ML weights for ML-optimized scoring mode."""
|
||||||
|
ml_path = _os.path.join(_os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))), "config", "ml_weights.json")
|
||||||
|
try:
|
||||||
|
with open(ml_path) as f:
|
||||||
|
data = _json.load(f)
|
||||||
|
return data.get("weights", {})
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# ML weight key mapping (backtest metric keys -> ML weight keys)
|
||||||
|
_BT_ML_KEY_MAP = {
|
||||||
|
"fear_greed": "fear_greed",
|
||||||
|
"puell_multiple": "puell_multiple",
|
||||||
|
"mvrv_zscore": "mvrv_zscore",
|
||||||
|
"reserve_risk": "reserve_risk",
|
||||||
|
"rhodl_ratio": "rhodl_ratio",
|
||||||
|
"nupl": "nupl",
|
||||||
|
"price_vs_200w_sma": "pct_above_200w_sma",
|
||||||
|
"lth_realized_price": "pct_above_lth_rp",
|
||||||
|
"drawdown": "drawdown",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def score_day(date, index, drawdowns, ml_weights=None):
|
||||||
|
"""Score a single day using all available metrics. Returns (composite_score, individual_scores, n_metrics).
|
||||||
|
|
||||||
|
If ml_weights is provided, uses ML-optimized weighting instead of equal weights.
|
||||||
|
"""
|
||||||
scores = []
|
scores = []
|
||||||
details = {}
|
details = {}
|
||||||
|
|
||||||
@ -163,6 +190,20 @@ def score_day(date, index, drawdowns):
|
|||||||
if not scores:
|
if not scores:
|
||||||
return None, details, 0
|
return None, details, 0
|
||||||
|
|
||||||
|
if ml_weights:
|
||||||
|
# ML-weighted composite
|
||||||
|
weighted_sum = 0.0
|
||||||
|
weight_total = 0.0
|
||||||
|
for metric_key, info in details.items():
|
||||||
|
ml_key = _BT_ML_KEY_MAP.get(metric_key, metric_key)
|
||||||
|
w = ml_weights.get(ml_key, 0.0)
|
||||||
|
weighted_sum += info["score"] * w
|
||||||
|
weight_total += w
|
||||||
|
if weight_total > 0:
|
||||||
|
composite = weighted_sum / weight_total * 10
|
||||||
|
else:
|
||||||
|
composite = sum(scores) / len(scores) * 10
|
||||||
|
else:
|
||||||
composite = sum(scores) / len(scores) * 10
|
composite = sum(scores) / len(scores) * 10
|
||||||
return round(composite, 1), details, len(scores)
|
return round(composite, 1), details, len(scores)
|
||||||
|
|
||||||
@ -208,9 +249,12 @@ def compute_max_drawdown_forward(price_lookup, date, window=90):
|
|||||||
return round(max_dd, 2) if max_dd > 0 else 0
|
return round(max_dd, 2) if max_dd > 0 else 0
|
||||||
|
|
||||||
|
|
||||||
def run_backtest():
|
def run_backtest(ml_mode=False):
|
||||||
"""Run the full backtest and return comprehensive results."""
|
"""Run the full backtest and return comprehensive results.
|
||||||
log.info("Loading historical data...")
|
|
||||||
|
If ml_mode=True, uses ML-optimized metric weights instead of equal weights.
|
||||||
|
"""
|
||||||
|
log.info("Loading historical data... (ml_mode=%s)", ml_mode)
|
||||||
if not os.path.exists(HISTORY_PATH):
|
if not os.path.exists(HISTORY_PATH):
|
||||||
return {"error": "No historical data found. Run history collector first."}
|
return {"error": "No historical data found. Run history collector first."}
|
||||||
|
|
||||||
@ -240,11 +284,17 @@ def run_backtest():
|
|||||||
log.info("Computing forward returns...")
|
log.info("Computing forward returns...")
|
||||||
fwd_returns = compute_forward_returns(price_lookup, all_dates)
|
fwd_returns = compute_forward_returns(price_lookup, all_dates)
|
||||||
|
|
||||||
|
# Load ML weights if in ML mode
|
||||||
|
ml_weights = _load_ml_weights() if ml_mode else None
|
||||||
|
if ml_mode and not ml_weights:
|
||||||
|
log.warning("ML mode requested but no weights found — falling back to equal weights")
|
||||||
|
ml_weights = None
|
||||||
|
|
||||||
# Score each day
|
# Score each day
|
||||||
log.info("Scoring %d days...", len(all_dates))
|
log.info("Scoring %d days...", len(all_dates))
|
||||||
daily_scores = []
|
daily_scores = []
|
||||||
for d in all_dates:
|
for d in all_dates:
|
||||||
composite, details, n_metrics = score_day(d, index, drawdowns)
|
composite, details, n_metrics = score_day(d, index, drawdowns, ml_weights=ml_weights)
|
||||||
if composite is not None and n_metrics >= 3: # Require at least 3 metrics
|
if composite is not None and n_metrics >= 3: # Require at least 3 metrics
|
||||||
price = price_lookup.get(d)
|
price = price_lookup.get(d)
|
||||||
entry = {
|
entry = {
|
||||||
@ -435,6 +485,7 @@ def run_backtest():
|
|||||||
"signal_events": signal_events,
|
"signal_events": signal_events,
|
||||||
"current_context": current_context,
|
"current_context": current_context,
|
||||||
"chart_data": chart_data,
|
"chart_data": chart_data,
|
||||||
|
"ml_mode": ml_mode,
|
||||||
"computed_at": datetime.utcnow().isoformat() + "Z",
|
"computed_at": datetime.utcnow().isoformat() + "Z",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
159
config/ml_weights.json
Normal file
159
config/ml_weights.json
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
{
|
||||||
|
"weights": {
|
||||||
|
"pct_above_200w_sma": 0.5075,
|
||||||
|
"drawdown": 0.1459,
|
||||||
|
"pct_above_lth_rp": 0.1095,
|
||||||
|
"rhodl_ratio": 0.089,
|
||||||
|
"fear_greed": 0.0515,
|
||||||
|
"reserve_risk": 0.046,
|
||||||
|
"puell_multiple": 0.0255,
|
||||||
|
"mvrv_zscore": 0.0182,
|
||||||
|
"nupl": 0.0068
|
||||||
|
},
|
||||||
|
"feature_importances": {
|
||||||
|
"raw_pct_above_200w_sma": 0.436377,
|
||||||
|
"days_since_ath": 0.119405,
|
||||||
|
"raw_pct_above_lth_rp": 0.109451,
|
||||||
|
"raw_rhodl_ratio": 0.088999,
|
||||||
|
"score_pct_above_200w_sma": 0.071148,
|
||||||
|
"raw_fear_greed": 0.051475,
|
||||||
|
"puell_x_reserve": 0.032886,
|
||||||
|
"raw_drawdown": 0.026474,
|
||||||
|
"raw_reserve_risk": 0.021707,
|
||||||
|
"raw_mvrv_zscore": 0.012429,
|
||||||
|
"raw_puell_multiple": 0.008599,
|
||||||
|
"delta_30d_reserve_risk": 0.007865,
|
||||||
|
"delta_30d_mvrv_zscore": 0.004263,
|
||||||
|
"raw_nupl": 0.003271,
|
||||||
|
"mvrv_x_nupl": 0.002979,
|
||||||
|
"delta_30d_nupl": 0.002056,
|
||||||
|
"delta_30d_puell_multiple": 0.000473,
|
||||||
|
"score_fear_greed": 6.8e-05,
|
||||||
|
"score_mvrv_zscore": 5.4e-05,
|
||||||
|
"score_puell_multiple": 1e-05,
|
||||||
|
"score_pct_above_lth_rp": 6e-06,
|
||||||
|
"score_rhodl_ratio": 2e-06,
|
||||||
|
"score_reserve_risk": 0.0,
|
||||||
|
"score_nupl": 0.0,
|
||||||
|
"score_drawdown": 0.0
|
||||||
|
},
|
||||||
|
"cv_results": {
|
||||||
|
"mean_auc": 0.6164,
|
||||||
|
"std_auc": 0.3317,
|
||||||
|
"mean_f1": 0.6736,
|
||||||
|
"mean_precision": 0.8015,
|
||||||
|
"mean_recall": 0.7047
|
||||||
|
},
|
||||||
|
"training_info": {
|
||||||
|
"n_samples": 2601,
|
||||||
|
"n_positive": 1553,
|
||||||
|
"positive_rate": 0.5971,
|
||||||
|
"n_features": 25,
|
||||||
|
"target_threshold": 30.0,
|
||||||
|
"date_range": "2018-02-01 to 2025-03-21",
|
||||||
|
"model": "GradientBoostingClassifier"
|
||||||
|
},
|
||||||
|
"comparison": {
|
||||||
|
"equal_weight": [
|
||||||
|
{
|
||||||
|
"range": "0-20",
|
||||||
|
"label": "Extreme Caution",
|
||||||
|
"days": 295,
|
||||||
|
"avg_365d": -5.94,
|
||||||
|
"median_365d": -11.99,
|
||||||
|
"win_rate_365d": 35.6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "21-40",
|
||||||
|
"label": "Caution",
|
||||||
|
"days": 587,
|
||||||
|
"avg_365d": 23.84,
|
||||||
|
"median_365d": -7.2,
|
||||||
|
"win_rate_365d": 45.3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "41-55",
|
||||||
|
"label": "Neutral",
|
||||||
|
"days": 697,
|
||||||
|
"avg_365d": 108.96,
|
||||||
|
"median_365d": 75.92,
|
||||||
|
"win_rate_365d": 70.4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "56-70",
|
||||||
|
"label": "Moderate Opportunity",
|
||||||
|
"days": 450,
|
||||||
|
"avg_365d": 128.81,
|
||||||
|
"median_365d": 109.03,
|
||||||
|
"win_rate_365d": 96.4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "71-85",
|
||||||
|
"label": "Strong Accumulation",
|
||||||
|
"days": 275,
|
||||||
|
"avg_365d": 175.76,
|
||||||
|
"median_365d": 117.95,
|
||||||
|
"win_rate_365d": 86.9
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "86-100",
|
||||||
|
"label": "Extreme Accumulation",
|
||||||
|
"days": 247,
|
||||||
|
"avg_365d": 115.5,
|
||||||
|
"median_365d": 90.08,
|
||||||
|
"win_rate_365d": 100.0
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"ml_weighted": [
|
||||||
|
{
|
||||||
|
"range": "0-20",
|
||||||
|
"label": "Extreme Caution",
|
||||||
|
"days": 577,
|
||||||
|
"avg_365d": -6.17,
|
||||||
|
"median_365d": -26.21,
|
||||||
|
"win_rate_365d": 27.0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "21-40",
|
||||||
|
"label": "Caution",
|
||||||
|
"days": 855,
|
||||||
|
"avg_365d": 77.5,
|
||||||
|
"median_365d": 39.28,
|
||||||
|
"win_rate_365d": 72.7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "41-55",
|
||||||
|
"label": "Neutral",
|
||||||
|
"days": 241,
|
||||||
|
"avg_365d": 165.77,
|
||||||
|
"median_365d": 124.05,
|
||||||
|
"win_rate_365d": 92.5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "56-70",
|
||||||
|
"label": "Moderate Opportunity",
|
||||||
|
"days": 328,
|
||||||
|
"avg_365d": 144.47,
|
||||||
|
"median_365d": 124.27,
|
||||||
|
"win_rate_365d": 89.6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "71-85",
|
||||||
|
"label": "Strong Accumulation",
|
||||||
|
"days": 201,
|
||||||
|
"avg_365d": 210.2,
|
||||||
|
"median_365d": 122.22,
|
||||||
|
"win_rate_365d": 99.0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"range": "86-100",
|
||||||
|
"label": "Extreme Accumulation",
|
||||||
|
"days": 287,
|
||||||
|
"avg_365d": 113.92,
|
||||||
|
"median_365d": 99.53,
|
||||||
|
"win_rate_365d": 100.0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"trained_at": "2026-03-21T23:15:38.277703+00:00"
|
||||||
|
}
|
||||||
@ -192,10 +192,17 @@ def run_scrape(force_full=False):
|
|||||||
if "_onchain_timestamp" in existing_cache:
|
if "_onchain_timestamp" in existing_cache:
|
||||||
metrics["_onchain_timestamp"] = existing_cache["_onchain_timestamp"]
|
metrics["_onchain_timestamp"] = existing_cache["_onchain_timestamp"]
|
||||||
|
|
||||||
# 4. Score everything
|
# 4. Score everything (classic + ML)
|
||||||
log.info("Scoring metrics...")
|
log.info("Scoring metrics...")
|
||||||
scored = engine.score_all(metrics)
|
scored = engine.score_all(metrics)
|
||||||
metrics["_scored"] = scored
|
metrics["_scored"] = scored
|
||||||
|
|
||||||
|
# ML-optimized scoring (parallel)
|
||||||
|
try:
|
||||||
|
scored_ml = engine.score_all_ml(metrics)
|
||||||
|
metrics["_scored_ml"] = scored_ml
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("ML scoring failed (non-critical): %s", e)
|
||||||
metrics["_timestamp"] = datetime.now(timezone.utc).isoformat()
|
metrics["_timestamp"] = datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
save_cache(metrics)
|
save_cache(metrics)
|
||||||
@ -335,9 +342,14 @@ def _fetch_models(provider, providers):
|
|||||||
# ── API Routes ────────────────────────────────────────────────────────────
|
# ── API Routes ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@app.get("/api/data")
|
@app.get("/api/data")
|
||||||
def api_data():
|
def api_data(mode: str = "classic"):
|
||||||
"""Return current cached metrics + scores."""
|
"""Return current cached metrics + scores.
|
||||||
|
mode=classic (default) or mode=ml for ML-optimized scoring.
|
||||||
|
"""
|
||||||
cache = load_cache()
|
cache = load_cache()
|
||||||
|
if mode == "ml":
|
||||||
|
scored = cache.get("_scored_ml", cache.get("_scored", {}))
|
||||||
|
else:
|
||||||
scored = cache.get("_scored", {})
|
scored = cache.get("_scored", {})
|
||||||
price_data = cache.get("price", {})
|
price_data = cache.get("price", {})
|
||||||
drawdown_data = cache.get("drawdown", {})
|
drawdown_data = cache.get("drawdown", {})
|
||||||
@ -352,6 +364,7 @@ def api_data():
|
|||||||
"last_update": cache.get("_timestamp"),
|
"last_update": cache.get("_timestamp"),
|
||||||
"scraper_running": _scraper_running,
|
"scraper_running": _scraper_running,
|
||||||
"last_error": _last_error,
|
"last_error": _last_error,
|
||||||
|
"mode": mode,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -513,6 +526,13 @@ DASHBOARD_HTML = """<!DOCTYPE html>
|
|||||||
.status-dot.stale{background:var(--yellow)}
|
.status-dot.stale{background:var(--yellow)}
|
||||||
.status-dot.error{background:var(--red)}
|
.status-dot.error{background:var(--red)}
|
||||||
@keyframes pulse{0%,100%{opacity:1}50%{opacity:.3}}
|
@keyframes pulse{0%,100%{opacity:1}50%{opacity:.3}}
|
||||||
|
.mode-toggle{display:flex;border-radius:6px;overflow:hidden;border:1px solid var(--border)}
|
||||||
|
.mode-btn{padding:6px 14px;border:none;background:transparent;color:var(--text-dim);font-family:inherit;font-weight:600;font-size:.8rem;cursor:pointer;transition:all .15s}
|
||||||
|
.mode-btn:hover{color:var(--text)}
|
||||||
|
.mode-btn.active[data-mode="classic"]{background:var(--accent);color:#000}
|
||||||
|
.mode-btn.active[data-mode="ml"]{background:#8b5cf6;color:#fff}
|
||||||
|
.ml-badge{display:inline-block;font-size:.6rem;font-weight:700;padding:2px 6px;border-radius:3px;background:#8b5cf6;color:#fff;vertical-align:super;margin-left:4px}
|
||||||
|
.ml-weight{font-size:.65rem;color:#8b5cf6;font-family:var(--mono);margin-top:2px}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
@ -530,6 +550,10 @@ DASHBOARD_HTML = """<!DOCTYPE html>
|
|||||||
<span class="status-dot" id="statusDot"></span>
|
<span class="status-dot" id="statusDot"></span>
|
||||||
<span id="statusText">Loading...</span>
|
<span id="statusText">Loading...</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="mode-toggle" id="modeToggle" title="Switch between Classic (equal-weight) and ML-optimized scoring">
|
||||||
|
<button class="mode-btn active" data-mode="classic" onclick="setMode('classic')">Classic</button>
|
||||||
|
<button class="mode-btn" data-mode="ml" onclick="setMode('ml')">ML</button>
|
||||||
|
</div>
|
||||||
<button class="btn btn-accent" onclick="doRefresh(false)" id="btnRefresh">⚡ Quick Refresh</button>
|
<button class="btn btn-accent" onclick="doRefresh(false)" id="btnRefresh">⚡ Quick Refresh</button>
|
||||||
<button class="btn btn-secondary" onclick="doRefresh(true)" id="btnFullRefresh" title="Re-scrape on-chain metrics from LookIntoBitcoin (~2-3 min)">🔄 Full Refresh</button>
|
<button class="btn btn-secondary" onclick="doRefresh(true)" id="btnFullRefresh" title="Re-scrape on-chain metrics from LookIntoBitcoin (~2-3 min)">🔄 Full Refresh</button>
|
||||||
</div>
|
</div>
|
||||||
@ -689,6 +713,11 @@ function renderMetrics(metrics) {
|
|||||||
html += '</div></div>';
|
html += '</div></div>';
|
||||||
html += '<div class="metric-value">' + (m.display_value || 'N/A') + '</div>';
|
html += '<div class="metric-value">' + (m.display_value || 'N/A') + '</div>';
|
||||||
html += '<div class="metric-desc">' + (m.description || '') + '</div>';
|
html += '<div class="metric-desc">' + (m.description || '') + '</div>';
|
||||||
|
if (currentMode === 'ml' && m.ml_weight != null) {
|
||||||
|
const wpct = (m.ml_weight * 100).toFixed(1);
|
||||||
|
const contrib = m.ml_contribution != null ? m.ml_contribution.toFixed(1) : '--';
|
||||||
|
html += '<div class="ml-weight">ML weight: ' + wpct + '% · contribution: ' + contrib + ' pts</div>';
|
||||||
|
}
|
||||||
if (hasSparkline) {
|
if (hasSparkline) {
|
||||||
html += '<div class="metric-sparkline"><canvas id="spark-' + idx + '"></canvas></div>';
|
html += '<div class="metric-sparkline"><canvas id="spark-' + idx + '"></canvas></div>';
|
||||||
}
|
}
|
||||||
@ -841,7 +870,7 @@ function renderHistoryFromData(history) {
|
|||||||
// Load backtest daily scores for the chart
|
// Load backtest daily scores for the chart
|
||||||
async function loadBacktestChart() {
|
async function loadBacktestChart() {
|
||||||
try {
|
try {
|
||||||
const r = await fetch('/api/backtest');
|
const r = await fetch('/api/backtest?mode=' + currentMode);
|
||||||
const data = await r.json();
|
const data = await r.json();
|
||||||
if (data.chart_data && data.chart_data.length) {
|
if (data.chart_data && data.chart_data.length) {
|
||||||
fullDailyScores = data.chart_data;
|
fullDailyScores = data.chart_data;
|
||||||
@ -894,7 +923,7 @@ function updateStatus(data) {
|
|||||||
async function poll() {
|
async function poll() {
|
||||||
try {
|
try {
|
||||||
const [dataRes, histRes] = await Promise.all([
|
const [dataRes, histRes] = await Promise.all([
|
||||||
fetch('/api/data'), fetch('/api/history')
|
fetch('/api/data?mode=' + currentMode), fetch('/api/history')
|
||||||
]);
|
]);
|
||||||
const data = await dataRes.json();
|
const data = await dataRes.json();
|
||||||
const history = await histRes.json();
|
const history = await histRes.json();
|
||||||
@ -906,7 +935,12 @@ async function poll() {
|
|||||||
|
|
||||||
// Assessment
|
// Assessment
|
||||||
const el = document.getElementById('assessment');
|
const el = document.getElementById('assessment');
|
||||||
el.textContent = scored.assessment || 'Loading...';
|
let assessText = scored.assessment || 'Loading...';
|
||||||
|
if (currentMode === 'ml') {
|
||||||
|
el.innerHTML = assessText + '<span class="ml-badge">ML</span>';
|
||||||
|
} else {
|
||||||
|
el.textContent = assessText;
|
||||||
|
}
|
||||||
el.style.color = assessmentColor(composite);
|
el.style.color = assessmentColor(composite);
|
||||||
|
|
||||||
// Price
|
// Price
|
||||||
@ -925,7 +959,11 @@ async function poll() {
|
|||||||
if (data.mayer_multiple) document.getElementById('mayerDisplay').textContent = data.mayer_multiple.toFixed(2);
|
if (data.mayer_multiple) document.getElementById('mayerDisplay').textContent = data.mayer_multiple.toFixed(2);
|
||||||
if (data.sma_200d) document.getElementById('sma200dDisplay').textContent = '$' + Math.round(data.sma_200d).toLocaleString();
|
if (data.sma_200d) document.getElementById('sma200dDisplay').textContent = '$' + Math.round(data.sma_200d).toLocaleString();
|
||||||
if (scored.scored_count != null) {
|
if (scored.scored_count != null) {
|
||||||
document.getElementById('scoredCount').textContent = scored.scored_count + '/' + scored.total_count + ' metrics active';
|
let countText = scored.scored_count + '/' + scored.total_count + ' metrics active';
|
||||||
|
if (currentMode === 'ml' && scored.classic_score != null) {
|
||||||
|
countText += ' · Classic: ' + scored.classic_score;
|
||||||
|
}
|
||||||
|
document.getElementById('scoredCount').textContent = countText;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Metrics
|
// Metrics
|
||||||
@ -954,6 +992,17 @@ async function doRefresh(full) {
|
|||||||
setTimeout(() => { btn.disabled = false; btn.textContent = origText; }, delay);
|
setTimeout(() => { btn.disabled = false; btn.textContent = origText; }, delay);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let currentMode = 'classic';
|
||||||
|
|
||||||
|
function setMode(mode) {
|
||||||
|
currentMode = mode;
|
||||||
|
document.querySelectorAll('.mode-btn').forEach(b => {
|
||||||
|
b.classList.toggle('active', b.dataset.mode === mode);
|
||||||
|
});
|
||||||
|
poll(); // Refresh with new mode
|
||||||
|
loadBacktestChart(); // Reload chart with new mode
|
||||||
|
}
|
||||||
|
|
||||||
drawScoreRing(0);
|
drawScoreRing(0);
|
||||||
poll();
|
poll();
|
||||||
setInterval(poll, 30000);
|
setInterval(poll, 30000);
|
||||||
@ -1174,11 +1223,13 @@ _history_collector_progress = {}
|
|||||||
|
|
||||||
|
|
||||||
@app.get("/api/backtest")
|
@app.get("/api/backtest")
|
||||||
def api_backtest():
|
def api_backtest(mode: str = "classic"):
|
||||||
"""Run backtest and return full results."""
|
"""Run backtest and return full results.
|
||||||
|
mode=classic (default) or mode=ml for ML-optimized scoring.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
from backtesting.engine import run_backtest
|
from backtesting.engine import run_backtest
|
||||||
return run_backtest()
|
return run_backtest(ml_mode=(mode == "ml"))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error("Backtest error: %s", traceback.format_exc())
|
log.error("Backtest error: %s", traceback.format_exc())
|
||||||
return JSONResponse({"error": str(e)}, status_code=500)
|
return JSONResponse({"error": str(e)}, status_code=500)
|
||||||
|
|||||||
0
ml/__init__.py
Normal file
0
ml/__init__.py
Normal file
562
ml/optimizer.py
Normal file
562
ml/optimizer.py
Normal file
@ -0,0 +1,562 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
ML Optimizer for Bitcoin Accumulation Zone Scoring.
|
||||||
|
|
||||||
|
Trains a gradient boosted tree model on historical on-chain metrics to find
|
||||||
|
optimal metric weights for identifying the best long-term buying opportunities.
|
||||||
|
|
||||||
|
Output: config/ml_weights.json with optimized weights and feature importances.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.ensemble import GradientBoostingClassifier
|
||||||
|
from sklearn.metrics import (
|
||||||
|
classification_report,
|
||||||
|
f1_score,
|
||||||
|
precision_score,
|
||||||
|
recall_score,
|
||||||
|
roc_auc_score,
|
||||||
|
)
|
||||||
|
from sklearn.model_selection import TimeSeriesSplit
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
||||||
|
)
|
||||||
|
log = logging.getLogger("ml-optimizer")
|
||||||
|
|
||||||
|
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
HISTORY_PATH = os.path.join(BASE_DIR, "data", "history.json")
|
||||||
|
OUTPUT_PATH = os.path.join(BASE_DIR, "config", "ml_weights.json")
|
||||||
|
THRESHOLDS_PATH = os.path.join(BASE_DIR, "config", "thresholds.json")
|
||||||
|
|
||||||
|
# Date range: 2018-02-01 onward (when all 8 metrics + fear_greed available)
|
||||||
|
START_DATE = "2018-02-01"
|
||||||
|
# Training cutoff: need 1yr forward data for labels
|
||||||
|
TRAIN_CUTOFF_DAYS = 365
|
||||||
|
# Target: forward 365d return > 30% = "good time to buy"
|
||||||
|
GOOD_BUY_THRESHOLD = 30.0
|
||||||
|
|
||||||
|
# The 8 core metrics we score
|
||||||
|
METRIC_KEYS = [
|
||||||
|
"puell_multiple",
|
||||||
|
"mvrv_zscore",
|
||||||
|
"reserve_risk",
|
||||||
|
"rhodl_ratio",
|
||||||
|
"nupl",
|
||||||
|
"fear_greed",
|
||||||
|
]
|
||||||
|
# Ratio-based metrics (derived from price vs reference)
|
||||||
|
RATIO_METRICS = {
|
||||||
|
"pct_above_200w_sma": {"price_key": "btc_price", "ref_key": "200w_sma"},
|
||||||
|
"pct_above_lth_rp": {"price_key": "btc_price", "ref_key": "lth_realized_price"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def load_history():
|
||||||
|
"""Load historical data and build date-aligned lookup."""
|
||||||
|
with open(HISTORY_PATH) as f:
|
||||||
|
raw = json.load(f)
|
||||||
|
|
||||||
|
index = {}
|
||||||
|
for key, data in raw.items():
|
||||||
|
if not isinstance(data, dict) or "dates" not in data:
|
||||||
|
continue
|
||||||
|
lookup = {}
|
||||||
|
for d, v in zip(data["dates"], data["values"]):
|
||||||
|
if v is not None:
|
||||||
|
lookup[d] = v
|
||||||
|
index[key] = lookup
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def load_thresholds():
|
||||||
|
"""Load scoring thresholds for converting raw values to 0-10 scores."""
|
||||||
|
with open(THRESHOLDS_PATH) as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
def score_range(value, ranges):
|
||||||
|
"""Score a value using range-based thresholds (same logic as scoring/engine.py)."""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
for low, high, score in ranges:
|
||||||
|
low_ok = low is None or value >= low
|
||||||
|
high_ok = high is None or value < high
|
||||||
|
if low_ok and high_ok:
|
||||||
|
return score
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def build_dataset(index, thresholds):
|
||||||
|
"""Build aligned training dataset: metric scores + forward returns."""
|
||||||
|
# Get all dates from 2018-02-01 onward
|
||||||
|
all_dates = set()
|
||||||
|
for lookup in index.values():
|
||||||
|
all_dates.update(lookup.keys())
|
||||||
|
dates = sorted(d for d in all_dates if d >= START_DATE)
|
||||||
|
|
||||||
|
# Build price lookup for forward returns
|
||||||
|
price_lookup = {}
|
||||||
|
for pk in ["btc_price", "btc_price_sma", "btc_price_lth"]:
|
||||||
|
if pk in index:
|
||||||
|
for d, v in index[pk].items():
|
||||||
|
if d not in price_lookup:
|
||||||
|
price_lookup[d] = v
|
||||||
|
|
||||||
|
# Compute ATH series for drawdown
|
||||||
|
all_dates_sorted = sorted(all_dates)
|
||||||
|
ath = 0
|
||||||
|
drawdowns = {}
|
||||||
|
for d in all_dates_sorted:
|
||||||
|
p = price_lookup.get(d)
|
||||||
|
if p is None:
|
||||||
|
continue
|
||||||
|
if p > ath:
|
||||||
|
ath = p
|
||||||
|
if ath > 0:
|
||||||
|
drawdowns[d] = ((ath - p) / ath) * 100
|
||||||
|
|
||||||
|
# Get threshold ranges for scoring raw values
|
||||||
|
metric_ranges = {
|
||||||
|
"puell_multiple": thresholds.get("puell_multiple", {}).get("ranges", []),
|
||||||
|
"mvrv_zscore": thresholds.get("mvrv_zscore", {}).get("ranges", []),
|
||||||
|
"reserve_risk": thresholds.get("reserve_risk", {}).get("ranges", []),
|
||||||
|
"rhodl_ratio": thresholds.get("rhodl_ratio", {}).get("ranges", []),
|
||||||
|
"nupl": thresholds.get("nupl", {}).get("ranges", []),
|
||||||
|
"fear_greed": thresholds.get("fear_greed", {}).get("ranges", []),
|
||||||
|
"drawdown": thresholds.get("drawdown", {}).get("ranges", []),
|
||||||
|
"price_vs_200w_sma": thresholds.get("price_vs_200w_sma", {}).get("ranges", []),
|
||||||
|
"lth_realized_price": thresholds.get("lth_realized_price", {}).get("ranges", []),
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Building dataset from %d dates (%s to %s)", len(dates), dates[0], dates[-1])
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
for d in dates:
|
||||||
|
# Get raw metric values
|
||||||
|
vals = {}
|
||||||
|
skip = False
|
||||||
|
for key in METRIC_KEYS:
|
||||||
|
v = index.get(key, {}).get(d)
|
||||||
|
if v is None:
|
||||||
|
skip = True
|
||||||
|
break
|
||||||
|
vals[key] = v
|
||||||
|
if skip:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Compute ratio metrics
|
||||||
|
price = price_lookup.get(d)
|
||||||
|
sma_200w = index.get("200w_sma", {}).get(d)
|
||||||
|
lth_rp = index.get("lth_realized_price", {}).get(d)
|
||||||
|
|
||||||
|
if price is None or sma_200w is None or lth_rp is None:
|
||||||
|
continue
|
||||||
|
if sma_200w == 0 or lth_rp == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
pct_200w = ((price - sma_200w) / sma_200w) * 100
|
||||||
|
pct_lth = ((price - lth_rp) / lth_rp) * 100
|
||||||
|
dd = drawdowns.get(d, 0)
|
||||||
|
|
||||||
|
vals["pct_above_200w_sma"] = pct_200w
|
||||||
|
vals["pct_above_lth_rp"] = pct_lth
|
||||||
|
vals["drawdown"] = dd
|
||||||
|
|
||||||
|
# Score each metric (0-10) using existing thresholds
|
||||||
|
scores = {}
|
||||||
|
scores["puell_multiple"] = score_range(vals["puell_multiple"], metric_ranges["puell_multiple"])
|
||||||
|
scores["mvrv_zscore"] = score_range(vals["mvrv_zscore"], metric_ranges["mvrv_zscore"])
|
||||||
|
scores["reserve_risk"] = score_range(vals["reserve_risk"], metric_ranges["reserve_risk"])
|
||||||
|
scores["rhodl_ratio"] = score_range(vals["rhodl_ratio"], metric_ranges["rhodl_ratio"])
|
||||||
|
scores["nupl"] = score_range(vals["nupl"], metric_ranges["nupl"])
|
||||||
|
scores["fear_greed"] = score_range(vals["fear_greed"], metric_ranges["fear_greed"])
|
||||||
|
scores["drawdown"] = score_range(dd, metric_ranges["drawdown"])
|
||||||
|
scores["pct_above_200w_sma"] = score_range(pct_200w, metric_ranges["price_vs_200w_sma"])
|
||||||
|
scores["pct_above_lth_rp"] = score_range(pct_lth, metric_ranges["lth_realized_price"])
|
||||||
|
|
||||||
|
if any(s is None for s in scores.values()):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Forward returns
|
||||||
|
dt = datetime.strptime(d, "%Y-%m-%d")
|
||||||
|
fwd = {}
|
||||||
|
for days in [30, 90, 180, 365]:
|
||||||
|
future_d = (dt + timedelta(days=days)).strftime("%Y-%m-%d")
|
||||||
|
fp = price_lookup.get(future_d)
|
||||||
|
if fp is not None and price > 0:
|
||||||
|
fwd[f"fwd_{days}d"] = ((fp - price) / price) * 100
|
||||||
|
|
||||||
|
# Compute rate-of-change features (30d deltas)
|
||||||
|
deltas = {}
|
||||||
|
d_30ago = (dt - timedelta(days=30)).strftime("%Y-%m-%d")
|
||||||
|
for key in ["mvrv_zscore", "nupl", "puell_multiple", "reserve_risk"]:
|
||||||
|
v_now = vals[key]
|
||||||
|
v_prev = index.get(key, {}).get(d_30ago)
|
||||||
|
if v_prev is not None and v_prev != 0:
|
||||||
|
deltas[f"delta_30d_{key}"] = v_now - v_prev
|
||||||
|
else:
|
||||||
|
deltas[f"delta_30d_{key}"] = 0.0
|
||||||
|
|
||||||
|
# Interaction terms
|
||||||
|
interactions = {
|
||||||
|
"mvrv_x_nupl": vals["mvrv_zscore"] * vals["nupl"],
|
||||||
|
"puell_x_reserve": vals["puell_multiple"] * vals["reserve_risk"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Days since last ATH
|
||||||
|
days_since_ath = 0
|
||||||
|
for i in range(1, 2000):
|
||||||
|
check_d = (dt - timedelta(days=i)).strftime("%Y-%m-%d")
|
||||||
|
check_dd = drawdowns.get(check_d, 100)
|
||||||
|
if check_dd < 0.1: # essentially at ATH
|
||||||
|
days_since_ath = i
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
days_since_ath = 2000
|
||||||
|
|
||||||
|
row = {
|
||||||
|
"date": d,
|
||||||
|
"price": price,
|
||||||
|
**{f"score_{k}": v for k, v in scores.items()},
|
||||||
|
**{f"raw_{k}": v for k, v in vals.items()},
|
||||||
|
**deltas,
|
||||||
|
**interactions,
|
||||||
|
"days_since_ath": days_since_ath,
|
||||||
|
**fwd,
|
||||||
|
}
|
||||||
|
rows.append(row)
|
||||||
|
|
||||||
|
log.info("Built %d complete data rows", len(rows))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def train_model(rows):
|
||||||
|
"""Train gradient boosted classifier to identify good buying opportunities."""
|
||||||
|
# Filter to rows that have 365d forward return (for labeling)
|
||||||
|
labeled = [r for r in rows if "fwd_365d" in r]
|
||||||
|
log.info("Rows with 365d forward data: %d", len(labeled))
|
||||||
|
|
||||||
|
if len(labeled) < 100:
|
||||||
|
log.error("Not enough labeled data. Need at least 100 rows, got %d", len(labeled))
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Create binary target: forward 365d return > threshold
|
||||||
|
for r in labeled:
|
||||||
|
r["target"] = 1 if r["fwd_365d"] > GOOD_BUY_THRESHOLD else 0
|
||||||
|
|
||||||
|
positive = sum(r["target"] for r in labeled)
|
||||||
|
log.info("Target distribution: %d positive (%.1f%%), %d negative",
|
||||||
|
positive, positive / len(labeled) * 100, len(labeled) - positive)
|
||||||
|
|
||||||
|
# Feature columns: scores + raw values + deltas + interactions + cycle position
|
||||||
|
score_features = [
|
||||||
|
"score_puell_multiple", "score_mvrv_zscore", "score_reserve_risk",
|
||||||
|
"score_rhodl_ratio", "score_nupl", "score_fear_greed",
|
||||||
|
"score_drawdown", "score_pct_above_200w_sma", "score_pct_above_lth_rp",
|
||||||
|
]
|
||||||
|
raw_features = [
|
||||||
|
"raw_puell_multiple", "raw_mvrv_zscore", "raw_reserve_risk",
|
||||||
|
"raw_rhodl_ratio", "raw_nupl", "raw_fear_greed",
|
||||||
|
"raw_pct_above_200w_sma", "raw_pct_above_lth_rp", "raw_drawdown",
|
||||||
|
]
|
||||||
|
delta_features = [
|
||||||
|
"delta_30d_mvrv_zscore", "delta_30d_nupl",
|
||||||
|
"delta_30d_puell_multiple", "delta_30d_reserve_risk",
|
||||||
|
]
|
||||||
|
interaction_features = ["mvrv_x_nupl", "puell_x_reserve"]
|
||||||
|
cycle_features = ["days_since_ath"]
|
||||||
|
|
||||||
|
feature_cols = score_features + raw_features + delta_features + interaction_features + cycle_features
|
||||||
|
|
||||||
|
X = np.array([[r[f] for f in feature_cols] for r in labeled])
|
||||||
|
y = np.array([r["target"] for r in labeled])
|
||||||
|
|
||||||
|
log.info("Feature matrix: %d samples x %d features", X.shape[0], X.shape[1])
|
||||||
|
|
||||||
|
# Time-series cross-validation (expanding window, 5 splits)
|
||||||
|
tscv = TimeSeriesSplit(n_splits=5)
|
||||||
|
cv_scores = []
|
||||||
|
cv_f1 = []
|
||||||
|
cv_precision = []
|
||||||
|
cv_recall = []
|
||||||
|
|
||||||
|
for fold, (train_idx, val_idx) in enumerate(tscv.split(X)):
|
||||||
|
X_train, X_val = X[train_idx], X[val_idx]
|
||||||
|
y_train, y_val = y[train_idx], y[val_idx]
|
||||||
|
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X_train_s = scaler.fit_transform(X_train)
|
||||||
|
X_val_s = scaler.transform(X_val)
|
||||||
|
|
||||||
|
model = GradientBoostingClassifier(
|
||||||
|
n_estimators=300,
|
||||||
|
learning_rate=0.05,
|
||||||
|
max_depth=4,
|
||||||
|
subsample=0.8,
|
||||||
|
min_samples_leaf=20,
|
||||||
|
random_state=42,
|
||||||
|
)
|
||||||
|
model.fit(X_train_s, y_train)
|
||||||
|
|
||||||
|
y_pred = model.predict(X_val_s)
|
||||||
|
y_prob = model.predict_proba(X_val_s)[:, 1]
|
||||||
|
|
||||||
|
auc = roc_auc_score(y_val, y_prob) if len(np.unique(y_val)) > 1 else 0
|
||||||
|
f1 = f1_score(y_val, y_pred, zero_division=0)
|
||||||
|
prec = precision_score(y_val, y_pred, zero_division=0)
|
||||||
|
rec = recall_score(y_val, y_pred, zero_division=0)
|
||||||
|
|
||||||
|
cv_scores.append(auc)
|
||||||
|
cv_f1.append(f1)
|
||||||
|
cv_precision.append(prec)
|
||||||
|
cv_recall.append(rec)
|
||||||
|
|
||||||
|
train_dates = f"{labeled[train_idx[0]]['date']} to {labeled[train_idx[-1]]['date']}"
|
||||||
|
val_dates = f"{labeled[val_idx[0]]['date']} to {labeled[val_idx[-1]]['date']}"
|
||||||
|
log.info("Fold %d: Train %s | Val %s | AUC=%.3f F1=%.3f P=%.3f R=%.3f",
|
||||||
|
fold + 1, train_dates, val_dates, auc, f1, prec, rec)
|
||||||
|
|
||||||
|
log.info("CV Mean AUC: %.3f (+/- %.3f)", np.mean(cv_scores), np.std(cv_scores))
|
||||||
|
log.info("CV Mean F1: %.3f (+/- %.3f)", np.mean(cv_f1), np.std(cv_f1))
|
||||||
|
|
||||||
|
# Train final model on all labeled data
|
||||||
|
log.info("Training final model on all %d labeled samples...", len(labeled))
|
||||||
|
scaler = StandardScaler()
|
||||||
|
X_scaled = scaler.fit_transform(X)
|
||||||
|
|
||||||
|
final_model = GradientBoostingClassifier(
|
||||||
|
n_estimators=300,
|
||||||
|
learning_rate=0.05,
|
||||||
|
max_depth=4,
|
||||||
|
subsample=0.8,
|
||||||
|
min_samples_leaf=20,
|
||||||
|
random_state=42,
|
||||||
|
)
|
||||||
|
final_model.fit(X_scaled, y)
|
||||||
|
|
||||||
|
# Feature importances
|
||||||
|
importances = final_model.feature_importances_
|
||||||
|
feat_imp = sorted(
|
||||||
|
zip(feature_cols, importances),
|
||||||
|
key=lambda x: x[1],
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
log.info("\nFeature Importance Ranking:")
|
||||||
|
log.info("-" * 50)
|
||||||
|
for name, imp in feat_imp:
|
||||||
|
bar = "#" * int(imp * 200)
|
||||||
|
log.info(" %-30s %.4f %s", name, imp, bar)
|
||||||
|
|
||||||
|
# Extract optimal weights by aggregating importance per metric
|
||||||
|
# Map each feature back to its parent metric
|
||||||
|
metric_names = [
|
||||||
|
"puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio",
|
||||||
|
"nupl", "fear_greed", "drawdown", "pct_above_200w_sma", "pct_above_lth_rp",
|
||||||
|
]
|
||||||
|
feature_to_metric = {}
|
||||||
|
for m in metric_names:
|
||||||
|
feature_to_metric[f"score_{m}"] = m
|
||||||
|
feature_to_metric[f"raw_{m}"] = m
|
||||||
|
# Delta features map to their base metric
|
||||||
|
feature_to_metric["delta_30d_mvrv_zscore"] = "mvrv_zscore"
|
||||||
|
feature_to_metric["delta_30d_nupl"] = "nupl"
|
||||||
|
feature_to_metric["delta_30d_puell_multiple"] = "puell_multiple"
|
||||||
|
feature_to_metric["delta_30d_reserve_risk"] = "reserve_risk"
|
||||||
|
# Interaction terms split evenly between constituent metrics
|
||||||
|
# mvrv_x_nupl -> mvrv_zscore + nupl
|
||||||
|
# puell_x_reserve -> puell_multiple + reserve_risk
|
||||||
|
|
||||||
|
metric_importances = {m: 0.0 for m in metric_names}
|
||||||
|
for name, imp in feat_imp:
|
||||||
|
if name in feature_to_metric:
|
||||||
|
metric_importances[feature_to_metric[name]] += imp
|
||||||
|
elif name == "mvrv_x_nupl":
|
||||||
|
metric_importances["mvrv_zscore"] += imp / 2
|
||||||
|
metric_importances["nupl"] += imp / 2
|
||||||
|
elif name == "puell_x_reserve":
|
||||||
|
metric_importances["puell_multiple"] += imp / 2
|
||||||
|
metric_importances["reserve_risk"] += imp / 2
|
||||||
|
# days_since_ath maps to drawdown conceptually
|
||||||
|
elif name == "days_since_ath":
|
||||||
|
metric_importances["drawdown"] += imp
|
||||||
|
|
||||||
|
# Normalize weights to sum to 1
|
||||||
|
total_imp = sum(metric_importances.values())
|
||||||
|
if total_imp > 0:
|
||||||
|
weights = {k: round(v / total_imp, 4) for k, v in metric_importances.items()}
|
||||||
|
else:
|
||||||
|
weights = {k: round(1 / len(metric_importances), 4) for k in metric_importances}
|
||||||
|
|
||||||
|
# Sort by weight descending
|
||||||
|
weights = dict(sorted(weights.items(), key=lambda x: x[1], reverse=True))
|
||||||
|
|
||||||
|
log.info("\nOptimal Metric Weights:")
|
||||||
|
log.info("-" * 50)
|
||||||
|
equal_weight = round(1 / len(weights), 4)
|
||||||
|
for metric, w in weights.items():
|
||||||
|
change = "+" if w > equal_weight else ""
|
||||||
|
diff = (w - equal_weight) / equal_weight * 100
|
||||||
|
log.info(" %-25s %.4f (%s%.0f%% vs equal)", metric, w, change, diff)
|
||||||
|
|
||||||
|
# Run comparison backtest: ML-weighted vs equal-weight
|
||||||
|
log.info("\n" + "=" * 60)
|
||||||
|
log.info("COMPARISON BACKTEST: ML-Weighted vs Equal-Weight")
|
||||||
|
log.info("=" * 60)
|
||||||
|
comparison = run_comparison(rows, weights)
|
||||||
|
|
||||||
|
# Build output
|
||||||
|
result = {
|
||||||
|
"weights": weights,
|
||||||
|
"feature_importances": {name: round(float(imp), 6) for name, imp in feat_imp},
|
||||||
|
"cv_results": {
|
||||||
|
"mean_auc": round(float(np.mean(cv_scores)), 4),
|
||||||
|
"std_auc": round(float(np.std(cv_scores)), 4),
|
||||||
|
"mean_f1": round(float(np.mean(cv_f1)), 4),
|
||||||
|
"mean_precision": round(float(np.mean(cv_precision)), 4),
|
||||||
|
"mean_recall": round(float(np.mean(cv_recall)), 4),
|
||||||
|
},
|
||||||
|
"training_info": {
|
||||||
|
"n_samples": len(labeled),
|
||||||
|
"n_positive": int(positive),
|
||||||
|
"positive_rate": round(positive / len(labeled), 4),
|
||||||
|
"n_features": len(feature_cols),
|
||||||
|
"target_threshold": GOOD_BUY_THRESHOLD,
|
||||||
|
"date_range": f"{labeled[0]['date']} to {labeled[-1]['date']}",
|
||||||
|
"model": "GradientBoostingClassifier",
|
||||||
|
},
|
||||||
|
"comparison": comparison,
|
||||||
|
"trained_at": datetime.now(tz=__import__('datetime').timezone.utc).isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def run_comparison(rows, ml_weights):
|
||||||
|
"""Compare ML-weighted scoring vs equal-weight scoring across score brackets."""
|
||||||
|
# Metrics used in scoring (maps to score_* columns)
|
||||||
|
score_keys = [
|
||||||
|
"puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio",
|
||||||
|
"nupl", "fear_greed", "drawdown", "pct_above_200w_sma", "pct_above_lth_rp",
|
||||||
|
]
|
||||||
|
n_metrics = len(score_keys)
|
||||||
|
equal_weight = 1.0 / n_metrics
|
||||||
|
|
||||||
|
brackets = [
|
||||||
|
(0, 20, "Extreme Caution"),
|
||||||
|
(21, 40, "Caution"),
|
||||||
|
(41, 55, "Neutral"),
|
||||||
|
(56, 70, "Moderate Opportunity"),
|
||||||
|
(71, 85, "Strong Accumulation"),
|
||||||
|
(86, 100, "Extreme Accumulation"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Only use rows with forward returns
|
||||||
|
scored_rows = [r for r in rows if "fwd_365d" in r]
|
||||||
|
|
||||||
|
results = {"equal_weight": [], "ml_weighted": []}
|
||||||
|
|
||||||
|
for mode in ["equal_weight", "ml_weighted"]:
|
||||||
|
for r in scored_rows:
|
||||||
|
scores = [r[f"score_{k}"] for k in score_keys]
|
||||||
|
if mode == "equal_weight":
|
||||||
|
composite = sum(scores) / n_metrics * 10
|
||||||
|
else:
|
||||||
|
weighted_sum = sum(r[f"score_{k}"] * ml_weights.get(k, equal_weight) for k in score_keys)
|
||||||
|
composite = weighted_sum * 10
|
||||||
|
r[f"composite_{mode}"] = composite
|
||||||
|
|
||||||
|
for low, high, label in brackets:
|
||||||
|
days_in = [r for r in scored_rows if low <= r[f"composite_{mode}"] <= high]
|
||||||
|
if not days_in:
|
||||||
|
results[mode].append({
|
||||||
|
"range": f"{low}-{high}", "label": label,
|
||||||
|
"days": 0, "avg_365d": None,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
returns_365 = [r["fwd_365d"] for r in days_in]
|
||||||
|
win_rate = len([r for r in returns_365 if r > 0]) / len(returns_365) * 100
|
||||||
|
results[mode].append({
|
||||||
|
"range": f"{low}-{high}",
|
||||||
|
"label": label,
|
||||||
|
"days": len(days_in),
|
||||||
|
"avg_365d": round(sum(returns_365) / len(returns_365), 2),
|
||||||
|
"median_365d": round(sorted(returns_365)[len(returns_365) // 2], 2),
|
||||||
|
"win_rate_365d": round(win_rate, 1),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Print comparison
|
||||||
|
log.info("\n%-18s | %-8s %-8s %-8s | %-8s %-8s %-8s",
|
||||||
|
"Bracket", "EQ Avg", "EQ Med", "EQ Win%", "ML Avg", "ML Med", "ML Win%")
|
||||||
|
log.info("-" * 80)
|
||||||
|
for eq, ml in zip(results["equal_weight"], results["ml_weighted"]):
|
||||||
|
eq_avg = f"{eq['avg_365d']:.1f}%" if eq["avg_365d"] is not None else "--"
|
||||||
|
eq_med = f"{eq['median_365d']:.1f}%" if eq.get("median_365d") is not None else "--"
|
||||||
|
eq_win = f"{eq['win_rate_365d']:.0f}%" if eq.get("win_rate_365d") is not None else "--"
|
||||||
|
ml_avg = f"{ml['avg_365d']:.1f}%" if ml["avg_365d"] is not None else "--"
|
||||||
|
ml_med = f"{ml['median_365d']:.1f}%" if ml.get("median_365d") is not None else "--"
|
||||||
|
ml_win = f"{ml['win_rate_365d']:.0f}%" if ml.get("win_rate_365d") is not None else "--"
|
||||||
|
log.info("%-18s | %-8s %-8s %-8s | %-8s %-8s %-8s",
|
||||||
|
eq["label"], eq_avg, eq_med, eq_win, ml_avg, ml_med, ml_win)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
log.info("=" * 60)
|
||||||
|
log.info("Bitcoin Accumulation Zone ML Optimizer")
|
||||||
|
log.info("=" * 60)
|
||||||
|
|
||||||
|
if not os.path.exists(HISTORY_PATH):
|
||||||
|
log.error("No historical data at %s. Run history collector first.", HISTORY_PATH)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
log.info("Loading historical data...")
|
||||||
|
index = load_history()
|
||||||
|
thresholds = load_thresholds()
|
||||||
|
|
||||||
|
# Build dataset
|
||||||
|
log.info("Building training dataset...")
|
||||||
|
rows = build_dataset(index, thresholds)
|
||||||
|
|
||||||
|
# Train model
|
||||||
|
log.info("Training ML model...")
|
||||||
|
result = train_model(rows)
|
||||||
|
|
||||||
|
if result is None:
|
||||||
|
log.error("Training failed.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Save weights
|
||||||
|
with open(OUTPUT_PATH, "w") as f:
|
||||||
|
json.dump(result, f, indent=2)
|
||||||
|
log.info("\nSaved ML weights to %s", OUTPUT_PATH)
|
||||||
|
|
||||||
|
# Print summary
|
||||||
|
log.info("\n" + "=" * 60)
|
||||||
|
log.info("SUMMARY")
|
||||||
|
log.info("=" * 60)
|
||||||
|
log.info("Model: %s", result["training_info"]["model"])
|
||||||
|
log.info("Samples: %d (%d positive)", result["training_info"]["n_samples"], result["training_info"]["n_positive"])
|
||||||
|
log.info("CV AUC: %.3f (+/- %.3f)", result["cv_results"]["mean_auc"], result["cv_results"]["std_auc"])
|
||||||
|
log.info("CV F1: %.3f", result["cv_results"]["mean_f1"])
|
||||||
|
log.info("\nTop 5 Feature Importances:")
|
||||||
|
for name, imp in list(result["feature_importances"].items())[:5]:
|
||||||
|
log.info(" %-30s %.4f", name, imp)
|
||||||
|
log.info("\nMetric Weights (ML-Optimized):")
|
||||||
|
for metric, weight in result["weights"].items():
|
||||||
|
log.info(" %-25s %.1f%%", metric, weight * 100)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -428,3 +428,104 @@ def score_all(metrics):
|
|||||||
"scored_count": len(valid_scores),
|
"scored_count": len(valid_scores),
|
||||||
"total_count": len(results),
|
"total_count": len(results),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── ML-Optimized Scoring ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
ML_WEIGHTS_PATH = os.path.join(
|
||||||
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||||
|
"config",
|
||||||
|
"ml_weights.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Maps scoring engine metric keys to ML weight keys
|
||||||
|
_ML_KEY_MAP = {
|
||||||
|
"fear_greed": "fear_greed",
|
||||||
|
"puell_multiple": "puell_multiple",
|
||||||
|
"mvrv_zscore": "mvrv_zscore",
|
||||||
|
"drawdown": "drawdown",
|
||||||
|
"price_vs_200w_sma": "pct_above_200w_sma",
|
||||||
|
"reserve_risk": "reserve_risk",
|
||||||
|
"rhodl_ratio": "rhodl_ratio",
|
||||||
|
"nupl": "nupl",
|
||||||
|
"lth_realized_price": "pct_above_lth_rp",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def load_ml_weights():
|
||||||
|
"""Load ML-optimized weights from config."""
|
||||||
|
try:
|
||||||
|
with open(ML_WEIGHTS_PATH) as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data.get("weights", {})
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def score_all_ml(metrics):
|
||||||
|
"""Score all metrics using ML-optimized weights.
|
||||||
|
|
||||||
|
Same output format as score_all() but uses learned weights
|
||||||
|
instead of equal weighting. Each metric still shows its
|
||||||
|
individual 0-10 score plus the ML weight applied to it.
|
||||||
|
"""
|
||||||
|
# Get classic scores first (reuses all individual scoring logic)
|
||||||
|
classic = score_all(metrics)
|
||||||
|
ml_weights = load_ml_weights()
|
||||||
|
|
||||||
|
if not ml_weights:
|
||||||
|
# Fallback to classic if no ML weights available
|
||||||
|
classic["ml_mode"] = False
|
||||||
|
classic["ml_error"] = "ML weights not found — run ml/optimizer.py"
|
||||||
|
return classic
|
||||||
|
|
||||||
|
results = classic["metrics"]
|
||||||
|
|
||||||
|
# Compute ML-weighted composite
|
||||||
|
weighted_sum = 0.0
|
||||||
|
weight_total = 0.0
|
||||||
|
|
||||||
|
for m in results:
|
||||||
|
if m["score"] is None:
|
||||||
|
continue
|
||||||
|
ml_key = _ML_KEY_MAP.get(m["key"])
|
||||||
|
if ml_key is None:
|
||||||
|
# Hash ribbons or unknown metric — use small default weight
|
||||||
|
w = 0.01
|
||||||
|
else:
|
||||||
|
w = ml_weights.get(ml_key, 0.0)
|
||||||
|
|
||||||
|
m["ml_weight"] = round(w, 4)
|
||||||
|
m["ml_contribution"] = round(m["score"] * w * 10, 2)
|
||||||
|
weighted_sum += m["score"] * w
|
||||||
|
weight_total += w
|
||||||
|
|
||||||
|
# Normalize if weights don't sum to 1 (e.g., missing metrics)
|
||||||
|
if weight_total > 0:
|
||||||
|
composite = weighted_sum / weight_total * 10
|
||||||
|
else:
|
||||||
|
composite = 0
|
||||||
|
|
||||||
|
# Assessment text (same thresholds as classic)
|
||||||
|
if composite >= 80:
|
||||||
|
assessment = "EXTREME ACCUMULATION ZONE"
|
||||||
|
elif composite >= 65:
|
||||||
|
assessment = "STRONG ACCUMULATION ZONE"
|
||||||
|
elif composite >= 50:
|
||||||
|
assessment = "MODERATE OPPORTUNITY"
|
||||||
|
elif composite >= 35:
|
||||||
|
assessment = "NEUTRAL"
|
||||||
|
elif composite >= 20:
|
||||||
|
assessment = "CAUTION — OVERHEATED"
|
||||||
|
else:
|
||||||
|
assessment = "EXTREME CAUTION"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"metrics": results,
|
||||||
|
"composite_score": round(composite, 1),
|
||||||
|
"assessment": assessment,
|
||||||
|
"scored_count": classic["scored_count"],
|
||||||
|
"total_count": classic["total_count"],
|
||||||
|
"ml_mode": True,
|
||||||
|
"classic_score": classic["composite_score"],
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user