fix: historical data stored permanently, only append new daily values

- Historical data (5693+ points per metric) saved in history.json permanently
- Quick refresh: only updates price + Fear & Greed from APIs (~2 seconds)
- Full refresh: only needed for FIRST-TIME setup or if data is missing
- Daily append: new values added to history.json from cache, not re-scraped
- Startup: uses cached on-chain data if it exists, no unnecessary Playwright launches
- On-chain metrics only update once per day, no reason to re-scrape them
This commit is contained in:
BizzleBot 2026-03-20 23:29:39 +00:00
parent 28b5240a81
commit 22fc7fc6cd
3 changed files with 132 additions and 23 deletions

View File

@ -139,28 +139,16 @@ def run_scrape(force_full=False):
mayer = price.calculate_mayer_multiple(price_current.get("price"), sma_200d) mayer = price.calculate_mayer_multiple(price_current.get("price"), sma_200d)
metrics["price_extras"] = {"sma_200d": sma_200d, "mayer_multiple": mayer} metrics["price_extras"] = {"sma_200d": sma_200d, "mayer_multiple": mayer}
# 3. On-chain metrics via Playwright (slow — only when needed) # 3. On-chain metrics — use cached values (historical data is permanent)
onchain_keys = ["puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio", onchain_keys = ["puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio",
"nupl", "200w_sma", "lth_realized_price", "hash_ribbons", "nupl", "200w_sma", "lth_realized_price", "hash_ribbons",
"pi_cycle_bottom", "lth_supply"] "pi_cycle_bottom", "lth_supply"]
# Check if we need a full on-chain refresh
cached_ts = existing_cache.get("_onchain_timestamp")
onchain_stale = True
if cached_ts and not force_full:
try:
from datetime import datetime as dt
age_hours = (datetime.now(timezone.utc) - datetime.fromisoformat(cached_ts)).total_seconds() / 3600
onchain_stale = age_hours > 6
if not onchain_stale:
log.info("On-chain data is %.1fh old — reusing cache (next full refresh in %.1fh)", age_hours, 6 - age_hours)
except Exception:
onchain_stale = True
has_cached_onchain = any(existing_cache.get(k, {}).get("value") is not None for k in onchain_keys) has_cached_onchain = any(existing_cache.get(k, {}).get("value") is not None for k in onchain_keys)
if force_full or onchain_stale or not has_cached_onchain: if force_full or not has_cached_onchain:
log.info("Scraping on-chain metrics from LookIntoBitcoin (full refresh)...") # Only do a full Playwright scrape if explicitly requested or no data exists
log.info("Scraping on-chain metrics from LookIntoBitcoin (full refresh requested)...")
try: try:
from scrapers import lookintobitcoin from scrapers import lookintobitcoin
onchain = lookintobitcoin.scrape_all() onchain = lookintobitcoin.scrape_all()
@ -169,14 +157,12 @@ def run_scrape(force_full=False):
except Exception as e: except Exception as e:
log.error("LookIntoBitcoin scraping failed: %s\n%s", e, traceback.format_exc()) log.error("LookIntoBitcoin scraping failed: %s\n%s", e, traceback.format_exc())
_last_error = f"On-chain scraping failed: {e}" _last_error = f"On-chain scraping failed: {e}"
# Fall back to cached on-chain data
for k in onchain_keys: for k in onchain_keys:
if k in existing_cache: if k in existing_cache:
metrics[k] = existing_cache[k] metrics[k] = existing_cache[k]
if "_onchain_timestamp" in existing_cache:
metrics["_onchain_timestamp"] = existing_cache["_onchain_timestamp"]
else: else:
# Reuse cached on-chain data # Reuse cached on-chain values — they're stored permanently
log.info("Reusing cached on-chain data (use Full Refresh to re-scrape)")
for k in onchain_keys: for k in onchain_keys:
if k in existing_cache: if k in existing_cache:
metrics[k] = existing_cache[k] metrics[k] = existing_cache[k]
@ -192,6 +178,13 @@ def run_scrape(force_full=False):
save_cache(metrics) save_cache(metrics)
append_history(scored) append_history(scored)
# Append today's values to permanent history (incremental, not full re-scrape)
try:
from scrapers.history_updater import update_history
update_history()
except Exception as e:
log.warning("History update failed (non-critical): %s", e)
_last_update = datetime.now(timezone.utc).isoformat() _last_update = datetime.now(timezone.utc).isoformat()
_last_error = None _last_error = None
log.info("Scrape cycle complete. Composite score: %s", scored["composite_score"]) log.info("Scrape cycle complete. Composite score: %s", scored["composite_score"])
@ -205,11 +198,14 @@ def run_scrape(force_full=False):
def scraper_loop(): def scraper_loop():
"""Background loop: quick refresh every 15min, full on-chain refresh every 6h.""" """Background loop: quick refresh every 15min. Full scrape only on first boot with no data."""
run_scrape(force_full=True) # Full scrape on first boot if no cached data cache = load_cache()
has_data = any(cache.get(k, {}).get("value") is not None
for k in ["puell_multiple", "mvrv_zscore", "nupl"])
run_scrape(force_full=not has_data) # Full only if no cached on-chain data
while True: while True:
time.sleep(900) # 15 minutes time.sleep(900) # 15 minutes
run_scrape() # Quick refresh (reuses cached on-chain if <6h old) run_scrape() # Quick refresh only
# Start background scraper on import # Start background scraper on import

View File

@ -4,3 +4,4 @@
{"timestamp": "2026-03-20T22:51:27.724327+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.94907994923858}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} {"timestamp": "2026-03-20T22:51:27.724327+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.94907994923858}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
{"timestamp": "2026-03-20T23:07:48.303808+00:00", "composite_score": 51.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.942734771573605}, "price_vs_200w_sma": {"score": 3, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} {"timestamp": "2026-03-20T23:07:48.303808+00:00", "composite_score": 51.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.942734771573605}, "price_vs_200w_sma": {"score": 3, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
{"timestamp": "2026-03-20T23:21:39.705718+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07439720812183}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} {"timestamp": "2026-03-20T23:21:39.705718+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07439720812183}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
{"timestamp": "2026-03-20T23:27:15.835859+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07122461928934}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}

112
scrapers/history_updater.py Normal file
View File

@ -0,0 +1,112 @@
"""Incremental history updater — appends new daily data to history.json from cache."""
import json
import logging
import os
from datetime import datetime, timezone
log = logging.getLogger(__name__)
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
HISTORY_PATH = os.path.join(DATA_DIR, "history.json")
CACHE_PATH = os.path.join(DATA_DIR, "cache.json")
def update_history():
"""Append today's values from cache to history.json. Only adds NEW dates."""
if not os.path.exists(HISTORY_PATH):
log.warning("No history.json found — run full collection first")
return False
if not os.path.exists(CACHE_PATH):
log.warning("No cache.json found — run a scrape first")
return False
with open(HISTORY_PATH) as f:
history = json.load(f)
with open(CACHE_PATH) as f:
cache = json.load(f)
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
updated = False
# Map of cache keys to history keys and how to extract the value
mappings = {
"puell_multiple": {"history_key": "puell_multiple", "value_key": "value"},
"mvrv_zscore": {"history_key": "mvrv_zscore", "value_key": "value"},
"reserve_risk": {"history_key": "reserve_risk", "value_key": "value"},
"rhodl_ratio": {"history_key": "rhodl_ratio", "value_key": "value"},
"nupl": {"history_key": "nupl", "value_key": "value"},
"200w_sma": {"history_key": "200w_sma", "value_key": "value"},
"lth_realized_price": {"history_key": "lth_realized_price", "value_key": "value"},
"lth_supply": {"history_key": "lth_supply", "value_key": "value"},
}
for cache_key, mapping in mappings.items():
hkey = mapping["history_key"]
if hkey not in history:
continue
h = history[hkey]
dates = h.get("dates", [])
values = h.get("values", [])
# Skip if today already in history
if dates and dates[-1] >= today:
continue
# Get value from cache
cached = cache.get(cache_key, {})
val = cached.get(mapping["value_key"])
if val is not None:
dates.append(today)
values.append(val)
h["dates"] = dates
h["values"] = values
updated = True
log.info("Appended %s: %s = %s", hkey, today, val)
# Also update btc_price from cache
price_data = cache.get("price", {})
btc_price = price_data.get("price")
if btc_price and "btc_price" in history:
h = history["btc_price"]
if h["dates"][-1] < today:
h["dates"].append(today)
h["values"].append(btc_price)
updated = True
# BTC price for SMA chart
if btc_price and "btc_price_sma" in history:
h = history["btc_price_sma"]
if h["dates"][-1] < today:
h["dates"].append(today)
h["values"].append(btc_price)
updated = True
# BTC price for LTH chart
if btc_price and "btc_price_lth" in history:
h = history["btc_price_lth"]
if h["dates"][-1] < today:
h["dates"].append(today)
h["values"].append(btc_price)
updated = True
# Fear & Greed
fg = cache.get("fear_greed", {})
fg_val = fg.get("value")
if fg_val is not None and "fear_greed" in history:
h = history["fear_greed"]
if h["dates"][-1] < today:
h["dates"].append(today)
h["values"].append(int(fg_val))
updated = True
if updated:
with open(HISTORY_PATH, "w") as f:
json.dump(history, f)
log.info("History updated with %s data", today)
else:
log.info("History already up to date (last date >= %s)", today)
return updated