From 22fc7fc6cd26d6eb320213307d1e49a2a610c6cf Mon Sep 17 00:00:00 2001 From: BizzleBot Date: Fri, 20 Mar 2026 23:29:39 +0000 Subject: [PATCH] fix: historical data stored permanently, only append new daily values - Historical data (5693+ points per metric) saved in history.json permanently - Quick refresh: only updates price + Fear & Greed from APIs (~2 seconds) - Full refresh: only needed for FIRST-TIME setup or if data is missing - Daily append: new values added to history.json from cache, not re-scraped - Startup: uses cached on-chain data if it exists, no unnecessary Playwright launches - On-chain metrics only update once per day, no reason to re-scrape them --- dashboard/server.py | 42 ++++++-------- data/score_history.jsonl | 1 + scrapers/history_updater.py | 112 ++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 23 deletions(-) create mode 100644 scrapers/history_updater.py diff --git a/dashboard/server.py b/dashboard/server.py index 16d609d..f37ab89 100644 --- a/dashboard/server.py +++ b/dashboard/server.py @@ -139,28 +139,16 @@ def run_scrape(force_full=False): mayer = price.calculate_mayer_multiple(price_current.get("price"), sma_200d) metrics["price_extras"] = {"sma_200d": sma_200d, "mayer_multiple": mayer} - # 3. On-chain metrics via Playwright (slow — only when needed) + # 3. On-chain metrics — use cached values (historical data is permanent) onchain_keys = ["puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio", "nupl", "200w_sma", "lth_realized_price", "hash_ribbons", "pi_cycle_bottom", "lth_supply"] - # Check if we need a full on-chain refresh - cached_ts = existing_cache.get("_onchain_timestamp") - onchain_stale = True - if cached_ts and not force_full: - try: - from datetime import datetime as dt - age_hours = (datetime.now(timezone.utc) - datetime.fromisoformat(cached_ts)).total_seconds() / 3600 - onchain_stale = age_hours > 6 - if not onchain_stale: - log.info("On-chain data is %.1fh old — reusing cache (next full refresh in %.1fh)", age_hours, 6 - age_hours) - except Exception: - onchain_stale = True - has_cached_onchain = any(existing_cache.get(k, {}).get("value") is not None for k in onchain_keys) - if force_full or onchain_stale or not has_cached_onchain: - log.info("Scraping on-chain metrics from LookIntoBitcoin (full refresh)...") + if force_full or not has_cached_onchain: + # Only do a full Playwright scrape if explicitly requested or no data exists + log.info("Scraping on-chain metrics from LookIntoBitcoin (full refresh requested)...") try: from scrapers import lookintobitcoin onchain = lookintobitcoin.scrape_all() @@ -169,14 +157,12 @@ def run_scrape(force_full=False): except Exception as e: log.error("LookIntoBitcoin scraping failed: %s\n%s", e, traceback.format_exc()) _last_error = f"On-chain scraping failed: {e}" - # Fall back to cached on-chain data for k in onchain_keys: if k in existing_cache: metrics[k] = existing_cache[k] - if "_onchain_timestamp" in existing_cache: - metrics["_onchain_timestamp"] = existing_cache["_onchain_timestamp"] else: - # Reuse cached on-chain data + # Reuse cached on-chain values — they're stored permanently + log.info("Reusing cached on-chain data (use Full Refresh to re-scrape)") for k in onchain_keys: if k in existing_cache: metrics[k] = existing_cache[k] @@ -192,6 +178,13 @@ def run_scrape(force_full=False): save_cache(metrics) append_history(scored) + # Append today's values to permanent history (incremental, not full re-scrape) + try: + from scrapers.history_updater import update_history + update_history() + except Exception as e: + log.warning("History update failed (non-critical): %s", e) + _last_update = datetime.now(timezone.utc).isoformat() _last_error = None log.info("Scrape cycle complete. Composite score: %s", scored["composite_score"]) @@ -205,11 +198,14 @@ def run_scrape(force_full=False): def scraper_loop(): - """Background loop: quick refresh every 15min, full on-chain refresh every 6h.""" - run_scrape(force_full=True) # Full scrape on first boot if no cached data + """Background loop: quick refresh every 15min. Full scrape only on first boot with no data.""" + cache = load_cache() + has_data = any(cache.get(k, {}).get("value") is not None + for k in ["puell_multiple", "mvrv_zscore", "nupl"]) + run_scrape(force_full=not has_data) # Full only if no cached on-chain data while True: time.sleep(900) # 15 minutes - run_scrape() # Quick refresh (reuses cached on-chain if <6h old) + run_scrape() # Quick refresh only # Start background scraper on import diff --git a/data/score_history.jsonl b/data/score_history.jsonl index cbd87da..7776a47 100644 --- a/data/score_history.jsonl +++ b/data/score_history.jsonl @@ -4,3 +4,4 @@ {"timestamp": "2026-03-20T22:51:27.724327+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.94907994923858}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} {"timestamp": "2026-03-20T23:07:48.303808+00:00", "composite_score": 51.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.942734771573605}, "price_vs_200w_sma": {"score": 3, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} {"timestamp": "2026-03-20T23:21:39.705718+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07439720812183}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} +{"timestamp": "2026-03-20T23:27:15.835859+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07122461928934}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}} diff --git a/scrapers/history_updater.py b/scrapers/history_updater.py new file mode 100644 index 0000000..23fb37b --- /dev/null +++ b/scrapers/history_updater.py @@ -0,0 +1,112 @@ +"""Incremental history updater — appends new daily data to history.json from cache.""" + +import json +import logging +import os +from datetime import datetime, timezone + +log = logging.getLogger(__name__) + +DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") +HISTORY_PATH = os.path.join(DATA_DIR, "history.json") +CACHE_PATH = os.path.join(DATA_DIR, "cache.json") + + +def update_history(): + """Append today's values from cache to history.json. Only adds NEW dates.""" + if not os.path.exists(HISTORY_PATH): + log.warning("No history.json found — run full collection first") + return False + + if not os.path.exists(CACHE_PATH): + log.warning("No cache.json found — run a scrape first") + return False + + with open(HISTORY_PATH) as f: + history = json.load(f) + with open(CACHE_PATH) as f: + cache = json.load(f) + + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + updated = False + + # Map of cache keys to history keys and how to extract the value + mappings = { + "puell_multiple": {"history_key": "puell_multiple", "value_key": "value"}, + "mvrv_zscore": {"history_key": "mvrv_zscore", "value_key": "value"}, + "reserve_risk": {"history_key": "reserve_risk", "value_key": "value"}, + "rhodl_ratio": {"history_key": "rhodl_ratio", "value_key": "value"}, + "nupl": {"history_key": "nupl", "value_key": "value"}, + "200w_sma": {"history_key": "200w_sma", "value_key": "value"}, + "lth_realized_price": {"history_key": "lth_realized_price", "value_key": "value"}, + "lth_supply": {"history_key": "lth_supply", "value_key": "value"}, + } + + for cache_key, mapping in mappings.items(): + hkey = mapping["history_key"] + if hkey not in history: + continue + + h = history[hkey] + dates = h.get("dates", []) + values = h.get("values", []) + + # Skip if today already in history + if dates and dates[-1] >= today: + continue + + # Get value from cache + cached = cache.get(cache_key, {}) + val = cached.get(mapping["value_key"]) + if val is not None: + dates.append(today) + values.append(val) + h["dates"] = dates + h["values"] = values + updated = True + log.info("Appended %s: %s = %s", hkey, today, val) + + # Also update btc_price from cache + price_data = cache.get("price", {}) + btc_price = price_data.get("price") + if btc_price and "btc_price" in history: + h = history["btc_price"] + if h["dates"][-1] < today: + h["dates"].append(today) + h["values"].append(btc_price) + updated = True + + # BTC price for SMA chart + if btc_price and "btc_price_sma" in history: + h = history["btc_price_sma"] + if h["dates"][-1] < today: + h["dates"].append(today) + h["values"].append(btc_price) + updated = True + + # BTC price for LTH chart + if btc_price and "btc_price_lth" in history: + h = history["btc_price_lth"] + if h["dates"][-1] < today: + h["dates"].append(today) + h["values"].append(btc_price) + updated = True + + # Fear & Greed + fg = cache.get("fear_greed", {}) + fg_val = fg.get("value") + if fg_val is not None and "fear_greed" in history: + h = history["fear_greed"] + if h["dates"][-1] < today: + h["dates"].append(today) + h["values"].append(int(fg_val)) + updated = True + + if updated: + with open(HISTORY_PATH, "w") as f: + json.dump(history, f) + log.info("History updated with %s data", today) + else: + log.info("History already up to date (last date >= %s)", today) + + return updated