fix: historical data stored permanently, only append new daily values
- Historical data (5693+ points per metric) saved in history.json permanently - Quick refresh: only updates price + Fear & Greed from APIs (~2 seconds) - Full refresh: only needed for FIRST-TIME setup or if data is missing - Daily append: new values added to history.json from cache, not re-scraped - Startup: uses cached on-chain data if it exists, no unnecessary Playwright launches - On-chain metrics only update once per day, no reason to re-scrape them
This commit is contained in:
parent
28b5240a81
commit
22fc7fc6cd
@ -139,28 +139,16 @@ def run_scrape(force_full=False):
|
||||
mayer = price.calculate_mayer_multiple(price_current.get("price"), sma_200d)
|
||||
metrics["price_extras"] = {"sma_200d": sma_200d, "mayer_multiple": mayer}
|
||||
|
||||
# 3. On-chain metrics via Playwright (slow — only when needed)
|
||||
# 3. On-chain metrics — use cached values (historical data is permanent)
|
||||
onchain_keys = ["puell_multiple", "mvrv_zscore", "reserve_risk", "rhodl_ratio",
|
||||
"nupl", "200w_sma", "lth_realized_price", "hash_ribbons",
|
||||
"pi_cycle_bottom", "lth_supply"]
|
||||
|
||||
# Check if we need a full on-chain refresh
|
||||
cached_ts = existing_cache.get("_onchain_timestamp")
|
||||
onchain_stale = True
|
||||
if cached_ts and not force_full:
|
||||
try:
|
||||
from datetime import datetime as dt
|
||||
age_hours = (datetime.now(timezone.utc) - datetime.fromisoformat(cached_ts)).total_seconds() / 3600
|
||||
onchain_stale = age_hours > 6
|
||||
if not onchain_stale:
|
||||
log.info("On-chain data is %.1fh old — reusing cache (next full refresh in %.1fh)", age_hours, 6 - age_hours)
|
||||
except Exception:
|
||||
onchain_stale = True
|
||||
|
||||
has_cached_onchain = any(existing_cache.get(k, {}).get("value") is not None for k in onchain_keys)
|
||||
|
||||
if force_full or onchain_stale or not has_cached_onchain:
|
||||
log.info("Scraping on-chain metrics from LookIntoBitcoin (full refresh)...")
|
||||
if force_full or not has_cached_onchain:
|
||||
# Only do a full Playwright scrape if explicitly requested or no data exists
|
||||
log.info("Scraping on-chain metrics from LookIntoBitcoin (full refresh requested)...")
|
||||
try:
|
||||
from scrapers import lookintobitcoin
|
||||
onchain = lookintobitcoin.scrape_all()
|
||||
@ -169,14 +157,12 @@ def run_scrape(force_full=False):
|
||||
except Exception as e:
|
||||
log.error("LookIntoBitcoin scraping failed: %s\n%s", e, traceback.format_exc())
|
||||
_last_error = f"On-chain scraping failed: {e}"
|
||||
# Fall back to cached on-chain data
|
||||
for k in onchain_keys:
|
||||
if k in existing_cache:
|
||||
metrics[k] = existing_cache[k]
|
||||
if "_onchain_timestamp" in existing_cache:
|
||||
metrics["_onchain_timestamp"] = existing_cache["_onchain_timestamp"]
|
||||
else:
|
||||
# Reuse cached on-chain data
|
||||
# Reuse cached on-chain values — they're stored permanently
|
||||
log.info("Reusing cached on-chain data (use Full Refresh to re-scrape)")
|
||||
for k in onchain_keys:
|
||||
if k in existing_cache:
|
||||
metrics[k] = existing_cache[k]
|
||||
@ -192,6 +178,13 @@ def run_scrape(force_full=False):
|
||||
save_cache(metrics)
|
||||
append_history(scored)
|
||||
|
||||
# Append today's values to permanent history (incremental, not full re-scrape)
|
||||
try:
|
||||
from scrapers.history_updater import update_history
|
||||
update_history()
|
||||
except Exception as e:
|
||||
log.warning("History update failed (non-critical): %s", e)
|
||||
|
||||
_last_update = datetime.now(timezone.utc).isoformat()
|
||||
_last_error = None
|
||||
log.info("Scrape cycle complete. Composite score: %s", scored["composite_score"])
|
||||
@ -205,11 +198,14 @@ def run_scrape(force_full=False):
|
||||
|
||||
|
||||
def scraper_loop():
|
||||
"""Background loop: quick refresh every 15min, full on-chain refresh every 6h."""
|
||||
run_scrape(force_full=True) # Full scrape on first boot if no cached data
|
||||
"""Background loop: quick refresh every 15min. Full scrape only on first boot with no data."""
|
||||
cache = load_cache()
|
||||
has_data = any(cache.get(k, {}).get("value") is not None
|
||||
for k in ["puell_multiple", "mvrv_zscore", "nupl"])
|
||||
run_scrape(force_full=not has_data) # Full only if no cached on-chain data
|
||||
while True:
|
||||
time.sleep(900) # 15 minutes
|
||||
run_scrape() # Quick refresh (reuses cached on-chain if <6h old)
|
||||
run_scrape() # Quick refresh only
|
||||
|
||||
|
||||
# Start background scraper on import
|
||||
|
||||
@ -4,3 +4,4 @@
|
||||
{"timestamp": "2026-03-20T22:51:27.724327+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.94907994923858}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
|
||||
{"timestamp": "2026-03-20T23:07:48.303808+00:00", "composite_score": 51.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 43.942734771573605}, "price_vs_200w_sma": {"score": 3, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
|
||||
{"timestamp": "2026-03-20T23:21:39.705718+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07439720812183}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
|
||||
{"timestamp": "2026-03-20T23:27:15.835859+00:00", "composite_score": 54.0, "scored_count": 10, "metrics": {"fear_greed": {"score": 7, "value": 11}, "puell_multiple": {"score": 5, "value": 0.6602699608966011}, "mvrv_zscore": {"score": 5, "value": 0.5211180167687892}, "drawdown": {"score": 6, "value": 44.07122461928934}, "price_vs_200w_sma": {"score": 6, "value": 58895.78086828114}, "reserve_risk": {"score": 10, "value": 0.0012985709697654493}, "rhodl_ratio": {"score": 4, "value": 1230.6243545314708}, "nupl": {"score": 7, "value": 0.22243290955405431}, "lth_realized_price": {"score": 1, "value": 43346.58756410873}, "hash_ribbons": {"score": 3, "value": null}}}
|
||||
|
||||
112
scrapers/history_updater.py
Normal file
112
scrapers/history_updater.py
Normal file
@ -0,0 +1,112 @@
|
||||
"""Incremental history updater — appends new daily data to history.json from cache."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
|
||||
HISTORY_PATH = os.path.join(DATA_DIR, "history.json")
|
||||
CACHE_PATH = os.path.join(DATA_DIR, "cache.json")
|
||||
|
||||
|
||||
def update_history():
|
||||
"""Append today's values from cache to history.json. Only adds NEW dates."""
|
||||
if not os.path.exists(HISTORY_PATH):
|
||||
log.warning("No history.json found — run full collection first")
|
||||
return False
|
||||
|
||||
if not os.path.exists(CACHE_PATH):
|
||||
log.warning("No cache.json found — run a scrape first")
|
||||
return False
|
||||
|
||||
with open(HISTORY_PATH) as f:
|
||||
history = json.load(f)
|
||||
with open(CACHE_PATH) as f:
|
||||
cache = json.load(f)
|
||||
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
updated = False
|
||||
|
||||
# Map of cache keys to history keys and how to extract the value
|
||||
mappings = {
|
||||
"puell_multiple": {"history_key": "puell_multiple", "value_key": "value"},
|
||||
"mvrv_zscore": {"history_key": "mvrv_zscore", "value_key": "value"},
|
||||
"reserve_risk": {"history_key": "reserve_risk", "value_key": "value"},
|
||||
"rhodl_ratio": {"history_key": "rhodl_ratio", "value_key": "value"},
|
||||
"nupl": {"history_key": "nupl", "value_key": "value"},
|
||||
"200w_sma": {"history_key": "200w_sma", "value_key": "value"},
|
||||
"lth_realized_price": {"history_key": "lth_realized_price", "value_key": "value"},
|
||||
"lth_supply": {"history_key": "lth_supply", "value_key": "value"},
|
||||
}
|
||||
|
||||
for cache_key, mapping in mappings.items():
|
||||
hkey = mapping["history_key"]
|
||||
if hkey not in history:
|
||||
continue
|
||||
|
||||
h = history[hkey]
|
||||
dates = h.get("dates", [])
|
||||
values = h.get("values", [])
|
||||
|
||||
# Skip if today already in history
|
||||
if dates and dates[-1] >= today:
|
||||
continue
|
||||
|
||||
# Get value from cache
|
||||
cached = cache.get(cache_key, {})
|
||||
val = cached.get(mapping["value_key"])
|
||||
if val is not None:
|
||||
dates.append(today)
|
||||
values.append(val)
|
||||
h["dates"] = dates
|
||||
h["values"] = values
|
||||
updated = True
|
||||
log.info("Appended %s: %s = %s", hkey, today, val)
|
||||
|
||||
# Also update btc_price from cache
|
||||
price_data = cache.get("price", {})
|
||||
btc_price = price_data.get("price")
|
||||
if btc_price and "btc_price" in history:
|
||||
h = history["btc_price"]
|
||||
if h["dates"][-1] < today:
|
||||
h["dates"].append(today)
|
||||
h["values"].append(btc_price)
|
||||
updated = True
|
||||
|
||||
# BTC price for SMA chart
|
||||
if btc_price and "btc_price_sma" in history:
|
||||
h = history["btc_price_sma"]
|
||||
if h["dates"][-1] < today:
|
||||
h["dates"].append(today)
|
||||
h["values"].append(btc_price)
|
||||
updated = True
|
||||
|
||||
# BTC price for LTH chart
|
||||
if btc_price and "btc_price_lth" in history:
|
||||
h = history["btc_price_lth"]
|
||||
if h["dates"][-1] < today:
|
||||
h["dates"].append(today)
|
||||
h["values"].append(btc_price)
|
||||
updated = True
|
||||
|
||||
# Fear & Greed
|
||||
fg = cache.get("fear_greed", {})
|
||||
fg_val = fg.get("value")
|
||||
if fg_val is not None and "fear_greed" in history:
|
||||
h = history["fear_greed"]
|
||||
if h["dates"][-1] < today:
|
||||
h["dates"].append(today)
|
||||
h["values"].append(int(fg_val))
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
with open(HISTORY_PATH, "w") as f:
|
||||
json.dump(history, f)
|
||||
log.info("History updated with %s data", today)
|
||||
else:
|
||||
log.info("History already up to date (last date >= %s)", today)
|
||||
|
||||
return updated
|
||||
Loading…
x
Reference in New Issue
Block a user