#!/usr/bin/env python3 """Fetch BTC/USD OHLCV data using ccxt (multiple exchanges for best coverage).""" import os, sys, time, ccxt, pandas as pd from datetime import datetime, timezone DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") YEARS = 2 LIMIT = 300 def fetch_ohlcv(timeframe): """Try multiple exchanges until we get sufficient history.""" exchanges = [ ("coinbasepro", "BTC/USD"), ("binanceus", "BTC/USDT"), ("kraken", "BTC/USD"), ("bitfinex", "BTC/USD"), ] now_ms = int(time.time() * 1000) ms_map = {"1h": 3600_000, "4h": 14400_000} ms_per = ms_map[timeframe] target_since = now_ms - (YEARS * 365 * 24 * 3600 * 1000) for exch_id, symbol in exchanges: print(f" Trying {exch_id} for {symbol} {timeframe}...") try: exchange = getattr(ccxt, exch_id)({"enableRateLimit": True}) since = target_since all_c = [] fails = 0 while since < now_ms and fails < 5: try: candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=LIMIT) except Exception as e: fails += 1 print(f" Error ({fails}/5): {e}") time.sleep(3) continue if not candles: break all_c.extend(candles) since = candles[-1][0] + ms_per print(f"\r {len(all_c)} candles...", end="", flush=True) time.sleep(exchange.rateLimit / 1000 + 0.1) print(f"\r {len(all_c)} candles from {exch_id}") if len(all_c) > 2000: # Good enough df = pd.DataFrame(all_c, columns=["timestamp","open","high","low","close","volume"]) df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True) return df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True) else: print(f" Only {len(all_c)} candles, trying next exchange...") except Exception as e: print(f" Exchange {exch_id} failed: {e}") continue raise RuntimeError("Could not fetch sufficient data from any exchange") if __name__ == "__main__": os.makedirs(DATA_DIR, exist_ok=True) for tf in ["4h", "1h"]: print(f"[*] Fetching {tf}...") df = fetch_ohlcv(tf) out = os.path.join(DATA_DIR, f"btc_{tf}.csv") df.to_csv(out, index=False) print(f" Saved {out} ({len(df)} rows, {df['timestamp'].min()} to {df['timestamp'].max()})") print("[OK] Done!")