66 lines
2.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""Fetch BTC/USD OHLCV data using ccxt (multiple exchanges for best coverage)."""
import os, sys, time, ccxt, pandas as pd
from datetime import datetime, timezone
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
YEARS = 2
LIMIT = 300
def fetch_ohlcv(timeframe):
"""Try multiple exchanges until we get sufficient history."""
exchanges = [
("coinbasepro", "BTC/USD"),
("binanceus", "BTC/USDT"),
("kraken", "BTC/USD"),
("bitfinex", "BTC/USD"),
]
now_ms = int(time.time() * 1000)
ms_map = {"1h": 3600_000, "4h": 14400_000}
ms_per = ms_map[timeframe]
target_since = now_ms - (YEARS * 365 * 24 * 3600 * 1000)
for exch_id, symbol in exchanges:
print(f" Trying {exch_id} for {symbol} {timeframe}...")
try:
exchange = getattr(ccxt, exch_id)({"enableRateLimit": True})
since = target_since
all_c = []
fails = 0
while since < now_ms and fails < 5:
try:
candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=LIMIT)
except Exception as e:
fails += 1
print(f" Error ({fails}/5): {e}")
time.sleep(3)
continue
if not candles:
break
all_c.extend(candles)
since = candles[-1][0] + ms_per
print(f"\r {len(all_c)} candles...", end="", flush=True)
time.sleep(exchange.rateLimit / 1000 + 0.1)
print(f"\r {len(all_c)} candles from {exch_id}")
if len(all_c) > 2000: # Good enough
df = pd.DataFrame(all_c, columns=["timestamp","open","high","low","close","volume"])
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
return df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
else:
print(f" Only {len(all_c)} candles, trying next exchange...")
except Exception as e:
print(f" Exchange {exch_id} failed: {e}")
continue
raise RuntimeError("Could not fetch sufficient data from any exchange")
if __name__ == "__main__":
os.makedirs(DATA_DIR, exist_ok=True)
for tf in ["4h", "1h"]:
print(f"[*] Fetching {tf}...")
df = fetch_ohlcv(tf)
out = os.path.join(DATA_DIR, f"btc_{tf}.csv")
df.to_csv(out, index=False)
print(f" Saved {out} ({len(df)} rows, {df['timestamp'].min()} to {df['timestamp'].max()})")
print("[OK] Done!")