fix: use binanceus for data fetch (binance geo-blocked), add multi-exchange fallback

This commit is contained in:
BizzleBot 2026-03-19 22:15:40 +00:00
parent f13e1679cd
commit c2eab35811
4 changed files with 21954 additions and 63 deletions

Binary file not shown.

17521
data/btc_1h.csv Normal file

File diff suppressed because it is too large Load Diff

4381
data/btc_4h.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,76 +1,65 @@
#!/usr/bin/env python3
"""Fetch BTC/USDT OHLCV data from Binance using ccxt."""
"""Fetch BTC/USD OHLCV data using ccxt (multiple exchanges for best coverage)."""
import os
import sys
import time
import ccxt
import pandas as pd
import os, sys, time, ccxt, pandas as pd
from datetime import datetime, timezone
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
SYMBOL = "BTC/USDT"
EXCHANGE_ID = "binance"
YEARS_HISTORY = 2
LIMIT_PER_REQUEST = 1000 # Binance max
YEARS = 2
LIMIT = 300
def fetch_ohlcv(timeframe: str) -> pd.DataFrame:
"""Fetch OHLCV data for a given timeframe."""
exchange = ccxt.binance({"enableRateLimit": True})
# Calculate start time
def fetch_ohlcv(timeframe):
"""Try multiple exchanges until we get sufficient history."""
exchanges = [
("coinbasepro", "BTC/USD"),
("binanceus", "BTC/USDT"),
("kraken", "BTC/USD"),
("bitfinex", "BTC/USD"),
]
now_ms = int(time.time() * 1000)
if timeframe == "1h":
ms_per_candle = 3600 * 1000
elif timeframe == "4h":
ms_per_candle = 4 * 3600 * 1000
else:
raise ValueError(f"Unsupported timeframe: {timeframe}")
ms_map = {"1h": 3600_000, "4h": 14400_000}
ms_per = ms_map[timeframe]
target_since = now_ms - (YEARS * 365 * 24 * 3600 * 1000)
since = now_ms - (YEARS_HISTORY * 365 * 24 * 3600 * 1000)
all_candles = []
print(f" Fetching {SYMBOL} {timeframe} from {datetime.fromtimestamp(since / 1000, tz=timezone.utc).strftime('%Y-%m-%d')}...")
while since < now_ms:
for exch_id, symbol in exchanges:
print(f" Trying {exch_id} for {symbol} {timeframe}...")
try:
candles = exchange.fetch_ohlcv(SYMBOL, timeframe, since=since, limit=LIMIT_PER_REQUEST)
exchange = getattr(ccxt, exch_id)({"enableRateLimit": True})
since = target_since
all_c = []
fails = 0
while since < now_ms and fails < 5:
try:
candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=LIMIT)
except Exception as e:
print(f" Warning: fetch error, retrying in 5s — {e}")
time.sleep(5)
fails += 1
print(f" Error ({fails}/5): {e}")
time.sleep(3)
continue
if not candles:
break
all_candles.extend(candles)
since = candles[-1][0] + ms_per_candle
sys.stdout.write(f"\r Downloaded {len(all_candles)} candles...")
sys.stdout.flush()
time.sleep(exchange.rateLimit / 1000)
print(f"\r Downloaded {len(all_candles)} candles total.")
df = pd.DataFrame(all_candles, columns=["timestamp", "open", "high", "low", "close", "volume"])
all_c.extend(candles)
since = candles[-1][0] + ms_per
print(f"\r {len(all_c)} candles...", end="", flush=True)
time.sleep(exchange.rateLimit / 1000 + 0.1)
print(f"\r {len(all_c)} candles from {exch_id}")
if len(all_c) > 2000: # Good enough
df = pd.DataFrame(all_c, columns=["timestamp","open","high","low","close","volume"])
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
df = df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
return df
def main():
os.makedirs(DATA_DIR, exist_ok=True)
for tf in ["1h", "4h"]:
print(f"\n[*] Fetching {tf} data...")
df = fetch_ohlcv(tf)
out_path = os.path.join(DATA_DIR, f"btc_{tf}.csv")
df.to_csv(out_path, index=False)
print(f" Saved {len(df)} rows to {out_path}")
print(f" Range: {df['timestamp'].iloc[0]}{df['timestamp'].iloc[-1]}")
print("\nData fetch complete!")
return df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
else:
print(f" Only {len(all_c)} candles, trying next exchange...")
except Exception as e:
print(f" Exchange {exch_id} failed: {e}")
continue
raise RuntimeError("Could not fetch sufficient data from any exchange")
if __name__ == "__main__":
main()
os.makedirs(DATA_DIR, exist_ok=True)
for tf in ["4h", "1h"]:
print(f"[*] Fetching {tf}...")
df = fetch_ohlcv(tf)
out = os.path.join(DATA_DIR, f"btc_{tf}.csv")
df.to_csv(out, index=False)
print(f" Saved {out} ({len(df)} rows, {df['timestamp'].min()} to {df['timestamp'].max()})")
print("[OK] Done!")