fix: use binanceus for data fetch (binance geo-blocked), add multi-exchange fallback

This commit is contained in:
BizzleBot 2026-03-19 22:15:40 +00:00
parent f13e1679cd
commit c2eab35811
4 changed files with 21954 additions and 63 deletions

Binary file not shown.

17521
data/btc_1h.csv Normal file

File diff suppressed because it is too large Load Diff

4381
data/btc_4h.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,76 +1,65 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Fetch BTC/USDT OHLCV data from Binance using ccxt.""" """Fetch BTC/USD OHLCV data using ccxt (multiple exchanges for best coverage)."""
import os import os, sys, time, ccxt, pandas as pd
import sys
import time
import ccxt
import pandas as pd
from datetime import datetime, timezone from datetime import datetime, timezone
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data") DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
SYMBOL = "BTC/USDT" YEARS = 2
EXCHANGE_ID = "binance" LIMIT = 300
YEARS_HISTORY = 2
LIMIT_PER_REQUEST = 1000 # Binance max
def fetch_ohlcv(timeframe):
def fetch_ohlcv(timeframe: str) -> pd.DataFrame: """Try multiple exchanges until we get sufficient history."""
"""Fetch OHLCV data for a given timeframe.""" exchanges = [
exchange = ccxt.binance({"enableRateLimit": True}) ("coinbasepro", "BTC/USD"),
("binanceus", "BTC/USDT"),
# Calculate start time ("kraken", "BTC/USD"),
("bitfinex", "BTC/USD"),
]
now_ms = int(time.time() * 1000) now_ms = int(time.time() * 1000)
if timeframe == "1h": ms_map = {"1h": 3600_000, "4h": 14400_000}
ms_per_candle = 3600 * 1000 ms_per = ms_map[timeframe]
elif timeframe == "4h": target_since = now_ms - (YEARS * 365 * 24 * 3600 * 1000)
ms_per_candle = 4 * 3600 * 1000
else:
raise ValueError(f"Unsupported timeframe: {timeframe}")
since = now_ms - (YEARS_HISTORY * 365 * 24 * 3600 * 1000) for exch_id, symbol in exchanges:
print(f" Trying {exch_id} for {symbol} {timeframe}...")
all_candles = []
print(f" Fetching {SYMBOL} {timeframe} from {datetime.fromtimestamp(since / 1000, tz=timezone.utc).strftime('%Y-%m-%d')}...")
while since < now_ms:
try: try:
candles = exchange.fetch_ohlcv(SYMBOL, timeframe, since=since, limit=LIMIT_PER_REQUEST) exchange = getattr(ccxt, exch_id)({"enableRateLimit": True})
since = target_since
all_c = []
fails = 0
while since < now_ms and fails < 5:
try:
candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=LIMIT)
except Exception as e: except Exception as e:
print(f" Warning: fetch error, retrying in 5s — {e}") fails += 1
time.sleep(5) print(f" Error ({fails}/5): {e}")
time.sleep(3)
continue continue
if not candles: if not candles:
break break
all_c.extend(candles)
all_candles.extend(candles) since = candles[-1][0] + ms_per
since = candles[-1][0] + ms_per_candle print(f"\r {len(all_c)} candles...", end="", flush=True)
sys.stdout.write(f"\r Downloaded {len(all_candles)} candles...") time.sleep(exchange.rateLimit / 1000 + 0.1)
sys.stdout.flush() print(f"\r {len(all_c)} candles from {exch_id}")
time.sleep(exchange.rateLimit / 1000) if len(all_c) > 2000: # Good enough
df = pd.DataFrame(all_c, columns=["timestamp","open","high","low","close","volume"])
print(f"\r Downloaded {len(all_candles)} candles total.")
df = pd.DataFrame(all_candles, columns=["timestamp", "open", "high", "low", "close", "volume"])
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True) df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
df = df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True) return df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
return df else:
print(f" Only {len(all_c)} candles, trying next exchange...")
except Exception as e:
def main(): print(f" Exchange {exch_id} failed: {e}")
os.makedirs(DATA_DIR, exist_ok=True) continue
raise RuntimeError("Could not fetch sufficient data from any exchange")
for tf in ["1h", "4h"]:
print(f"\n[*] Fetching {tf} data...")
df = fetch_ohlcv(tf)
out_path = os.path.join(DATA_DIR, f"btc_{tf}.csv")
df.to_csv(out_path, index=False)
print(f" Saved {len(df)} rows to {out_path}")
print(f" Range: {df['timestamp'].iloc[0]}{df['timestamp'].iloc[-1]}")
print("\nData fetch complete!")
if __name__ == "__main__": if __name__ == "__main__":
main() os.makedirs(DATA_DIR, exist_ok=True)
for tf in ["4h", "1h"]:
print(f"[*] Fetching {tf}...")
df = fetch_ohlcv(tf)
out = os.path.join(DATA_DIR, f"btc_{tf}.csv")
df.to_csv(out, index=False)
print(f" Saved {out} ({len(df)} rows, {df['timestamp'].min()} to {df['timestamp'].max()})")
print("[OK] Done!")