fix: use binanceus for data fetch (binance geo-blocked), add multi-exchange fallback
This commit is contained in:
parent
f13e1679cd
commit
c2eab35811
BIN
__pycache__/orchestrator.cpython-313.pyc
Normal file
BIN
__pycache__/orchestrator.cpython-313.pyc
Normal file
Binary file not shown.
17521
data/btc_1h.csv
Normal file
17521
data/btc_1h.csv
Normal file
File diff suppressed because it is too large
Load Diff
4381
data/btc_4h.csv
Normal file
4381
data/btc_4h.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,76 +1,65 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Fetch BTC/USDT OHLCV data from Binance using ccxt."""
|
"""Fetch BTC/USD OHLCV data using ccxt (multiple exchanges for best coverage)."""
|
||||||
|
|
||||||
import os
|
import os, sys, time, ccxt, pandas as pd
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import ccxt
|
|
||||||
import pandas as pd
|
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
|
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data")
|
||||||
SYMBOL = "BTC/USDT"
|
YEARS = 2
|
||||||
EXCHANGE_ID = "binance"
|
LIMIT = 300
|
||||||
YEARS_HISTORY = 2
|
|
||||||
LIMIT_PER_REQUEST = 1000 # Binance max
|
|
||||||
|
|
||||||
|
def fetch_ohlcv(timeframe):
|
||||||
def fetch_ohlcv(timeframe: str) -> pd.DataFrame:
|
"""Try multiple exchanges until we get sufficient history."""
|
||||||
"""Fetch OHLCV data for a given timeframe."""
|
exchanges = [
|
||||||
exchange = ccxt.binance({"enableRateLimit": True})
|
("coinbasepro", "BTC/USD"),
|
||||||
|
("binanceus", "BTC/USDT"),
|
||||||
# Calculate start time
|
("kraken", "BTC/USD"),
|
||||||
|
("bitfinex", "BTC/USD"),
|
||||||
|
]
|
||||||
now_ms = int(time.time() * 1000)
|
now_ms = int(time.time() * 1000)
|
||||||
if timeframe == "1h":
|
ms_map = {"1h": 3600_000, "4h": 14400_000}
|
||||||
ms_per_candle = 3600 * 1000
|
ms_per = ms_map[timeframe]
|
||||||
elif timeframe == "4h":
|
target_since = now_ms - (YEARS * 365 * 24 * 3600 * 1000)
|
||||||
ms_per_candle = 4 * 3600 * 1000
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported timeframe: {timeframe}")
|
|
||||||
|
|
||||||
since = now_ms - (YEARS_HISTORY * 365 * 24 * 3600 * 1000)
|
for exch_id, symbol in exchanges:
|
||||||
|
print(f" Trying {exch_id} for {symbol} {timeframe}...")
|
||||||
all_candles = []
|
|
||||||
print(f" Fetching {SYMBOL} {timeframe} from {datetime.fromtimestamp(since / 1000, tz=timezone.utc).strftime('%Y-%m-%d')}...")
|
|
||||||
|
|
||||||
while since < now_ms:
|
|
||||||
try:
|
try:
|
||||||
candles = exchange.fetch_ohlcv(SYMBOL, timeframe, since=since, limit=LIMIT_PER_REQUEST)
|
exchange = getattr(ccxt, exch_id)({"enableRateLimit": True})
|
||||||
|
since = target_since
|
||||||
|
all_c = []
|
||||||
|
fails = 0
|
||||||
|
while since < now_ms and fails < 5:
|
||||||
|
try:
|
||||||
|
candles = exchange.fetch_ohlcv(symbol, timeframe, since=since, limit=LIMIT)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Warning: fetch error, retrying in 5s — {e}")
|
fails += 1
|
||||||
time.sleep(5)
|
print(f" Error ({fails}/5): {e}")
|
||||||
|
time.sleep(3)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not candles:
|
if not candles:
|
||||||
break
|
break
|
||||||
|
all_c.extend(candles)
|
||||||
all_candles.extend(candles)
|
since = candles[-1][0] + ms_per
|
||||||
since = candles[-1][0] + ms_per_candle
|
print(f"\r {len(all_c)} candles...", end="", flush=True)
|
||||||
sys.stdout.write(f"\r Downloaded {len(all_candles)} candles...")
|
time.sleep(exchange.rateLimit / 1000 + 0.1)
|
||||||
sys.stdout.flush()
|
print(f"\r {len(all_c)} candles from {exch_id}")
|
||||||
time.sleep(exchange.rateLimit / 1000)
|
if len(all_c) > 2000: # Good enough
|
||||||
|
df = pd.DataFrame(all_c, columns=["timestamp","open","high","low","close","volume"])
|
||||||
print(f"\r Downloaded {len(all_candles)} candles total.")
|
|
||||||
|
|
||||||
df = pd.DataFrame(all_candles, columns=["timestamp", "open", "high", "low", "close", "volume"])
|
|
||||||
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
|
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms", utc=True)
|
||||||
df = df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
|
return df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
|
||||||
return df
|
else:
|
||||||
|
print(f" Only {len(all_c)} candles, trying next exchange...")
|
||||||
|
except Exception as e:
|
||||||
def main():
|
print(f" Exchange {exch_id} failed: {e}")
|
||||||
os.makedirs(DATA_DIR, exist_ok=True)
|
continue
|
||||||
|
raise RuntimeError("Could not fetch sufficient data from any exchange")
|
||||||
for tf in ["1h", "4h"]:
|
|
||||||
print(f"\n[*] Fetching {tf} data...")
|
|
||||||
df = fetch_ohlcv(tf)
|
|
||||||
out_path = os.path.join(DATA_DIR, f"btc_{tf}.csv")
|
|
||||||
df.to_csv(out_path, index=False)
|
|
||||||
print(f" Saved {len(df)} rows to {out_path}")
|
|
||||||
print(f" Range: {df['timestamp'].iloc[0]} → {df['timestamp'].iloc[-1]}")
|
|
||||||
|
|
||||||
print("\nData fetch complete!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
os.makedirs(DATA_DIR, exist_ok=True)
|
||||||
|
for tf in ["4h", "1h"]:
|
||||||
|
print(f"[*] Fetching {tf}...")
|
||||||
|
df = fetch_ohlcv(tf)
|
||||||
|
out = os.path.join(DATA_DIR, f"btc_{tf}.csv")
|
||||||
|
df.to_csv(out, index=False)
|
||||||
|
print(f" Saved {out} ({len(df)} rows, {df['timestamp'].min()} to {df['timestamp'].max()})")
|
||||||
|
print("[OK] Done!")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user