Path-based access with wildcard pattern support
Select exactly the data you need using path patterns
crypto_trades/date=*/hour=*/exchange=*/ instrument_type=*/symbol=BTCUSDT/**
crypto_trades/date=2025-01-*/hour=*/ exchange=binance/**
crypto_trades/date=*/hour=*/ exchange=binance/**
crypto_trades/date=2025-01-15/ hour=1[4-6]/**
crypto_trades/date=*/hour=*/exchange=*/ instrument_type=perpetual/**
crypto_trades/date=2025-01-*/hour=*/ exchange=binance/instrument_type=spot/ symbol=BTC*/**
Query directly without downloading
import duckdb
con = duckdb.connect()
# Query all BTCUSDT trades from January 2025
df = con.execute("""
SELECT * FROM read_parquet(
's3://quantum-edge/crypto_trades/
date=2025-01-*/hour=*/exchange=*/
instrument_type=*/symbol=BTCUSDT/**/*.parquet'
)
WHERE price > 90000
""").df()
print(f"Rows: {len(df):,}")Lazy evaluation for large datasets
import polars as pl
# Lazy scan with wildcard pattern
df = pl.scan_parquet(
"s3://quantum-edge/crypto_trades/\
date=2025-01-*/hour=*/exchange=binance/**"
)
# Filter and aggregate (evaluated lazily)
result = (df
.filter(pl.col("symbol") == "BTCUSDT")
.group_by("date")
.agg([
pl.col("volume").sum(),
pl.col("price").mean()
])
.collect()
)
print(result)Download files matching pattern
# First, get your API token from /tokens page
TOKEN="your_api_token_here"
# Download all files for a specific day
wget --header="Authorization: Bearer $TOKEN" \
--recursive --no-parent --no-host-directories \
--cut-dirs=2 \
"https://api.quantum-edge.app/download/\
crypto_trades/date=2025-01-15/"Programmatic access with filtering
import requests
import pandas as pd
from pathlib import Path
TOKEN = "your_api_token"
API_BASE = "https://api.quantum-edge.app"
# List files matching pattern
resp = requests.get(
f"{API_BASE}/api/datasets/crypto_trades/files",
headers={"Authorization": f"Bearer {TOKEN}"},
params={
"date": "2025-01-15",
"exchange": "binance",
"symbol": "BTCUSDT"
}
)
files = resp.json()["files"]
print(f"Found {len(files)} files")
# Download first file
file_url = files[0]["downloadUrl"]
df = pd.read_parquet(file_url)
print(df.head())