tradernick-data-provider 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tradernick_data_provider-0.2.0/.gitignore +14 -0
- tradernick_data_provider-0.2.0/PKG-INFO +114 -0
- tradernick_data_provider-0.2.0/README.md +87 -0
- tradernick_data_provider-0.2.0/pyproject.toml +42 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/__init__.py +10 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/_client.py +148 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/_http.py +55 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/_query.py +378 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/binance.py +228 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/btc.py +93 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/erc20.py +406 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/evm.py +87 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/exceptions.py +8 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/jobs.py +72 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/protocols.py +951 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/py.typed +0 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/snapshots.py +116 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/tron.py +88 -0
- tradernick_data_provider-0.2.0/tradernick_data_provider/wallets.py +93 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tradernick-data-provider
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Python client for the TraderNick data_provider service. Drop-in compatible with horatio-data-provider (same DataProviderClient API; only the import path changes).
|
|
5
|
+
Author: TraderNick
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: binance,clickhouse,data-provider,defi,evm,hyperliquid
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: httpx>=0.25
|
|
18
|
+
Requires-Dist: pandas>=2.0
|
|
19
|
+
Requires-Dist: polars>=1.0
|
|
20
|
+
Requires-Dist: pyarrow>=14.0
|
|
21
|
+
Requires-Dist: pytz>=2023.3
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# tradernick-data-provider
|
|
29
|
+
|
|
30
|
+
Python client for the TraderNick `data_provider` service. Drop-in
|
|
31
|
+
compatible with [`horatio-data-provider`](https://pypi.org/project/horatio-data-provider/):
|
|
32
|
+
same `DataProviderClient` class, same namespaces (`evm`, `tron`, `btc`,
|
|
33
|
+
`binance`, `hyperliquid`, `wallets`, `cache`, `jobs`), same chainable
|
|
34
|
+
builder methods, same `as_pandas()` / `as_polars()` / `as_parquet()`
|
|
35
|
+
terminators. The only visible difference is the import path.
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
# Before
|
|
39
|
+
from horatio_data_provider import DataProviderClient
|
|
40
|
+
|
|
41
|
+
# After
|
|
42
|
+
from tradernick_data_provider import DataProviderClient
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The server URL passed to the constructor is the only thing you need to
|
|
46
|
+
change at the call site.
|
|
47
|
+
|
|
48
|
+
## Install
|
|
49
|
+
|
|
50
|
+
```sh
|
|
51
|
+
pip install tradernick-data-provider
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import asyncio
|
|
58
|
+
from tradernick_data_provider import DataProviderClient
|
|
59
|
+
|
|
60
|
+
async def main():
|
|
61
|
+
async with DataProviderClient("http://localhost:10005") as client:
|
|
62
|
+
df = await client.binance.ohlcv("BTC", "1h") \
|
|
63
|
+
.time_range("2026-06-01T00:00:00Z", "2026-06-08T00:00:00Z") \
|
|
64
|
+
.as_polars()
|
|
65
|
+
print(df)
|
|
66
|
+
|
|
67
|
+
asyncio.run(main())
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
All Horatio query builders work unchanged. The server delegates to
|
|
71
|
+
ClickHouse instead of DeFiStream, so reads stay sub-second on tables
|
|
72
|
+
where Horatio has to pay a fresh upstream fetch.
|
|
73
|
+
|
|
74
|
+
## Status
|
|
75
|
+
|
|
76
|
+
**0.2.0 — Phase 1+2 read parity.** Validated against horatio-data-provider
|
|
77
|
+
on 2026-06-07:
|
|
78
|
+
- Column-shape parity: 100% across binance / aave / lido / uniswap /
|
|
79
|
+
hyperliquid / transfers.
|
|
80
|
+
- Row-count + value parity: exact match on stable tables
|
|
81
|
+
(binance.ohlcv 1m, raw_trades, lido.deposit, aave.borrows/repays,
|
|
82
|
+
evm.native with `min_amount`); within ±2 rows on AAVE event tables
|
|
83
|
+
where DeFiStream's sweep loop occasionally re-fetches windows with
|
|
84
|
+
slightly different cuts.
|
|
85
|
+
|
|
86
|
+
What works:
|
|
87
|
+
- `binance.{ohlcv, raw_trades, book_depth, open_interest, funding_rate,
|
|
88
|
+
long_short_ratios}` (with `with_id`, `add_symbol`)
|
|
89
|
+
- `evm.aave.{deposit, withdraw, borrow, repay, flashloan, liquidation}`
|
|
90
|
+
with `involving`, `exclude_involving`, `eth_market_type`
|
|
91
|
+
- `evm.uniswap.{swap, deposit, withdraw, collect}` — V3
|
|
92
|
+
- `evm.lido.{deposit, withdrawal_request, withdrawal_claimed,
|
|
93
|
+
l2_deposit, l2_withdrawal_request}`
|
|
94
|
+
- `evm.erc20.transfers`, `evm.native_transfers` with full filter set
|
|
95
|
+
(sender / receiver / involving / exclude_* / min_amount / max_amount)
|
|
96
|
+
- `tron.{native, trc20}.transfers`, `btc.native.transfers`
|
|
97
|
+
- `hyperliquid.{ohlcv, trades, fills, funding, transfers, vaults,
|
|
98
|
+
trade_history, position_history}`
|
|
99
|
+
- `client.{wallets.list, wallets.get, wallets.upsert, wallets.delete}`
|
|
100
|
+
- `client.{load_parquet, scan_parquet, list_snapshots, delete_snapshot,
|
|
101
|
+
as_parquet}` — server-side parquet snapshots with `local_*` filters
|
|
102
|
+
- `client.jobs.{list, get, cancel}` — proxies to the ingestion job queue
|
|
103
|
+
|
|
104
|
+
Not yet exposed:
|
|
105
|
+
- `evm.{stader, threshold}` — return empty schemas (TN doesn't ingest
|
|
106
|
+
these networks yet)
|
|
107
|
+
- `hyperliquid.{sends, spot_transfers}` — return empty schemas
|
|
108
|
+
- `client.scan_parquet` only honors address-based `local_*` filters;
|
|
109
|
+
label/category/entity variants are wired but no-op without
|
|
110
|
+
wallet_labels co-mounted on the snapshot.
|
|
111
|
+
|
|
112
|
+
## Compatibility
|
|
113
|
+
|
|
114
|
+
Drop-in compatible with horatio-data-provider 4.x.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# tradernick-data-provider
|
|
2
|
+
|
|
3
|
+
Python client for the TraderNick `data_provider` service. Drop-in
|
|
4
|
+
compatible with [`horatio-data-provider`](https://pypi.org/project/horatio-data-provider/):
|
|
5
|
+
same `DataProviderClient` class, same namespaces (`evm`, `tron`, `btc`,
|
|
6
|
+
`binance`, `hyperliquid`, `wallets`, `cache`, `jobs`), same chainable
|
|
7
|
+
builder methods, same `as_pandas()` / `as_polars()` / `as_parquet()`
|
|
8
|
+
terminators. The only visible difference is the import path.
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
# Before
|
|
12
|
+
from horatio_data_provider import DataProviderClient
|
|
13
|
+
|
|
14
|
+
# After
|
|
15
|
+
from tradernick_data_provider import DataProviderClient
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
The server URL passed to the constructor is the only thing you need to
|
|
19
|
+
change at the call site.
|
|
20
|
+
|
|
21
|
+
## Install
|
|
22
|
+
|
|
23
|
+
```sh
|
|
24
|
+
pip install tradernick-data-provider
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import asyncio
|
|
31
|
+
from tradernick_data_provider import DataProviderClient
|
|
32
|
+
|
|
33
|
+
async def main():
|
|
34
|
+
async with DataProviderClient("http://localhost:10005") as client:
|
|
35
|
+
df = await client.binance.ohlcv("BTC", "1h") \
|
|
36
|
+
.time_range("2026-06-01T00:00:00Z", "2026-06-08T00:00:00Z") \
|
|
37
|
+
.as_polars()
|
|
38
|
+
print(df)
|
|
39
|
+
|
|
40
|
+
asyncio.run(main())
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
All Horatio query builders work unchanged. The server delegates to
|
|
44
|
+
ClickHouse instead of DeFiStream, so reads stay sub-second on tables
|
|
45
|
+
where Horatio has to pay a fresh upstream fetch.
|
|
46
|
+
|
|
47
|
+
## Status
|
|
48
|
+
|
|
49
|
+
**0.2.0 — Phase 1+2 read parity.** Validated against horatio-data-provider
|
|
50
|
+
on 2026-06-07:
|
|
51
|
+
- Column-shape parity: 100% across binance / aave / lido / uniswap /
|
|
52
|
+
hyperliquid / transfers.
|
|
53
|
+
- Row-count + value parity: exact match on stable tables
|
|
54
|
+
(binance.ohlcv 1m, raw_trades, lido.deposit, aave.borrows/repays,
|
|
55
|
+
evm.native with `min_amount`); within ±2 rows on AAVE event tables
|
|
56
|
+
where DeFiStream's sweep loop occasionally re-fetches windows with
|
|
57
|
+
slightly different cuts.
|
|
58
|
+
|
|
59
|
+
What works:
|
|
60
|
+
- `binance.{ohlcv, raw_trades, book_depth, open_interest, funding_rate,
|
|
61
|
+
long_short_ratios}` (with `with_id`, `add_symbol`)
|
|
62
|
+
- `evm.aave.{deposit, withdraw, borrow, repay, flashloan, liquidation}`
|
|
63
|
+
with `involving`, `exclude_involving`, `eth_market_type`
|
|
64
|
+
- `evm.uniswap.{swap, deposit, withdraw, collect}` — V3
|
|
65
|
+
- `evm.lido.{deposit, withdrawal_request, withdrawal_claimed,
|
|
66
|
+
l2_deposit, l2_withdrawal_request}`
|
|
67
|
+
- `evm.erc20.transfers`, `evm.native_transfers` with full filter set
|
|
68
|
+
(sender / receiver / involving / exclude_* / min_amount / max_amount)
|
|
69
|
+
- `tron.{native, trc20}.transfers`, `btc.native.transfers`
|
|
70
|
+
- `hyperliquid.{ohlcv, trades, fills, funding, transfers, vaults,
|
|
71
|
+
trade_history, position_history}`
|
|
72
|
+
- `client.{wallets.list, wallets.get, wallets.upsert, wallets.delete}`
|
|
73
|
+
- `client.{load_parquet, scan_parquet, list_snapshots, delete_snapshot,
|
|
74
|
+
as_parquet}` — server-side parquet snapshots with `local_*` filters
|
|
75
|
+
- `client.jobs.{list, get, cancel}` — proxies to the ingestion job queue
|
|
76
|
+
|
|
77
|
+
Not yet exposed:
|
|
78
|
+
- `evm.{stader, threshold}` — return empty schemas (TN doesn't ingest
|
|
79
|
+
these networks yet)
|
|
80
|
+
- `hyperliquid.{sends, spot_transfers}` — return empty schemas
|
|
81
|
+
- `client.scan_parquet` only honors address-based `local_*` filters;
|
|
82
|
+
label/category/entity variants are wired but no-op without
|
|
83
|
+
wallet_labels co-mounted on the snapshot.
|
|
84
|
+
|
|
85
|
+
## Compatibility
|
|
86
|
+
|
|
87
|
+
Drop-in compatible with horatio-data-provider 4.x.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "tradernick-data-provider"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Python client for the TraderNick data_provider service. Drop-in compatible with horatio-data-provider (same DataProviderClient API; only the import path changes)."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "TraderNick" }]
|
|
9
|
+
keywords = ["clickhouse", "binance", "hyperliquid", "evm", "defi", "data-provider"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.10",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"httpx>=0.25",
|
|
22
|
+
"pyarrow>=14.0",
|
|
23
|
+
"pandas>=2.0",
|
|
24
|
+
"polars>=1.0",
|
|
25
|
+
"pytz>=2023.3",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "respx>=0.21"]
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
asyncio_mode = "auto"
|
|
33
|
+
|
|
34
|
+
[tool.hatch.build.targets.wheel]
|
|
35
|
+
packages = ["tradernick_data_provider"]
|
|
36
|
+
|
|
37
|
+
[tool.hatch.build.targets.sdist]
|
|
38
|
+
include = ["tradernick_data_provider", "README.md", "pyproject.toml"]
|
|
39
|
+
|
|
40
|
+
[build-system]
|
|
41
|
+
requires = ["hatchling"]
|
|
42
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from ._client import DataProviderClient
|
|
2
|
+
from .exceptions import DataProviderError, DataProviderHTTPError
|
|
3
|
+
from .wallets import WalletsNamespace
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"DataProviderClient",
|
|
7
|
+
"DataProviderError",
|
|
8
|
+
"DataProviderHTTPError",
|
|
9
|
+
"WalletsNamespace",
|
|
10
|
+
]
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import io
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Literal, Optional, Union
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
from ._http import load_parquet_bytes, list_snapshots, delete_snapshot
|
|
9
|
+
from ._query import _to_timestamp
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _to_datetime(date: datetime | str | int) -> datetime:
|
|
13
|
+
"""Convert any date input to a timezone-aware datetime."""
|
|
14
|
+
ts = _to_timestamp(date) # returns 'YYYY-MM-DDTHH:MM:SSZ'
|
|
15
|
+
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _cast_time_ms_utc(df: pl.DataFrame) -> pl.DataFrame:
|
|
19
|
+
"""Normalize the ``time`` column to ``Datetime('ms', 'UTC')``.
|
|
20
|
+
|
|
21
|
+
DuckDB-written snapshots come back as μs+UTC; cache reads come back
|
|
22
|
+
as ms+UTC. The cast keeps everything joinable on the polars side.
|
|
23
|
+
"""
|
|
24
|
+
if 'time' in df.columns:
|
|
25
|
+
dt = df.schema['time']
|
|
26
|
+
if isinstance(dt, pl.Datetime) and (
|
|
27
|
+
dt.time_unit != 'ms' or dt.time_zone != 'UTC'
|
|
28
|
+
):
|
|
29
|
+
df = df.with_columns(pl.col('time').cast(pl.Datetime('ms', 'UTC')))
|
|
30
|
+
return df
|
|
31
|
+
|
|
32
|
+
from .binance import BinanceNamespace, HyperliquidNamespace
|
|
33
|
+
from .btc import BtcNamespace
|
|
34
|
+
from .evm import EvmNamespace
|
|
35
|
+
from .jobs import JobsNamespace
|
|
36
|
+
from .protocols import CacheNamespace
|
|
37
|
+
from .tron import TronNamespace
|
|
38
|
+
from .wallets import WalletsNamespace
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DataProviderClient:
|
|
42
|
+
evm: EvmNamespace
|
|
43
|
+
tron: TronNamespace
|
|
44
|
+
btc: BtcNamespace
|
|
45
|
+
binance: BinanceNamespace
|
|
46
|
+
hyperliquid: HyperliquidNamespace
|
|
47
|
+
wallets: WalletsNamespace
|
|
48
|
+
cache: CacheNamespace
|
|
49
|
+
jobs: JobsNamespace
|
|
50
|
+
|
|
51
|
+
def __init__(self, url: str):
|
|
52
|
+
self._url = url.rstrip("/")
|
|
53
|
+
self._session = httpx.AsyncClient(timeout=86400)
|
|
54
|
+
self.evm = EvmNamespace(self._session, self._url)
|
|
55
|
+
self.tron = TronNamespace(self._session, self._url)
|
|
56
|
+
self.btc = BtcNamespace(self._session, self._url)
|
|
57
|
+
self.binance = BinanceNamespace(self._session, self._url)
|
|
58
|
+
self.hyperliquid = HyperliquidNamespace(self._session, self._url)
|
|
59
|
+
self.wallets = WalletsNamespace(self._session, self._url)
|
|
60
|
+
self.cache = CacheNamespace(self._session, self._url)
|
|
61
|
+
self.jobs = JobsNamespace(self._session, self._url)
|
|
62
|
+
|
|
63
|
+
async def health(self) -> bool:
|
|
64
|
+
response = await self._session.get(self._url + "/health")
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
async def load_parquet(
|
|
69
|
+
self,
|
|
70
|
+
key: str,
|
|
71
|
+
since: Optional[Union[datetime, str, int]] = None,
|
|
72
|
+
until: Optional[Union[datetime, str, int]] = None,
|
|
73
|
+
) -> pl.DataFrame:
|
|
74
|
+
"""Load a saved snapshot as a polars DataFrame.
|
|
75
|
+
|
|
76
|
+
``time`` is normalized to ``Datetime('ms', UTC)`` so joins with
|
|
77
|
+
transfer-read DataFrames (which the cache layer also returns at
|
|
78
|
+
ms+UTC) don't trip the polars 'datatypes of join keys don't
|
|
79
|
+
match' check. Snapshots saved via DuckDB COPY are stored at
|
|
80
|
+
μs+UTC internally; we cast on read.
|
|
81
|
+
|
|
82
|
+
For pandas, call ``(await client.load_parquet(key)).to_pandas()``.
|
|
83
|
+
"""
|
|
84
|
+
raw = await load_parquet_bytes(self._session, self._url, key)
|
|
85
|
+
df = pl.read_parquet(io.BytesIO(raw))
|
|
86
|
+
df = _cast_time_ms_utc(df)
|
|
87
|
+
if since is not None or until is not None:
|
|
88
|
+
time_col = "timestamp" if "timestamp" in df.columns else "time"
|
|
89
|
+
if time_col in df.columns:
|
|
90
|
+
if since is not None:
|
|
91
|
+
df = df.filter(pl.col(time_col) >= _to_datetime(since))
|
|
92
|
+
if until is not None:
|
|
93
|
+
df = df.filter(pl.col(time_col) <= _to_datetime(until))
|
|
94
|
+
return df
|
|
95
|
+
|
|
96
|
+
def scan_parquet(self, key: str, *,
|
|
97
|
+
since: Optional[Union[datetime, str, int]] = None,
|
|
98
|
+
until: Optional[Union[datetime, str, int]] = None,
|
|
99
|
+
engine: Literal['polars', 'duckdb'] = 'duckdb',
|
|
100
|
+
normalize_addresses: Optional[bool] = None):
|
|
101
|
+
"""Lazy-scan a saved snapshot with ``local_*`` filters applied
|
|
102
|
+
server-side. Returns a ``ScanParquetQuery`` builder. Chain
|
|
103
|
+
``local_*`` filter methods then call a terminal ``as_polars()`` /
|
|
104
|
+
``as_pandas()`` / ``as_parquet(new_key)``.
|
|
105
|
+
|
|
106
|
+
``engine``:
|
|
107
|
+
- ``'duckdb'`` (default): server mounts the snapshot + wallets
|
|
108
|
+
parquets as DuckDB views and runs the filter as SQL.
|
|
109
|
+
Streams via ``COPY ... TO PARQUET``. Best optimizer for
|
|
110
|
+
large ``IN`` filters; ~3-50× faster than polars on big
|
|
111
|
+
wallet-set queries.
|
|
112
|
+
- ``'polars'``: server uses ``pl.scan_parquet`` and a polars
|
|
113
|
+
lazy filter pipeline. Streams via ``sink_parquet``.
|
|
114
|
+
|
|
115
|
+
``normalize_addresses``: default ``None`` (auto). Set to ``False``
|
|
116
|
+
only when you know the snapshot is canonical and the file lacks
|
|
117
|
+
the metadata flag — auto-detect already handles canonical files.
|
|
118
|
+
|
|
119
|
+
Example::
|
|
120
|
+
|
|
121
|
+
df = await client.scan_parquet('huge_snapshot') \\
|
|
122
|
+
.local_exclude_sender_categories(['Hot-Wallet','Cold-Wallet']) \\
|
|
123
|
+
.local_involving_entities(['Binance']) \\
|
|
124
|
+
.as_polars()
|
|
125
|
+
"""
|
|
126
|
+
from .snapshots import ScanParquetQuery
|
|
127
|
+
return ScanParquetQuery(
|
|
128
|
+
self._session, self._url, key,
|
|
129
|
+
since=since, until=until,
|
|
130
|
+
engine=engine, normalize_addresses=normalize_addresses,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
async def list_snapshots(self) -> list[str]:
|
|
134
|
+
"""List all saved snapshot keys."""
|
|
135
|
+
return await list_snapshots(self._session, self._url)
|
|
136
|
+
|
|
137
|
+
async def delete_snapshot(self, key: str) -> None:
|
|
138
|
+
"""Delete a saved snapshot."""
|
|
139
|
+
await delete_snapshot(self._session, self._url, key)
|
|
140
|
+
|
|
141
|
+
async def close(self) -> None:
|
|
142
|
+
await self._session.aclose()
|
|
143
|
+
|
|
144
|
+
async def __aenter__(self) -> "DataProviderClient":
|
|
145
|
+
return self
|
|
146
|
+
|
|
147
|
+
async def __aexit__(self, *_) -> None:
|
|
148
|
+
await self.close()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import io
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
import pyarrow as pa
|
|
5
|
+
import pyarrow.parquet as pq
|
|
6
|
+
|
|
7
|
+
from .exceptions import DataProviderHTTPError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def fetch_table(session: httpx.AsyncClient, url: str, body: dict) -> pa.Table | None:
|
|
11
|
+
response = await session.post(url, json=body)
|
|
12
|
+
content_type = response.headers.get("content-type", "")
|
|
13
|
+
if "application/json" in content_type:
|
|
14
|
+
data = response.json()
|
|
15
|
+
if response.is_success and data.get("saved"):
|
|
16
|
+
return None
|
|
17
|
+
raise DataProviderHTTPError(response.status_code, data.get("error", str(data)))
|
|
18
|
+
response.raise_for_status()
|
|
19
|
+
return pq.read_table(io.BytesIO(response.content))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
async def save_parquet(session: httpx.AsyncClient, url: str, body: dict, key: str) -> None:
|
|
23
|
+
"""Send a query with save_key to save the result as a named snapshot."""
|
|
24
|
+
resp = await session.post(url, json={**body, "save_key": key})
|
|
25
|
+
resp.raise_for_status()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def load_parquet_bytes(session: httpx.AsyncClient, base_url: str, key: str) -> bytes:
|
|
29
|
+
"""Load a previously saved snapshot as raw parquet bytes."""
|
|
30
|
+
resp = await session.post(f"{base_url}/snapshots/load", json={"key": key})
|
|
31
|
+
content_type = resp.headers.get("content-type", "")
|
|
32
|
+
if "application/json" in content_type:
|
|
33
|
+
data = resp.json()
|
|
34
|
+
raise DataProviderHTTPError(resp.status_code, data.get("error", str(data)))
|
|
35
|
+
resp.raise_for_status()
|
|
36
|
+
return resp.content
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def load_parquet(session: httpx.AsyncClient, base_url: str, key: str) -> pa.Table:
|
|
40
|
+
"""Load a previously saved snapshot as a pyarrow Table."""
|
|
41
|
+
raw = await load_parquet_bytes(session, base_url, key)
|
|
42
|
+
return pq.read_table(io.BytesIO(raw))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def delete_snapshot(session: httpx.AsyncClient, base_url: str, key: str) -> None:
|
|
46
|
+
"""Delete a snapshot by key."""
|
|
47
|
+
resp = await session.post(f"{base_url}/snapshots/delete", json={"key": key})
|
|
48
|
+
resp.raise_for_status()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def list_snapshots(session: httpx.AsyncClient, base_url: str) -> list[str]:
|
|
52
|
+
"""List all saved snapshot keys."""
|
|
53
|
+
resp = await session.get(f"{base_url}/snapshots/list")
|
|
54
|
+
resp.raise_for_status()
|
|
55
|
+
return resp.json()["keys"]
|