tradernick-data-provider 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ .env
2
+ .venv/
3
+ __pycache__/
4
+ *.pyc
5
+ .pytest_cache/
6
+
7
+ node_modules/
8
+ .svelte-kit/
9
+ build/
10
+ .DS_Store
11
+
12
+ data/
13
+
14
+ .claude/
@@ -0,0 +1,114 @@
1
+ Metadata-Version: 2.4
2
+ Name: tradernick-data-provider
3
+ Version: 0.2.0
4
+ Summary: Python client for the TraderNick data_provider service. Drop-in compatible with horatio-data-provider (same DataProviderClient API; only the import path changes).
5
+ Author: TraderNick
6
+ License: MIT
7
+ Keywords: binance,clickhouse,data-provider,defi,evm,hyperliquid
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: httpx>=0.25
18
+ Requires-Dist: pandas>=2.0
19
+ Requires-Dist: polars>=1.0
20
+ Requires-Dist: pyarrow>=14.0
21
+ Requires-Dist: pytz>=2023.3
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
24
+ Requires-Dist: pytest>=8.0; extra == 'dev'
25
+ Requires-Dist: respx>=0.21; extra == 'dev'
26
+ Description-Content-Type: text/markdown
27
+
28
+ # tradernick-data-provider
29
+
30
+ Python client for the TraderNick `data_provider` service. Drop-in
31
+ compatible with [`horatio-data-provider`](https://pypi.org/project/horatio-data-provider/):
32
+ same `DataProviderClient` class, same namespaces (`evm`, `tron`, `btc`,
33
+ `binance`, `hyperliquid`, `wallets`, `cache`, `jobs`), same chainable
34
+ builder methods, same `as_pandas()` / `as_polars()` / `as_parquet()`
35
+ terminators. The only visible difference is the import path.
36
+
37
+ ```python
38
+ # Before
39
+ from horatio_data_provider import DataProviderClient
40
+
41
+ # After
42
+ from tradernick_data_provider import DataProviderClient
43
+ ```
44
+
45
+ The server URL passed to the constructor is the only thing you need to
46
+ change at the call site.
47
+
48
+ ## Install
49
+
50
+ ```sh
51
+ pip install tradernick-data-provider
52
+ ```
53
+
54
+ ## Usage
55
+
56
+ ```python
57
+ import asyncio
58
+ from tradernick_data_provider import DataProviderClient
59
+
60
+ async def main():
61
+ async with DataProviderClient("http://localhost:10005") as client:
62
+ df = await client.binance.ohlcv("BTC", "1h") \
63
+ .time_range("2026-06-01T00:00:00Z", "2026-06-08T00:00:00Z") \
64
+ .as_polars()
65
+ print(df)
66
+
67
+ asyncio.run(main())
68
+ ```
69
+
70
+ All Horatio query builders work unchanged. The server delegates to
71
+ ClickHouse instead of DeFiStream, so reads stay sub-second on tables
72
+ where Horatio has to pay a fresh upstream fetch.
73
+
74
+ ## Status
75
+
76
+ **0.2.0 — Phase 1+2 read parity.** Validated against horatio-data-provider
77
+ on 2026-06-07:
78
+ - Column-shape parity: 100% across binance / aave / lido / uniswap /
79
+ hyperliquid / transfers.
80
+ - Row-count + value parity: exact match on stable tables
81
+ (binance.ohlcv 1m, raw_trades, lido.deposit, aave.borrows/repays,
82
+ evm.native with `min_amount`); within ±2 rows on AAVE event tables
83
+ where DeFiStream's sweep loop occasionally re-fetches windows with
84
+ slightly different cuts.
85
+
86
+ What works:
87
+ - `binance.{ohlcv, raw_trades, book_depth, open_interest, funding_rate,
88
+ long_short_ratios}` (with `with_id`, `add_symbol`)
89
+ - `evm.aave.{deposit, withdraw, borrow, repay, flashloan, liquidation}`
90
+ with `involving`, `exclude_involving`, `eth_market_type`
91
+ - `evm.uniswap.{swap, deposit, withdraw, collect}` — V3
92
+ - `evm.lido.{deposit, withdrawal_request, withdrawal_claimed,
93
+ l2_deposit, l2_withdrawal_request}`
94
+ - `evm.erc20.transfers`, `evm.native_transfers` with full filter set
95
+ (sender / receiver / involving / exclude_* / min_amount / max_amount)
96
+ - `tron.{native, trc20}.transfers`, `btc.native.transfers`
97
+ - `hyperliquid.{ohlcv, trades, fills, funding, transfers, vaults,
98
+ trade_history, position_history}`
99
+ - `client.{wallets.list, wallets.get, wallets.upsert, wallets.delete}`
100
+ - `client.{load_parquet, scan_parquet, list_snapshots, delete_snapshot,
101
+ as_parquet}` — server-side parquet snapshots with `local_*` filters
102
+ - `client.jobs.{list, get, cancel}` — proxies to the ingestion job queue
103
+
104
+ Not yet exposed:
105
+ - `evm.{stader, threshold}` — return empty schemas (TN doesn't ingest
106
+ these networks yet)
107
+ - `hyperliquid.{sends, spot_transfers}` — return empty schemas
108
+ - `client.scan_parquet` only honors address-based `local_*` filters;
109
+ label/category/entity variants are wired but no-op without
110
+ wallet_labels co-mounted on the snapshot.
111
+
112
+ ## Compatibility
113
+
114
+ Drop-in compatible with horatio-data-provider 4.x.
@@ -0,0 +1,87 @@
1
+ # tradernick-data-provider
2
+
3
+ Python client for the TraderNick `data_provider` service. Drop-in
4
+ compatible with [`horatio-data-provider`](https://pypi.org/project/horatio-data-provider/):
5
+ same `DataProviderClient` class, same namespaces (`evm`, `tron`, `btc`,
6
+ `binance`, `hyperliquid`, `wallets`, `cache`, `jobs`), same chainable
7
+ builder methods, same `as_pandas()` / `as_polars()` / `as_parquet()`
8
+ terminators. The only visible difference is the import path.
9
+
10
+ ```python
11
+ # Before
12
+ from horatio_data_provider import DataProviderClient
13
+
14
+ # After
15
+ from tradernick_data_provider import DataProviderClient
16
+ ```
17
+
18
+ The server URL passed to the constructor is the only thing you need to
19
+ change at the call site.
20
+
21
+ ## Install
22
+
23
+ ```sh
24
+ pip install tradernick-data-provider
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```python
30
+ import asyncio
31
+ from tradernick_data_provider import DataProviderClient
32
+
33
+ async def main():
34
+ async with DataProviderClient("http://localhost:10005") as client:
35
+ df = await client.binance.ohlcv("BTC", "1h") \
36
+ .time_range("2026-06-01T00:00:00Z", "2026-06-08T00:00:00Z") \
37
+ .as_polars()
38
+ print(df)
39
+
40
+ asyncio.run(main())
41
+ ```
42
+
43
+ All Horatio query builders work unchanged. The server delegates to
44
+ ClickHouse instead of DeFiStream, so reads stay sub-second on tables
45
+ where Horatio has to pay a fresh upstream fetch.
46
+
47
+ ## Status
48
+
49
+ **0.2.0 — Phase 1+2 read parity.** Validated against horatio-data-provider
50
+ on 2026-06-07:
51
+ - Column-shape parity: 100% across binance / aave / lido / uniswap /
52
+ hyperliquid / transfers.
53
+ - Row-count + value parity: exact match on stable tables
54
+ (binance.ohlcv 1m, raw_trades, lido.deposit, aave.borrows/repays,
55
+ evm.native with `min_amount`); within ±2 rows on AAVE event tables
56
+ where DeFiStream's sweep loop occasionally re-fetches windows with
57
+ slightly different cuts.
58
+
59
+ What works:
60
+ - `binance.{ohlcv, raw_trades, book_depth, open_interest, funding_rate,
61
+ long_short_ratios}` (with `with_id`, `add_symbol`)
62
+ - `evm.aave.{deposit, withdraw, borrow, repay, flashloan, liquidation}`
63
+ with `involving`, `exclude_involving`, `eth_market_type`
64
+ - `evm.uniswap.{swap, deposit, withdraw, collect}` — V3
65
+ - `evm.lido.{deposit, withdrawal_request, withdrawal_claimed,
66
+ l2_deposit, l2_withdrawal_request}`
67
+ - `evm.erc20.transfers`, `evm.native_transfers` with full filter set
68
+ (sender / receiver / involving / exclude_* / min_amount / max_amount)
69
+ - `tron.{native, trc20}.transfers`, `btc.native.transfers`
70
+ - `hyperliquid.{ohlcv, trades, fills, funding, transfers, vaults,
71
+ trade_history, position_history}`
72
+ - `client.{wallets.list, wallets.get, wallets.upsert, wallets.delete}`
73
+ - `client.{load_parquet, scan_parquet, list_snapshots, delete_snapshot,
74
+ as_parquet}` — server-side parquet snapshots with `local_*` filters
75
+ - `client.jobs.{list, get, cancel}` — proxies to the ingestion job queue
76
+
77
+ Not yet exposed:
78
+ - `evm.{stader, threshold}` — return empty schemas (TN doesn't ingest
79
+ these networks yet)
80
+ - `hyperliquid.{sends, spot_transfers}` — return empty schemas
81
+ - `client.scan_parquet` only honors address-based `local_*` filters;
82
+ label/category/entity variants are wired but no-op without
83
+ wallet_labels co-mounted on the snapshot.
84
+
85
+ ## Compatibility
86
+
87
+ Drop-in compatible with horatio-data-provider 4.x.
@@ -0,0 +1,42 @@
1
+ [project]
2
+ name = "tradernick-data-provider"
3
+ version = "0.2.0"
4
+ description = "Python client for the TraderNick data_provider service. Drop-in compatible with horatio-data-provider (same DataProviderClient API; only the import path changes)."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ authors = [{ name = "TraderNick" }]
9
+ keywords = ["clickhouse", "binance", "hyperliquid", "evm", "defi", "data-provider"]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.10",
16
+ "Programming Language :: Python :: 3.11",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Software Development :: Libraries :: Python Modules",
19
+ ]
20
+ dependencies = [
21
+ "httpx>=0.25",
22
+ "pyarrow>=14.0",
23
+ "pandas>=2.0",
24
+ "polars>=1.0",
25
+ "pytz>=2023.3",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "respx>=0.21"]
30
+
31
+ [tool.pytest.ini_options]
32
+ asyncio_mode = "auto"
33
+
34
+ [tool.hatch.build.targets.wheel]
35
+ packages = ["tradernick_data_provider"]
36
+
37
+ [tool.hatch.build.targets.sdist]
38
+ include = ["tradernick_data_provider", "README.md", "pyproject.toml"]
39
+
40
+ [build-system]
41
+ requires = ["hatchling"]
42
+ build-backend = "hatchling.build"
@@ -0,0 +1,10 @@
1
+ from ._client import DataProviderClient
2
+ from .exceptions import DataProviderError, DataProviderHTTPError
3
+ from .wallets import WalletsNamespace
4
+
5
+ __all__ = [
6
+ "DataProviderClient",
7
+ "DataProviderError",
8
+ "DataProviderHTTPError",
9
+ "WalletsNamespace",
10
+ ]
@@ -0,0 +1,148 @@
1
+ import io
2
+ from datetime import datetime
3
+ from typing import Literal, Optional, Union
4
+
5
+ import httpx
6
+ import polars as pl
7
+
8
+ from ._http import load_parquet_bytes, list_snapshots, delete_snapshot
9
+ from ._query import _to_timestamp
10
+
11
+
12
+ def _to_datetime(date: datetime | str | int) -> datetime:
13
+ """Convert any date input to a timezone-aware datetime."""
14
+ ts = _to_timestamp(date) # returns 'YYYY-MM-DDTHH:MM:SSZ'
15
+ return datetime.fromisoformat(ts.replace("Z", "+00:00"))
16
+
17
+
18
+ def _cast_time_ms_utc(df: pl.DataFrame) -> pl.DataFrame:
19
+ """Normalize the ``time`` column to ``Datetime('ms', 'UTC')``.
20
+
21
+ DuckDB-written snapshots come back as μs+UTC; cache reads come back
22
+ as ms+UTC. The cast keeps everything joinable on the polars side.
23
+ """
24
+ if 'time' in df.columns:
25
+ dt = df.schema['time']
26
+ if isinstance(dt, pl.Datetime) and (
27
+ dt.time_unit != 'ms' or dt.time_zone != 'UTC'
28
+ ):
29
+ df = df.with_columns(pl.col('time').cast(pl.Datetime('ms', 'UTC')))
30
+ return df
31
+
32
+ from .binance import BinanceNamespace, HyperliquidNamespace
33
+ from .btc import BtcNamespace
34
+ from .evm import EvmNamespace
35
+ from .jobs import JobsNamespace
36
+ from .protocols import CacheNamespace
37
+ from .tron import TronNamespace
38
+ from .wallets import WalletsNamespace
39
+
40
+
41
+ class DataProviderClient:
42
+ evm: EvmNamespace
43
+ tron: TronNamespace
44
+ btc: BtcNamespace
45
+ binance: BinanceNamespace
46
+ hyperliquid: HyperliquidNamespace
47
+ wallets: WalletsNamespace
48
+ cache: CacheNamespace
49
+ jobs: JobsNamespace
50
+
51
+ def __init__(self, url: str):
52
+ self._url = url.rstrip("/")
53
+ self._session = httpx.AsyncClient(timeout=86400)
54
+ self.evm = EvmNamespace(self._session, self._url)
55
+ self.tron = TronNamespace(self._session, self._url)
56
+ self.btc = BtcNamespace(self._session, self._url)
57
+ self.binance = BinanceNamespace(self._session, self._url)
58
+ self.hyperliquid = HyperliquidNamespace(self._session, self._url)
59
+ self.wallets = WalletsNamespace(self._session, self._url)
60
+ self.cache = CacheNamespace(self._session, self._url)
61
+ self.jobs = JobsNamespace(self._session, self._url)
62
+
63
+ async def health(self) -> bool:
64
+ response = await self._session.get(self._url + "/health")
65
+ response.raise_for_status()
66
+ return True
67
+
68
+ async def load_parquet(
69
+ self,
70
+ key: str,
71
+ since: Optional[Union[datetime, str, int]] = None,
72
+ until: Optional[Union[datetime, str, int]] = None,
73
+ ) -> pl.DataFrame:
74
+ """Load a saved snapshot as a polars DataFrame.
75
+
76
+ ``time`` is normalized to ``Datetime('ms', UTC)`` so joins with
77
+ transfer-read DataFrames (which the cache layer also returns at
78
+ ms+UTC) don't trip the polars 'datatypes of join keys don't
79
+ match' check. Snapshots saved via DuckDB COPY are stored at
80
+ μs+UTC internally; we cast on read.
81
+
82
+ For pandas, call ``(await client.load_parquet(key)).to_pandas()``.
83
+ """
84
+ raw = await load_parquet_bytes(self._session, self._url, key)
85
+ df = pl.read_parquet(io.BytesIO(raw))
86
+ df = _cast_time_ms_utc(df)
87
+ if since is not None or until is not None:
88
+ time_col = "timestamp" if "timestamp" in df.columns else "time"
89
+ if time_col in df.columns:
90
+ if since is not None:
91
+ df = df.filter(pl.col(time_col) >= _to_datetime(since))
92
+ if until is not None:
93
+ df = df.filter(pl.col(time_col) <= _to_datetime(until))
94
+ return df
95
+
96
+ def scan_parquet(self, key: str, *,
97
+ since: Optional[Union[datetime, str, int]] = None,
98
+ until: Optional[Union[datetime, str, int]] = None,
99
+ engine: Literal['polars', 'duckdb'] = 'duckdb',
100
+ normalize_addresses: Optional[bool] = None):
101
+ """Lazy-scan a saved snapshot with ``local_*`` filters applied
102
+ server-side. Returns a ``ScanParquetQuery`` builder. Chain
103
+ ``local_*`` filter methods then call a terminal ``as_polars()`` /
104
+ ``as_pandas()`` / ``as_parquet(new_key)``.
105
+
106
+ ``engine``:
107
+ - ``'duckdb'`` (default): server mounts the snapshot + wallets
108
+ parquets as DuckDB views and runs the filter as SQL.
109
+ Streams via ``COPY ... TO PARQUET``. Best optimizer for
110
+ large ``IN`` filters; ~3-50× faster than polars on big
111
+ wallet-set queries.
112
+ - ``'polars'``: server uses ``pl.scan_parquet`` and a polars
113
+ lazy filter pipeline. Streams via ``sink_parquet``.
114
+
115
+ ``normalize_addresses``: default ``None`` (auto). Set to ``False``
116
+ only when you know the snapshot is canonical and the file lacks
117
+ the metadata flag — auto-detect already handles canonical files.
118
+
119
+ Example::
120
+
121
+ df = await client.scan_parquet('huge_snapshot') \\
122
+ .local_exclude_sender_categories(['Hot-Wallet','Cold-Wallet']) \\
123
+ .local_involving_entities(['Binance']) \\
124
+ .as_polars()
125
+ """
126
+ from .snapshots import ScanParquetQuery
127
+ return ScanParquetQuery(
128
+ self._session, self._url, key,
129
+ since=since, until=until,
130
+ engine=engine, normalize_addresses=normalize_addresses,
131
+ )
132
+
133
+ async def list_snapshots(self) -> list[str]:
134
+ """List all saved snapshot keys."""
135
+ return await list_snapshots(self._session, self._url)
136
+
137
+ async def delete_snapshot(self, key: str) -> None:
138
+ """Delete a saved snapshot."""
139
+ await delete_snapshot(self._session, self._url, key)
140
+
141
+ async def close(self) -> None:
142
+ await self._session.aclose()
143
+
144
+ async def __aenter__(self) -> "DataProviderClient":
145
+ return self
146
+
147
+ async def __aexit__(self, *_) -> None:
148
+ await self.close()
@@ -0,0 +1,55 @@
1
+ import io
2
+
3
+ import httpx
4
+ import pyarrow as pa
5
+ import pyarrow.parquet as pq
6
+
7
+ from .exceptions import DataProviderHTTPError
8
+
9
+
10
+ async def fetch_table(session: httpx.AsyncClient, url: str, body: dict) -> pa.Table | None:
11
+ response = await session.post(url, json=body)
12
+ content_type = response.headers.get("content-type", "")
13
+ if "application/json" in content_type:
14
+ data = response.json()
15
+ if response.is_success and data.get("saved"):
16
+ return None
17
+ raise DataProviderHTTPError(response.status_code, data.get("error", str(data)))
18
+ response.raise_for_status()
19
+ return pq.read_table(io.BytesIO(response.content))
20
+
21
+
22
+ async def save_parquet(session: httpx.AsyncClient, url: str, body: dict, key: str) -> None:
23
+ """Send a query with save_key to save the result as a named snapshot."""
24
+ resp = await session.post(url, json={**body, "save_key": key})
25
+ resp.raise_for_status()
26
+
27
+
28
+ async def load_parquet_bytes(session: httpx.AsyncClient, base_url: str, key: str) -> bytes:
29
+ """Load a previously saved snapshot as raw parquet bytes."""
30
+ resp = await session.post(f"{base_url}/snapshots/load", json={"key": key})
31
+ content_type = resp.headers.get("content-type", "")
32
+ if "application/json" in content_type:
33
+ data = resp.json()
34
+ raise DataProviderHTTPError(resp.status_code, data.get("error", str(data)))
35
+ resp.raise_for_status()
36
+ return resp.content
37
+
38
+
39
+ async def load_parquet(session: httpx.AsyncClient, base_url: str, key: str) -> pa.Table:
40
+ """Load a previously saved snapshot as a pyarrow Table."""
41
+ raw = await load_parquet_bytes(session, base_url, key)
42
+ return pq.read_table(io.BytesIO(raw))
43
+
44
+
45
+ async def delete_snapshot(session: httpx.AsyncClient, base_url: str, key: str) -> None:
46
+ """Delete a snapshot by key."""
47
+ resp = await session.post(f"{base_url}/snapshots/delete", json={"key": key})
48
+ resp.raise_for_status()
49
+
50
+
51
+ async def list_snapshots(session: httpx.AsyncClient, base_url: str) -> list[str]:
52
+ """List all saved snapshot keys."""
53
+ resp = await session.get(f"{base_url}/snapshots/list")
54
+ resp.raise_for_status()
55
+ return resp.json()["keys"]