tradernick-data-provider 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ from ._client import DataProviderClient
2
+ from .exceptions import DataProviderError, DataProviderHTTPError
3
+ from .wallets import WalletsNamespace
4
+
5
+ __all__ = [
6
+ "DataProviderClient",
7
+ "DataProviderError",
8
+ "DataProviderHTTPError",
9
+ "WalletsNamespace",
10
+ ]
@@ -0,0 +1,148 @@
1
+ import io
2
+ from datetime import datetime
3
+ from typing import Literal, Optional, Union
4
+
5
+ import httpx
6
+ import polars as pl
7
+
8
+ from ._http import load_parquet_bytes, list_snapshots, delete_snapshot
9
+ from ._query import _to_timestamp
10
+
11
+
12
+ def _to_datetime(date: datetime | str | int) -> datetime:
13
+ """Convert any date input to a timezone-aware datetime."""
14
+ ts = _to_timestamp(date) # returns 'YYYY-MM-DDTHH:MM:SSZ'
15
+ return datetime.fromisoformat(ts.replace("Z", "+00:00"))
16
+
17
+
18
+ def _cast_time_ms_utc(df: pl.DataFrame) -> pl.DataFrame:
19
+ """Normalize the ``time`` column to ``Datetime('ms', 'UTC')``.
20
+
21
+ DuckDB-written snapshots come back as μs+UTC; cache reads come back
22
+ as ms+UTC. The cast keeps everything joinable on the polars side.
23
+ """
24
+ if 'time' in df.columns:
25
+ dt = df.schema['time']
26
+ if isinstance(dt, pl.Datetime) and (
27
+ dt.time_unit != 'ms' or dt.time_zone != 'UTC'
28
+ ):
29
+ df = df.with_columns(pl.col('time').cast(pl.Datetime('ms', 'UTC')))
30
+ return df
31
+
32
+ from .binance import BinanceNamespace, HyperliquidNamespace
33
+ from .btc import BtcNamespace
34
+ from .evm import EvmNamespace
35
+ from .jobs import JobsNamespace
36
+ from .protocols import CacheNamespace
37
+ from .tron import TronNamespace
38
+ from .wallets import WalletsNamespace
39
+
40
+
41
+ class DataProviderClient:
42
+ evm: EvmNamespace
43
+ tron: TronNamespace
44
+ btc: BtcNamespace
45
+ binance: BinanceNamespace
46
+ hyperliquid: HyperliquidNamespace
47
+ wallets: WalletsNamespace
48
+ cache: CacheNamespace
49
+ jobs: JobsNamespace
50
+
51
+ def __init__(self, url: str):
52
+ self._url = url.rstrip("/")
53
+ self._session = httpx.AsyncClient(timeout=86400)
54
+ self.evm = EvmNamespace(self._session, self._url)
55
+ self.tron = TronNamespace(self._session, self._url)
56
+ self.btc = BtcNamespace(self._session, self._url)
57
+ self.binance = BinanceNamespace(self._session, self._url)
58
+ self.hyperliquid = HyperliquidNamespace(self._session, self._url)
59
+ self.wallets = WalletsNamespace(self._session, self._url)
60
+ self.cache = CacheNamespace(self._session, self._url)
61
+ self.jobs = JobsNamespace(self._session, self._url)
62
+
63
+ async def health(self) -> bool:
64
+ response = await self._session.get(self._url + "/health")
65
+ response.raise_for_status()
66
+ return True
67
+
68
+ async def load_parquet(
69
+ self,
70
+ key: str,
71
+ since: Optional[Union[datetime, str, int]] = None,
72
+ until: Optional[Union[datetime, str, int]] = None,
73
+ ) -> pl.DataFrame:
74
+ """Load a saved snapshot as a polars DataFrame.
75
+
76
+ ``time`` is normalized to ``Datetime('ms', UTC)`` so joins with
77
+ transfer-read DataFrames (which the cache layer also returns at
78
+ ms+UTC) don't trip the polars 'datatypes of join keys don't
79
+ match' check. Snapshots saved via DuckDB COPY are stored at
80
+ μs+UTC internally; we cast on read.
81
+
82
+ For pandas, call ``(await client.load_parquet(key)).to_pandas()``.
83
+ """
84
+ raw = await load_parquet_bytes(self._session, self._url, key)
85
+ df = pl.read_parquet(io.BytesIO(raw))
86
+ df = _cast_time_ms_utc(df)
87
+ if since is not None or until is not None:
88
+ time_col = "timestamp" if "timestamp" in df.columns else "time"
89
+ if time_col in df.columns:
90
+ if since is not None:
91
+ df = df.filter(pl.col(time_col) >= _to_datetime(since))
92
+ if until is not None:
93
+ df = df.filter(pl.col(time_col) <= _to_datetime(until))
94
+ return df
95
+
96
+ def scan_parquet(self, key: str, *,
97
+ since: Optional[Union[datetime, str, int]] = None,
98
+ until: Optional[Union[datetime, str, int]] = None,
99
+ engine: Literal['polars', 'duckdb'] = 'duckdb',
100
+ normalize_addresses: Optional[bool] = None):
101
+ """Lazy-scan a saved snapshot with ``local_*`` filters applied
102
+ server-side. Returns a ``ScanParquetQuery`` builder. Chain
103
+ ``local_*`` filter methods then call a terminal ``as_polars()`` /
104
+ ``as_pandas()`` / ``as_parquet(new_key)``.
105
+
106
+ ``engine``:
107
+ - ``'duckdb'`` (default): server mounts the snapshot + wallets
108
+ parquets as DuckDB views and runs the filter as SQL.
109
+ Streams via ``COPY ... TO PARQUET``. Best optimizer for
110
+ large ``IN`` filters; ~3-50× faster than polars on big
111
+ wallet-set queries.
112
+ - ``'polars'``: server uses ``pl.scan_parquet`` and a polars
113
+ lazy filter pipeline. Streams via ``sink_parquet``.
114
+
115
+ ``normalize_addresses``: default ``None`` (auto). Set to ``False``
116
+ only when you know the snapshot is canonical and the file lacks
117
+ the metadata flag — auto-detect already handles canonical files.
118
+
119
+ Example::
120
+
121
+ df = await client.scan_parquet('huge_snapshot') \\
122
+ .local_exclude_sender_categories(['Hot-Wallet','Cold-Wallet']) \\
123
+ .local_involving_entities(['Binance']) \\
124
+ .as_polars()
125
+ """
126
+ from .snapshots import ScanParquetQuery
127
+ return ScanParquetQuery(
128
+ self._session, self._url, key,
129
+ since=since, until=until,
130
+ engine=engine, normalize_addresses=normalize_addresses,
131
+ )
132
+
133
+ async def list_snapshots(self) -> list[str]:
134
+ """List all saved snapshot keys."""
135
+ return await list_snapshots(self._session, self._url)
136
+
137
+ async def delete_snapshot(self, key: str) -> None:
138
+ """Delete a saved snapshot."""
139
+ await delete_snapshot(self._session, self._url, key)
140
+
141
+ async def close(self) -> None:
142
+ await self._session.aclose()
143
+
144
+ async def __aenter__(self) -> "DataProviderClient":
145
+ return self
146
+
147
+ async def __aexit__(self, *_) -> None:
148
+ await self.close()
@@ -0,0 +1,55 @@
1
+ import io
2
+
3
+ import httpx
4
+ import pyarrow as pa
5
+ import pyarrow.parquet as pq
6
+
7
+ from .exceptions import DataProviderHTTPError
8
+
9
+
10
+ async def fetch_table(session: httpx.AsyncClient, url: str, body: dict) -> pa.Table | None:
11
+ response = await session.post(url, json=body)
12
+ content_type = response.headers.get("content-type", "")
13
+ if "application/json" in content_type:
14
+ data = response.json()
15
+ if response.is_success and data.get("saved"):
16
+ return None
17
+ raise DataProviderHTTPError(response.status_code, data.get("error", str(data)))
18
+ response.raise_for_status()
19
+ return pq.read_table(io.BytesIO(response.content))
20
+
21
+
22
+ async def save_parquet(session: httpx.AsyncClient, url: str, body: dict, key: str) -> None:
23
+ """Send a query with save_key to save the result as a named snapshot."""
24
+ resp = await session.post(url, json={**body, "save_key": key})
25
+ resp.raise_for_status()
26
+
27
+
28
+ async def load_parquet_bytes(session: httpx.AsyncClient, base_url: str, key: str) -> bytes:
29
+ """Load a previously saved snapshot as raw parquet bytes."""
30
+ resp = await session.post(f"{base_url}/snapshots/load", json={"key": key})
31
+ content_type = resp.headers.get("content-type", "")
32
+ if "application/json" in content_type:
33
+ data = resp.json()
34
+ raise DataProviderHTTPError(resp.status_code, data.get("error", str(data)))
35
+ resp.raise_for_status()
36
+ return resp.content
37
+
38
+
39
+ async def load_parquet(session: httpx.AsyncClient, base_url: str, key: str) -> pa.Table:
40
+ """Load a previously saved snapshot as a pyarrow Table."""
41
+ raw = await load_parquet_bytes(session, base_url, key)
42
+ return pq.read_table(io.BytesIO(raw))
43
+
44
+
45
+ async def delete_snapshot(session: httpx.AsyncClient, base_url: str, key: str) -> None:
46
+ """Delete a snapshot by key."""
47
+ resp = await session.post(f"{base_url}/snapshots/delete", json={"key": key})
48
+ resp.raise_for_status()
49
+
50
+
51
+ async def list_snapshots(session: httpx.AsyncClient, base_url: str) -> list[str]:
52
+ """List all saved snapshot keys."""
53
+ resp = await session.get(f"{base_url}/snapshots/list")
54
+ resp.raise_for_status()
55
+ return resp.json()["keys"]
@@ -0,0 +1,378 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import TYPE_CHECKING
5
+
6
+ import httpx
7
+ import pandas as pd
8
+ import polars as pl
9
+ import pyarrow as pa
10
+
11
+ from ._http import fetch_table
12
+
13
+ if TYPE_CHECKING:
14
+ from typing import Self
15
+
16
+ _TIME_COL = "timestamp"
17
+
18
+
19
+ def _to_timestamp(date: datetime | str | int) -> str:
20
+ """Normalise any DateType value to 'YYYY-MM-DDTHH:MM:SSZ' for the server."""
21
+ if isinstance(date, int):
22
+ dt = datetime.fromtimestamp(date / 1000, tz=timezone.utc)
23
+ elif isinstance(date, str):
24
+ if "T" in date:
25
+ dt = datetime.fromisoformat(date.replace("Z", "+00:00"))
26
+ elif len(date) == 10:
27
+ dt = datetime.strptime(date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
28
+ else:
29
+ dt = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
30
+ elif isinstance(date, datetime):
31
+ dt = date
32
+ else:
33
+ raise ValueError(f"Unsupported date type: {type(date)}")
34
+ if dt.tzinfo is None:
35
+ dt = dt.replace(tzinfo=timezone.utc)
36
+ return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
37
+
38
+
39
+ class _LocalFiltersMixin:
40
+ """24 ``local_*`` methods that accumulate filter steps into
41
+ ``self._body['local_filters']``. Used by both transfer queries (where
42
+ filters apply post-fetch on the server) and the new
43
+ ``ScanParquetQuery`` (where they apply via a lazy scan on the server).
44
+
45
+ Filter rules:
46
+ - Each call appends one sequential ``df.filter(...)`` step.
47
+ - Within a single call, ``values`` is union-ed (any-of).
48
+ - ``involving_*`` matches sender OR receiver.
49
+ - ``exclude_*`` negates the predicate.
50
+ - Address lookups are case-insensitive on entity/category/label terms;
51
+ EVM ``0x…`` addresses get lowercased per-row in the lazy plan,
52
+ TRON/BTC pass through unchanged.
53
+ """
54
+
55
+ _body: dict # set by the concrete subclass
56
+
57
+ def _add_local_filter(self, op: str, values: list[str]):
58
+ if not isinstance(values, (list, tuple, set)) or not all(isinstance(v, str) for v in values):
59
+ raise TypeError(f"{op}: values must be a list of strings")
60
+ if not values:
61
+ return self
62
+ steps = self._body.setdefault("local_filters", [])
63
+ steps.append({"op": op, "values": list(values)})
64
+ return self
65
+
66
+ # involving (sender OR receiver)
67
+ def local_involving(self, addresses: list[str]): return self._add_local_filter("involving", addresses)
68
+ def local_involving_labels(self, labels: list[str]): return self._add_local_filter("involving_labels", labels)
69
+ def local_involving_categories(self, categories: list[str]): return self._add_local_filter("involving_categories", categories)
70
+ def local_involving_entities(self, entities: list[str]): return self._add_local_filter("involving_entities", entities)
71
+
72
+ # sender
73
+ def local_sender(self, addresses: list[str]): return self._add_local_filter("sender", addresses)
74
+ def local_sender_labels(self, labels: list[str]): return self._add_local_filter("sender_labels", labels)
75
+ def local_sender_categories(self, categories: list[str]): return self._add_local_filter("sender_categories", categories)
76
+ def local_sender_entities(self, entities: list[str]): return self._add_local_filter("sender_entities", entities)
77
+
78
+ # receiver
79
+ def local_receiver(self, addresses: list[str]): return self._add_local_filter("receiver", addresses)
80
+ def local_receiver_labels(self, labels: list[str]): return self._add_local_filter("receiver_labels", labels)
81
+ def local_receiver_categories(self, categories: list[str]): return self._add_local_filter("receiver_categories", categories)
82
+ def local_receiver_entities(self, entities: list[str]): return self._add_local_filter("receiver_entities", entities)
83
+
84
+ # exclude variants
85
+ def local_exclude_involving(self, addresses: list[str]): return self._add_local_filter("exclude_involving", addresses)
86
+ def local_exclude_involving_labels(self, labels: list[str]): return self._add_local_filter("exclude_involving_labels", labels)
87
+ def local_exclude_involving_categories(self, categories: list[str]): return self._add_local_filter("exclude_involving_categories", categories)
88
+ def local_exclude_involving_entities(self, entities: list[str]): return self._add_local_filter("exclude_involving_entities", entities)
89
+
90
+ def local_exclude_sender(self, addresses: list[str]): return self._add_local_filter("exclude_sender", addresses)
91
+ def local_exclude_sender_labels(self, labels: list[str]): return self._add_local_filter("exclude_sender_labels", labels)
92
+ def local_exclude_sender_categories(self, categories: list[str]): return self._add_local_filter("exclude_sender_categories", categories)
93
+ def local_exclude_sender_entities(self, entities: list[str]): return self._add_local_filter("exclude_sender_entities", entities)
94
+
95
+ def local_exclude_receiver(self, addresses: list[str]): return self._add_local_filter("exclude_receiver", addresses)
96
+ def local_exclude_receiver_labels(self, labels: list[str]): return self._add_local_filter("exclude_receiver_labels", labels)
97
+ def local_exclude_receiver_categories(self, categories: list[str]): return self._add_local_filter("exclude_receiver_categories", categories)
98
+ def local_exclude_receiver_entities(self, entities: list[str]): return self._add_local_filter("exclude_receiver_entities", entities)
99
+
100
+
101
+ class BaseQuery(_LocalFiltersMixin):
102
+ def __init__(self, session: httpx.AsyncClient, base_url: str, body: dict):
103
+ self._session = session
104
+ self._base_url = base_url
105
+ self._body = body
106
+
107
+ def network(self, n: str | list[str]) -> Self:
108
+ # EVM-class endpoints accept a list to fan out per-network. The cache
109
+ # is keyed per-network on the server, so each chain reads/writes its
110
+ # own partition independently. The combined result is concatenated
111
+ # client-side and (when len > 1) automatically tagged with
112
+ # ``with_network`` so rows are distinguishable. TRON/BTC are always
113
+ # single-network — passing a one-element list still works.
114
+ if isinstance(n, list):
115
+ self._body["networks"] = n
116
+ else:
117
+ self._body["network"] = n
118
+ return self
119
+
120
+ def with_network(self, enabled: bool = True) -> Self:
121
+ self._body["with_network"] = enabled
122
+ return self
123
+
124
+ def include_zero_amounts(self, enabled: bool = True) -> Self:
125
+ """Keep rows where amount == 0 in the result. By default these
126
+ are filtered out — they're typically token-approval-style noise
127
+ that inflates row counts without representing real flow.
128
+
129
+ The filter applies right before the response is written or the
130
+ snapshot is saved; the data-provider cache itself is unaffected
131
+ and contains all rows including zero-amount ones, so toggling
132
+ this flag does not invalidate cache."""
133
+ self._body["include_zero_amounts"] = enabled
134
+ return self
135
+
136
+ def _auto_with_network(self) -> None:
137
+ """Force ``with_network`` on for multi-network fan-out unless the user
138
+ explicitly opted out via ``with_network(False)``."""
139
+ nets = self._body.get("networks") or []
140
+ if len(nets) > 1 and "with_network" not in self._body:
141
+ self._body["with_network"] = True
142
+
143
+ def time_range(self, since: datetime | str | int, until: datetime | str | int) -> Self:
144
+ self._body["since"] = _to_timestamp(since)
145
+ self._body["until"] = _to_timestamp(until)
146
+ return self
147
+
148
+ def involving(self, address: str) -> Self:
149
+ self._body["involving"] = address
150
+ return self
151
+
152
+ def involving_label(self, label: str) -> Self:
153
+ self._body["involving_label"] = label
154
+ return self
155
+
156
+ def involving_category(self, category: str) -> Self:
157
+ self._body["involving_category"] = category
158
+ return self
159
+
160
+ def exclude_involving(self, address: str) -> Self:
161
+ self._body["exclude_involving"] = address
162
+ return self
163
+
164
+ def exclude_involving_label(self, label: str) -> Self:
165
+ self._body["exclude_involving_label"] = label
166
+ return self
167
+
168
+ def exclude_involving_category(self, category: str) -> Self:
169
+ self._body["exclude_involving_category"] = category
170
+ return self
171
+
172
+ def wallet_namespace(self, ns: str) -> Self:
173
+ self._body["wallet_namespace"] = ns
174
+ return self
175
+
176
+ def with_value(self) -> Self:
177
+ self._body["with_value"] = True
178
+ return self
179
+
180
+ def verbose(self) -> Self:
181
+ self._body["verbose"] = True
182
+ return self
183
+
184
+ def aggregate(self, group_by: str = "time", period: str = "1h") -> Self:
185
+ self._body["aggregate"] = True
186
+ self._body["group_by"] = group_by
187
+ self._body["period"] = period
188
+ return self
189
+
190
+ # ---- local_wallets filters ----------------------------------------------
191
+ # Inherited from _LocalFiltersMixin (defined below) — see its docstring.
192
+
193
+ async def as_pandas(self) -> pd.DataFrame:
194
+ table = await self._fetch_table()
195
+ df = table.to_pandas()
196
+ # Canonicalize OHLCV / aggregate responses that come back with
197
+ # `window` instead of `time`. Downstream consumers (backtester,
198
+ # chain_analysis) expect `time` uniformly. Track whether this
199
+ # came from `window` so we know to time-index the result —
200
+ # aggregate frames are naturally time-keyed; transfer frames
201
+ # are row-streams where multiple rows share a time.
202
+ came_from_window = "window" in df.columns and "time" not in df.columns
203
+ if came_from_window:
204
+ df = df.rename(columns={"window": "time"})
205
+ sort_col = next(
206
+ (c for c in (_TIME_COL, "time") if c in df.columns), None
207
+ )
208
+ if sort_col:
209
+ df = df.sort_values(sort_col, ignore_index=True)
210
+ # Normalize the time column precision to ms+UTC for consistency
211
+ # with cache reads and with the polars side. Pandas 2.x supports
212
+ # .dt.as_unit('ms'); guard for older pandas.
213
+ if "time" in df.columns and pd.api.types.is_datetime64_any_dtype(df["time"]):
214
+ try:
215
+ df["time"] = df["time"].dt.as_unit("ms")
216
+ except (AttributeError, TypeError):
217
+ pass
218
+ # Aggregate-shaped frames become time-indexed so they're directly
219
+ # usable in time-series workflows (resampling, plotting, the
220
+ # backtester's set_index check).
221
+ if came_from_window and "time" in df.columns:
222
+ df = df.set_index("time")
223
+ return df
224
+
225
+ async def as_polars(self) -> pl.DataFrame:
226
+ table = await self._fetch_table()
227
+ df = pl.from_arrow(table)
228
+ if "window" in df.columns and "time" not in df.columns:
229
+ df = df.rename({"window": "time"})
230
+ sort_col = next(
231
+ (c for c in (_TIME_COL, "time") if c in df.columns), None
232
+ )
233
+ if sort_col:
234
+ df = df.sort(sort_col)
235
+ # Normalize the time column to ms+UTC so joins with snapshot /
236
+ # cache reads (which are ms+UTC) don't trip polars' precision-
237
+ # mismatch check.
238
+ if "time" in df.columns:
239
+ dt = df.schema["time"]
240
+ if isinstance(dt, pl.Datetime) and (
241
+ dt.time_unit != "ms" or dt.time_zone != "UTC"
242
+ ):
243
+ df = df.with_columns(pl.col("time").cast(pl.Datetime("ms", "UTC")))
244
+ return df
245
+
246
+ async def as_parquet(self, key: str) -> None:
247
+ """Save the query result as a named parquet snapshot on the server.
248
+
249
+ Single-network path uses the worker-side ``save_key`` mechanism
250
+ (server-side save with no extra round-trip).
251
+
252
+ Multi-network path can NOT use ``save_key`` because each per-network
253
+ worker would clobber the same file. Instead we fan out without
254
+ ``save_key``, concat client-side, then upload the combined parquet
255
+ via ``POST /snapshots/save``.
256
+ """
257
+ nets = self._body.get("networks") or []
258
+ protocol = getattr(self, "_PROTOCOL", None)
259
+ # Single-network calls land in body['network'] (string). For
260
+ # transfer queries that declare _PROTOCOL we still want the
261
+ # server-side streaming + DuckDB-merge path — coerce to a one-
262
+ # element list and route through /snapshots/save_multi. Saves
263
+ # the legacy single-network path (which eagerly materializes the
264
+ # full result and OOMs on huge volumes like TRON USDT).
265
+ if not nets and protocol:
266
+ single = self._body.get("network")
267
+ if single:
268
+ nets = [single]
269
+ if nets and protocol:
270
+ # Server-side multi-network save: data-provider fans out per-
271
+ # network reads in subprocesses and merges via DuckDB. Bytes
272
+ # never travel back to the client; RAM stays bounded at every
273
+ # hop. Available for transfer queries that declare _PROTOCOL.
274
+ # Single-network path is included — the merge step on one
275
+ # input is still preferable to the legacy eager-DF flow.
276
+ #
277
+ # _resolve_path() side-effects min_amount (and similar) into
278
+ # self._body — call it so the body we POST contains every
279
+ # field the per-network reads need.
280
+ if hasattr(self, "_resolve_path"):
281
+ self._resolve_path()
282
+ body = {**self._body, "protocol": protocol,
283
+ "save_key": key, "networks": nets}
284
+ # Drop the singular-network field so the server doesn't see
285
+ # both forms. Server reads only `networks`.
286
+ body.pop("network", None)
287
+ resp = await self._session.post(
288
+ f"{self._base_url}/snapshots/save_multi",
289
+ json=body,
290
+ timeout=None,
291
+ )
292
+ resp.raise_for_status()
293
+ return
294
+ if len(nets) > 1:
295
+ import os, tempfile
296
+ df = await self.as_polars()
297
+ # Fallback path for query types without _PROTOCOL: write to a
298
+ # tempfile, drop the polars DF, stream the file to the server
299
+ # via an async generator (httpx AsyncClient rejects sync file
300
+ # handles; chunked async iteration keeps peak memory low).
301
+ fd, tmp_path = tempfile.mkstemp(suffix='.parquet')
302
+ os.close(fd)
303
+ try:
304
+ df.write_parquet(tmp_path)
305
+ del df
306
+
307
+ async def _stream(path, chunk=1024 * 1024):
308
+ with open(path, 'rb') as fh:
309
+ while True:
310
+ buf = fh.read(chunk)
311
+ if not buf:
312
+ break
313
+ yield buf
314
+
315
+ size = os.path.getsize(tmp_path)
316
+ resp = await self._session.post(
317
+ f"{self._base_url}/snapshots/save",
318
+ content=_stream(tmp_path),
319
+ headers={
320
+ "X-Snapshot-Key": key,
321
+ "Content-Type": "application/octet-stream",
322
+ "Content-Length": str(size),
323
+ },
324
+ timeout=None,
325
+ )
326
+ resp.raise_for_status()
327
+ finally:
328
+ try: os.unlink(tmp_path)
329
+ except FileNotFoundError: pass
330
+ return
331
+ self._body["save_key"] = key
332
+ try:
333
+ await self._fetch_table()
334
+ finally:
335
+ del self._body["save_key"]
336
+
337
+
338
+ class CacheableQuery(BaseQuery):
339
+ def cache(self, cache_type: str = "append") -> Self:
340
+ self._body["cache"] = True
341
+ self._body["cache_type"] = cache_type
342
+ return self
343
+
344
+ def parallel(self) -> Self:
345
+ self._body["parallel"] = True
346
+ return self
347
+
348
+
349
+ class EventQuery(CacheableQuery):
350
+ def __init__(self, session: httpx.AsyncClient, base_url: str, path: str, body: dict):
351
+ super().__init__(session, base_url, body)
352
+ self._path = path
353
+
354
+ def _resolve_path(self) -> str:
355
+ if self._body.get("aggregate"):
356
+ return self._path.rsplit("/read", 1)[0] + "/aggregate"
357
+ return self._path
358
+
359
+ async def _fetch_single(self, network: str) -> pa.Table:
360
+ # _resolve_path() may mutate self._body. Run it before the snapshot.
361
+ path = self._resolve_path()
362
+ body = {**self._body, "network": network}
363
+ body.pop("networks", None)
364
+ return await fetch_table(self._session, self._base_url + path, body)
365
+
366
+ async def _fetch_table(self) -> pa.Table:
367
+ import asyncio
368
+
369
+ networks = self._body.get("networks")
370
+ if networks:
371
+ self._auto_with_network()
372
+ tables = await asyncio.gather(*[self._fetch_single(n) for n in networks])
373
+ non_empty = [t for t in tables if t is not None and len(t) > 0]
374
+ if not non_empty:
375
+ return tables[0] if tables else pa.table({})
376
+ return pa.concat_tables(non_empty)
377
+ path = self._resolve_path()
378
+ return await fetch_table(self._session, self._base_url + path, self._body)