tessera-api 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tessera/__init__.py +68 -0
- tessera/_base.py +81 -0
- tessera/_generated/__init__.py +1 -0
- tessera/_generated/models.py +85 -0
- tessera/_resolver.py +41 -0
- tessera/_version.py +1 -0
- tessera/async_client.py +191 -0
- tessera/client.py +197 -0
- tessera/config.py +64 -0
- tessera/errors.py +145 -0
- tessera/models.py +116 -0
- tessera/py.typed +0 -0
- tessera/readers/__init__.py +5 -0
- tessera/readers/_common.py +41 -0
- tessera/readers/duckdb.py +67 -0
- tessera/readers/polars.py +70 -0
- tessera_api-0.1.0.dist-info/METADATA +140 -0
- tessera_api-0.1.0.dist-info/RECORD +20 -0
- tessera_api-0.1.0.dist-info/WHEEL +4 -0
- tessera_api-0.1.0.dist-info/licenses/LICENSE +674 -0
tessera/__init__.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Tessera API client — Hyperliquid datasets straight into Polars & DuckDB.
|
|
2
|
+
|
|
3
|
+
Quickstart:
|
|
4
|
+
>>> import tessera
|
|
5
|
+
>>> client = tessera.TesseraClient() # reads $TESSERA_API_KEY
|
|
6
|
+
>>> df = client.read("gold_ohlcv_1m", "BTC", "2025-09")
|
|
7
|
+
|
|
8
|
+
See https://tesseralytics.dev for an API key and dataset reference.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from ._version import __version__
|
|
14
|
+
from .async_client import AsyncTesseraClient
|
|
15
|
+
from .client import TesseraClient
|
|
16
|
+
from .errors import (
|
|
17
|
+
AuthenticationError,
|
|
18
|
+
BadRequestError,
|
|
19
|
+
ConfigurationError,
|
|
20
|
+
ForbiddenError,
|
|
21
|
+
InternalServerError,
|
|
22
|
+
MissingDependencyError,
|
|
23
|
+
NetworkError,
|
|
24
|
+
NotFoundError,
|
|
25
|
+
PresignExpiredError,
|
|
26
|
+
ServiceUnavailableError,
|
|
27
|
+
TesseraAPIError,
|
|
28
|
+
TesseraError,
|
|
29
|
+
)
|
|
30
|
+
from .models import (
|
|
31
|
+
DatasetsResponse,
|
|
32
|
+
DatasetSummary,
|
|
33
|
+
DownloadResponse,
|
|
34
|
+
MonthRange,
|
|
35
|
+
MonthSpan,
|
|
36
|
+
Partition,
|
|
37
|
+
PartitionRef,
|
|
38
|
+
PartitionsResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
"AsyncTesseraClient",
|
|
43
|
+
"AuthenticationError",
|
|
44
|
+
"BadRequestError",
|
|
45
|
+
"ConfigurationError",
|
|
46
|
+
"DatasetSummary",
|
|
47
|
+
# models & helpers
|
|
48
|
+
"DatasetsResponse",
|
|
49
|
+
"DownloadResponse",
|
|
50
|
+
"ForbiddenError",
|
|
51
|
+
"InternalServerError",
|
|
52
|
+
"MissingDependencyError",
|
|
53
|
+
"MonthRange",
|
|
54
|
+
"MonthSpan",
|
|
55
|
+
"NetworkError",
|
|
56
|
+
"NotFoundError",
|
|
57
|
+
"Partition",
|
|
58
|
+
"PartitionRef",
|
|
59
|
+
"PartitionsResponse",
|
|
60
|
+
"PresignExpiredError",
|
|
61
|
+
"ServiceUnavailableError",
|
|
62
|
+
"TesseraAPIError",
|
|
63
|
+
# clients
|
|
64
|
+
"TesseraClient",
|
|
65
|
+
# errors
|
|
66
|
+
"TesseraError",
|
|
67
|
+
"__version__",
|
|
68
|
+
]
|
tessera/_base.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Transport-agnostic plumbing shared by the sync and async clients.
|
|
2
|
+
|
|
3
|
+
The two clients differ only in how they *await* I/O; everything else — URL
|
|
4
|
+
construction, header building, error mapping, retry decisions — lives here so
|
|
5
|
+
there is a single source of truth.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from urllib.parse import quote
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from .config import RETRYABLE_STATUSES, ClientConfig
|
|
16
|
+
from .errors import error_from_response
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _seg(value: str) -> str:
|
|
20
|
+
"""URL-encode a single path segment (no slashes survive)."""
|
|
21
|
+
return quote(value, safe="")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class PreparedRequest:
|
|
26
|
+
"""A resolved HTTP request: path + query params, ready to send."""
|
|
27
|
+
|
|
28
|
+
path: str
|
|
29
|
+
params: dict[str, str]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def datasets_request() -> PreparedRequest:
|
|
33
|
+
return PreparedRequest("/v1/datasets", {})
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def partitions_request(asset: str, coin: str | None, month: str | None) -> PreparedRequest:
|
|
37
|
+
params: dict[str, str] = {}
|
|
38
|
+
if coin is not None:
|
|
39
|
+
params["coin"] = coin
|
|
40
|
+
if month is not None:
|
|
41
|
+
params["month"] = month
|
|
42
|
+
return PreparedRequest(f"/v1/datasets/{_seg(asset)}", params)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def download_request(asset: str, coin: str, month: str) -> PreparedRequest:
|
|
46
|
+
path = f"/v1/datasets/{_seg(asset)}/{_seg(coin)}/{_seg(month)}/download"
|
|
47
|
+
return PreparedRequest(path, {})
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def parse_retry_after(response: httpx.Response) -> float | None:
|
|
51
|
+
"""Parse the ``Retry-After`` header (seconds form only) if present."""
|
|
52
|
+
raw = response.headers.get("retry-after")
|
|
53
|
+
if raw is None:
|
|
54
|
+
return None
|
|
55
|
+
try:
|
|
56
|
+
return float(raw)
|
|
57
|
+
except ValueError:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def should_retry(status_code: int) -> bool:
|
|
62
|
+
return status_code in RETRYABLE_STATUSES
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def raise_for_status(response: httpx.Response) -> None:
|
|
66
|
+
"""Raise the mapped :class:`TesseraAPIError` for a non-2xx response."""
|
|
67
|
+
if response.is_success:
|
|
68
|
+
return
|
|
69
|
+
raise error_from_response(response.status_code, response.content)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def build_httpx_kwargs(config: ClientConfig) -> dict[str, object]:
|
|
73
|
+
"""Shared constructor kwargs for both ``httpx.Client`` and ``AsyncClient``."""
|
|
74
|
+
return {
|
|
75
|
+
"base_url": config.base_url,
|
|
76
|
+
"headers": config.auth_headers(),
|
|
77
|
+
"timeout": config.timeout,
|
|
78
|
+
# The download endpoint 302-redirects to the presigned URL; we want the
|
|
79
|
+
# JSON body (with expiry), so never auto-follow.
|
|
80
|
+
"follow_redirects": False,
|
|
81
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Machine-generated schema models. Do not edit by hand — see ``make codegen``."""
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# AUTO-GENERATED from openapi.json by datamodel-code-generator — DO NOT EDIT. Run: make codegen
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Annotated
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DownloadResponse(BaseModel):
|
|
11
|
+
expires_at: Annotated[
|
|
12
|
+
str, Field(description='RFC3339 timestamp at which the URL expires.')
|
|
13
|
+
]
|
|
14
|
+
url: Annotated[
|
|
15
|
+
str, Field(description='Presigned Tigris URL for the parquet object.')
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ErrorBody(BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
The error response body. Every error variant serialises to this shape —
|
|
22
|
+
a single machine-readable `error` code. Declared as a real struct (rather
|
|
23
|
+
than the inline `json!` below) only so it can be referenced as a response
|
|
24
|
+
`body` in the OpenAPI spec; `into_response` still emits the same JSON.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
error: Annotated[
|
|
28
|
+
str,
|
|
29
|
+
Field(
|
|
30
|
+
description='Machine-readable error code, e.g. `not_found`, `unauthorized`.',
|
|
31
|
+
examples=['not_found'],
|
|
32
|
+
),
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class MonthRange(BaseModel):
|
|
37
|
+
earliest: Annotated[
|
|
38
|
+
str | None, Field(description='Earliest partition month (`YYYY-MM`), if any.')
|
|
39
|
+
] = None
|
|
40
|
+
latest: Annotated[
|
|
41
|
+
str | None, Field(description='Latest partition month (`YYYY-MM`), if any.')
|
|
42
|
+
] = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Partition(BaseModel):
|
|
46
|
+
coin: Annotated[
|
|
47
|
+
str, Field(description='Coin symbol, e.g. `BTC`.', examples=['BTC'])
|
|
48
|
+
]
|
|
49
|
+
modified_at: Annotated[
|
|
50
|
+
str | None, Field(description='RFC3339 timestamp of the last write, if known.')
|
|
51
|
+
] = None
|
|
52
|
+
month: Annotated[
|
|
53
|
+
str, Field(description='Partition month, `YYYY-MM`.', examples=['2025-09'])
|
|
54
|
+
]
|
|
55
|
+
size_bytes: Annotated[int, Field(description='Parquet object size in bytes.')]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class PartitionsResponse(BaseModel):
|
|
59
|
+
asset: Annotated[str, Field(description='Dataset name the partitions belong to.')]
|
|
60
|
+
generated_at: Annotated[
|
|
61
|
+
str, Field(description='RFC3339 timestamp the catalog manifest was generated.')
|
|
62
|
+
]
|
|
63
|
+
partitions: list[Partition]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DatasetSummary(BaseModel):
|
|
67
|
+
coins: Annotated[list[str], Field(description='Distinct coins present, sorted.')]
|
|
68
|
+
months: MonthRange
|
|
69
|
+
name: Annotated[
|
|
70
|
+
str,
|
|
71
|
+
Field(
|
|
72
|
+
description='Dataset name, e.g. `gold_ohlcv_1m`.',
|
|
73
|
+
examples=['gold_ohlcv_1m'],
|
|
74
|
+
),
|
|
75
|
+
]
|
|
76
|
+
partition_count: Annotated[
|
|
77
|
+
int, Field(description='Number of partitions visible to the caller.', ge=0)
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class DatasetsResponse(BaseModel):
|
|
82
|
+
datasets: list[DatasetSummary]
|
|
83
|
+
generated_at: Annotated[
|
|
84
|
+
str, Field(description='RFC3339 timestamp the catalog manifest was generated.')
|
|
85
|
+
]
|
tessera/_resolver.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Concurrent presigned-URL resolution.
|
|
2
|
+
|
|
3
|
+
Each ``(asset, coin, month)`` partition needs its own short-lived presigned URL,
|
|
4
|
+
minted via the download endpoint. For multi-partition reads we resolve them
|
|
5
|
+
concurrently — a thread pool for the sync client, ``asyncio.gather`` for async.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
from collections.abc import Awaitable, Callable, Sequence
|
|
12
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
13
|
+
|
|
14
|
+
from .models import PartitionRef
|
|
15
|
+
|
|
16
|
+
ResolvedPartition = tuple[PartitionRef, str]
|
|
17
|
+
|
|
18
|
+
_MAX_WORKERS = 8
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def resolve_sync(
|
|
22
|
+
fetch_url: Callable[[PartitionRef], str],
|
|
23
|
+
refs: Sequence[PartitionRef],
|
|
24
|
+
) -> list[ResolvedPartition]:
|
|
25
|
+
"""Resolve presigned URLs for ``refs`` concurrently, preserving order."""
|
|
26
|
+
if len(refs) == 1:
|
|
27
|
+
ref = refs[0]
|
|
28
|
+
return [(ref, fetch_url(ref))]
|
|
29
|
+
workers = min(_MAX_WORKERS, len(refs))
|
|
30
|
+
with ThreadPoolExecutor(max_workers=workers) as pool:
|
|
31
|
+
urls = list(pool.map(fetch_url, refs))
|
|
32
|
+
return list(zip(refs, urls, strict=True))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async def resolve_async(
|
|
36
|
+
fetch_url: Callable[[PartitionRef], Awaitable[str]],
|
|
37
|
+
refs: Sequence[PartitionRef],
|
|
38
|
+
) -> list[ResolvedPartition]:
|
|
39
|
+
"""Resolve presigned URLs for ``refs`` concurrently, preserving order."""
|
|
40
|
+
urls = await asyncio.gather(*(fetch_url(ref) for ref in refs))
|
|
41
|
+
return list(zip(refs, urls, strict=True))
|
tessera/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
tessera/async_client.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Asynchronous Tessera API client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from types import TracebackType
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from . import _base, _resolver
|
|
13
|
+
from .config import DEFAULT_BASE_URL, ClientConfig, backoff_delay, resolve_api_key
|
|
14
|
+
from .errors import NetworkError
|
|
15
|
+
from .models import (
|
|
16
|
+
Coins,
|
|
17
|
+
DatasetsResponse,
|
|
18
|
+
DownloadResponse,
|
|
19
|
+
Months,
|
|
20
|
+
PartitionRef,
|
|
21
|
+
PartitionsResponse,
|
|
22
|
+
)
|
|
23
|
+
from .readers import duckdb as _duckdb_reader
|
|
24
|
+
from .readers import polars as _polars_reader
|
|
25
|
+
from .readers._common import expand_refs, frame_to_pandas
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
import duckdb
|
|
29
|
+
import pandas
|
|
30
|
+
import polars as pl
|
|
31
|
+
|
|
32
|
+
__all__ = ["AsyncTesseraClient"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AsyncTesseraClient:
|
|
36
|
+
"""An asyncio-native client for the Tessera API.
|
|
37
|
+
|
|
38
|
+
Mirrors :class:`~tessera.TesseraClient` with ``await``. Parquet reads (which
|
|
39
|
+
are CPU/IO-bound and synchronous in the engines) run in a worker thread so
|
|
40
|
+
they never block the event loop.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> async with AsyncTesseraClient() as client:
|
|
44
|
+
... df = await client.read("gold_ohlcv_1m", "BTC", "2025-09")
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
api_key: str | None = None,
|
|
50
|
+
*,
|
|
51
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
52
|
+
timeout: float = 30.0,
|
|
53
|
+
max_retries: int = 3,
|
|
54
|
+
http_client: httpx.AsyncClient | None = None,
|
|
55
|
+
) -> None:
|
|
56
|
+
self.config = ClientConfig(
|
|
57
|
+
api_key=resolve_api_key(api_key),
|
|
58
|
+
base_url=base_url,
|
|
59
|
+
timeout=timeout,
|
|
60
|
+
max_retries=max_retries,
|
|
61
|
+
)
|
|
62
|
+
if http_client is not None:
|
|
63
|
+
self._http = http_client
|
|
64
|
+
self._owns_http = False
|
|
65
|
+
else:
|
|
66
|
+
self._http = httpx.AsyncClient(**_base.build_httpx_kwargs(self.config)) # type: ignore[arg-type]
|
|
67
|
+
self._owns_http = True
|
|
68
|
+
|
|
69
|
+
# -- lifecycle ---------------------------------------------------------
|
|
70
|
+
async def aclose(self) -> None:
|
|
71
|
+
"""Close the underlying HTTP connection pool."""
|
|
72
|
+
if self._owns_http:
|
|
73
|
+
await self._http.aclose()
|
|
74
|
+
|
|
75
|
+
async def __aenter__(self) -> AsyncTesseraClient:
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
async def __aexit__(
|
|
79
|
+
self,
|
|
80
|
+
exc_type: type[BaseException] | None,
|
|
81
|
+
exc: BaseException | None,
|
|
82
|
+
tb: TracebackType | None,
|
|
83
|
+
) -> None:
|
|
84
|
+
await self.aclose()
|
|
85
|
+
|
|
86
|
+
# -- transport ---------------------------------------------------------
|
|
87
|
+
async def _request(self, prepared: _base.PreparedRequest) -> httpx.Response:
|
|
88
|
+
for attempt in range(self.config.max_retries + 1):
|
|
89
|
+
last = attempt == self.config.max_retries
|
|
90
|
+
try:
|
|
91
|
+
response = await self._http.get(prepared.path, params=prepared.params or None)
|
|
92
|
+
except httpx.TransportError as exc:
|
|
93
|
+
if last:
|
|
94
|
+
raise NetworkError(f"network error contacting Tessera: {exc}") from exc
|
|
95
|
+
await asyncio.sleep(backoff_delay(attempt, None))
|
|
96
|
+
continue
|
|
97
|
+
if not last and _base.should_retry(response.status_code):
|
|
98
|
+
await asyncio.sleep(backoff_delay(attempt, _base.parse_retry_after(response)))
|
|
99
|
+
continue
|
|
100
|
+
_base.raise_for_status(response)
|
|
101
|
+
return response
|
|
102
|
+
raise AssertionError("unreachable") # pragma: no cover
|
|
103
|
+
|
|
104
|
+
# -- metadata endpoints ------------------------------------------------
|
|
105
|
+
async def datasets(self) -> DatasetsResponse:
|
|
106
|
+
"""List every dataset visible to your plan."""
|
|
107
|
+
response = await self._request(_base.datasets_request())
|
|
108
|
+
return DatasetsResponse.model_validate_json(response.content)
|
|
109
|
+
|
|
110
|
+
async def partitions(
|
|
111
|
+
self,
|
|
112
|
+
asset: str,
|
|
113
|
+
*,
|
|
114
|
+
coin: str | None = None,
|
|
115
|
+
month: str | None = None,
|
|
116
|
+
) -> PartitionsResponse:
|
|
117
|
+
"""List the partitions of ``asset``, optionally filtered by coin/month."""
|
|
118
|
+
response = await self._request(_base.partitions_request(asset, coin, month))
|
|
119
|
+
return PartitionsResponse.model_validate_json(response.content)
|
|
120
|
+
|
|
121
|
+
async def download_url(self, asset: str, coin: str, month: str) -> DownloadResponse:
|
|
122
|
+
"""Mint a short-lived presigned download URL for one partition."""
|
|
123
|
+
response = await self._request(_base.download_request(asset, coin, month))
|
|
124
|
+
return DownloadResponse.model_validate_json(response.content)
|
|
125
|
+
|
|
126
|
+
# -- partition helpers -------------------------------------------------
|
|
127
|
+
def partition_refs(self, asset: str, coin: Coins, month: Months) -> list[PartitionRef]:
|
|
128
|
+
"""Expand ``(asset, coins, months)`` into concrete partition references."""
|
|
129
|
+
return expand_refs(asset, coin, month)
|
|
130
|
+
|
|
131
|
+
async def _resolve(self, refs: Sequence[PartitionRef]) -> list[_resolver.ResolvedPartition]:
|
|
132
|
+
async def fetch(ref: PartitionRef) -> str:
|
|
133
|
+
return (await self.download_url(ref.asset, ref.coin, ref.month)).url
|
|
134
|
+
|
|
135
|
+
return await _resolver.resolve_async(fetch, refs)
|
|
136
|
+
|
|
137
|
+
# -- data loading ------------------------------------------------------
|
|
138
|
+
async def scan(
|
|
139
|
+
self,
|
|
140
|
+
asset: str,
|
|
141
|
+
coin: Coins,
|
|
142
|
+
month: Months,
|
|
143
|
+
*,
|
|
144
|
+
columns: Sequence[str] | None = None,
|
|
145
|
+
) -> pl.LazyFrame:
|
|
146
|
+
"""Lazily scan one or more partitions into a Polars ``LazyFrame``."""
|
|
147
|
+
_polars_reader.ensure_available()
|
|
148
|
+
parts = await self._resolve(expand_refs(asset, coin, month))
|
|
149
|
+
return _polars_reader.build_lazyframe(parts, columns=columns)
|
|
150
|
+
|
|
151
|
+
async def read(
|
|
152
|
+
self,
|
|
153
|
+
asset: str,
|
|
154
|
+
coin: Coins,
|
|
155
|
+
month: Months,
|
|
156
|
+
*,
|
|
157
|
+
columns: Sequence[str] | None = None,
|
|
158
|
+
) -> pl.DataFrame:
|
|
159
|
+
"""Eagerly read one or more partitions into a Polars ``DataFrame``."""
|
|
160
|
+
_polars_reader.ensure_available()
|
|
161
|
+
parts = await self._resolve(expand_refs(asset, coin, month))
|
|
162
|
+
lazyframe = _polars_reader.build_lazyframe(parts, columns=columns)
|
|
163
|
+
return await asyncio.to_thread(_polars_reader.collect, lazyframe)
|
|
164
|
+
|
|
165
|
+
async def to_duckdb(
|
|
166
|
+
self,
|
|
167
|
+
asset: str,
|
|
168
|
+
coin: Coins,
|
|
169
|
+
month: Months,
|
|
170
|
+
*,
|
|
171
|
+
connection: duckdb.DuckDBPyConnection | None = None,
|
|
172
|
+
columns: Sequence[str] | None = None,
|
|
173
|
+
) -> duckdb.DuckDBPyRelation:
|
|
174
|
+
"""Open one or more partitions as a DuckDB relation for SQL querying."""
|
|
175
|
+
_duckdb_reader.ensure_available()
|
|
176
|
+
parts = await self._resolve(expand_refs(asset, coin, month))
|
|
177
|
+
return await asyncio.to_thread(
|
|
178
|
+
lambda: _duckdb_reader.build_relation(parts, connection=connection, columns=columns)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
async def to_pandas(
|
|
182
|
+
self,
|
|
183
|
+
asset: str,
|
|
184
|
+
coin: Coins,
|
|
185
|
+
month: Months,
|
|
186
|
+
*,
|
|
187
|
+
columns: Sequence[str] | None = None,
|
|
188
|
+
) -> pandas.DataFrame:
|
|
189
|
+
"""Convenience escape hatch: read into a pandas ``DataFrame`` (via Polars)."""
|
|
190
|
+
frame = await self.read(asset, coin, month, columns=columns)
|
|
191
|
+
return frame_to_pandas(frame) # type: ignore[return-value]
|
tessera/client.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Synchronous Tessera API client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from types import TracebackType
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from . import _base, _resolver
|
|
13
|
+
from .config import DEFAULT_BASE_URL, ClientConfig, backoff_delay, resolve_api_key
|
|
14
|
+
from .errors import NetworkError
|
|
15
|
+
from .models import (
|
|
16
|
+
Coins,
|
|
17
|
+
DatasetsResponse,
|
|
18
|
+
DownloadResponse,
|
|
19
|
+
Months,
|
|
20
|
+
PartitionRef,
|
|
21
|
+
PartitionsResponse,
|
|
22
|
+
)
|
|
23
|
+
from .readers import duckdb as _duckdb_reader
|
|
24
|
+
from .readers import polars as _polars_reader
|
|
25
|
+
from .readers._common import expand_refs, frame_to_pandas
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
import duckdb
|
|
29
|
+
import pandas
|
|
30
|
+
import polars as pl
|
|
31
|
+
|
|
32
|
+
__all__ = ["TesseraClient"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TesseraClient:
|
|
36
|
+
"""A synchronous client for the Tessera API.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
api_key: Your Tessera API key. Falls back to ``$TESSERA_API_KEY``.
|
|
40
|
+
base_url: API base URL. Defaults to the production endpoint.
|
|
41
|
+
timeout: Per-request timeout in seconds.
|
|
42
|
+
max_retries: Retries for transient failures (429/5xx, network errors).
|
|
43
|
+
http_client: Inject your own ``httpx.Client`` (advanced; overrides the
|
|
44
|
+
timeout/base-url defaults — you are responsible for auth headers).
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
>>> client = TesseraClient()
|
|
48
|
+
>>> df = client.read("gold_ohlcv_1m", "BTC", "2025-09")
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
api_key: str | None = None,
|
|
54
|
+
*,
|
|
55
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
56
|
+
timeout: float = 30.0,
|
|
57
|
+
max_retries: int = 3,
|
|
58
|
+
http_client: httpx.Client | None = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
self.config = ClientConfig(
|
|
61
|
+
api_key=resolve_api_key(api_key),
|
|
62
|
+
base_url=base_url,
|
|
63
|
+
timeout=timeout,
|
|
64
|
+
max_retries=max_retries,
|
|
65
|
+
)
|
|
66
|
+
if http_client is not None:
|
|
67
|
+
self._http = http_client
|
|
68
|
+
self._owns_http = False
|
|
69
|
+
else:
|
|
70
|
+
self._http = httpx.Client(**_base.build_httpx_kwargs(self.config)) # type: ignore[arg-type]
|
|
71
|
+
self._owns_http = True
|
|
72
|
+
|
|
73
|
+
# -- lifecycle ---------------------------------------------------------
|
|
74
|
+
def close(self) -> None:
|
|
75
|
+
"""Close the underlying HTTP connection pool."""
|
|
76
|
+
if self._owns_http:
|
|
77
|
+
self._http.close()
|
|
78
|
+
|
|
79
|
+
def __enter__(self) -> TesseraClient:
|
|
80
|
+
return self
|
|
81
|
+
|
|
82
|
+
def __exit__(
|
|
83
|
+
self,
|
|
84
|
+
exc_type: type[BaseException] | None,
|
|
85
|
+
exc: BaseException | None,
|
|
86
|
+
tb: TracebackType | None,
|
|
87
|
+
) -> None:
|
|
88
|
+
self.close()
|
|
89
|
+
|
|
90
|
+
# -- transport ---------------------------------------------------------
|
|
91
|
+
def _request(self, prepared: _base.PreparedRequest) -> httpx.Response:
|
|
92
|
+
for attempt in range(self.config.max_retries + 1):
|
|
93
|
+
last = attempt == self.config.max_retries
|
|
94
|
+
try:
|
|
95
|
+
response = self._http.get(prepared.path, params=prepared.params or None)
|
|
96
|
+
except httpx.TransportError as exc:
|
|
97
|
+
if last:
|
|
98
|
+
raise NetworkError(f"network error contacting Tessera: {exc}") from exc
|
|
99
|
+
time.sleep(backoff_delay(attempt, None))
|
|
100
|
+
continue
|
|
101
|
+
if not last and _base.should_retry(response.status_code):
|
|
102
|
+
time.sleep(backoff_delay(attempt, _base.parse_retry_after(response)))
|
|
103
|
+
continue
|
|
104
|
+
_base.raise_for_status(response)
|
|
105
|
+
return response
|
|
106
|
+
raise AssertionError("unreachable") # pragma: no cover
|
|
107
|
+
|
|
108
|
+
# -- metadata endpoints ------------------------------------------------
|
|
109
|
+
def datasets(self) -> DatasetsResponse:
|
|
110
|
+
"""List every dataset visible to your plan."""
|
|
111
|
+
response = self._request(_base.datasets_request())
|
|
112
|
+
return DatasetsResponse.model_validate_json(response.content)
|
|
113
|
+
|
|
114
|
+
def partitions(
|
|
115
|
+
self,
|
|
116
|
+
asset: str,
|
|
117
|
+
*,
|
|
118
|
+
coin: str | None = None,
|
|
119
|
+
month: str | None = None,
|
|
120
|
+
) -> PartitionsResponse:
|
|
121
|
+
"""List the partitions of ``asset``, optionally filtered by coin/month."""
|
|
122
|
+
response = self._request(_base.partitions_request(asset, coin, month))
|
|
123
|
+
return PartitionsResponse.model_validate_json(response.content)
|
|
124
|
+
|
|
125
|
+
def download_url(self, asset: str, coin: str, month: str) -> DownloadResponse:
|
|
126
|
+
"""Mint a short-lived presigned download URL for one partition."""
|
|
127
|
+
response = self._request(_base.download_request(asset, coin, month))
|
|
128
|
+
return DownloadResponse.model_validate_json(response.content)
|
|
129
|
+
|
|
130
|
+
# -- partition helpers -------------------------------------------------
|
|
131
|
+
def partition_refs(self, asset: str, coin: Coins, month: Months) -> list[PartitionRef]:
|
|
132
|
+
"""Expand ``(asset, coins, months)`` into concrete partition references."""
|
|
133
|
+
return expand_refs(asset, coin, month)
|
|
134
|
+
|
|
135
|
+
def _resolve(self, refs: Sequence[PartitionRef]) -> list[_resolver.ResolvedPartition]:
|
|
136
|
+
return _resolver.resolve_sync(
|
|
137
|
+
lambda ref: self.download_url(ref.asset, ref.coin, ref.month).url,
|
|
138
|
+
refs,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# -- data loading ------------------------------------------------------
|
|
142
|
+
def scan(
|
|
143
|
+
self,
|
|
144
|
+
asset: str,
|
|
145
|
+
coin: Coins,
|
|
146
|
+
month: Months,
|
|
147
|
+
*,
|
|
148
|
+
columns: Sequence[str] | None = None,
|
|
149
|
+
) -> pl.LazyFrame:
|
|
150
|
+
"""Lazily scan one or more partitions into a Polars ``LazyFrame``.
|
|
151
|
+
|
|
152
|
+
URLs are minted now but the data is read on ``.collect()``. Because
|
|
153
|
+
presigned URLs expire (~15 min), collect promptly; for long-lived graphs
|
|
154
|
+
re-run ``scan`` to refresh.
|
|
155
|
+
"""
|
|
156
|
+
_polars_reader.ensure_available()
|
|
157
|
+
parts = self._resolve(expand_refs(asset, coin, month))
|
|
158
|
+
return _polars_reader.build_lazyframe(parts, columns=columns)
|
|
159
|
+
|
|
160
|
+
def read(
|
|
161
|
+
self,
|
|
162
|
+
asset: str,
|
|
163
|
+
coin: Coins,
|
|
164
|
+
month: Months,
|
|
165
|
+
*,
|
|
166
|
+
columns: Sequence[str] | None = None,
|
|
167
|
+
) -> pl.DataFrame:
|
|
168
|
+
"""Eagerly read one or more partitions into a Polars ``DataFrame``."""
|
|
169
|
+
_polars_reader.ensure_available()
|
|
170
|
+
parts = self._resolve(expand_refs(asset, coin, month))
|
|
171
|
+
return _polars_reader.collect(_polars_reader.build_lazyframe(parts, columns=columns))
|
|
172
|
+
|
|
173
|
+
def to_duckdb(
|
|
174
|
+
self,
|
|
175
|
+
asset: str,
|
|
176
|
+
coin: Coins,
|
|
177
|
+
month: Months,
|
|
178
|
+
*,
|
|
179
|
+
connection: duckdb.DuckDBPyConnection | None = None,
|
|
180
|
+
columns: Sequence[str] | None = None,
|
|
181
|
+
) -> duckdb.DuckDBPyRelation:
|
|
182
|
+
"""Open one or more partitions as a DuckDB relation for SQL querying."""
|
|
183
|
+
_duckdb_reader.ensure_available()
|
|
184
|
+
parts = self._resolve(expand_refs(asset, coin, month))
|
|
185
|
+
return _duckdb_reader.build_relation(parts, connection=connection, columns=columns)
|
|
186
|
+
|
|
187
|
+
def to_pandas(
|
|
188
|
+
self,
|
|
189
|
+
asset: str,
|
|
190
|
+
coin: Coins,
|
|
191
|
+
month: Months,
|
|
192
|
+
*,
|
|
193
|
+
columns: Sequence[str] | None = None,
|
|
194
|
+
) -> pandas.DataFrame:
|
|
195
|
+
"""Convenience escape hatch: read into a pandas ``DataFrame`` (via Polars)."""
|
|
196
|
+
frame = self.read(asset, coin, month, columns=columns)
|
|
197
|
+
return frame_to_pandas(frame) # type: ignore[return-value]
|