xrtm-data 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/PKG-INFO +1 -1
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/pyproject.toml +1 -1
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/__init__.py +4 -1
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/core/__init__.py +2 -2
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/core/interfaces.py +26 -5
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/core/schemas/forecast.py +9 -1
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/core/schemas/trade.py +36 -2
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/corpora/real_binary.py +6 -2
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/providers/local/csv.py +15 -4
- xrtm_data-0.2.5/src/xrtm/data/providers/online/polymarket.py +256 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/providers/subgraph/polymarket.py +61 -7
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/version.py +1 -1
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm_data.egg-info/PKG-INFO +1 -1
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_local_datasource.py +14 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_polymarket_source.py +52 -3
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_polymarket_subgraph.py +78 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_schemas.py +50 -2
- xrtm_data-0.2.4/src/xrtm/data/providers/online/polymarket.py +0 -167
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/LICENSE +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/README.md +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/setup.cfg +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/cli/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/core/schemas/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/core/schemas/prior.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/corpora/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/kit/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/kit/processors/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/providers/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/providers/local/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/providers/online/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm/data/providers/subgraph/__init__.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm_data.egg-info/SOURCES.txt +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm_data.egg-info/dependency_links.txt +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm_data.egg-info/entry_points.txt +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm_data.egg-info/requires.txt +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/src/xrtm_data.egg-info/top_level.txt +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_beta_fitter.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_cli_loading.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_cli_ux.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_prior_schemas.py +0 -0
- {xrtm_data-0.2.4 → xrtm_data-0.2.5}/tests/test_real_binary_corpus.py +0 -0
|
@@ -31,7 +31,7 @@ Example:
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
# Core interfaces
|
|
34
|
-
from xrtm.data.core import DataSource
|
|
34
|
+
from xrtm.data.core import DataSource, DataSourceError, SourceFetchError, SourceTemporalIntegrityError
|
|
35
35
|
|
|
36
36
|
# Core schemas (public API)
|
|
37
37
|
from xrtm.data.core.schemas import (
|
|
@@ -46,6 +46,9 @@ from xrtm.data.core.schemas import (
|
|
|
46
46
|
__all__ = [
|
|
47
47
|
# Interfaces
|
|
48
48
|
"DataSource",
|
|
49
|
+
"DataSourceError",
|
|
50
|
+
"SourceFetchError",
|
|
51
|
+
"SourceTemporalIntegrityError",
|
|
49
52
|
# Schemas
|
|
50
53
|
"MetadataBase",
|
|
51
54
|
"ForecastQuestion",
|
|
@@ -21,6 +21,6 @@ providers must implement. The core module is domain-agnostic and MUST NOT
|
|
|
21
21
|
import from kit/ or providers/.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
from xrtm.data.core.interfaces import DataSource
|
|
24
|
+
from xrtm.data.core.interfaces import DataSource, DataSourceError, SourceFetchError, SourceTemporalIntegrityError
|
|
25
25
|
|
|
26
|
-
__all__ = ["DataSource"]
|
|
26
|
+
__all__ = ["DataSource", "DataSourceError", "SourceFetchError", "SourceTemporalIntegrityError"]
|
|
@@ -23,18 +23,31 @@ data provider, regardless of the source.
|
|
|
23
23
|
Example:
|
|
24
24
|
>>> from xrtm.data.core import DataSource
|
|
25
25
|
>>> class MySource(DataSource):
|
|
26
|
-
... async def fetch_questions(self, query=None, limit=5):
|
|
26
|
+
... async def fetch_questions(self, query=None, limit=5, *, snapshot_time=None):
|
|
27
27
|
... return []
|
|
28
28
|
... async def get_question_by_id(self, question_id):
|
|
29
29
|
... return None
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
import abc
|
|
33
|
+
from datetime import datetime
|
|
33
34
|
from typing import List, Optional
|
|
34
35
|
|
|
35
36
|
from xrtm.data.core.schemas.forecast import ForecastQuestion
|
|
36
37
|
|
|
37
38
|
|
|
39
|
+
class DataSourceError(RuntimeError):
|
|
40
|
+
r"""Base exception for data source failures."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SourceFetchError(DataSourceError):
|
|
44
|
+
r"""Raised when a provider cannot fetch or decode source data."""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SourceTemporalIntegrityError(DataSourceError):
|
|
48
|
+
r"""Raised when a provider cannot satisfy a requested snapshot safely."""
|
|
49
|
+
|
|
50
|
+
|
|
38
51
|
class DataSource(abc.ABC):
|
|
39
52
|
r"""
|
|
40
53
|
Abstract interface for gathering or streaming forecasting workloads.
|
|
@@ -47,18 +60,22 @@ class DataSource(abc.ABC):
|
|
|
47
60
|
|
|
48
61
|
Example:
|
|
49
62
|
>>> class LocalSource(DataSource):
|
|
50
|
-
... async def fetch_questions(self, query=None, limit=5):
|
|
63
|
+
... async def fetch_questions(self, query=None, limit=5, *, snapshot_time=None):
|
|
51
64
|
... return [ForecastQuestion(id="1", title="Test")]
|
|
52
65
|
"""
|
|
53
66
|
|
|
54
67
|
@abc.abstractmethod
|
|
55
|
-
async def fetch_questions(
|
|
68
|
+
async def fetch_questions(
|
|
69
|
+
self, query: Optional[str] = None, limit: int = 5, *, snapshot_time: Optional[datetime] = None
|
|
70
|
+
) -> List[ForecastQuestion]:
|
|
56
71
|
r"""
|
|
57
72
|
Fetch a list of forecast questions from the data source.
|
|
58
73
|
|
|
59
74
|
Args:
|
|
60
75
|
query: Optional search/filter string.
|
|
61
76
|
limit: Maximum number of questions to return.
|
|
77
|
+
snapshot_time: Optional end-of-history timestamp. Providers that cannot
|
|
78
|
+
satisfy historical snapshots must surface a temporal integrity error.
|
|
62
79
|
|
|
63
80
|
Returns:
|
|
64
81
|
List of ForecastQuestion objects matching the criteria.
|
|
@@ -66,12 +83,16 @@ class DataSource(abc.ABC):
|
|
|
66
83
|
pass
|
|
67
84
|
|
|
68
85
|
@abc.abstractmethod
|
|
69
|
-
async def get_question_by_id(
|
|
86
|
+
async def get_question_by_id(
|
|
87
|
+
self, question_id: str, *, snapshot_time: Optional[datetime] = None
|
|
88
|
+
) -> Optional[ForecastQuestion]:
|
|
70
89
|
r"""
|
|
71
90
|
Retrieve a single question by its unique identifier.
|
|
72
91
|
|
|
73
92
|
Args:
|
|
74
93
|
question_id: The unique identifier of the question.
|
|
94
|
+
snapshot_time: Optional end-of-history timestamp. Providers that cannot
|
|
95
|
+
satisfy historical snapshots must surface a temporal integrity error.
|
|
75
96
|
|
|
76
97
|
Returns:
|
|
77
98
|
The ForecastQuestion if found, None otherwise.
|
|
@@ -79,4 +100,4 @@ class DataSource(abc.ABC):
|
|
|
79
100
|
pass
|
|
80
101
|
|
|
81
102
|
|
|
82
|
-
__all__ = ["DataSource"]
|
|
103
|
+
__all__ = ["DataSource", "DataSourceError", "SourceFetchError", "SourceTemporalIntegrityError"]
|
|
@@ -28,7 +28,7 @@ Example:
|
|
|
28
28
|
from datetime import datetime, timezone
|
|
29
29
|
from typing import Any, Dict, List, Optional
|
|
30
30
|
|
|
31
|
-
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator
|
|
31
|
+
from pydantic import AliasChoices, BaseModel, ConfigDict, Field, field_validator, model_validator
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
class MetadataBase(BaseModel):
|
|
@@ -66,6 +66,14 @@ class MetadataBase(BaseModel):
|
|
|
66
66
|
source_version: Optional[str] = Field(None, description="Version of the data source")
|
|
67
67
|
raw_data: Optional[Dict[str, Any]] = Field(None, description="Original unprocessed data")
|
|
68
68
|
|
|
69
|
+
@field_validator("created_at", "snapshot_time", mode="after")
|
|
70
|
+
@classmethod
|
|
71
|
+
def _normalize_temporal_fields(cls, value: datetime) -> datetime:
|
|
72
|
+
r"""Store temporal boundary fields as timezone-aware UTC datetimes."""
|
|
73
|
+
if value.tzinfo is None:
|
|
74
|
+
return value.replace(tzinfo=timezone.utc)
|
|
75
|
+
return value.astimezone(timezone.utc)
|
|
76
|
+
|
|
69
77
|
def get(self, key: str, default: Any = None) -> Any:
|
|
70
78
|
r"""Backward compatibility for dict-like access."""
|
|
71
79
|
return getattr(self, key, default)
|
|
@@ -31,10 +31,17 @@ Example:
|
|
|
31
31
|
... )
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
-
from datetime import datetime
|
|
34
|
+
from datetime import datetime, timezone
|
|
35
35
|
from typing import Optional
|
|
36
36
|
|
|
37
|
-
from pydantic import BaseModel, Field
|
|
37
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _as_utc(value: datetime) -> datetime:
|
|
41
|
+
r"""Normalize datetimes to timezone-aware UTC without rejecting legacy naive inputs."""
|
|
42
|
+
if value.tzinfo is None:
|
|
43
|
+
return value.replace(tzinfo=timezone.utc)
|
|
44
|
+
return value.astimezone(timezone.utc)
|
|
38
45
|
|
|
39
46
|
|
|
40
47
|
class TradeEvent(BaseModel):
|
|
@@ -98,6 +105,12 @@ class TradeEvent(BaseModel):
|
|
|
98
105
|
description="Transaction hash for verification",
|
|
99
106
|
)
|
|
100
107
|
|
|
108
|
+
@field_validator("timestamp", mode="after")
|
|
109
|
+
@classmethod
|
|
110
|
+
def _normalize_timestamp(cls, value: datetime) -> datetime:
|
|
111
|
+
r"""Normalize trade timestamps to UTC to make window comparisons stable."""
|
|
112
|
+
return _as_utc(value)
|
|
113
|
+
|
|
101
114
|
@property
|
|
102
115
|
def yes_weight(self) -> float:
|
|
103
116
|
r"""Volume-weighted contribution to Yes outcome: price × amount."""
|
|
@@ -149,6 +162,27 @@ class TradeWindow(BaseModel):
|
|
|
149
162
|
description="Identifier for the market these trades belong to",
|
|
150
163
|
)
|
|
151
164
|
|
|
165
|
+
@field_validator("start_time", "end_time", mode="after")
|
|
166
|
+
@classmethod
|
|
167
|
+
def _normalize_window_boundary(cls, value: datetime) -> datetime:
|
|
168
|
+
r"""Normalize window boundaries to UTC before enforcing leakage invariants."""
|
|
169
|
+
return _as_utc(value)
|
|
170
|
+
|
|
171
|
+
@model_validator(mode="after")
|
|
172
|
+
def _validate_temporal_bounds(self) -> "TradeWindow":
|
|
173
|
+
r"""Ensure a trade window cannot contain future or pre-window events."""
|
|
174
|
+
if self.end_time < self.start_time:
|
|
175
|
+
raise ValueError("end_time must not precede start_time")
|
|
176
|
+
|
|
177
|
+
leaked = [
|
|
178
|
+
trade.timestamp
|
|
179
|
+
for trade in self.trades
|
|
180
|
+
if trade.timestamp < self.start_time or trade.timestamp > self.end_time
|
|
181
|
+
]
|
|
182
|
+
if leaked:
|
|
183
|
+
raise ValueError("trades must fall within [start_time, end_time]")
|
|
184
|
+
return self
|
|
185
|
+
|
|
152
186
|
@property
|
|
153
187
|
def total_volume(self) -> float:
|
|
154
188
|
r"""Total trading volume in the window."""
|
|
@@ -486,7 +486,9 @@ class RealBinaryCorpusSource(DataSource):
|
|
|
486
486
|
self._questions = [record.to_forecast_question() for record in self._records]
|
|
487
487
|
self._questions_by_id = {question.id: question for question in self._questions}
|
|
488
488
|
|
|
489
|
-
async def fetch_questions(
|
|
489
|
+
async def fetch_questions(
|
|
490
|
+
self, query: Optional[str] = None, limit: int = 5, *, snapshot_time: Optional[datetime] = None
|
|
491
|
+
) -> list[ForecastQuestion]:
|
|
490
492
|
r"""Fetch deterministic corpus questions, optionally filtering title/content text."""
|
|
491
493
|
query_lower = query.lower() if query else None
|
|
492
494
|
matches: list[ForecastQuestion] = []
|
|
@@ -498,7 +500,9 @@ class RealBinaryCorpusSource(DataSource):
|
|
|
498
500
|
break
|
|
499
501
|
return matches
|
|
500
502
|
|
|
501
|
-
async def get_question_by_id(
|
|
503
|
+
async def get_question_by_id(
|
|
504
|
+
self, question_id: str, *, snapshot_time: Optional[datetime] = None
|
|
505
|
+
) -> Optional[ForecastQuestion]:
|
|
502
506
|
r"""Retrieve a deterministic corpus question by id."""
|
|
503
507
|
question = self._questions_by_id.get(question_id)
|
|
504
508
|
if question is None:
|
|
@@ -29,8 +29,11 @@ import asyncio
|
|
|
29
29
|
import json
|
|
30
30
|
import logging
|
|
31
31
|
import threading
|
|
32
|
+
from datetime import datetime
|
|
32
33
|
from typing import Any, List, Optional
|
|
33
34
|
|
|
35
|
+
from pydantic import ValidationError
|
|
36
|
+
|
|
34
37
|
from xrtm.data.core import DataSource
|
|
35
38
|
from xrtm.data.core.schemas import ForecastQuestion
|
|
36
39
|
|
|
@@ -75,7 +78,7 @@ class LocalDataSource(DataSource):
|
|
|
75
78
|
try:
|
|
76
79
|
with open(self.file_path, "r") as f:
|
|
77
80
|
raw_data = json.load(f)
|
|
78
|
-
except
|
|
81
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
79
82
|
logger.error("Failed to read local questions from %s: %s", self.file_path, e)
|
|
80
83
|
self._questions = []
|
|
81
84
|
self._questions_by_id = {}
|
|
@@ -95,7 +98,7 @@ class LocalDataSource(DataSource):
|
|
|
95
98
|
continue
|
|
96
99
|
try:
|
|
97
100
|
question = ForecastQuestion(**item)
|
|
98
|
-
except
|
|
101
|
+
except ValidationError as e:
|
|
99
102
|
logger.warning("Skipping invalid local question %s from %s: %s", idx, self.file_path, e)
|
|
100
103
|
continue
|
|
101
104
|
|
|
@@ -124,13 +127,17 @@ class LocalDataSource(DataSource):
|
|
|
124
127
|
break
|
|
125
128
|
return questions
|
|
126
129
|
|
|
127
|
-
async def fetch_questions(
|
|
130
|
+
async def fetch_questions(
|
|
131
|
+
self, query: Optional[str] = None, limit: int = 5, *, snapshot_time: Optional[datetime] = None
|
|
132
|
+
) -> List[ForecastQuestion]:
|
|
128
133
|
r"""
|
|
129
134
|
Fetch questions from the local JSON file.
|
|
130
135
|
|
|
131
136
|
Args:
|
|
132
137
|
query: Optional search string to filter questions by title.
|
|
133
138
|
limit: Maximum number of questions to return.
|
|
139
|
+
snapshot_time: Accepted for DataSource compatibility. Local snapshots
|
|
140
|
+
are already frozen by the file contents.
|
|
134
141
|
|
|
135
142
|
Returns:
|
|
136
143
|
List of ForecastQuestion objects matching the criteria.
|
|
@@ -152,12 +159,16 @@ class LocalDataSource(DataSource):
|
|
|
152
159
|
return []
|
|
153
160
|
return [question.model_dump(mode="json") for question in self._questions]
|
|
154
161
|
|
|
155
|
-
async def get_question_by_id(
|
|
162
|
+
async def get_question_by_id(
|
|
163
|
+
self, question_id: str, *, snapshot_time: Optional[datetime] = None
|
|
164
|
+
) -> Optional[ForecastQuestion]:
|
|
156
165
|
r"""
|
|
157
166
|
Retrieve a single question by ID from the local file.
|
|
158
167
|
|
|
159
168
|
Args:
|
|
160
169
|
question_id: The unique identifier of the question.
|
|
170
|
+
snapshot_time: Accepted for DataSource compatibility. Local snapshots
|
|
171
|
+
are already frozen by the file contents.
|
|
161
172
|
|
|
162
173
|
Returns:
|
|
163
174
|
The ForecastQuestion if found, None otherwise.
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""
|
|
17
|
+
Polymarket Gamma API data source.
|
|
18
|
+
|
|
19
|
+
This module provides a DataSource implementation that fetches forecast
|
|
20
|
+
questions from the Polymarket Gamma API.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
>>> from xrtm.data.providers.online import PolymarketSource
|
|
24
|
+
>>> source = PolymarketSource()
|
|
25
|
+
>>> questions = await source.fetch_questions(query="election", limit=5)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from contextlib import asynccontextmanager
|
|
30
|
+
from datetime import datetime, timedelta, timezone
|
|
31
|
+
from types import TracebackType
|
|
32
|
+
from typing import Any, Dict, List, Optional
|
|
33
|
+
from urllib.parse import urlencode
|
|
34
|
+
|
|
35
|
+
import aiohttp
|
|
36
|
+
|
|
37
|
+
from xrtm.data.core import DataSource, DataSourceError, SourceFetchError, SourceTemporalIntegrityError
|
|
38
|
+
from xrtm.data.core.schemas import ForecastQuestion, MetadataBase
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
__all__ = ["PolymarketSource"]
|
|
43
|
+
|
|
44
|
+
_LIVE_SNAPSHOT_TOLERANCE = timedelta(seconds=60)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class PolymarketSource(DataSource):
|
|
48
|
+
r"""
|
|
49
|
+
DataSource implementation that fetches from the Polymarket Gamma API.
|
|
50
|
+
|
|
51
|
+
This provider connects to Polymarket's public Gamma API to retrieve
|
|
52
|
+
event metadata for forecasting. For trade history with OrderFilled
|
|
53
|
+
events, see the subgraph provider (to be added).
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
API_BASE: Base URL for the Polymarket Gamma API.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> source = PolymarketSource()
|
|
60
|
+
>>> questions = await source.fetch_questions(limit=10)
|
|
61
|
+
>>> print(f"Fetched {len(questions)} questions")
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
API_BASE = "https://gamma-api.polymarket.com"
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
session: Optional[aiohttp.ClientSession] = None,
|
|
69
|
+
*,
|
|
70
|
+
raise_on_error: bool = False,
|
|
71
|
+
) -> None:
|
|
72
|
+
self._session = session
|
|
73
|
+
self._owns_session = False
|
|
74
|
+
self.raise_on_error = raise_on_error
|
|
75
|
+
self.last_error: Optional[DataSourceError] = None
|
|
76
|
+
|
|
77
|
+
async def __aenter__(self) -> "PolymarketSource":
|
|
78
|
+
if self._session is None or self._session.closed:
|
|
79
|
+
self._session = aiohttp.ClientSession()
|
|
80
|
+
self._owns_session = True
|
|
81
|
+
return self
|
|
82
|
+
|
|
83
|
+
async def __aexit__(
|
|
84
|
+
self,
|
|
85
|
+
exc_type: Optional[type[BaseException]],
|
|
86
|
+
exc: Optional[BaseException],
|
|
87
|
+
traceback: Optional[TracebackType],
|
|
88
|
+
) -> None:
|
|
89
|
+
if self._owns_session and self._session is not None:
|
|
90
|
+
await self._session.close()
|
|
91
|
+
self._session = None
|
|
92
|
+
self._owns_session = False
|
|
93
|
+
|
|
94
|
+
@asynccontextmanager
|
|
95
|
+
async def _get_session(self):
|
|
96
|
+
if self._session is not None and not self._session.closed:
|
|
97
|
+
yield self._session
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
session = aiohttp.ClientSession()
|
|
101
|
+
try:
|
|
102
|
+
yield session
|
|
103
|
+
finally:
|
|
104
|
+
await session.close()
|
|
105
|
+
|
|
106
|
+
def _fail(self, error: DataSourceError) -> None:
|
|
107
|
+
self.last_error = error
|
|
108
|
+
logger.error("%s", error)
|
|
109
|
+
if self.raise_on_error:
|
|
110
|
+
raise error
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _as_utc(value: datetime) -> datetime:
|
|
114
|
+
if value.tzinfo is None:
|
|
115
|
+
return value.replace(tzinfo=timezone.utc)
|
|
116
|
+
return value.astimezone(timezone.utc)
|
|
117
|
+
|
|
118
|
+
def _live_snapshot_time(self, snapshot_time: Optional[datetime]) -> datetime:
|
|
119
|
+
request_time = datetime.now(timezone.utc)
|
|
120
|
+
if snapshot_time is None:
|
|
121
|
+
return request_time
|
|
122
|
+
|
|
123
|
+
snapshot_utc = self._as_utc(snapshot_time)
|
|
124
|
+
if snapshot_utc < request_time - _LIVE_SNAPSHOT_TOLERANCE:
|
|
125
|
+
raise SourceTemporalIntegrityError(
|
|
126
|
+
"Polymarket Gamma is a live-only API and cannot satisfy historical "
|
|
127
|
+
f"snapshot_time={snapshot_utc.isoformat()} without future leakage."
|
|
128
|
+
)
|
|
129
|
+
if snapshot_utc > request_time + _LIVE_SNAPSHOT_TOLERANCE:
|
|
130
|
+
raise SourceTemporalIntegrityError(
|
|
131
|
+
f"snapshot_time={snapshot_utc.isoformat()} is in the future for a live Polymarket request."
|
|
132
|
+
)
|
|
133
|
+
return snapshot_utc
|
|
134
|
+
|
|
135
|
+
async def fetch_questions(
|
|
136
|
+
self,
|
|
137
|
+
query: Optional[str] = None,
|
|
138
|
+
limit: int = 5,
|
|
139
|
+
*,
|
|
140
|
+
snapshot_time: Optional[datetime] = None,
|
|
141
|
+
) -> List[ForecastQuestion]:
|
|
142
|
+
r"""
|
|
143
|
+
Fetch active forecast questions from Polymarket.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
query: Optional search string to filter events.
|
|
147
|
+
limit: Maximum number of questions to return.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
List of ForecastQuestion objects from active markets.
|
|
151
|
+
"""
|
|
152
|
+
try:
|
|
153
|
+
effective_snapshot_time = self._live_snapshot_time(snapshot_time)
|
|
154
|
+
except SourceTemporalIntegrityError as e:
|
|
155
|
+
self._fail(e)
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
params = {"active": "true", "closed": "false", "limit": str(limit)}
|
|
159
|
+
if query:
|
|
160
|
+
params["search"] = query
|
|
161
|
+
url = f"{self.API_BASE}/events?{urlencode(params)}"
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
async with self._get_session() as session:
|
|
165
|
+
async with session.get(url) as resp:
|
|
166
|
+
if resp.status != 200:
|
|
167
|
+
self._fail(SourceFetchError(f"Polymarket API returned status {resp.status} for events request."))
|
|
168
|
+
return []
|
|
169
|
+
|
|
170
|
+
data = await resp.json()
|
|
171
|
+
if not isinstance(data, list):
|
|
172
|
+
self._fail(SourceFetchError("Polymarket API returned a non-list events payload."))
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
questions: list[ForecastQuestion] = []
|
|
176
|
+
for idx, item in enumerate(data):
|
|
177
|
+
if not isinstance(item, dict):
|
|
178
|
+
logger.warning("Skipping Polymarket event %s: item must be an object.", idx)
|
|
179
|
+
continue
|
|
180
|
+
try:
|
|
181
|
+
questions.append(self._normalize(item, effective_snapshot_time))
|
|
182
|
+
except (TypeError, ValueError) as e:
|
|
183
|
+
logger.warning("Skipping invalid Polymarket event %s: %s", idx, e)
|
|
184
|
+
self.last_error = None
|
|
185
|
+
return questions
|
|
186
|
+
except (aiohttp.ClientError, TimeoutError, TypeError, ValueError) as e:
|
|
187
|
+
self._fail(SourceFetchError(f"Failed to fetch questions from Polymarket: {e}"))
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
async def get_question_by_id(
|
|
191
|
+
self,
|
|
192
|
+
question_id: str,
|
|
193
|
+
*,
|
|
194
|
+
snapshot_time: Optional[datetime] = None,
|
|
195
|
+
) -> Optional[ForecastQuestion]:
|
|
196
|
+
r"""
|
|
197
|
+
Retrieve a single Polymarket event by ID.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
question_id: The unique event identifier.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
The ForecastQuestion if found, None otherwise.
|
|
204
|
+
"""
|
|
205
|
+
try:
|
|
206
|
+
effective_snapshot_time = self._live_snapshot_time(snapshot_time)
|
|
207
|
+
except SourceTemporalIntegrityError as e:
|
|
208
|
+
self._fail(e)
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
url = f"{self.API_BASE}/events/{question_id}"
|
|
212
|
+
try:
|
|
213
|
+
async with self._get_session() as session:
|
|
214
|
+
async with session.get(url) as resp:
|
|
215
|
+
if resp.status == 200:
|
|
216
|
+
data = await resp.json()
|
|
217
|
+
if not isinstance(data, dict):
|
|
218
|
+
self._fail(SourceFetchError(f"Polymarket event {question_id} payload is not an object."))
|
|
219
|
+
return None
|
|
220
|
+
question = self._normalize(data, effective_snapshot_time)
|
|
221
|
+
self.last_error = None
|
|
222
|
+
return question
|
|
223
|
+
if resp.status != 404:
|
|
224
|
+
self._fail(
|
|
225
|
+
SourceFetchError(
|
|
226
|
+
f"Polymarket API returned status {resp.status} for event {question_id} request."
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
return None
|
|
230
|
+
except (aiohttp.ClientError, TimeoutError, TypeError, ValueError) as e:
|
|
231
|
+
self._fail(SourceFetchError(f"Failed to retrieve Polymarket event {question_id}: {e}"))
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def _normalize(self, item: Dict[str, Any], snapshot_time: datetime) -> ForecastQuestion:
|
|
235
|
+
r"""
|
|
236
|
+
Normalize Polymarket API response to ForecastQuestion schema.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
item: Raw API response dict.
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Normalized ForecastQuestion instance.
|
|
243
|
+
"""
|
|
244
|
+
return ForecastQuestion(
|
|
245
|
+
id=str(item.get("id", "")),
|
|
246
|
+
title=item.get("title", "Untitled Event"),
|
|
247
|
+
description=item.get("description", ""),
|
|
248
|
+
metadata=MetadataBase(
|
|
249
|
+
tags=item.get("tags", []),
|
|
250
|
+
snapshot_time=snapshot_time,
|
|
251
|
+
subject_type="binary",
|
|
252
|
+
source_version="polymarket-gamma-v1",
|
|
253
|
+
raw_data=item,
|
|
254
|
+
fetched_at=datetime.now(timezone.utc),
|
|
255
|
+
),
|
|
256
|
+
)
|
|
@@ -38,9 +38,17 @@ from typing import Any, Optional
|
|
|
38
38
|
|
|
39
39
|
import aiohttp
|
|
40
40
|
|
|
41
|
+
from xrtm.data.core import SourceFetchError
|
|
41
42
|
from xrtm.data.core.schemas.trade import TradeEvent, TradeWindow
|
|
42
43
|
|
|
43
44
|
|
|
45
|
+
def _as_utc(value: datetime) -> datetime:
|
|
46
|
+
r"""Normalize datetimes to timezone-aware UTC without rejecting legacy naive inputs."""
|
|
47
|
+
if value.tzinfo is None:
|
|
48
|
+
return value.replace(tzinfo=timezone.utc)
|
|
49
|
+
return value.astimezone(timezone.utc)
|
|
50
|
+
|
|
51
|
+
|
|
44
52
|
class PolymarketTradeSource:
|
|
45
53
|
r"""
|
|
46
54
|
Data provider for Polymarket Goldsky Subgraph.
|
|
@@ -81,6 +89,17 @@ class PolymarketTradeSource:
|
|
|
81
89
|
"""
|
|
82
90
|
self.endpoint = endpoint or self.ENDPOINT
|
|
83
91
|
self.timeout = timeout
|
|
92
|
+
self.last_error: Optional[SourceFetchError] = None
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def _validate_window(start_time: datetime, end_time: datetime, limit: int) -> tuple[datetime, datetime]:
|
|
96
|
+
start_utc = _as_utc(start_time)
|
|
97
|
+
end_utc = _as_utc(end_time)
|
|
98
|
+
if end_utc < start_utc:
|
|
99
|
+
raise ValueError("end_time must not precede start_time")
|
|
100
|
+
if limit <= 0:
|
|
101
|
+
raise ValueError("limit must be positive")
|
|
102
|
+
return start_utc, end_utc
|
|
84
103
|
|
|
85
104
|
async def fetch_trades(
|
|
86
105
|
self,
|
|
@@ -101,6 +120,8 @@ class PolymarketTradeSource:
|
|
|
101
120
|
Returns:
|
|
102
121
|
List of TradeEvent objects in chronological order.
|
|
103
122
|
"""
|
|
123
|
+
start_utc, end_utc = self._validate_window(start_time, end_time, limit)
|
|
124
|
+
|
|
104
125
|
# Query filtering by makerAssetId (assuming token is the asset being traded)
|
|
105
126
|
query = """
|
|
106
127
|
query($assetId: String!, $start: Int!, $end: Int!, $first: Int!) {
|
|
@@ -141,8 +162,8 @@ class PolymarketTradeSource:
|
|
|
141
162
|
|
|
142
163
|
variables = {
|
|
143
164
|
"assetId": market_id,
|
|
144
|
-
"start": int(
|
|
145
|
-
"end": int(
|
|
165
|
+
"start": int(start_utc.timestamp()),
|
|
166
|
+
"end": int(end_utc.timestamp()),
|
|
146
167
|
"first": limit,
|
|
147
168
|
}
|
|
148
169
|
|
|
@@ -155,11 +176,29 @@ class PolymarketTradeSource:
|
|
|
155
176
|
response.raise_for_status()
|
|
156
177
|
data = await response.json()
|
|
157
178
|
|
|
158
|
-
|
|
179
|
+
if not isinstance(data, dict):
|
|
180
|
+
error = SourceFetchError("Polymarket subgraph returned a non-object GraphQL payload.")
|
|
181
|
+
self.last_error = error
|
|
182
|
+
raise error
|
|
183
|
+
if data.get("errors"):
|
|
184
|
+
error = SourceFetchError(f"Polymarket subgraph returned GraphQL errors: {data['errors']}")
|
|
185
|
+
self.last_error = error
|
|
186
|
+
raise error
|
|
159
187
|
|
|
160
|
-
|
|
188
|
+
self.last_error = None
|
|
189
|
+
return self._parse_trades(data, market_id, start_utc, end_utc)
|
|
190
|
+
|
|
191
|
+
def _parse_trades(
|
|
192
|
+
self,
|
|
193
|
+
data: dict[str, Any],
|
|
194
|
+
market_id: str,
|
|
195
|
+
start_time: Optional[datetime] = None,
|
|
196
|
+
end_time: Optional[datetime] = None,
|
|
197
|
+
) -> list[TradeEvent]:
|
|
161
198
|
r"""Parse GraphQL response into TradeEvent objects."""
|
|
162
199
|
trades: list[TradeEvent] = []
|
|
200
|
+
start_utc = _as_utc(start_time) if start_time is not None else None
|
|
201
|
+
end_utc = _as_utc(end_time) if end_time is not None else None
|
|
163
202
|
|
|
164
203
|
order_filleds = data.get("data", {}).get("orderFilledEvents", [])
|
|
165
204
|
for item in order_filleds:
|
|
@@ -209,6 +248,10 @@ class PolymarketTradeSource:
|
|
|
209
248
|
|
|
210
249
|
# Convert timestamp
|
|
211
250
|
timestamp = datetime.fromtimestamp(int(item.get("timestamp", 0)), tz=timezone.utc)
|
|
251
|
+
if start_utc is not None and timestamp < start_utc:
|
|
252
|
+
continue
|
|
253
|
+
if end_utc is not None and timestamp > end_utc:
|
|
254
|
+
continue
|
|
212
255
|
|
|
213
256
|
trade = TradeEvent(
|
|
214
257
|
price=max(0.0, min(1.0, price)),
|
|
@@ -244,11 +287,12 @@ class PolymarketTradeSource:
|
|
|
244
287
|
Returns:
|
|
245
288
|
TradeWindow containing all trades in the specified window.
|
|
246
289
|
"""
|
|
247
|
-
|
|
290
|
+
start_utc, end_utc = self._validate_window(start_time, end_time, limit)
|
|
291
|
+
trades = await self.fetch_trades(market_id, start_utc, end_utc, limit)
|
|
248
292
|
return TradeWindow(
|
|
249
293
|
trades=trades,
|
|
250
|
-
start_time=
|
|
251
|
-
end_time=
|
|
294
|
+
start_time=start_utc,
|
|
295
|
+
end_time=end_utc,
|
|
252
296
|
market_id=market_id,
|
|
253
297
|
)
|
|
254
298
|
|
|
@@ -289,6 +333,16 @@ class PolymarketTradeSource:
|
|
|
289
333
|
response.raise_for_status()
|
|
290
334
|
data = await response.json()
|
|
291
335
|
|
|
336
|
+
if not isinstance(data, dict):
|
|
337
|
+
error = SourceFetchError("Polymarket subgraph returned a non-object GraphQL payload.")
|
|
338
|
+
self.last_error = error
|
|
339
|
+
raise error
|
|
340
|
+
if data.get("errors"):
|
|
341
|
+
error = SourceFetchError(f"Polymarket subgraph returned GraphQL errors: {data['errors']}")
|
|
342
|
+
self.last_error = error
|
|
343
|
+
raise error
|
|
344
|
+
|
|
345
|
+
self.last_error = None
|
|
292
346
|
markets = data.get("data", {}).get("markets", [])
|
|
293
347
|
return [
|
|
294
348
|
{
|
|
@@ -21,7 +21,7 @@ This module provides the single source of truth for the package version.
|
|
|
21
21
|
|
|
22
22
|
__all__ = ["__version__", "__author__", "__contact__", "__license__", "__copyright__"]
|
|
23
23
|
|
|
24
|
-
__version__ = "0.2.
|
|
24
|
+
__version__ = "0.2.5"
|
|
25
25
|
__author__ = "XRTM Team"
|
|
26
26
|
__contact__ = "moy@xrtm.org"
|
|
27
27
|
__license__ = "Apache-2.0"
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
17
|
import json
|
|
18
|
+
from datetime import datetime, timezone
|
|
18
19
|
|
|
19
20
|
import pytest
|
|
20
21
|
|
|
@@ -79,6 +80,19 @@ async def test_fetch_questions_query_case_insensitive(sample_data):
|
|
|
79
80
|
assert questions[0].id == "q2"
|
|
80
81
|
|
|
81
82
|
|
|
83
|
+
@pytest.mark.asyncio
|
|
84
|
+
async def test_accepts_snapshot_time_for_datasource_compatibility(sample_data):
|
|
85
|
+
source = LocalDataSource(sample_data)
|
|
86
|
+
snapshot_time = datetime(2026, 1, 1, tzinfo=timezone.utc)
|
|
87
|
+
|
|
88
|
+
questions = await source.fetch_questions(limit=1, snapshot_time=snapshot_time)
|
|
89
|
+
question = await source.get_question_by_id("q1", snapshot_time=snapshot_time)
|
|
90
|
+
|
|
91
|
+
assert len(questions) == 1
|
|
92
|
+
assert question is not None
|
|
93
|
+
assert question.id == "q1"
|
|
94
|
+
|
|
95
|
+
|
|
82
96
|
@pytest.mark.asyncio
|
|
83
97
|
async def test_get_question_by_id(sample_data):
|
|
84
98
|
source = LocalDataSource(sample_data)
|
|
@@ -13,10 +13,12 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
16
17
|
from unittest.mock import patch
|
|
17
18
|
|
|
18
19
|
import pytest
|
|
19
20
|
|
|
21
|
+
from xrtm.data.core import SourceFetchError, SourceTemporalIntegrityError
|
|
20
22
|
from xrtm.data.providers.online.polymarket import PolymarketSource
|
|
21
23
|
|
|
22
24
|
|
|
@@ -36,16 +38,17 @@ class FakeResponse:
|
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
class FakeSession:
|
|
39
|
-
def __init__(self, payload=None):
|
|
41
|
+
def __init__(self, payload=None, status: int = 200):
|
|
40
42
|
self.payload = payload or [{"id": "p1", "title": "Will local tests pass?", "description": "Smoke"}]
|
|
43
|
+
self.status = status
|
|
41
44
|
self.closed = False
|
|
42
45
|
self.get_calls = 0
|
|
43
46
|
|
|
44
47
|
def get(self, url):
|
|
45
48
|
self.get_calls += 1
|
|
46
49
|
if url.endswith("/events/p1"):
|
|
47
|
-
return FakeResponse(self.payload[0])
|
|
48
|
-
return FakeResponse(self.payload)
|
|
50
|
+
return FakeResponse(self.payload[0], self.status)
|
|
51
|
+
return FakeResponse(self.payload, self.status)
|
|
49
52
|
|
|
50
53
|
async def __aenter__(self):
|
|
51
54
|
return self
|
|
@@ -99,3 +102,49 @@ async def test_closed_external_session_falls_back_to_one_off_session():
|
|
|
99
102
|
assert closed_session.get_calls == 0
|
|
100
103
|
assert replacement.get_calls == 1
|
|
101
104
|
assert replacement.closed is True
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@pytest.mark.asyncio
|
|
108
|
+
async def test_successful_fetch_uses_one_snapshot_for_all_questions():
|
|
109
|
+
payload = [
|
|
110
|
+
{"id": "p1", "title": "Question 1", "description": "Smoke"},
|
|
111
|
+
{"id": "p2", "title": "Question 2", "description": "Smoke"},
|
|
112
|
+
]
|
|
113
|
+
source = PolymarketSource(session=FakeSession(payload=payload))
|
|
114
|
+
|
|
115
|
+
questions = await source.fetch_questions(limit=2)
|
|
116
|
+
|
|
117
|
+
assert len(questions) == 2
|
|
118
|
+
assert questions[0].metadata.snapshot_time == questions[1].metadata.snapshot_time
|
|
119
|
+
assert questions[0].metadata.snapshot_time.tzinfo == timezone.utc
|
|
120
|
+
assert source.last_error is None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@pytest.mark.asyncio
|
|
124
|
+
async def test_historical_snapshot_is_rejected_without_live_fetch():
|
|
125
|
+
session = FakeSession()
|
|
126
|
+
source = PolymarketSource(session=session)
|
|
127
|
+
|
|
128
|
+
questions = await source.fetch_questions(snapshot_time=datetime.now(timezone.utc) - timedelta(hours=1))
|
|
129
|
+
|
|
130
|
+
assert questions == []
|
|
131
|
+
assert session.get_calls == 0
|
|
132
|
+
assert isinstance(source.last_error, SourceTemporalIntegrityError)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.asyncio
|
|
136
|
+
async def test_source_failure_is_recorded_for_compatibility():
|
|
137
|
+
source = PolymarketSource(session=FakeSession(status=503))
|
|
138
|
+
|
|
139
|
+
questions = await source.fetch_questions(limit=1)
|
|
140
|
+
|
|
141
|
+
assert questions == []
|
|
142
|
+
assert isinstance(source.last_error, SourceFetchError)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@pytest.mark.asyncio
|
|
146
|
+
async def test_raise_on_error_surfaces_source_failure():
|
|
147
|
+
source = PolymarketSource(session=FakeSession(status=503), raise_on_error=True)
|
|
148
|
+
|
|
149
|
+
with pytest.raises(SourceFetchError):
|
|
150
|
+
await source.fetch_questions(limit=1)
|
|
@@ -20,6 +20,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
20
20
|
|
|
21
21
|
import pytest
|
|
22
22
|
|
|
23
|
+
from xrtm.data.core import SourceFetchError
|
|
23
24
|
from xrtm.data.providers.subgraph import PolymarketTradeSource
|
|
24
25
|
|
|
25
26
|
|
|
@@ -144,6 +145,83 @@ class TestPolymarketTradeSource:
|
|
|
144
145
|
assert len(window.trades) == 2
|
|
145
146
|
assert window.total_volume == 300.0
|
|
146
147
|
|
|
148
|
+
@pytest.mark.asyncio
|
|
149
|
+
async def test_fetch_trades_filters_response_to_requested_window(self, source: PolymarketTradeSource) -> None:
|
|
150
|
+
r"""Provider response cannot leak trades outside the requested snapshot window."""
|
|
151
|
+
data = {
|
|
152
|
+
"data": {
|
|
153
|
+
"orderFilledEvents": [
|
|
154
|
+
{
|
|
155
|
+
"id": "inside",
|
|
156
|
+
"makerAmountFilled": "100",
|
|
157
|
+
"takerAmountFilled": "75",
|
|
158
|
+
"timestamp": "1704067200",
|
|
159
|
+
"maker": "0xmaker1",
|
|
160
|
+
"taker": "0xtaker1",
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"id": "future",
|
|
164
|
+
"makerAmountFilled": "100",
|
|
165
|
+
"takerAmountFilled": "80",
|
|
166
|
+
"timestamp": "1704153600",
|
|
167
|
+
"maker": "0xmaker2",
|
|
168
|
+
"taker": "0xtaker2",
|
|
169
|
+
},
|
|
170
|
+
]
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
mock_response = MagicMock()
|
|
174
|
+
mock_response.json = AsyncMock(return_value=data)
|
|
175
|
+
mock_response.raise_for_status = MagicMock()
|
|
176
|
+
|
|
177
|
+
mock_response_ctx = AsyncMock()
|
|
178
|
+
mock_response_ctx.__aenter__.return_value = mock_response
|
|
179
|
+
mock_response_ctx.__aexit__.return_value = None
|
|
180
|
+
|
|
181
|
+
mock_session = MagicMock()
|
|
182
|
+
mock_session.post.return_value = mock_response_ctx
|
|
183
|
+
|
|
184
|
+
mock_session_ctx = AsyncMock()
|
|
185
|
+
mock_session_ctx.__aenter__.return_value = mock_session
|
|
186
|
+
mock_session_ctx.__aexit__.return_value = None
|
|
187
|
+
|
|
188
|
+
with patch("aiohttp.ClientSession", return_value=mock_session_ctx):
|
|
189
|
+
trades = await source.fetch_trades(
|
|
190
|
+
market_id="0xmarket",
|
|
191
|
+
start_time=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
|
192
|
+
end_time=datetime(2024, 1, 1, 12, tzinfo=timezone.utc),
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
assert [trade.maker for trade in trades] == ["0xmaker1"]
|
|
196
|
+
|
|
197
|
+
@pytest.mark.asyncio
|
|
198
|
+
async def test_graphql_errors_raise_source_fetch_error(self, source: PolymarketTradeSource) -> None:
|
|
199
|
+
r"""GraphQL error payloads must not be silently interpreted as empty data."""
|
|
200
|
+
mock_response = MagicMock()
|
|
201
|
+
mock_response.json = AsyncMock(return_value={"errors": [{"message": "bad query"}]})
|
|
202
|
+
mock_response.raise_for_status = MagicMock()
|
|
203
|
+
|
|
204
|
+
mock_response_ctx = AsyncMock()
|
|
205
|
+
mock_response_ctx.__aenter__.return_value = mock_response
|
|
206
|
+
mock_response_ctx.__aexit__.return_value = None
|
|
207
|
+
|
|
208
|
+
mock_session = MagicMock()
|
|
209
|
+
mock_session.post.return_value = mock_response_ctx
|
|
210
|
+
|
|
211
|
+
mock_session_ctx = AsyncMock()
|
|
212
|
+
mock_session_ctx.__aenter__.return_value = mock_session
|
|
213
|
+
mock_session_ctx.__aexit__.return_value = None
|
|
214
|
+
|
|
215
|
+
with patch("aiohttp.ClientSession", return_value=mock_session_ctx):
|
|
216
|
+
with pytest.raises(SourceFetchError):
|
|
217
|
+
await source.fetch_trades(
|
|
218
|
+
market_id="0xmarket",
|
|
219
|
+
start_time=datetime(2024, 1, 1, tzinfo=timezone.utc),
|
|
220
|
+
end_time=datetime(2024, 1, 3, tzinfo=timezone.utc),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
assert isinstance(source.last_error, SourceFetchError)
|
|
224
|
+
|
|
147
225
|
def test_parse_trades_empty_response(self, source: PolymarketTradeSource) -> None:
|
|
148
226
|
r"""Test parsing empty response."""
|
|
149
227
|
trades = source._parse_trades({}, "market1")
|
|
@@ -12,11 +12,12 @@
|
|
|
12
12
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
|
-
|
|
15
|
+
from datetime import datetime, timedelta, timezone
|
|
16
16
|
|
|
17
17
|
import pytest
|
|
18
18
|
|
|
19
|
-
from xrtm.data import CausalEdge, CausalNode, ForecastOutput
|
|
19
|
+
from xrtm.data import CausalEdge, CausalNode, ForecastOutput, MetadataBase
|
|
20
|
+
from xrtm.data.core.schemas import TradeEvent, TradeWindow
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
def test_forecast_output_initialization():
|
|
@@ -100,3 +101,50 @@ def test_to_networkx_conversion():
|
|
|
100
101
|
assert dg.has_edge("n1", "n2")
|
|
101
102
|
except ImportError:
|
|
102
103
|
pytest.skip("NetworkX not installed")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_metadata_temporal_fields_normalize_to_utc():
|
|
107
|
+
"""Metadata timestamps remain timezone-aware for deterministic snapshot comparisons."""
|
|
108
|
+
metadata = MetadataBase(
|
|
109
|
+
created_at=datetime(2024, 1, 1, 12, 0),
|
|
110
|
+
snapshot_time=datetime(2024, 1, 1, 13, 0),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
assert metadata.created_at.tzinfo == timezone.utc
|
|
114
|
+
assert metadata.snapshot_time.tzinfo == timezone.utc
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_trade_window_rejects_future_leakage():
|
|
118
|
+
"""TradeWindow enforces that no trade falls outside its snapshot window."""
|
|
119
|
+
start = datetime(2024, 1, 1, tzinfo=timezone.utc)
|
|
120
|
+
end = datetime(2024, 1, 2, tzinfo=timezone.utc)
|
|
121
|
+
future_trade = TradeEvent(
|
|
122
|
+
price=0.5,
|
|
123
|
+
amount=1.0,
|
|
124
|
+
timestamp=end + timedelta(seconds=1),
|
|
125
|
+
maker="0xmaker",
|
|
126
|
+
taker="0xtaker",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
with pytest.raises(ValueError, match="trades must fall within"):
|
|
130
|
+
TradeWindow(trades=[future_trade], start_time=start, end_time=end, market_id="m1")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_trade_window_normalizes_naive_boundaries_and_timestamps():
|
|
134
|
+
"""Legacy naive datetimes are interpreted as UTC before invariant checks."""
|
|
135
|
+
trade = TradeEvent(
|
|
136
|
+
price=0.5,
|
|
137
|
+
amount=1.0,
|
|
138
|
+
timestamp=datetime(2024, 1, 1, 12, 0),
|
|
139
|
+
maker="0xmaker",
|
|
140
|
+
taker="0xtaker",
|
|
141
|
+
)
|
|
142
|
+
window = TradeWindow(
|
|
143
|
+
trades=[trade],
|
|
144
|
+
start_time=datetime(2024, 1, 1),
|
|
145
|
+
end_time=datetime(2024, 1, 2),
|
|
146
|
+
market_id="m1",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
assert window.start_time.tzinfo == timezone.utc
|
|
150
|
+
assert window.trades[0].timestamp.tzinfo == timezone.utc
|
|
@@ -1,167 +0,0 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
|
-
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
-
#
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
8
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
|
|
16
|
-
r"""
|
|
17
|
-
Polymarket Gamma API data source.
|
|
18
|
-
|
|
19
|
-
This module provides a DataSource implementation that fetches forecast
|
|
20
|
-
questions from the Polymarket Gamma API.
|
|
21
|
-
|
|
22
|
-
Example:
|
|
23
|
-
>>> from xrtm.data.providers.online import PolymarketSource
|
|
24
|
-
>>> source = PolymarketSource()
|
|
25
|
-
>>> questions = await source.fetch_questions(query="election", limit=5)
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
import logging
|
|
29
|
-
from contextlib import asynccontextmanager
|
|
30
|
-
from types import TracebackType
|
|
31
|
-
from typing import Any, Dict, List, Optional
|
|
32
|
-
|
|
33
|
-
import aiohttp
|
|
34
|
-
|
|
35
|
-
from xrtm.data.core import DataSource
|
|
36
|
-
from xrtm.data.core.schemas import ForecastQuestion, MetadataBase
|
|
37
|
-
|
|
38
|
-
logger = logging.getLogger(__name__)
|
|
39
|
-
|
|
40
|
-
__all__ = ["PolymarketSource"]
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class PolymarketSource(DataSource):
|
|
44
|
-
r"""
|
|
45
|
-
DataSource implementation that fetches from the Polymarket Gamma API.
|
|
46
|
-
|
|
47
|
-
This provider connects to Polymarket's public Gamma API to retrieve
|
|
48
|
-
event metadata for forecasting. For trade history with OrderFilled
|
|
49
|
-
events, see the subgraph provider (to be added).
|
|
50
|
-
|
|
51
|
-
Attributes:
|
|
52
|
-
API_BASE: Base URL for the Polymarket Gamma API.
|
|
53
|
-
|
|
54
|
-
Example:
|
|
55
|
-
>>> source = PolymarketSource()
|
|
56
|
-
>>> questions = await source.fetch_questions(limit=10)
|
|
57
|
-
>>> print(f"Fetched {len(questions)} questions")
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
API_BASE = "https://gamma-api.polymarket.com"
|
|
61
|
-
|
|
62
|
-
def __init__(self, session: Optional[aiohttp.ClientSession] = None) -> None:
|
|
63
|
-
self._session = session
|
|
64
|
-
self._owns_session = False
|
|
65
|
-
|
|
66
|
-
async def __aenter__(self) -> "PolymarketSource":
|
|
67
|
-
if self._session is None or self._session.closed:
|
|
68
|
-
self._session = aiohttp.ClientSession()
|
|
69
|
-
self._owns_session = True
|
|
70
|
-
return self
|
|
71
|
-
|
|
72
|
-
async def __aexit__(
|
|
73
|
-
self,
|
|
74
|
-
exc_type: Optional[type[BaseException]],
|
|
75
|
-
exc: Optional[BaseException],
|
|
76
|
-
traceback: Optional[TracebackType],
|
|
77
|
-
) -> None:
|
|
78
|
-
if self._owns_session and self._session is not None:
|
|
79
|
-
await self._session.close()
|
|
80
|
-
self._session = None
|
|
81
|
-
self._owns_session = False
|
|
82
|
-
|
|
83
|
-
@asynccontextmanager
|
|
84
|
-
async def _get_session(self):
|
|
85
|
-
if self._session is not None and not self._session.closed:
|
|
86
|
-
yield self._session
|
|
87
|
-
return
|
|
88
|
-
|
|
89
|
-
session = aiohttp.ClientSession()
|
|
90
|
-
try:
|
|
91
|
-
yield session
|
|
92
|
-
finally:
|
|
93
|
-
await session.close()
|
|
94
|
-
|
|
95
|
-
async def fetch_questions(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
|
|
96
|
-
r"""
|
|
97
|
-
Fetch active forecast questions from Polymarket.
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
query: Optional search string to filter events.
|
|
101
|
-
limit: Maximum number of questions to return.
|
|
102
|
-
|
|
103
|
-
Returns:
|
|
104
|
-
List of ForecastQuestion objects from active markets.
|
|
105
|
-
"""
|
|
106
|
-
url = f"{self.API_BASE}/events?active=true&closed=false&limit={limit}"
|
|
107
|
-
if query:
|
|
108
|
-
url += f"&search={query}"
|
|
109
|
-
|
|
110
|
-
try:
|
|
111
|
-
async with self._get_session() as session:
|
|
112
|
-
async with session.get(url) as resp:
|
|
113
|
-
if resp.status != 200:
|
|
114
|
-
logger.error(f"Polymarket API returned status {resp.status}")
|
|
115
|
-
return []
|
|
116
|
-
|
|
117
|
-
data = await resp.json()
|
|
118
|
-
questions = []
|
|
119
|
-
for item in data:
|
|
120
|
-
questions.append(self._normalize(item))
|
|
121
|
-
return questions
|
|
122
|
-
except Exception as e:
|
|
123
|
-
logger.error(f"Failed to fetch questions from Polymarket: {e}")
|
|
124
|
-
return []
|
|
125
|
-
|
|
126
|
-
async def get_question_by_id(self, question_id: str) -> Optional[ForecastQuestion]:
|
|
127
|
-
r"""
|
|
128
|
-
Retrieve a single Polymarket event by ID.
|
|
129
|
-
|
|
130
|
-
Args:
|
|
131
|
-
question_id: The unique event identifier.
|
|
132
|
-
|
|
133
|
-
Returns:
|
|
134
|
-
The ForecastQuestion if found, None otherwise.
|
|
135
|
-
"""
|
|
136
|
-
url = f"{self.API_BASE}/events/{question_id}"
|
|
137
|
-
try:
|
|
138
|
-
async with self._get_session() as session:
|
|
139
|
-
async with session.get(url) as resp:
|
|
140
|
-
if resp.status == 200:
|
|
141
|
-
return self._normalize(await resp.json())
|
|
142
|
-
return None
|
|
143
|
-
except Exception as e:
|
|
144
|
-
logger.error(f"Failed to retrieve Polymarket event {question_id}: {e}")
|
|
145
|
-
return None
|
|
146
|
-
|
|
147
|
-
def _normalize(self, item: Dict[str, Any]) -> ForecastQuestion:
|
|
148
|
-
r"""
|
|
149
|
-
Normalize Polymarket API response to ForecastQuestion schema.
|
|
150
|
-
|
|
151
|
-
Args:
|
|
152
|
-
item: Raw API response dict.
|
|
153
|
-
|
|
154
|
-
Returns:
|
|
155
|
-
Normalized ForecastQuestion instance.
|
|
156
|
-
"""
|
|
157
|
-
return ForecastQuestion(
|
|
158
|
-
id=str(item.get("id", "")),
|
|
159
|
-
title=item.get("title", "Untitled Event"),
|
|
160
|
-
description=item.get("description", ""),
|
|
161
|
-
metadata=MetadataBase(
|
|
162
|
-
tags=item.get("tags", []),
|
|
163
|
-
subject_type="binary",
|
|
164
|
-
source_version="polymarket-gamma-v1",
|
|
165
|
-
raw_data=item,
|
|
166
|
-
),
|
|
167
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|