xrtm-data 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xrtm/data/__init__.py CHANGED
@@ -13,18 +13,44 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- from xrtm.data.schemas.forecast import (
16
+ r"""
17
+ xrtm-data: The Foundation Layer (Layer 1)
18
+
19
+ This package provides the core data schemas and interfaces for the xrtm
20
+ ecosystem. It follows the "Zero Leakage" principle, ensuring all data
21
+ is properly timestamped for temporal isolation.
22
+
23
+ Structure:
24
+ - core/: Domain-agnostic interfaces and schemas
25
+ - kit/: High-level processors and utilities
26
+ - providers/: External data source implementations
27
+
28
+ Example:
29
+ >>> from xrtm.data import ForecastQuestion, DataSource
30
+ >>> from xrtm.data.providers import LocalDataSource
31
+ """
32
+
33
+ # Core interfaces
34
+ from xrtm.data.core import DataSource
35
+
36
+ # Core schemas (public API)
37
+ from xrtm.data.core.schemas import (
17
38
  CausalEdge,
18
39
  CausalNode,
40
+ ConfidenceInterval,
19
41
  ForecastOutput,
20
42
  ForecastQuestion,
21
43
  MetadataBase,
22
44
  )
23
45
 
24
46
  __all__ = [
47
+ # Interfaces
48
+ "DataSource",
49
+ # Schemas
25
50
  "MetadataBase",
26
51
  "ForecastQuestion",
27
52
  "ForecastOutput",
28
53
  "CausalNode",
29
54
  "CausalEdge",
55
+ "ConfidenceInterval",
30
56
  ]
@@ -0,0 +1,26 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Core interfaces and protocols for xrtm-data.
18
+
19
+ This module defines the abstract base classes (protocols) that all data
20
+ providers must implement. The core module is domain-agnostic and MUST NOT
21
+ import from kit/ or providers/.
22
+ """
23
+
24
+ from xrtm.data.core.interfaces import DataSource
25
+
26
+ __all__ = ["DataSource"]
@@ -0,0 +1,82 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Abstract interfaces for xrtm-data providers.
18
+
19
+ This module defines the protocols that all data sources must implement.
20
+ These are domain-agnostic abstractions that can be used for any forecasting
21
+ data provider, regardless of the source.
22
+
23
+ Example:
24
+ >>> from xrtm.data.core import DataSource
25
+ >>> class MySource(DataSource):
26
+ ... async def fetch_questions(self, query=None, limit=5):
27
+ ... return []
28
+ ... async def get_question_by_id(self, question_id):
29
+ ... return None
30
+ """
31
+
32
+ import abc
33
+ from typing import List, Optional
34
+
35
+ from xrtm.data.core.schemas.forecast import ForecastQuestion
36
+
37
+
38
+ class DataSource(abc.ABC):
39
+ r"""
40
+ Abstract interface for gathering or streaming forecasting workloads.
41
+
42
+ All data providers (local, online, subgraph) must implement this interface
43
+ to ensure consistent access patterns across the ecosystem.
44
+
45
+ Attributes:
46
+ None. This is a pure protocol.
47
+
48
+ Example:
49
+ >>> class LocalSource(DataSource):
50
+ ... async def fetch_questions(self, query=None, limit=5):
51
+ ... return [ForecastQuestion(id="1", title="Test")]
52
+ """
53
+
54
+ @abc.abstractmethod
55
+ async def fetch_questions(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
56
+ r"""
57
+ Fetch a list of forecast questions from the data source.
58
+
59
+ Args:
60
+ query: Optional search/filter string.
61
+ limit: Maximum number of questions to return.
62
+
63
+ Returns:
64
+ List of ForecastQuestion objects matching the criteria.
65
+ """
66
+ pass
67
+
68
+ @abc.abstractmethod
69
+ async def get_question_by_id(self, question_id: str) -> Optional[ForecastQuestion]:
70
+ r"""
71
+ Retrieve a single question by its unique identifier.
72
+
73
+ Args:
74
+ question_id: The unique identifier of the question.
75
+
76
+ Returns:
77
+ The ForecastQuestion if found, None otherwise.
78
+ """
79
+ pass
80
+
81
+
82
+ __all__ = ["DataSource"]
@@ -0,0 +1,39 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Core data schemas for xrtm-data.
18
+
19
+ This module exports all foundational Pydantic models used across the
20
+ xrtm ecosystem for representing forecasting data structures.
21
+ """
22
+
23
+ from xrtm.data.core.schemas.forecast import (
24
+ CausalEdge,
25
+ CausalNode,
26
+ ConfidenceInterval,
27
+ ForecastOutput,
28
+ ForecastQuestion,
29
+ MetadataBase,
30
+ )
31
+
32
+ __all__ = [
33
+ "MetadataBase",
34
+ "ForecastQuestion",
35
+ "ForecastOutput",
36
+ "CausalNode",
37
+ "CausalEdge",
38
+ "ConfidenceInterval",
39
+ ]
@@ -0,0 +1,249 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Core forecast schemas for xrtm-data.
18
+
19
+ This module defines the foundational Pydantic models used across the xrtm
20
+ ecosystem for representing forecast questions, outputs, and causal reasoning
21
+ structures.
22
+
23
+ Example:
24
+ >>> from xrtm.data.core.schemas import ForecastQuestion
25
+ >>> q = ForecastQuestion(id="q1", title="Will it rain tomorrow?")
26
+ """
27
+
28
+ from datetime import datetime, timezone
29
+ from typing import Any, Dict, List, Optional
30
+
31
+ from pydantic import AliasChoices, BaseModel, ConfigDict, Field
32
+
33
+
34
+ class MetadataBase(BaseModel):
35
+ r"""
36
+ A foundational metadata block used to ensure consistency across schemas.
37
+
38
+ This model captures temporal information critical for the Zero Leakage
39
+ principle, ensuring all data is properly timestamped.
40
+
41
+ Attributes:
42
+ id: Unique identifier for this metadata block.
43
+ created_at: When this metadata was created.
44
+ snapshot_time: The "Time T" at which the world state was frozen.
45
+ tags: List of classification tags.
46
+ subject_type: Type of subject being forecasted.
47
+ source_version: Version of the data source.
48
+ raw_data: Original unprocessed data.
49
+ """
50
+
51
+ model_config = ConfigDict(extra="allow")
52
+ id: str = Field(
53
+ default_factory=lambda: "meta_" + str(datetime.now(timezone.utc).timestamp()),
54
+ description="Unique identifier for this metadata block",
55
+ )
56
+ created_at: datetime = Field(
57
+ default_factory=lambda: datetime.now(timezone.utc),
58
+ description="When this metadata was created",
59
+ )
60
+ snapshot_time: datetime = Field(
61
+ default_factory=lambda: datetime.now(timezone.utc),
62
+ description="Zero Leakage: The specific 'Time T' at which the world state was frozen.",
63
+ )
64
+ tags: List[str] = Field(default_factory=list, description="Classification tags")
65
+ subject_type: Optional[str] = Field(None, description="Type of subject being forecasted")
66
+ source_version: Optional[str] = Field(None, description="Version of the data source")
67
+ raw_data: Optional[Dict[str, Any]] = Field(None, description="Original unprocessed data")
68
+
69
+ def get(self, key: str, default: Any = None) -> Any:
70
+ r"""Backward compatibility for dict-like access."""
71
+ return getattr(self, key, default)
72
+
73
+
74
+ class ForecastQuestion(BaseModel):
75
+ r"""
76
+ The standardized input format for a forecasting task.
77
+
78
+ This is the primary input schema used throughout the xrtm ecosystem
79
+ to represent a question or hypothesis to be forecasted.
80
+
81
+ Attributes:
82
+ id: Unique identifier for the question.
83
+ title: The main question or statement being forecasted.
84
+ description: Detailed context and background.
85
+ resolution_criteria: Explicit rules for ground truth determination.
86
+ metadata: Associated metadata including temporal information.
87
+
88
+ Example:
89
+ >>> q = ForecastQuestion(
90
+ ... id="q1",
91
+ ... title="Will Company X announce earnings above expectations?",
92
+ ... description="Q4 earnings call scheduled for Jan 15",
93
+ ... )
94
+ """
95
+
96
+ id: str = Field(..., description="Unique identifier for the question")
97
+ title: str = Field(..., description="The main question or statement being forecasted")
98
+ description: Optional[str] = Field(
99
+ None,
100
+ alias="content",
101
+ validation_alias=AliasChoices("description", "content"),
102
+ description="Detailed context and background",
103
+ )
104
+ resolution_criteria: Optional[str] = Field(None, description="Explicit rules for ground truth determination")
105
+ metadata: MetadataBase = Field(default_factory=MetadataBase) # type: ignore[arg-type]
106
+
107
+ @property
108
+ def content(self) -> Optional[str]:
109
+ r"""Backward compatibility alias for description."""
110
+ return self.description
111
+
112
+
113
+ class CausalNode(BaseModel):
114
+ r"""
115
+ Represents a single step in a logical reasoning chain.
116
+
117
+ Attributes:
118
+ event: The assumption or event in the chain.
119
+ probability: Optional probability assigned to this node.
120
+ description: Additional context for this reasoning step.
121
+ node_id: Unique identifier for graph operations.
122
+ """
123
+
124
+ event: str = Field(..., description="The assumption or event in the chain")
125
+ probability: Optional[float] = Field(None, ge=0, le=1, description="Probability of this event")
126
+ description: Optional[str] = Field(None, description="Additional context")
127
+ node_id: str = Field(
128
+ default_factory=lambda: "node_" + str(datetime.now().timestamp()),
129
+ description="Unique ID for graph operations",
130
+ )
131
+
132
+
133
+ class CausalEdge(BaseModel):
134
+ r"""
135
+ Represents a directed causal dependency between two reasoning nodes.
136
+
137
+ Attributes:
138
+ source: ID of the source node.
139
+ target: ID of the target node.
140
+ weight: Strength of the causal relationship.
141
+ description: Context for this causal link.
142
+ """
143
+
144
+ source: str = Field(..., description="ID of the source node")
145
+ target: str = Field(..., description="ID of the target node")
146
+ weight: float = Field(default=1.0, ge=0, le=1, description="Strength of causal relationship")
147
+ description: Optional[str] = Field(None, description="Context for this causal link")
148
+
149
+
150
+ class ConfidenceInterval(BaseModel):
151
+ r"""
152
+ Standard range for probabilistic calibration.
153
+
154
+ Attributes:
155
+ low: Lower bound of the interval.
156
+ high: Upper bound of the interval.
157
+ level: Confidence level (default 0.9 for 90%).
158
+ """
159
+
160
+ low: float = Field(..., description="Lower bound")
161
+ high: float = Field(..., description="Upper bound")
162
+ level: float = Field(0.9, ge=0, le=1, description="Confidence level")
163
+
164
+
165
+ class ForecastOutput(BaseModel):
166
+ r"""
167
+ The structured result of an agent's forecasting reasoning.
168
+
169
+ This model captures not just the final probability, but also the
170
+ complete reasoning chain that led to it, enabling audit and calibration.
171
+
172
+ Attributes:
173
+ question_id: Reference to the input question.
174
+ probability: The assigned probability of the primary outcome.
175
+ uncertainty: Optional measure of forecast uncertainty.
176
+ confidence_interval: Range for calibration.
177
+ reasoning: Narrative reasoning for the forecast.
178
+ logical_trace: Bayesian-style sequence of assumptions.
179
+ logical_edges: Causal dependencies between nodes.
180
+ structural_trace: Order of graph nodes executed.
181
+ calibration_metrics: Performance metrics.
182
+ metadata: Associated temporal and source metadata.
183
+ """
184
+
185
+ question_id: str = Field(..., description="Reference to the input question")
186
+ probability: float = Field(
187
+ ...,
188
+ alias="confidence",
189
+ validation_alias=AliasChoices("probability", "confidence"),
190
+ ge=0,
191
+ le=1,
192
+ description="The assigned probability of the primary outcome",
193
+ )
194
+ uncertainty: Optional[float] = Field(None, ge=0, le=1, description="Measure of forecast uncertainty")
195
+ confidence_interval: Optional[ConfidenceInterval] = None
196
+ reasoning: str = Field(..., description="Narrative reasoning for the forecast")
197
+ logical_trace: List[CausalNode] = Field(
198
+ default_factory=list, description="The Bayesian-style sequence of assumptions"
199
+ )
200
+ logical_edges: List[CausalEdge] = Field(default_factory=list, description="Causal dependencies between nodes")
201
+ structural_trace: List[str] = Field(default_factory=list, description="Order of graph nodes executed")
202
+ calibration_metrics: Dict[str, Any] = Field(default_factory=dict, description="Performance metrics")
203
+ metadata: MetadataBase = Field(default_factory=MetadataBase) # type: ignore[arg-type]
204
+
205
+ @property
206
+ def confidence(self) -> float:
207
+ r"""Backward compatibility alias for probability."""
208
+ return self.probability
209
+
210
+ @confidence.setter
211
+ def confidence(self, value: float) -> None:
212
+ r"""Backward compatibility setter for probability."""
213
+ self.probability = value
214
+
215
+ def to_networkx(self) -> Any:
216
+ r"""
217
+ Convert the logical trace to a NetworkX directed graph.
218
+
219
+ Returns:
220
+ A NetworkX DiGraph representing the reasoning chain.
221
+
222
+ Raises:
223
+ ImportError: If networkx is not installed.
224
+ """
225
+ try:
226
+ import networkx as nx
227
+ except ImportError:
228
+ raise ImportError("networkx is required for to_networkx(). Install it with 'uv add networkx'.")
229
+ dg = nx.DiGraph()
230
+ for node in self.logical_trace:
231
+ dg.add_node(
232
+ node.node_id,
233
+ event=node.event,
234
+ probability=node.probability,
235
+ description=node.description,
236
+ )
237
+ for edge in self.logical_edges:
238
+ dg.add_edge(edge.source, edge.target, weight=edge.weight, description=edge.description)
239
+ return dg
240
+
241
+
242
+ __all__ = [
243
+ "MetadataBase",
244
+ "ForecastQuestion",
245
+ "CausalNode",
246
+ "CausalEdge",
247
+ "ConfidenceInterval",
248
+ "ForecastOutput",
249
+ ]
@@ -0,0 +1,27 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Researcher Kit for xrtm-data.
18
+
19
+ This module provides high-level, composable utilities built on top of
20
+ the core interfaces. Kit components use abstractions from core/ and
21
+ MUST NOT import from providers/.
22
+
23
+ Currently empty - will be populated with data processors (e.g., Beta fitters)
24
+ during the training logic implementation phase.
25
+ """
26
+
27
+ __all__: list[str] = []
@@ -0,0 +1,27 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Data providers for xrtm-data.
18
+
19
+ This module exports concrete DataSource implementations that fetch data
20
+ from various external sources. Providers implement the core interfaces
21
+ and CAN import from core/ but MUST NOT import from kit/.
22
+ """
23
+
24
+ from xrtm.data.providers.local import LocalDataSource
25
+ from xrtm.data.providers.online import PolymarketSource
26
+
27
+ __all__ = ["LocalDataSource", "PolymarketSource"]
@@ -0,0 +1,20 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""Local file-based data providers."""
17
+
18
+ from xrtm.data.providers.local.csv import LocalDataSource
19
+
20
+ __all__ = ["LocalDataSource"]
@@ -0,0 +1,120 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Local file-based data source.
18
+
19
+ This module provides a DataSource implementation that reads forecast
20
+ questions from local JSON files.
21
+
22
+ Example:
23
+ >>> from xrtm.data.providers.local import LocalDataSource
24
+ >>> source = LocalDataSource("./questions.json")
25
+ >>> questions = await source.fetch_questions(limit=10)
26
+ """
27
+
28
+ import asyncio
29
+ import json
30
+ import logging
31
+ from typing import List, Optional
32
+
33
+ from xrtm.data.core import DataSource
34
+ from xrtm.data.core.schemas import ForecastQuestion
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ __all__ = ["LocalDataSource"]
39
+
40
+
41
+ class LocalDataSource(DataSource):
42
+ r"""
43
+ DataSource implementation that reads from a local JSON file.
44
+
45
+ This provider is useful for testing, development, and offline scenarios
46
+ where data has been pre-fetched and stored locally.
47
+
48
+ Args:
49
+ file_path: Path to the JSON file containing forecast questions.
50
+
51
+ Attributes:
52
+ file_path: The path to the local JSON file.
53
+
54
+ Example:
55
+ >>> source = LocalDataSource("./test_questions.json")
56
+ >>> questions = await source.fetch_questions(query="weather", limit=5)
57
+ """
58
+
59
+ def __init__(self, file_path: str) -> None:
60
+ self.file_path = file_path
61
+ self._questions: Optional[List[dict]] = None
62
+
63
+ def _fetch_questions_sync(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
64
+ r"""Synchronous implementation of question fetching."""
65
+ try:
66
+ if self._questions is None:
67
+ with open(self.file_path, "r") as f:
68
+ self._questions = json.load(f)
69
+
70
+ questions = []
71
+ for item in self._questions:
72
+ if not query or query.lower() in item.get("title", "").lower():
73
+ questions.append(ForecastQuestion(**item))
74
+
75
+ if len(questions) >= limit:
76
+ break
77
+ return questions
78
+ except Exception as e:
79
+ logger.error(f"Failed to read local questions from {self.file_path}: {e}")
80
+ return []
81
+
82
+ async def fetch_questions(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
83
+ r"""
84
+ Fetch questions from the local JSON file.
85
+
86
+ Args:
87
+ query: Optional search string to filter questions by title.
88
+ limit: Maximum number of questions to return.
89
+
90
+ Returns:
91
+ List of ForecastQuestion objects matching the criteria.
92
+ """
93
+ return await asyncio.to_thread(self._fetch_questions_sync, query, limit)
94
+
95
+ def _get_question_by_id_sync(self, question_id: str) -> Optional[ForecastQuestion]:
96
+ r"""Synchronous implementation of single question retrieval."""
97
+ try:
98
+ if self._questions is None:
99
+ with open(self.file_path, "r") as f:
100
+ self._questions = json.load(f)
101
+
102
+ for item in self._questions:
103
+ if item.get("id") == question_id:
104
+ return ForecastQuestion(**item)
105
+ return None
106
+ except Exception as e:
107
+ logger.error(f"Failed to retrieve question {question_id} from {self.file_path}: {e}")
108
+ return None
109
+
110
+ async def get_question_by_id(self, question_id: str) -> Optional[ForecastQuestion]:
111
+ r"""
112
+ Retrieve a single question by ID from the local file.
113
+
114
+ Args:
115
+ question_id: The unique identifier of the question.
116
+
117
+ Returns:
118
+ The ForecastQuestion if found, None otherwise.
119
+ """
120
+ return await asyncio.to_thread(self._get_question_by_id_sync, question_id)
@@ -0,0 +1,20 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""Online API-based data providers."""
17
+
18
+ from xrtm.data.providers.online.polymarket import PolymarketSource
19
+
20
+ __all__ = ["PolymarketSource"]
@@ -1,13 +1,37 @@
1
1
  # coding=utf-8
2
2
  # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Polymarket Gamma API data source.
18
+
19
+ This module provides a DataSource implementation that fetches forecast
20
+ questions from the Polymarket Gamma API.
21
+
22
+ Example:
23
+ >>> from xrtm.data.providers.online import PolymarketSource
24
+ >>> source = PolymarketSource()
25
+ >>> questions = await source.fetch_questions(query="election", limit=5)
26
+ """
3
27
 
4
28
  import logging
5
29
  from typing import Any, Dict, List, Optional
6
30
 
7
31
  import aiohttp
8
32
 
9
- from xrtm.data.providers.data.base import DataSource
10
- from xrtm.data.schemas.forecast import ForecastQuestion, MetadataBase
33
+ from xrtm.data.core import DataSource
34
+ from xrtm.data.core.schemas import ForecastQuestion, MetadataBase
11
35
 
12
36
  logger = logging.getLogger(__name__)
13
37
 
@@ -17,11 +41,33 @@ __all__ = ["PolymarketSource"]
17
41
  class PolymarketSource(DataSource):
18
42
  r"""
19
43
  DataSource implementation that fetches from the Polymarket Gamma API.
44
+
45
+ This provider connects to Polymarket's public Gamma API to retrieve
46
+ event metadata for forecasting. For trade history with OrderFilled
47
+ events, see the subgraph provider (to be added).
48
+
49
+ Attributes:
50
+ API_BASE: Base URL for the Polymarket Gamma API.
51
+
52
+ Example:
53
+ >>> source = PolymarketSource()
54
+ >>> questions = await source.fetch_questions(limit=10)
55
+ >>> print(f"Fetched {len(questions)} questions")
20
56
  """
21
57
 
22
58
  API_BASE = "https://gamma-api.polymarket.com"
23
59
 
24
60
  async def fetch_questions(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
61
+ r"""
62
+ Fetch active forecast questions from Polymarket.
63
+
64
+ Args:
65
+ query: Optional search string to filter events.
66
+ limit: Maximum number of questions to return.
67
+
68
+ Returns:
69
+ List of ForecastQuestion objects from active markets.
70
+ """
25
71
  url = f"{self.API_BASE}/events?active=true&closed=false&limit={limit}"
26
72
  if query:
27
73
  url += f"&search={query}"
@@ -43,6 +89,15 @@ class PolymarketSource(DataSource):
43
89
  return []
44
90
 
45
91
  async def get_question_by_id(self, question_id: str) -> Optional[ForecastQuestion]:
92
+ r"""
93
+ Retrieve a single Polymarket event by ID.
94
+
95
+ Args:
96
+ question_id: The unique event identifier.
97
+
98
+ Returns:
99
+ The ForecastQuestion if found, None otherwise.
100
+ """
46
101
  url = f"{self.API_BASE}/events/{question_id}"
47
102
  try:
48
103
  async with aiohttp.ClientSession() as session:
@@ -55,10 +110,19 @@ class PolymarketSource(DataSource):
55
110
  return None
56
111
 
57
112
  def _normalize(self, item: Dict[str, Any]) -> ForecastQuestion:
113
+ r"""
114
+ Normalize Polymarket API response to ForecastQuestion schema.
115
+
116
+ Args:
117
+ item: Raw API response dict.
118
+
119
+ Returns:
120
+ Normalized ForecastQuestion instance.
121
+ """
58
122
  return ForecastQuestion(
59
123
  id=str(item.get("id", "")),
60
124
  title=item.get("title", "Untitled Event"),
61
- content=item.get("description", ""),
125
+ description=item.get("description", ""),
62
126
  metadata=MetadataBase(
63
127
  tags=item.get("tags", []),
64
128
  subject_type="binary",
xrtm/data/version.py ADDED
@@ -0,0 +1,28 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Version information for xrtm-data.
18
+
19
+ This module provides the single source of truth for the package version.
20
+ """
21
+
22
+ __all__ = ["__version__", "__author__", "__contact__", "__license__", "__copyright__"]
23
+
24
+ __version__ = "0.2.0"
25
+ __author__ = "XRTM Team"
26
+ __contact__ = "moy@xrtm.org"
27
+ __license__ = "Apache-2.0"
28
+ __copyright__ = "Copyright 2026 XRTM Team"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xrtm-data
3
- Version: 0.1.0
3
+ Version: 0.2.0
4
4
  Summary: The Snapshot Vault for XRTM.
5
5
  Author-email: XRTM Team <moy@xrtm.org>
6
6
  License: Apache-2.0
@@ -21,15 +21,27 @@ Dynamic: license-file
21
21
 
22
22
  [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
23
23
  [![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
24
+ [![PyPI](https://img.shields.io/pypi/v/xrtm-data.svg)](https://pypi.org/project/xrtm-data/)
24
25
 
25
26
  **The Snapshot Vault for XRTM.**
26
27
 
27
28
  `xrtm-data` provides the rigid schemas and temporal sandboxing infrastructure required for zero-leakage forecasting. It defines the "Ground Truth" data structures that the rest of the ecosystem (Forecast, Eval, Train) relies on.
28
29
 
30
+ ## Part of the XRTM Ecosystem
31
+
32
+ ```
33
+ Layer 4: xrtm-train → (imports all)
34
+ Layer 3: xrtm-forecast → (imports eval, data)
35
+ Layer 2: xrtm-eval → (imports data)
36
+ Layer 1: xrtm-data → (zero dependencies) ← YOU ARE HERE
37
+ ```
38
+
39
+ `xrtm-data` is the foundation layer with **zero dependencies** on other xrtm packages.
40
+
29
41
  ## Installation
30
42
 
31
43
  ```bash
32
- uv pip install xrtm-data
44
+ pip install xrtm-data
33
45
  ```
34
46
 
35
47
  ## Core Primitives
@@ -54,6 +66,19 @@ prediction = ForecastOutput(
54
66
  ### 2. Zero Leakage
55
67
  The `MetadataBase` enforces a strict `snapshot_time`. This timestamp represents the "End of History" for the model. Any data point generated after this time is considered "Future Leakage" and is programmatically inaccessible during backtesting.
56
68
 
69
+ ## Project Structure
70
+
71
+ ```
72
+ src/xrtm/data/
73
+ ├── core/ # Interfaces & Schemas (domain-agnostic)
74
+ │ ├── interfaces.py # DataSource protocol
75
+ │ └── schemas/ # ForecastQuestion, ForecastOutput, etc.
76
+ ├── kit/ # Composable utilities (processors)
77
+ └── providers/ # External data source implementations
78
+ ├── local/ # LocalDataSource (JSON files)
79
+ └── online/ # PolymarketSource (Gamma API)
80
+ ```
81
+
57
82
  ## Development
58
83
 
59
84
  Prerequisites:
@@ -0,0 +1,17 @@
1
+ xrtm/data/__init__.py,sha256=UTaJBIZScoMJMlnaopghWecvAOCSSBXM72JOLBfW-Yc,1578
2
+ xrtm/data/version.py,sha256=unDkjMG2k5bVOLaPToqkq8QKf10iSygOTCyxCOrHEig,964
3
+ xrtm/data/core/__init__.py,sha256=eONxw_v-xjXkErcC-BH2K15-KJx57aBZZxxvZM-qEcY,920
4
+ xrtm/data/core/interfaces.py,sha256=i8CZv-p4PXk1sTnoUAEHw5grbL2s3sbVZI01JbwI4LI,2589
5
+ xrtm/data/core/schemas/__init__.py,sha256=CAErO_XSAQMkfAYr2wVgdR_kYaiYG-hC4HXmbKOtdfY,1092
6
+ xrtm/data/core/schemas/forecast.py,sha256=IiFEMjZzhr9aJci4RbcX42BGK5gjNWh45s41BRMDukA,9607
7
+ xrtm/data/kit/__init__.py,sha256=C02rIncfR_bde2KZJ6O47qog3kdTzp__FjTyrrPhXWU,974
8
+ xrtm/data/providers/__init__.py,sha256=mg76kKnfOESoeHPArreDUZXaQtEcU3axJo_JntF-3RY,1013
9
+ xrtm/data/providers/local/__init__.py,sha256=CWucgoHFHMnOsUTvh3B5a7_JukmJFJzkPHTvBKf0YiI,741
10
+ xrtm/data/providers/local/csv.py,sha256=AwyvE5Kq0nyyLdkYzgUbZDShV28-uieBaI4A_V6R7WQ,4202
11
+ xrtm/data/providers/online/__init__.py,sha256=aB55dO83DfAQ2Pj0J8eUxI_LbfyGoQ5blFl9wbgoOvM,751
12
+ xrtm/data/providers/online/polymarket.py,sha256=Kllm1ZvVqM1C1BT3YVaLzLxg2c-lLX0Gz2dCXSZD2S0,4476
13
+ xrtm_data-0.2.0.dist-info/licenses/LICENSE,sha256=BexUTTsX5WlzyJ0Tqajo1h_LFYfCtfFgWdRaGltpm5I,11328
14
+ xrtm_data-0.2.0.dist-info/METADATA,sha256=XR2vdhbtRA8-WFuNqZqs1mWt3jmRn91zgCeUO7AMtBU,3032
15
+ xrtm_data-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
16
+ xrtm_data-0.2.0.dist-info/top_level.txt,sha256=Jz-i0a9P8GVrIR9KJTT-9wT95E1brww6U5o2QViAt20,5
17
+ xrtm_data-0.2.0.dist-info/RECORD,,
@@ -1,4 +0,0 @@
1
- from .base import DataSource
2
- from .local import LocalDataSource
3
-
4
- __all__ = ["DataSource", "LocalDataSource"]
@@ -1,24 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2026 XRTM Team. All rights reserved.
3
-
4
- import abc
5
- from typing import List, Optional
6
-
7
- from xrtm.data.schemas.forecast import ForecastQuestion
8
-
9
-
10
- class DataSource(abc.ABC):
11
- r"""
12
- Abstract interface for gathering or streaming forecasting workloads.
13
- """
14
-
15
- @abc.abstractmethod
16
- async def fetch_questions(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
17
- pass
18
-
19
- @abc.abstractmethod
20
- async def get_question_by_id(self, question_id: str) -> Optional[ForecastQuestion]:
21
- pass
22
-
23
-
24
- __all__ = ["DataSource"]
@@ -1,52 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2026 XRTM Team. All rights reserved.
3
-
4
- import json
5
- import logging
6
- from typing import List, Optional
7
-
8
- from xrtm.data.providers.data.base import DataSource
9
- from xrtm.data.schemas.forecast import ForecastQuestion
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- __all__ = ["LocalDataSource"]
14
-
15
-
16
- class LocalDataSource(DataSource):
17
- r"""
18
- DataSource implementation that reads from a local JSON file.
19
- """
20
-
21
- def __init__(self, file_path: str):
22
- self.file_path = file_path
23
-
24
- async def fetch_questions(self, query: Optional[str] = None, limit: int = 5) -> List[ForecastQuestion]:
25
- try:
26
- with open(self.file_path, "r") as f:
27
- data = json.load(f)
28
-
29
- questions = []
30
- for item in data:
31
- if not query or query.lower() in item.get("title", "").lower():
32
- questions.append(ForecastQuestion(**item))
33
-
34
- if len(questions) >= limit:
35
- break
36
- return questions
37
- except Exception as e:
38
- logger.error(f"Failed to read local questions from {self.file_path}: {e}")
39
- return []
40
-
41
- async def get_question_by_id(self, question_id: str) -> Optional[ForecastQuestion]:
42
- try:
43
- with open(self.file_path, "r") as f:
44
- data = json.load(f)
45
-
46
- for item in data:
47
- if item.get("id") == question_id:
48
- return ForecastQuestion(**item)
49
- return None
50
- except Exception as e:
51
- logger.error(f"Failed to retrieve question {question_id} from {self.file_path}: {e}")
52
- return None
@@ -1,3 +0,0 @@
1
- from .polymarket import PolymarketSource
2
-
3
- __all__ = ["PolymarketSource"]
@@ -1,3 +0,0 @@
1
- from .forecast import ForecastQuestion, MetadataBase
2
-
3
- __all__ = ["MetadataBase", "ForecastQuestion"]
@@ -1,137 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2026 XRTM Team. All rights reserved.
3
-
4
- from datetime import datetime, timezone
5
- from typing import Any, Dict, List, Optional
6
-
7
- from pydantic import AliasChoices, BaseModel, ConfigDict, Field
8
-
9
-
10
- class MetadataBase(BaseModel):
11
- r"""
12
- A foundational metadata block used to ensure consistency across schemas.
13
- """
14
-
15
- model_config = ConfigDict(extra="allow")
16
- id: str = Field(default_factory=lambda: "meta_" + str(datetime.now(timezone.utc).timestamp()))
17
- created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
18
- snapshot_time: datetime = Field(
19
- default_factory=lambda: datetime.now(timezone.utc),
20
- description="Zero Leakage: The specific 'Time T' at which the world state was frozen."
21
- )
22
- tags: List[str] = Field(default_factory=list)
23
- subject_type: Optional[str] = None
24
- source_version: Optional[str] = None
25
- raw_data: Optional[Dict[str, Any]] = None
26
-
27
- def get(self, key: str, default: Any = None) -> Any:
28
- r"""Backward compatibility for dict-like access."""
29
- return getattr(self, key, default)
30
-
31
-
32
- class ForecastQuestion(BaseModel):
33
- r"""
34
- The standardized input format for a forecasting task.
35
- """
36
-
37
- id: str = Field(..., description="Unique identifier for the question")
38
- title: str = Field(..., description="The main question or statement being forecasted")
39
- description: Optional[str] = Field(
40
- None,
41
- alias="content",
42
- validation_alias=AliasChoices("description", "content"),
43
- description="Detailed context and background",
44
- )
45
- resolution_criteria: Optional[str] = Field(None, description="Explicit rules for ground truth determination")
46
- metadata: MetadataBase = Field(default_factory=MetadataBase)
47
-
48
- @property
49
- def content(self) -> Optional[str]:
50
- r"""Backward compatibility alias for description."""
51
- return self.description
52
-
53
-
54
- class CausalNode(BaseModel):
55
- r"""
56
- Represents a single step in a logical reasoning chain.
57
- """
58
- event: str = Field(..., description="The assumption or event in the chain")
59
- probability: Optional[float] = Field(None, ge=0, le=1)
60
- description: Optional[str] = None
61
- node_id: str = Field(
62
- default_factory=lambda: "node_" + str(datetime.now().timestamp()), description="Unique ID for graph operations"
63
- )
64
-
65
-
66
- class CausalEdge(BaseModel):
67
- r"""
68
- Represents a directed causal dependency between two reasoning nodes.
69
- """
70
- source: str
71
- target: str
72
- weight: float = Field(default=1.0, ge=0, le=1)
73
- description: Optional[str] = None
74
-
75
-
76
- class ConfidenceInterval(BaseModel):
77
- r"""Standard range for probabilistic calibration."""
78
- low: float
79
- high: float
80
- level: float = 0.9
81
-
82
-
83
- class ForecastOutput(BaseModel):
84
- r"""
85
- The structured result of an agent's forecasting reasoning, compliant with Governance v1.
86
- """
87
- question_id: str
88
- probability: float = Field(
89
- ...,
90
- alias="confidence",
91
- validation_alias=AliasChoices("probability", "confidence"),
92
- ge=0,
93
- le=1,
94
- description="The assigned probability of the primary outcome",
95
- )
96
- uncertainty: Optional[float] = Field(None, ge=0, le=1)
97
- confidence_interval: Optional[ConfidenceInterval] = None
98
- reasoning: str = Field(..., description="Narrative reasoning for the forecast")
99
- logical_trace: List[CausalNode] = Field(
100
- default_factory=list, description="The Bayesian-style sequence of assumptions (Mental Model)"
101
- )
102
- logical_edges: List[CausalEdge] = Field(
103
- default_factory=list, description="Causal dependencies between nodes in the trace"
104
- )
105
- structural_trace: List[str] = Field(default_factory=list, description="Order of graph nodes executed (Audit Trail)")
106
- calibration_metrics: Dict[str, Any] = Field(default_factory=dict)
107
- metadata: MetadataBase = Field(default_factory=MetadataBase)
108
-
109
- @property
110
- def confidence(self) -> float:
111
- r"""Backward compatibility alias for probability."""
112
- return self.probability
113
-
114
- @confidence.setter
115
- def confidence(self, value: float):
116
- r"""Backward compatibility setter for probability."""
117
- self.probability = value
118
-
119
- def to_networkx(self) -> Any:
120
- try:
121
- import networkx as nx
122
- except ImportError:
123
- raise ImportError("networkx is required for to_networkx(). Install it with 'pip install networkx'.")
124
- dg = nx.DiGraph()
125
- for node in self.logical_trace:
126
- dg.add_node(
127
- node.node_id,
128
- event=node.event,
129
- probability=node.probability,
130
- description=node.description,
131
- )
132
- for edge in self.logical_edges:
133
- dg.add_edge(edge.source, edge.target, weight=edge.weight, description=edge.description)
134
- return dg
135
-
136
-
137
- __all__ = ["MetadataBase", "ForecastQuestion", "CausalNode", "CausalEdge", "ForecastOutput"]
@@ -1,13 +0,0 @@
1
- xrtm/data/__init__.py,sha256=e3DN4n5besbAxcEiFR5nBEXcaFvvtD1NulRXOWwSZSc,863
2
- xrtm/data/providers/data/__init__.py,sha256=53T9VrE33ndux16r0Z2vQQlpIUzV9yaudqXr2YOMgmY,109
3
- xrtm/data/providers/data/base.py,sha256=sXtAqJ9PF2z7TTRhD-RG1ORVkmmPJdi6DPFTlTblrec,585
4
- xrtm/data/providers/data/local.py,sha256=J7nQfaT4yGJF3-dYNZQM5zl6hB8pEVz48YGqhzk2HXY,1645
5
- xrtm/data/providers/data/online/__init__.py,sha256=AD6BTffDyajN_qZfpjzTOKk6KhFTBh1T51bAOzHew3o,73
6
- xrtm/data/providers/data/online/polymarket.py,sha256=VOZtl_lc6wpQ3V2sAKIEJVAMC4P9lnkZ1wL0ERYIBi4,2418
7
- xrtm/data/schemas/__init__.py,sha256=Y6OSOai_6qd9b0-ZhWPSNZfWXI3STM7kta60DjF9hBY,101
8
- xrtm/data/schemas/forecast.py,sha256=Wu5W_ThUTQSjm6Sx3WMcBw-MwlCMqAqSbcoW1d3moHk,4952
9
- xrtm_data-0.1.0.dist-info/licenses/LICENSE,sha256=BexUTTsX5WlzyJ0Tqajo1h_LFYfCtfFgWdRaGltpm5I,11328
10
- xrtm_data-0.1.0.dist-info/METADATA,sha256=9X7NnYWCptYXNC7GIO5I8nZz4n34RIeCEF1A4MrxqSY,2124
11
- xrtm_data-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
12
- xrtm_data-0.1.0.dist-info/top_level.txt,sha256=Jz-i0a9P8GVrIR9KJTT-9wT95E1brww6U5o2QViAt20,5
13
- xrtm_data-0.1.0.dist-info/RECORD,,