xrtm-data 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/PKG-INFO +6 -2
  2. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/pyproject.toml +9 -3
  3. xrtm_data-0.2.2/src/xrtm/data/cli/__init__.py +322 -0
  4. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/core/schemas/__init__.py +11 -3
  5. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/core/schemas/forecast.py +45 -8
  6. xrtm_data-0.2.2/src/xrtm/data/core/schemas/prior.py +254 -0
  7. xrtm_data-0.2.2/src/xrtm/data/core/schemas/trade.py +173 -0
  8. xrtm_data-0.2.2/src/xrtm/data/corpora/__init__.py +36 -0
  9. xrtm_data-0.2.2/src/xrtm/data/corpora/real_binary.py +506 -0
  10. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/kit/__init__.py +14 -8
  11. xrtm_data-0.2.2/src/xrtm/data/kit/processors/__init__.py +212 -0
  12. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/providers/__init__.py +2 -1
  13. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/providers/local/csv.py +74 -29
  14. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/providers/online/polymarket.py +37 -2
  15. xrtm_data-0.2.2/src/xrtm/data/providers/subgraph/__init__.py +25 -0
  16. xrtm_data-0.2.2/src/xrtm/data/providers/subgraph/polymarket.py +303 -0
  17. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/version.py +1 -1
  18. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm_data.egg-info/PKG-INFO +6 -2
  19. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm_data.egg-info/SOURCES.txt +15 -0
  20. xrtm_data-0.2.2/src/xrtm_data.egg-info/entry_points.txt +2 -0
  21. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm_data.egg-info/requires.txt +4 -0
  22. xrtm_data-0.2.2/tests/test_beta_fitter.py +147 -0
  23. xrtm_data-0.2.2/tests/test_cli_loading.py +42 -0
  24. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/tests/test_local_datasource.py +74 -0
  25. xrtm_data-0.2.2/tests/test_polymarket_source.py +101 -0
  26. xrtm_data-0.2.2/tests/test_polymarket_subgraph.py +208 -0
  27. xrtm_data-0.2.2/tests/test_prior_schemas.py +196 -0
  28. xrtm_data-0.2.2/tests/test_real_binary_corpus.py +110 -0
  29. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/tests/test_schemas.py +26 -0
  30. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/LICENSE +0 -0
  31. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/README.md +0 -0
  32. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/setup.cfg +0 -0
  33. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/__init__.py +0 -0
  34. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/core/__init__.py +0 -0
  35. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/core/interfaces.py +0 -0
  36. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/providers/local/__init__.py +0 -0
  37. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm/data/providers/online/__init__.py +0 -0
  38. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm_data.egg-info/dependency_links.txt +0 -0
  39. {xrtm_data-0.2.0 → xrtm_data-0.2.2}/src/xrtm_data.egg-info/top_level.txt +0 -0
@@ -1,14 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xrtm-data
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: The Snapshot Vault for XRTM.
5
5
  Author-email: XRTM Team <moy@xrtm.org>
6
- License: Apache-2.0
6
+ License-Expression: Apache-2.0
7
7
  Requires-Python: >=3.11
8
8
  Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: pydantic>=2.0.0
11
11
  Requires-Dist: aiohttp>=3.9.0
12
+ Requires-Dist: scipy>=1.11.0
13
+ Requires-Dist: click>=8.0.0
14
+ Requires-Dist: rich>=13.0.0
15
+ Requires-Dist: pyarrow>=14.0.0
12
16
  Provides-Extra: dev
13
17
  Requires-Dist: pytest>=7.0.0; extra == "dev"
14
18
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -4,17 +4,21 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xrtm-data"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "The Snapshot Vault for XRTM."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
11
- license = {text = "Apache-2.0"}
11
+ license = "Apache-2.0"
12
12
  authors = [
13
13
  {name = "XRTM Team", email = "moy@xrtm.org"}
14
14
  ]
15
15
  dependencies = [
16
16
  "pydantic>=2.0.0",
17
17
  "aiohttp>=3.9.0",
18
+ "scipy>=1.11.0",
19
+ "click>=8.0.0",
20
+ "rich>=13.0.0",
21
+ "pyarrow>=14.0.0",
18
22
  ]
19
23
 
20
24
  [project.optional-dependencies]
@@ -26,12 +30,14 @@ dev = [
26
30
  "mypy>=1.0.0",
27
31
  ]
28
32
 
33
+ [project.scripts]
34
+ xrtm-data = "xrtm.data.cli:main"
35
+
29
36
  [tool.setuptools]
30
37
  package-dir = {"" = "src"}
31
38
  packages = {find = {where = ["src"], include = ["xrtm*"], namespaces = true}}
32
39
 
33
40
  [tool.pytest.ini_options]
34
- pythonpath = ["."]
35
41
  testpaths = ["tests"]
36
42
  asyncio_mode = "strict"
37
43
 
@@ -0,0 +1,322 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ CLI entry point for xrtm-data.
18
+
19
+ Provides commands for:
20
+ - Collecting trade data from Polymarket
21
+ - Fitting Beta priors from trade history
22
+ - Caching data to Parquet files
23
+
24
+ Example:
25
+ $ xrtm-data collect --market-id 0x... --days 30 -o trades.parquet
26
+ $ xrtm-data fit-prior --input trades.parquet -o prior.json
27
+ """
28
+
29
+ import asyncio
30
+ import json
31
+ from datetime import datetime, timedelta, timezone
32
+ from pathlib import Path
33
+ from typing import Optional
34
+
35
+ import click
36
+ from rich.console import Console
37
+ from rich.panel import Panel
38
+ from rich.progress import Progress, SpinnerColumn, TextColumn
39
+ from rich.table import Table
40
+
41
+ from xrtm.data.version import __version__
42
+
43
+ console = Console()
44
+
45
+
46
+ @click.group()
47
+ @click.version_option(version=__version__)
48
+ def main():
49
+ r"""xrtm-data: Data collection and preprocessing for xRTM training."""
50
+ pass
51
+
52
+
53
+ @main.command()
54
+ @click.option("--market-id", "-m", required=True, help="Polymarket market ID (hex address)")
55
+ @click.option("--days", "-d", default=30, help="Number of days of history to fetch")
56
+ @click.option("--start", type=click.DateTime(), help="Start date (overrides --days)")
57
+ @click.option("--end", type=click.DateTime(), help="End date (default: now)")
58
+ @click.option("--output", "-o", required=True, type=click.Path(), help="Output file path (.parquet or .json)")
59
+ @click.option("--force", "-f", is_flag=True, help="Overwrite existing file")
60
+ def collect(
61
+ market_id: str,
62
+ days: int,
63
+ start: Optional[datetime],
64
+ end: Optional[datetime],
65
+ output: str,
66
+ force: bool,
67
+ ):
68
+ r"""
69
+ Collect trade data from Polymarket.
70
+
71
+ Fetches historical trades for a given market and saves to Parquet or JSON.
72
+ Supports caching — will skip if output file exists unless --force is used.
73
+
74
+ Example:
75
+ xrtm-data collect -m 0x1234... -d 30 -o data/trades.parquet
76
+ """
77
+ output_path = Path(output)
78
+
79
+ # Check cache
80
+ if output_path.exists() and not force:
81
+ console.print(f"[yellow]⚠ File exists:[/yellow] {output_path}")
82
+ console.print(" Use --force to overwrite, or specify different output.")
83
+ return
84
+
85
+ # Calculate time range
86
+ end_time = end or datetime.now(timezone.utc)
87
+ if start:
88
+ start_time = start.replace(tzinfo=timezone.utc)
89
+ else:
90
+ start_time = end_time - timedelta(days=days)
91
+
92
+ console.print(Panel(
93
+ f"[bold blue]Collecting Polymarket Trades[/bold blue]\n"
94
+ f"Market: {market_id[:16]}...\n"
95
+ f"Range: {start_time.date()} → {end_time.date()}",
96
+ title="xrtm-data",
97
+ ))
98
+
99
+ # Run async collection
100
+ async def _collect():
101
+ from xrtm.data.providers.subgraph import PolymarketTradeSource
102
+
103
+ source = PolymarketTradeSource()
104
+
105
+ with Progress(
106
+ SpinnerColumn(),
107
+ TextColumn("[progress.description]{task.description}"),
108
+ console=console,
109
+ ) as progress:
110
+ task = progress.add_task("Fetching trades...", total=None)
111
+
112
+ window = await source.fetch_trade_window(
113
+ market_id=market_id,
114
+ start_time=start_time,
115
+ end_time=end_time,
116
+ )
117
+
118
+ progress.update(task, description=f"Fetched {len(window.trades)} trades")
119
+
120
+ return window
121
+
122
+ window = asyncio.run(_collect())
123
+
124
+ # Save output
125
+ output_path.parent.mkdir(parents=True, exist_ok=True)
126
+
127
+ if output_path.suffix == ".parquet":
128
+ _save_parquet(window, output_path)
129
+ else:
130
+ _save_json(window, output_path)
131
+
132
+ console.print(f"[green]✓ Saved {len(window.trades)} trades to:[/green] {output_path}")
133
+
134
+ # Summary table
135
+ table = Table(title="Trade Summary")
136
+ table.add_column("Metric", style="cyan")
137
+ table.add_column("Value", style="green")
138
+ table.add_row("Total Trades", str(window.trade_count))
139
+ table.add_row("Total Volume", f"${window.total_volume:,.2f}")
140
+ table.add_row("VWAP", f"{window.volume_weighted_price:.4f}")
141
+ console.print(table)
142
+
143
+
144
+ @main.command("fit-prior")
145
+ @click.option("--input", "-i", "input_path", required=True, type=click.Path(exists=True), help="Input trades file")
146
+ @click.option("--output", "-o", required=True, type=click.Path(), help="Output prior file (.json)")
147
+ @click.option("--half-life", "-h", default=24.0, help="Half-life in hours for decay weighting")
148
+ @click.option("--min-concentration", default=2.0, help="Minimum concentration (α+β)")
149
+ def fit_prior(input_path: str, output: str, half_life: float, min_concentration: float):
150
+ r"""
151
+ Fit a Beta prior from trade data.
152
+
153
+ Reads trade history and fits a Beta distribution using exponential
154
+ decay weighting (recent trades weighted more heavily).
155
+
156
+ Example:
157
+ xrtm-data fit-prior -i trades.parquet -o prior.json -h 24.0
158
+ """
159
+ from xrtm.data.kit.processors import fit_beta_exponential_decay
160
+
161
+ console.print(Panel(
162
+ f"[bold blue]Fitting Beta Prior[/bold blue]\n"
163
+ f"Input: {input_path}\n"
164
+ f"Half-life: {half_life} hours",
165
+ title="xrtm-data",
166
+ ))
167
+
168
+ # Load trades
169
+ trades = _load_trades(Path(input_path))
170
+ console.print(f"Loaded {len(trades)} trades")
171
+
172
+ # Fit prior
173
+ prior = fit_beta_exponential_decay(
174
+ trades,
175
+ half_life_hours=half_life,
176
+ min_concentration=min_concentration,
177
+ )
178
+
179
+ # Save
180
+ output_path = Path(output)
181
+ output_path.parent.mkdir(parents=True, exist_ok=True)
182
+
183
+ prior_dict = prior.to_distribution_dict()
184
+ prior_dict["metadata"] = {
185
+ "source": str(input_path),
186
+ "trade_count": len(trades),
187
+ "half_life_hours": half_life,
188
+ "fitted_at": datetime.now(timezone.utc).isoformat(),
189
+ }
190
+
191
+ with open(output_path, "w") as f:
192
+ json.dump(prior_dict, f, indent=2)
193
+
194
+ console.print(f"[green]✓ Saved prior to:[/green] {output_path}")
195
+
196
+ # Summary
197
+ low, high = prior.credible_interval(0.9)
198
+ table = Table(title="Fitted Prior")
199
+ table.add_column("Parameter", style="cyan")
200
+ table.add_column("Value", style="green")
201
+ table.add_row("α (alpha)", f"{prior.alpha:.4f}")
202
+ table.add_row("β (beta)", f"{prior.beta:.4f}")
203
+ table.add_row("Mean", f"{prior.mean:.4f}")
204
+ table.add_row("90% CI", f"[{low:.4f}, {high:.4f}]")
205
+ console.print(table)
206
+
207
+
208
+ @main.command("info")
209
+ @click.argument("file_path", type=click.Path(exists=True))
210
+ def info(file_path: str):
211
+ r"""
212
+ Show information about a data file.
213
+
214
+ Displays summary statistics for trade files or prior files.
215
+ """
216
+ path = Path(file_path)
217
+
218
+ if path.suffix == ".json":
219
+ with open(path) as f:
220
+ data = json.load(f)
221
+
222
+ if "family" in data:
223
+ # It's a prior
224
+ console.print(Panel(
225
+ f"[bold]Prior File[/bold]\n"
226
+ f"Family: {data['family']}\n"
227
+ f"α: {data.get('alpha', 'N/A')}\n"
228
+ f"β: {data.get('beta', 'N/A')}",
229
+ title=path.name,
230
+ ))
231
+ else:
232
+ console.print(f"JSON file with {len(data)} keys")
233
+ elif path.suffix == ".parquet":
234
+ import pyarrow.parquet as pq
235
+
236
+ table = pq.read_table(path)
237
+ console.print(Panel(
238
+ f"[bold]Parquet File[/bold]\n"
239
+ f"Rows: {table.num_rows}\n"
240
+ f"Columns: {table.column_names}",
241
+ title=path.name,
242
+ ))
243
+
244
+
245
+ def _save_parquet(window, path: Path) -> None:
246
+ r"""Save TradeWindow to Parquet format."""
247
+ import pyarrow as pa
248
+ import pyarrow.parquet as pq
249
+
250
+ data = {
251
+ "price": [t.price for t in window.trades],
252
+ "amount": [t.amount for t in window.trades],
253
+ "timestamp": [t.timestamp.isoformat() for t in window.trades],
254
+ "maker": [t.maker for t in window.trades],
255
+ "taker": [t.taker for t in window.trades],
256
+ }
257
+
258
+ table = pa.table(data)
259
+ pq.write_table(table, path)
260
+
261
+
262
+ def _save_json(window, path: Path) -> None:
263
+ r"""Save TradeWindow to JSON format."""
264
+ data = {
265
+ "market_id": window.market_id,
266
+ "start_time": window.start_time.isoformat(),
267
+ "end_time": window.end_time.isoformat(),
268
+ "trades": [
269
+ {
270
+ "price": t.price,
271
+ "amount": t.amount,
272
+ "timestamp": t.timestamp.isoformat(),
273
+ "maker": t.maker,
274
+ "taker": t.taker,
275
+ }
276
+ for t in window.trades
277
+ ],
278
+ }
279
+
280
+ with open(path, "w") as f:
281
+ json.dump(data, f, indent=2)
282
+
283
+
284
+ def _load_trades(path: Path) -> list:
285
+ r"""Load trades from Parquet or JSON."""
286
+ from xrtm.data.core.schemas import TradeEvent
287
+
288
+ if path.suffix == ".parquet":
289
+ import pyarrow.parquet as pq
290
+
291
+ table = pq.read_table(path)
292
+ return [
293
+ TradeEvent(
294
+ price=row["price"],
295
+ amount=row["amount"],
296
+ timestamp=datetime.fromisoformat(row["timestamp"]),
297
+ maker=row["maker"],
298
+ taker=row["taker"],
299
+ )
300
+ for row in table.to_pylist()
301
+ ]
302
+ else:
303
+ with open(path) as f:
304
+ data = json.load(f)
305
+
306
+ return [
307
+ TradeEvent(
308
+ price=t["price"],
309
+ amount=t["amount"],
310
+ timestamp=datetime.fromisoformat(t["timestamp"]),
311
+ maker=t["maker"],
312
+ taker=t["taker"],
313
+ )
314
+ for t in data.get("trades", data)
315
+ ]
316
+
317
+
318
+ if __name__ == "__main__":
319
+ main()
320
+
321
+
322
+ __all__ = ["main"]
@@ -14,10 +14,9 @@
14
14
  # limitations under the License.
15
15
 
16
16
  r"""
17
- Core data schemas for xrtm-data.
17
+ Core schemas for xrtm-data.
18
18
 
19
- This module exports all foundational Pydantic models used across the
20
- xrtm ecosystem for representing forecasting data structures.
19
+ This module exports all Pydantic schemas used across the xrtm ecosystem.
21
20
  """
22
21
 
23
22
  from xrtm.data.core.schemas.forecast import (
@@ -28,12 +27,21 @@ from xrtm.data.core.schemas.forecast import (
28
27
  ForecastQuestion,
29
28
  MetadataBase,
30
29
  )
30
+ from xrtm.data.core.schemas.prior import BetaPrior, PriorState
31
+ from xrtm.data.core.schemas.trade import TradeEvent, TradeWindow
31
32
 
32
33
  __all__ = [
34
+ # Forecast schemas
33
35
  "MetadataBase",
34
36
  "ForecastQuestion",
35
37
  "ForecastOutput",
36
38
  "CausalNode",
37
39
  "CausalEdge",
38
40
  "ConfidenceInterval",
41
+ # Prior schemas
42
+ "BetaPrior",
43
+ "PriorState",
44
+ # Trade schemas
45
+ "TradeEvent",
46
+ "TradeWindow",
39
47
  ]
@@ -28,7 +28,7 @@ Example:
28
28
  from datetime import datetime, timezone
29
29
  from typing import Any, Dict, List, Optional
30
30
 
31
- from pydantic import AliasChoices, BaseModel, ConfigDict, Field
31
+ from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator
32
32
 
33
33
 
34
34
  class MetadataBase(BaseModel):
@@ -202,6 +202,41 @@ class ForecastOutput(BaseModel):
202
202
  calibration_metrics: Dict[str, Any] = Field(default_factory=dict, description="Performance metrics")
203
203
  metadata: MetadataBase = Field(default_factory=MetadataBase) # type: ignore[arg-type]
204
204
 
205
+ @model_validator(mode="before")
206
+ @classmethod
207
+ def _apply_reasoning_trace_alias(cls, data: Any) -> Any:
208
+ r"""Accept governance ``reasoning_trace`` as an alias for runtime trace fields."""
209
+ if not isinstance(data, dict) or "reasoning_trace" not in data:
210
+ return data
211
+
212
+ trace = data["reasoning_trace"]
213
+ updated = dict(data)
214
+ if isinstance(trace, dict):
215
+ if "reasoning" not in updated and isinstance(trace.get("narrative"), str):
216
+ updated["reasoning"] = trace["narrative"]
217
+
218
+ causal_graph = trace.get("causal_graph")
219
+ if isinstance(causal_graph, dict):
220
+ if "logical_trace" not in updated and "nodes" in causal_graph:
221
+ updated["logical_trace"] = causal_graph["nodes"]
222
+ if "logical_edges" not in updated and "edges" in causal_graph:
223
+ updated["logical_edges"] = causal_graph["edges"]
224
+ elif isinstance(trace, list) and "logical_trace" not in updated:
225
+ updated["logical_trace"] = trace
226
+
227
+ return updated
228
+
229
+ @property
230
+ def reasoning_trace(self) -> Dict[str, Any]:
231
+ r"""Governance-compatible alias for the narrative and causal graph trace."""
232
+ return {
233
+ "narrative": self.reasoning,
234
+ "causal_graph": {
235
+ "nodes": [node.model_dump(exclude_none=True) for node in self.logical_trace],
236
+ "edges": [edge.model_dump(exclude_none=True) for edge in self.logical_edges],
237
+ },
238
+ }
239
+
205
240
  @property
206
241
  def confidence(self) -> float:
207
242
  r"""Backward compatibility alias for probability."""
@@ -227,15 +262,17 @@ class ForecastOutput(BaseModel):
227
262
  except ImportError:
228
263
  raise ImportError("networkx is required for to_networkx(). Install it with 'uv add networkx'.")
229
264
  dg = nx.DiGraph()
230
- for node in self.logical_trace:
231
- dg.add_node(
265
+ dg.add_nodes_from(
266
+ (
232
267
  node.node_id,
233
- event=node.event,
234
- probability=node.probability,
235
- description=node.description,
268
+ {"event": node.event, "probability": node.probability, "description": node.description},
236
269
  )
237
- for edge in self.logical_edges:
238
- dg.add_edge(edge.source, edge.target, weight=edge.weight, description=edge.description)
270
+ for node in self.logical_trace
271
+ )
272
+ dg.add_edges_from(
273
+ (edge.source, edge.target, {"weight": edge.weight, "description": edge.description})
274
+ for edge in self.logical_edges
275
+ )
239
276
  return dg
240
277
 
241
278