xrtm-data 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {xrtm_data-0.2.0/src/xrtm_data.egg-info → xrtm_data-0.2.1}/PKG-INFO +5 -1
  2. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/pyproject.toml +9 -2
  3. xrtm_data-0.2.1/src/xrtm/data/cli/__init__.py +323 -0
  4. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/core/schemas/__init__.py +11 -3
  5. xrtm_data-0.2.1/src/xrtm/data/core/schemas/prior.py +254 -0
  6. xrtm_data-0.2.1/src/xrtm/data/core/schemas/trade.py +173 -0
  7. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/kit/__init__.py +14 -8
  8. xrtm_data-0.2.1/src/xrtm/data/kit/processors/__init__.py +206 -0
  9. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/providers/__init__.py +2 -1
  10. xrtm_data-0.2.1/src/xrtm/data/providers/subgraph/__init__.py +25 -0
  11. xrtm_data-0.2.1/src/xrtm/data/providers/subgraph/polymarket.py +303 -0
  12. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/version.py +1 -1
  13. {xrtm_data-0.2.0 → xrtm_data-0.2.1/src/xrtm_data.egg-info}/PKG-INFO +5 -1
  14. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm_data.egg-info/SOURCES.txt +10 -0
  15. xrtm_data-0.2.1/src/xrtm_data.egg-info/entry_points.txt +2 -0
  16. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm_data.egg-info/requires.txt +4 -0
  17. xrtm_data-0.2.1/tests/test_beta_fitter.py +141 -0
  18. xrtm_data-0.2.1/tests/test_polymarket_subgraph.py +208 -0
  19. xrtm_data-0.2.1/tests/test_prior_schemas.py +196 -0
  20. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/LICENSE +0 -0
  21. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/README.md +0 -0
  22. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/setup.cfg +0 -0
  23. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/__init__.py +0 -0
  24. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/core/__init__.py +0 -0
  25. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/core/interfaces.py +0 -0
  26. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/core/schemas/forecast.py +0 -0
  27. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/providers/local/__init__.py +0 -0
  28. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/providers/local/csv.py +0 -0
  29. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/providers/online/__init__.py +0 -0
  30. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/providers/online/polymarket.py +0 -0
  31. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm_data.egg-info/dependency_links.txt +0 -0
  32. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm_data.egg-info/top_level.txt +0 -0
  33. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/tests/test_local_datasource.py +0 -0
  34. {xrtm_data-0.2.0 → xrtm_data-0.2.1}/tests/test_schemas.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xrtm-data
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: The Snapshot Vault for XRTM.
5
5
  Author-email: XRTM Team <moy@xrtm.org>
6
6
  License: Apache-2.0
@@ -9,6 +9,10 @@ Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
10
  Requires-Dist: pydantic>=2.0.0
11
11
  Requires-Dist: aiohttp>=3.9.0
12
+ Requires-Dist: scipy>=1.11.0
13
+ Requires-Dist: click>=8.0.0
14
+ Requires-Dist: rich>=13.0.0
15
+ Requires-Dist: pyarrow>=14.0.0
12
16
  Provides-Extra: dev
13
17
  Requires-Dist: pytest>=7.0.0; extra == "dev"
14
18
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "xrtm-data"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "The Snapshot Vault for XRTM."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -15,6 +15,10 @@ authors = [
15
15
  dependencies = [
16
16
  "pydantic>=2.0.0",
17
17
  "aiohttp>=3.9.0",
18
+ "scipy>=1.11.0",
19
+ "click>=8.0.0",
20
+ "rich>=13.0.0",
21
+ "pyarrow>=14.0.0",
18
22
  ]
19
23
 
20
24
  [project.optional-dependencies]
@@ -26,12 +30,15 @@ dev = [
26
30
  "mypy>=1.0.0",
27
31
  ]
28
32
 
33
+ [project.scripts]
34
+ xrtm-data = "xrtm.data.cli:main"
35
+
29
36
  [tool.setuptools]
30
37
  package-dir = {"" = "src"}
31
38
  packages = {find = {where = ["src"], include = ["xrtm*"], namespaces = true}}
32
39
 
33
40
  [tool.pytest.ini_options]
34
- pythonpath = ["."]
41
+ pythonpath = ["src"]
35
42
  testpaths = ["tests"]
36
43
  asyncio_mode = "strict"
37
44
 
@@ -0,0 +1,323 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ CLI entry point for xrtm-data.
18
+
19
+ Provides commands for:
20
+ - Collecting trade data from Polymarket
21
+ - Fitting Beta priors from trade history
22
+ - Caching data to Parquet files
23
+
24
+ Example:
25
+ $ xrtm-data collect --market-id 0x... --days 30 -o trades.parquet
26
+ $ xrtm-data fit-prior --input trades.parquet -o prior.json
27
+ """
28
+
29
+ import asyncio
30
+ import json
31
+ from datetime import datetime, timedelta, timezone
32
+ from pathlib import Path
33
+ from typing import Optional
34
+
35
+ import click
36
+ from rich.console import Console
37
+ from rich.panel import Panel
38
+ from rich.progress import Progress, SpinnerColumn, TextColumn
39
+ from rich.table import Table
40
+
41
+ from xrtm.data.version import __version__
42
+
43
+ console = Console()
44
+
45
+
46
+ @click.group()
47
+ @click.version_option(version=__version__)
48
+ def main():
49
+ r"""xrtm-data: Data collection and preprocessing for xRTM training."""
50
+ pass
51
+
52
+
53
+ @main.command()
54
+ @click.option("--market-id", "-m", required=True, help="Polymarket market ID (hex address)")
55
+ @click.option("--days", "-d", default=30, help="Number of days of history to fetch")
56
+ @click.option("--start", type=click.DateTime(), help="Start date (overrides --days)")
57
+ @click.option("--end", type=click.DateTime(), help="End date (default: now)")
58
+ @click.option("--output", "-o", required=True, type=click.Path(), help="Output file path (.parquet or .json)")
59
+ @click.option("--force", "-f", is_flag=True, help="Overwrite existing file")
60
+ def collect(
61
+ market_id: str,
62
+ days: int,
63
+ start: Optional[datetime],
64
+ end: Optional[datetime],
65
+ output: str,
66
+ force: bool,
67
+ ):
68
+ r"""
69
+ Collect trade data from Polymarket.
70
+
71
+ Fetches historical trades for a given market and saves to Parquet or JSON.
72
+ Supports caching — will skip if output file exists unless --force is used.
73
+
74
+ Example:
75
+ xrtm-data collect -m 0x1234... -d 30 -o data/trades.parquet
76
+ """
77
+ output_path = Path(output)
78
+
79
+ # Check cache
80
+ if output_path.exists() and not force:
81
+ console.print(f"[yellow]⚠ File exists:[/yellow] {output_path}")
82
+ console.print(" Use --force to overwrite, or specify different output.")
83
+ return
84
+
85
+ # Calculate time range
86
+ end_time = end or datetime.now(timezone.utc)
87
+ if start:
88
+ start_time = start.replace(tzinfo=timezone.utc)
89
+ else:
90
+ start_time = end_time - timedelta(days=days)
91
+
92
+ console.print(Panel(
93
+ f"[bold blue]Collecting Polymarket Trades[/bold blue]\n"
94
+ f"Market: {market_id[:16]}...\n"
95
+ f"Range: {start_time.date()} → {end_time.date()}",
96
+ title="xrtm-data",
97
+ ))
98
+
99
+ # Run async collection
100
+ async def _collect():
101
+ from xrtm.data.providers.subgraph import PolymarketTradeSource
102
+
103
+ source = PolymarketTradeSource()
104
+
105
+ with Progress(
106
+ SpinnerColumn(),
107
+ TextColumn("[progress.description]{task.description}"),
108
+ console=console,
109
+ ) as progress:
110
+ task = progress.add_task("Fetching trades...", total=None)
111
+
112
+ window = await source.fetch_trade_window(
113
+ market_id=market_id,
114
+ start_time=start_time,
115
+ end_time=end_time,
116
+ )
117
+
118
+ progress.update(task, description=f"Fetched {len(window.trades)} trades")
119
+
120
+ return window
121
+
122
+ window = asyncio.run(_collect())
123
+
124
+ # Save output
125
+ output_path.parent.mkdir(parents=True, exist_ok=True)
126
+
127
+ if output_path.suffix == ".parquet":
128
+ _save_parquet(window, output_path)
129
+ else:
130
+ _save_json(window, output_path)
131
+
132
+ console.print(f"[green]✓ Saved {len(window.trades)} trades to:[/green] {output_path}")
133
+
134
+ # Summary table
135
+ table = Table(title="Trade Summary")
136
+ table.add_column("Metric", style="cyan")
137
+ table.add_column("Value", style="green")
138
+ table.add_row("Total Trades", str(window.trade_count))
139
+ table.add_row("Total Volume", f"${window.total_volume:,.2f}")
140
+ table.add_row("VWAP", f"{window.volume_weighted_price:.4f}")
141
+ console.print(table)
142
+
143
+
144
+ @main.command("fit-prior")
145
+ @click.option("--input", "-i", "input_path", required=True, type=click.Path(exists=True), help="Input trades file")
146
+ @click.option("--output", "-o", required=True, type=click.Path(), help="Output prior file (.json)")
147
+ @click.option("--half-life", "-h", default=24.0, help="Half-life in hours for decay weighting")
148
+ @click.option("--min-concentration", default=2.0, help="Minimum concentration (α+β)")
149
+ def fit_prior(input_path: str, output: str, half_life: float, min_concentration: float):
150
+ r"""
151
+ Fit a Beta prior from trade data.
152
+
153
+ Reads trade history and fits a Beta distribution using exponential
154
+ decay weighting (recent trades weighted more heavily).
155
+
156
+ Example:
157
+ xrtm-data fit-prior -i trades.parquet -o prior.json -h 24.0
158
+ """
159
+ from xrtm.data.kit.processors import fit_beta_exponential_decay
160
+
161
+ console.print(Panel(
162
+ f"[bold blue]Fitting Beta Prior[/bold blue]\n"
163
+ f"Input: {input_path}\n"
164
+ f"Half-life: {half_life} hours",
165
+ title="xrtm-data",
166
+ ))
167
+
168
+ # Load trades
169
+ trades = _load_trades(Path(input_path))
170
+ console.print(f"Loaded {len(trades)} trades")
171
+
172
+ # Fit prior
173
+ prior = fit_beta_exponential_decay(
174
+ trades,
175
+ half_life_hours=half_life,
176
+ min_concentration=min_concentration,
177
+ )
178
+
179
+ # Save
180
+ output_path = Path(output)
181
+ output_path.parent.mkdir(parents=True, exist_ok=True)
182
+
183
+ prior_dict = prior.to_distribution_dict()
184
+ prior_dict["metadata"] = {
185
+ "source": str(input_path),
186
+ "trade_count": len(trades),
187
+ "half_life_hours": half_life,
188
+ "fitted_at": datetime.now(timezone.utc).isoformat(),
189
+ }
190
+
191
+ with open(output_path, "w") as f:
192
+ json.dump(prior_dict, f, indent=2)
193
+
194
+ console.print(f"[green]✓ Saved prior to:[/green] {output_path}")
195
+
196
+ # Summary
197
+ low, high = prior.credible_interval(0.9)
198
+ table = Table(title="Fitted Prior")
199
+ table.add_column("Parameter", style="cyan")
200
+ table.add_column("Value", style="green")
201
+ table.add_row("α (alpha)", f"{prior.alpha:.4f}")
202
+ table.add_row("β (beta)", f"{prior.beta:.4f}")
203
+ table.add_row("Mean", f"{prior.mean:.4f}")
204
+ table.add_row("90% CI", f"[{low:.4f}, {high:.4f}]")
205
+ console.print(table)
206
+
207
+
208
+ @main.command("info")
209
+ @click.argument("file_path", type=click.Path(exists=True))
210
+ def info(file_path: str):
211
+ r"""
212
+ Show information about a data file.
213
+
214
+ Displays summary statistics for trade files or prior files.
215
+ """
216
+ path = Path(file_path)
217
+
218
+ if path.suffix == ".json":
219
+ with open(path) as f:
220
+ data = json.load(f)
221
+
222
+ if "family" in data:
223
+ # It's a prior
224
+ console.print(Panel(
225
+ f"[bold]Prior File[/bold]\n"
226
+ f"Family: {data['family']}\n"
227
+ f"α: {data.get('alpha', 'N/A')}\n"
228
+ f"β: {data.get('beta', 'N/A')}",
229
+ title=path.name,
230
+ ))
231
+ else:
232
+ console.print(f"JSON file with {len(data)} keys")
233
+ elif path.suffix == ".parquet":
234
+ import pyarrow.parquet as pq
235
+
236
+ table = pq.read_table(path)
237
+ console.print(Panel(
238
+ f"[bold]Parquet File[/bold]\n"
239
+ f"Rows: {table.num_rows}\n"
240
+ f"Columns: {table.column_names}",
241
+ title=path.name,
242
+ ))
243
+
244
+
245
+ def _save_parquet(window, path: Path) -> None:
246
+ r"""Save TradeWindow to Parquet format."""
247
+ import pyarrow as pa
248
+ import pyarrow.parquet as pq
249
+
250
+ data = {
251
+ "price": [t.price for t in window.trades],
252
+ "amount": [t.amount for t in window.trades],
253
+ "timestamp": [t.timestamp.isoformat() for t in window.trades],
254
+ "maker": [t.maker for t in window.trades],
255
+ "taker": [t.taker for t in window.trades],
256
+ }
257
+
258
+ table = pa.table(data)
259
+ pq.write_table(table, path)
260
+
261
+
262
+ def _save_json(window, path: Path) -> None:
263
+ r"""Save TradeWindow to JSON format."""
264
+ data = {
265
+ "market_id": window.market_id,
266
+ "start_time": window.start_time.isoformat(),
267
+ "end_time": window.end_time.isoformat(),
268
+ "trades": [
269
+ {
270
+ "price": t.price,
271
+ "amount": t.amount,
272
+ "timestamp": t.timestamp.isoformat(),
273
+ "maker": t.maker,
274
+ "taker": t.taker,
275
+ }
276
+ for t in window.trades
277
+ ],
278
+ }
279
+
280
+ with open(path, "w") as f:
281
+ json.dump(data, f, indent=2)
282
+
283
+
284
+ def _load_trades(path: Path) -> list:
285
+ r"""Load trades from Parquet or JSON."""
286
+ from xrtm.data.core.schemas import TradeEvent
287
+
288
+ if path.suffix == ".parquet":
289
+ import pyarrow.parquet as pq
290
+
291
+ table = pq.read_table(path)
292
+ df = table.to_pandas()
293
+ return [
294
+ TradeEvent(
295
+ price=row["price"],
296
+ amount=row["amount"],
297
+ timestamp=datetime.fromisoformat(row["timestamp"]),
298
+ maker=row["maker"],
299
+ taker=row["taker"],
300
+ )
301
+ for _, row in df.iterrows()
302
+ ]
303
+ else:
304
+ with open(path) as f:
305
+ data = json.load(f)
306
+
307
+ return [
308
+ TradeEvent(
309
+ price=t["price"],
310
+ amount=t["amount"],
311
+ timestamp=datetime.fromisoformat(t["timestamp"]),
312
+ maker=t["maker"],
313
+ taker=t["taker"],
314
+ )
315
+ for t in data.get("trades", data)
316
+ ]
317
+
318
+
319
+ if __name__ == "__main__":
320
+ main()
321
+
322
+
323
+ __all__ = ["main"]
@@ -14,10 +14,9 @@
14
14
  # limitations under the License.
15
15
 
16
16
  r"""
17
- Core data schemas for xrtm-data.
17
+ Core schemas for xrtm-data.
18
18
 
19
- This module exports all foundational Pydantic models used across the
20
- xrtm ecosystem for representing forecasting data structures.
19
+ This module exports all Pydantic schemas used across the xrtm ecosystem.
21
20
  """
22
21
 
23
22
  from xrtm.data.core.schemas.forecast import (
@@ -28,12 +27,21 @@ from xrtm.data.core.schemas.forecast import (
28
27
  ForecastQuestion,
29
28
  MetadataBase,
30
29
  )
30
+ from xrtm.data.core.schemas.prior import BetaPrior, PriorState
31
+ from xrtm.data.core.schemas.trade import TradeEvent, TradeWindow
31
32
 
32
33
  __all__ = [
34
+ # Forecast schemas
33
35
  "MetadataBase",
34
36
  "ForecastQuestion",
35
37
  "ForecastOutput",
36
38
  "CausalNode",
37
39
  "CausalEdge",
38
40
  "ConfidenceInterval",
41
+ # Prior schemas
42
+ "BetaPrior",
43
+ "PriorState",
44
+ # Trade schemas
45
+ "TradeEvent",
46
+ "TradeWindow",
39
47
  ]
@@ -0,0 +1,254 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Prior state schemas for LLM training injection.
18
+
19
+ This module defines the Beta distribution parameters and prior state
20
+ representation used for injecting market belief state into LLM training.
21
+ Implements Decision 1 from the training architecture.
22
+
23
+ Example:
24
+ >>> from xrtm.data.core.schemas import BetaPrior, PriorState
25
+ >>> prior = BetaPrior(alpha=7.0, beta=3.0)
26
+ >>> print(f"Mean: {prior.mean:.2f}, Concentration: {prior.concentration}")
27
+ Mean: 0.70, Concentration: 10.0
28
+ """
29
+
30
+ from datetime import datetime, timezone
31
+ from typing import Optional
32
+
33
+ from pydantic import BaseModel, Field, computed_field
34
+
35
+
36
+ class BetaPrior(BaseModel):
37
+ r"""
38
+ Beta distribution parameters fitted from trade history.
39
+
40
+ The Beta distribution is the conjugate prior for Bernoulli outcomes,
41
+ making it ideal for representing belief state about binary events.
42
+ Parameters α and β can be interpreted as "pseudo-counts" of Yes and No
43
+ observations respectively.
44
+
45
+ Attributes:
46
+ alpha: Shape parameter α (Yes-weighted). Higher values indicate
47
+ stronger belief in the positive outcome.
48
+ beta: Shape parameter β (No-weighted). Higher values indicate
49
+ stronger belief in the negative outcome.
50
+
51
+ Example:
52
+ >>> prior = BetaPrior(alpha=7.0, beta=3.0)
53
+ >>> prior.mean
54
+ 0.7
55
+ >>> prior.concentration
56
+ 10.0
57
+ >>> prior.variance
58
+ 0.019090909090909092
59
+ """
60
+
61
+ alpha: float = Field(
62
+ ...,
63
+ gt=0,
64
+ description="Shape parameter α (Yes-weighted pseudo-count)",
65
+ )
66
+ beta: float = Field(
67
+ ...,
68
+ gt=0,
69
+ description="Shape parameter β (No-weighted pseudo-count)",
70
+ )
71
+
72
+ @computed_field # type: ignore[prop-decorator]
73
+ @property
74
+ def mean(self) -> float:
75
+ r"""Expected value of the Beta distribution: α / (α + β)."""
76
+ return self.alpha / (self.alpha + self.beta)
77
+
78
+ @computed_field # type: ignore[prop-decorator]
79
+ @property
80
+ def concentration(self) -> float:
81
+ r"""
82
+ Total concentration α + β.
83
+
84
+ Higher concentration indicates more confident prior (less uncertainty).
85
+ A concentration of 2 represents maximum uncertainty (uniform prior).
86
+ """
87
+ return self.alpha + self.beta
88
+
89
+ @property
90
+ def variance(self) -> float:
91
+ r"""Variance of the Beta distribution: αβ / ((α+β)²(α+β+1))."""
92
+ total = self.alpha + self.beta
93
+ return (self.alpha * self.beta) / (total * total * (total + 1))
94
+
95
+ @classmethod
96
+ def uniform(cls) -> "BetaPrior":
97
+ r"""Create a uniform (uninformative) prior with α=β=1."""
98
+ return cls(alpha=1.0, beta=1.0)
99
+
100
+ @classmethod
101
+ def from_mean_concentration(cls, mean: float, concentration: float) -> "BetaPrior":
102
+ r"""
103
+ Create a BetaPrior from mean and concentration.
104
+
105
+ Args:
106
+ mean: Target mean value in (0, 1).
107
+ concentration: Target α + β, must be > 0.
108
+
109
+ Returns:
110
+ BetaPrior with specified mean and concentration.
111
+
112
+ Example:
113
+ >>> prior = BetaPrior.from_mean_concentration(0.7, 10.0)
114
+ >>> prior.alpha, prior.beta
115
+ (7.0, 3.0)
116
+ """
117
+ alpha = mean * concentration
118
+ beta = (1 - mean) * concentration
119
+ return cls(alpha=alpha, beta=beta)
120
+
121
+ def credible_interval(self, level: float = 0.9) -> tuple[float, float]:
122
+ r"""
123
+ Compute the credible interval (Bayesian confidence interval).
124
+
125
+ Uses scipy.stats.beta to compute the equal-tailed credible interval.
126
+
127
+ Args:
128
+ level: Confidence level (default 0.9 for 90% interval).
129
+
130
+ Returns:
131
+ Tuple of (low, high) bounds.
132
+
133
+ Example:
134
+ >>> prior = BetaPrior(alpha=7.0, beta=3.0)
135
+ >>> low, high = prior.credible_interval(0.9)
136
+ >>> print(f"90% CI: [{low:.3f}, {high:.3f}]")
137
+ 90% CI: [0.435, 0.895]
138
+ """
139
+ from scipy.stats import beta as beta_dist
140
+
141
+ dist = beta_dist(self.alpha, self.beta)
142
+ tail = (1 - level) / 2
143
+ return (float(dist.ppf(tail)), float(dist.ppf(1 - tail)))
144
+
145
+ def sample(self, n: int = 1) -> list[float]:
146
+ r"""
147
+ Draw random samples from the Beta distribution.
148
+
149
+ Args:
150
+ n: Number of samples to draw.
151
+
152
+ Returns:
153
+ List of n samples from Beta(α, β).
154
+
155
+ Example:
156
+ >>> prior = BetaPrior(alpha=7.0, beta=3.0)
157
+ >>> samples = prior.sample(1000)
158
+ >>> abs(sum(samples)/len(samples) - prior.mean) < 0.05
159
+ True
160
+ """
161
+ from scipy.stats import beta as beta_dist
162
+
163
+ dist = beta_dist(self.alpha, self.beta)
164
+ return [float(x) for x in dist.rvs(size=n)]
165
+
166
+ def to_distribution_dict(self) -> dict:
167
+ r"""
168
+ Convert to governance schema v1.1 distribution format.
169
+
170
+ Returns:
171
+ Dictionary matching the forecast_object_v1.1 distribution schema.
172
+
173
+ Example:
174
+ >>> prior = BetaPrior(alpha=7.0, beta=3.0)
175
+ >>> d = prior.to_distribution_dict()
176
+ >>> d["family"]
177
+ 'beta'
178
+ """
179
+ low, high = self.credible_interval(0.9)
180
+ return {
181
+ "family": "beta",
182
+ "alpha": self.alpha,
183
+ "beta": self.beta,
184
+ "credible_interval": {
185
+ "low": low,
186
+ "high": high,
187
+ "level": 0.9,
188
+ },
189
+ }
190
+
191
+
192
+ class PriorState(BaseModel):
193
+ r"""
194
+ Full prior state for training injection (Decision 1).
195
+
196
+ This domain-agnostic schema captures the complete belief state at a
197
+ point in time, including temporal context for the model to reason
198
+ about information staleness and deadline proximity.
199
+
200
+ Attributes:
201
+ prior: The Beta distribution parameters representing current belief.
202
+ silence_delta: Normalized time since last information update.
203
+ 0 = just updated, 1 = long silence (model should consider decay).
204
+ deadline_delta: Normalized time remaining until resolution.
205
+ 0 = at resolution, 1 = maximum time remaining.
206
+ snapshot_time: UTC timestamp when this state was captured.
207
+ metadata: Optional additional context.
208
+
209
+ Example:
210
+ >>> from datetime import datetime, timezone
211
+ >>> state = PriorState(
212
+ ... prior=BetaPrior(alpha=7.0, beta=3.0),
213
+ ... silence_delta=0.1,
214
+ ... deadline_delta=0.5,
215
+ ... snapshot_time=datetime.now(timezone.utc),
216
+ ... )
217
+ """
218
+
219
+ prior: BetaPrior = Field(
220
+ ...,
221
+ description="Beta distribution parameters representing current belief",
222
+ )
223
+ silence_delta: float = Field(
224
+ default=0.0,
225
+ ge=0,
226
+ le=1,
227
+ description="Normalized time since last information update [0=just updated, 1=long silence]",
228
+ )
229
+ deadline_delta: float = Field(
230
+ default=1.0,
231
+ ge=0,
232
+ le=1,
233
+ description="Normalized time remaining until resolution [0=at resolution, 1=max time]",
234
+ )
235
+ snapshot_time: datetime = Field(
236
+ default_factory=lambda: datetime.now(timezone.utc),
237
+ description="UTC timestamp when this state was captured",
238
+ )
239
+ metadata: Optional[dict] = Field(
240
+ default=None,
241
+ description="Optional additional context",
242
+ )
243
+
244
+ @classmethod
245
+ def uninformative(cls) -> "PriorState":
246
+ r"""Create an uninformative prior state with uniform Beta(1,1)."""
247
+ return cls(
248
+ prior=BetaPrior.uniform(),
249
+ silence_delta=0.0,
250
+ deadline_delta=1.0,
251
+ )
252
+
253
+
254
+ __all__ = ["BetaPrior", "PriorState"]