PyPI - xrtm-data - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

xrtm-data 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{xrtm_data-0.2.0/src/xrtm_data.egg-info → xrtm_data-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xrtm-data
-Version: 0.2.0
+Version: 0.2.1
 Summary: The Snapshot Vault for XRTM.
 Author-email: XRTM Team <moy@xrtm.org>
 License: Apache-2.0
@@ -9,6 +9,10 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: pydantic>=2.0.0
 Requires-Dist: aiohttp>=3.9.0
+Requires-Dist: scipy>=1.11.0
+Requires-Dist: click>=8.0.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: pyarrow>=14.0.0
 Provides-Extra: dev
 Requires-Dist: pytest>=7.0.0; extra == "dev"
 Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"

{xrtm_data-0.2.0 → xrtm_data-0.2.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "xrtm-data"
-version = "0.2.0"
+version = "0.2.1"
 description = "The Snapshot Vault for XRTM."
 readme = "README.md"
 requires-python = ">=3.11"
@@ -15,6 +15,10 @@ authors = [
 dependencies = [
     "pydantic>=2.0.0",
     "aiohttp>=3.9.0",
+    "scipy>=1.11.0",
+    "click>=8.0.0",
+    "rich>=13.0.0",
+    "pyarrow>=14.0.0",
 ]
 [project.optional-dependencies]
@@ -26,12 +30,15 @@ dev = [
     "mypy>=1.0.0",
 ]
+[project.scripts]
+xrtm-data = "xrtm.data.cli:main"
 [tool.setuptools]
 package-dir = {"" = "src"}
 packages = {find = {where = ["src"], include = ["xrtm*"], namespaces = true}}
 [tool.pytest.ini_options]
-pythonpath = ["."]
+pythonpath = ["src"]
 testpaths = ["tests"]
 asyncio_mode = "strict"

xrtm_data-0.2.1/src/xrtm/data/cli/__init__.py ADDED Viewed

@@ -0,0 +1,323 @@
+# coding=utf-8
+# Copyright 2026 XRTM Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+CLI entry point for xrtm-data.
+Provides commands for:
+- Collecting trade data from Polymarket
+- Fitting Beta priors from trade history
+- Caching data to Parquet files
+Example:
+    $ xrtm-data collect --market-id 0x... --days 30 -o trades.parquet
+    $ xrtm-data fit-prior --input trades.parquet -o prior.json
+"""
+import asyncio
+import json
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Optional
+import click
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
+from rich.table import Table
+from xrtm.data.version import __version__
+console = Console()
+@click.group()
+@click.version_option(version=__version__)
+def main():
+    r"""xrtm-data: Data collection and preprocessing for xRTM training."""
+    pass
+@main.command()
+@click.option("--market-id", "-m", required=True, help="Polymarket market ID (hex address)")
+@click.option("--days", "-d", default=30, help="Number of days of history to fetch")
+@click.option("--start", type=click.DateTime(), help="Start date (overrides --days)")
+@click.option("--end", type=click.DateTime(), help="End date (default: now)")
+@click.option("--output", "-o", required=True, type=click.Path(), help="Output file path (.parquet or .json)")
+@click.option("--force", "-f", is_flag=True, help="Overwrite existing file")
+def collect(
+    market_id: str,
+    days: int,
+    start: Optional[datetime],
+    end: Optional[datetime],
+    output: str,
+    force: bool,
+):
+    r"""
+    Collect trade data from Polymarket.
+    Fetches historical trades for a given market and saves to Parquet or JSON.
+    Supports caching — will skip if output file exists unless --force is used.
+    Example:
+        xrtm-data collect -m 0x1234... -d 30 -o data/trades.parquet
+    """
+    output_path = Path(output)
+    # Check cache
+    if output_path.exists() and not force:
+        console.print(f"[yellow]⚠ File exists:[/yellow] {output_path}")
+        console.print("  Use --force to overwrite, or specify different output.")
+        return
+    # Calculate time range
+    end_time = end or datetime.now(timezone.utc)
+    if start:
+        start_time = start.replace(tzinfo=timezone.utc)
+    else:
+        start_time = end_time - timedelta(days=days)
+    console.print(Panel(
+        f"[bold blue]Collecting Polymarket Trades[/bold blue]\n"
+        f"Market: {market_id[:16]}...\n"
+        f"Range: {start_time.date()} → {end_time.date()}",
+        title="xrtm-data",
+    ))
+    # Run async collection
+    async def _collect():
+        from xrtm.data.providers.subgraph import PolymarketTradeSource
+        source = PolymarketTradeSource()
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Fetching trades...", total=None)
+            window = await source.fetch_trade_window(
+                market_id=market_id,
+                start_time=start_time,
+                end_time=end_time,
+            )
+            progress.update(task, description=f"Fetched {len(window.trades)} trades")
+        return window
+    window = asyncio.run(_collect())
+    # Save output
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    if output_path.suffix == ".parquet":
+        _save_parquet(window, output_path)
+    else:
+        _save_json(window, output_path)
+    console.print(f"[green]✓ Saved {len(window.trades)} trades to:[/green] {output_path}")
+    # Summary table
+    table = Table(title="Trade Summary")
+    table.add_column("Metric", style="cyan")
+    table.add_column("Value", style="green")
+    table.add_row("Total Trades", str(window.trade_count))
+    table.add_row("Total Volume", f"${window.total_volume:,.2f}")
+    table.add_row("VWAP", f"{window.volume_weighted_price:.4f}")
+    console.print(table)
+@main.command("fit-prior")
+@click.option("--input", "-i", "input_path", required=True, type=click.Path(exists=True), help="Input trades file")
+@click.option("--output", "-o", required=True, type=click.Path(), help="Output prior file (.json)")
+@click.option("--half-life", "-h", default=24.0, help="Half-life in hours for decay weighting")
+@click.option("--min-concentration", default=2.0, help="Minimum concentration (α+β)")
+def fit_prior(input_path: str, output: str, half_life: float, min_concentration: float):
+    r"""
+    Fit a Beta prior from trade data.
+    Reads trade history and fits a Beta distribution using exponential
+    decay weighting (recent trades weighted more heavily).
+    Example:
+        xrtm-data fit-prior -i trades.parquet -o prior.json -h 24.0
+    """
+    from xrtm.data.kit.processors import fit_beta_exponential_decay
+    console.print(Panel(
+        f"[bold blue]Fitting Beta Prior[/bold blue]\n"
+        f"Input: {input_path}\n"
+        f"Half-life: {half_life} hours",
+        title="xrtm-data",
+    ))
+    # Load trades
+    trades = _load_trades(Path(input_path))
+    console.print(f"Loaded {len(trades)} trades")
+    # Fit prior
+    prior = fit_beta_exponential_decay(
+        trades,
+        half_life_hours=half_life,
+        min_concentration=min_concentration,
+    )
+    # Save
+    output_path = Path(output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    prior_dict = prior.to_distribution_dict()
+    prior_dict["metadata"] = {
+        "source": str(input_path),
+        "trade_count": len(trades),
+        "half_life_hours": half_life,
+        "fitted_at": datetime.now(timezone.utc).isoformat(),
+    }
+    with open(output_path, "w") as f:
+        json.dump(prior_dict, f, indent=2)
+    console.print(f"[green]✓ Saved prior to:[/green] {output_path}")
+    # Summary
+    low, high = prior.credible_interval(0.9)
+    table = Table(title="Fitted Prior")
+    table.add_column("Parameter", style="cyan")
+    table.add_column("Value", style="green")
+    table.add_row("α (alpha)", f"{prior.alpha:.4f}")
+    table.add_row("β (beta)", f"{prior.beta:.4f}")
+    table.add_row("Mean", f"{prior.mean:.4f}")
+    table.add_row("90% CI", f"[{low:.4f}, {high:.4f}]")
+    console.print(table)
+@main.command("info")
+@click.argument("file_path", type=click.Path(exists=True))
+def info(file_path: str):
+    r"""
+    Show information about a data file.
+    Displays summary statistics for trade files or prior files.
+    """
+    path = Path(file_path)
+    if path.suffix == ".json":
+        with open(path) as f:
+            data = json.load(f)
+        if "family" in data:
+            # It's a prior
+            console.print(Panel(
+                f"[bold]Prior File[/bold]\n"
+                f"Family: {data['family']}\n"
+                f"α: {data.get('alpha', 'N/A')}\n"
+                f"β: {data.get('beta', 'N/A')}",
+                title=path.name,
+            ))
+        else:
+            console.print(f"JSON file with {len(data)} keys")
+    elif path.suffix == ".parquet":
+        import pyarrow.parquet as pq
+        table = pq.read_table(path)
+        console.print(Panel(
+            f"[bold]Parquet File[/bold]\n"
+            f"Rows: {table.num_rows}\n"
+            f"Columns: {table.column_names}",
+            title=path.name,
+        ))
+def _save_parquet(window, path: Path) -> None:
+    r"""Save TradeWindow to Parquet format."""
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+    data = {
+        "price": [t.price for t in window.trades],
+        "amount": [t.amount for t in window.trades],
+        "timestamp": [t.timestamp.isoformat() for t in window.trades],
+        "maker": [t.maker for t in window.trades],
+        "taker": [t.taker for t in window.trades],
+    }
+    table = pa.table(data)
+    pq.write_table(table, path)
+def _save_json(window, path: Path) -> None:
+    r"""Save TradeWindow to JSON format."""
+    data = {
+        "market_id": window.market_id,
+        "start_time": window.start_time.isoformat(),
+        "end_time": window.end_time.isoformat(),
+        "trades": [
+            {
+                "price": t.price,
+                "amount": t.amount,
+                "timestamp": t.timestamp.isoformat(),
+                "maker": t.maker,
+                "taker": t.taker,
+            }
+            for t in window.trades
+        ],
+    }
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+def _load_trades(path: Path) -> list:
+    r"""Load trades from Parquet or JSON."""
+    from xrtm.data.core.schemas import TradeEvent
+    if path.suffix == ".parquet":
+        import pyarrow.parquet as pq
+        table = pq.read_table(path)
+        df = table.to_pandas()
+        return [
+            TradeEvent(
+                price=row["price"],
+                amount=row["amount"],
+                timestamp=datetime.fromisoformat(row["timestamp"]),
+                maker=row["maker"],
+                taker=row["taker"],
+            )
+            for _, row in df.iterrows()
+        ]
+    else:
+        with open(path) as f:
+            data = json.load(f)
+        return [
+            TradeEvent(
+                price=t["price"],
+                amount=t["amount"],
+                timestamp=datetime.fromisoformat(t["timestamp"]),
+                maker=t["maker"],
+                taker=t["taker"],
+            )
+            for t in data.get("trades", data)
+        ]
+if __name__ == "__main__":
+    main()
+__all__ = ["main"]

{xrtm_data-0.2.0 → xrtm_data-0.2.1}/src/xrtm/data/core/schemas/__init__.py RENAMED Viewed

@@ -14,10 +14,9 @@
 # limitations under the License.
 r"""
-Core data schemas for xrtm-data.
+Core schemas for xrtm-data.
-This module exports all foundational Pydantic models used across the
-xrtm ecosystem for representing forecasting data structures.
+This module exports all Pydantic schemas used across the xrtm ecosystem.
 """
 from xrtm.data.core.schemas.forecast import (
@@ -28,12 +27,21 @@ from xrtm.data.core.schemas.forecast import (
     ForecastQuestion,
     MetadataBase,
 )
+from xrtm.data.core.schemas.prior import BetaPrior, PriorState
+from xrtm.data.core.schemas.trade import TradeEvent, TradeWindow
 __all__ = [
+    # Forecast schemas
     "MetadataBase",
     "ForecastQuestion",
     "ForecastOutput",
     "CausalNode",
     "CausalEdge",
     "ConfidenceInterval",
+    # Prior schemas
+    "BetaPrior",
+    "PriorState",
+    # Trade schemas
+    "TradeEvent",
+    "TradeWindow",
 ]

xrtm_data-0.2.1/src/xrtm/data/core/schemas/prior.py ADDED Viewed

@@ -0,0 +1,254 @@
+# coding=utf-8
+# Copyright 2026 XRTM Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+Prior state schemas for LLM training injection.
+This module defines the Beta distribution parameters and prior state
+representation used for injecting market belief state into LLM training.
+Implements Decision 1 from the training architecture.
+Example:
+    >>> from xrtm.data.core.schemas import BetaPrior, PriorState
+    >>> prior = BetaPrior(alpha=7.0, beta=3.0)
+    >>> print(f"Mean: {prior.mean:.2f}, Concentration: {prior.concentration}")
+    Mean: 0.70, Concentration: 10.0
+"""
+from datetime import datetime, timezone
+from typing import Optional
+from pydantic import BaseModel, Field, computed_field
+class BetaPrior(BaseModel):
+    r"""
+    Beta distribution parameters fitted from trade history.
+    The Beta distribution is the conjugate prior for Bernoulli outcomes,
+    making it ideal for representing belief state about binary events.
+    Parameters α and β can be interpreted as "pseudo-counts" of Yes and No
+    observations respectively.
+    Attributes:
+        alpha: Shape parameter α (Yes-weighted). Higher values indicate
+            stronger belief in the positive outcome.
+        beta: Shape parameter β (No-weighted). Higher values indicate
+            stronger belief in the negative outcome.
+    Example:
+        >>> prior = BetaPrior(alpha=7.0, beta=3.0)
+        >>> prior.mean
+        0.7
+        >>> prior.concentration
+        10.0
+        >>> prior.variance
+        0.019090909090909092
+    """
+    alpha: float = Field(
+        ...,
+        gt=0,
+        description="Shape parameter α (Yes-weighted pseudo-count)",
+    )
+    beta: float = Field(
+        ...,
+        gt=0,
+        description="Shape parameter β (No-weighted pseudo-count)",
+    )
+    @computed_field  # type: ignore[prop-decorator]
+    @property
+    def mean(self) -> float:
+        r"""Expected value of the Beta distribution: α / (α + β)."""
+        return self.alpha / (self.alpha + self.beta)
+    @computed_field  # type: ignore[prop-decorator]
+    @property
+    def concentration(self) -> float:
+        r"""
+        Total concentration α + β.
+        Higher concentration indicates more confident prior (less uncertainty).
+        A concentration of 2 represents maximum uncertainty (uniform prior).
+        """
+        return self.alpha + self.beta
+    @property
+    def variance(self) -> float:
+        r"""Variance of the Beta distribution: αβ / ((α+β)²(α+β+1))."""
+        total = self.alpha + self.beta
+        return (self.alpha * self.beta) / (total * total * (total + 1))
+    @classmethod
+    def uniform(cls) -> "BetaPrior":
+        r"""Create a uniform (uninformative) prior with α=β=1."""
+        return cls(alpha=1.0, beta=1.0)
+    @classmethod
+    def from_mean_concentration(cls, mean: float, concentration: float) -> "BetaPrior":
+        r"""
+        Create a BetaPrior from mean and concentration.
+        Args:
+            mean: Target mean value in (0, 1).
+            concentration: Target α + β, must be > 0.
+        Returns:
+            BetaPrior with specified mean and concentration.
+        Example:
+            >>> prior = BetaPrior.from_mean_concentration(0.7, 10.0)
+            >>> prior.alpha, prior.beta
+            (7.0, 3.0)
+        """
+        alpha = mean * concentration
+        beta = (1 - mean) * concentration
+        return cls(alpha=alpha, beta=beta)
+    def credible_interval(self, level: float = 0.9) -> tuple[float, float]:
+        r"""
+        Compute the credible interval (Bayesian confidence interval).
+        Uses scipy.stats.beta to compute the equal-tailed credible interval.
+        Args:
+            level: Confidence level (default 0.9 for 90% interval).
+        Returns:
+            Tuple of (low, high) bounds.
+        Example:
+            >>> prior = BetaPrior(alpha=7.0, beta=3.0)
+            >>> low, high = prior.credible_interval(0.9)
+            >>> print(f"90% CI: [{low:.3f}, {high:.3f}]")
+            90% CI: [0.435, 0.895]
+        """
+        from scipy.stats import beta as beta_dist
+        dist = beta_dist(self.alpha, self.beta)
+        tail = (1 - level) / 2
+        return (float(dist.ppf(tail)), float(dist.ppf(1 - tail)))
+    def sample(self, n: int = 1) -> list[float]:
+        r"""
+        Draw random samples from the Beta distribution.
+        Args:
+            n: Number of samples to draw.
+        Returns:
+            List of n samples from Beta(α, β).
+        Example:
+            >>> prior = BetaPrior(alpha=7.0, beta=3.0)
+            >>> samples = prior.sample(1000)
+            >>> abs(sum(samples)/len(samples) - prior.mean) < 0.05
+            True
+        """
+        from scipy.stats import beta as beta_dist
+        dist = beta_dist(self.alpha, self.beta)
+        return [float(x) for x in dist.rvs(size=n)]
+    def to_distribution_dict(self) -> dict:
+        r"""
+        Convert to governance schema v1.1 distribution format.
+        Returns:
+            Dictionary matching the forecast_object_v1.1 distribution schema.
+        Example:
+            >>> prior = BetaPrior(alpha=7.0, beta=3.0)
+            >>> d = prior.to_distribution_dict()
+            >>> d["family"]
+            'beta'
+        """
+        low, high = self.credible_interval(0.9)
+        return {
+            "family": "beta",
+            "alpha": self.alpha,
+            "beta": self.beta,
+            "credible_interval": {
+                "low": low,
+                "high": high,
+                "level": 0.9,
+            },
+        }
+class PriorState(BaseModel):
+    r"""
+    Full prior state for training injection (Decision 1).
+    This domain-agnostic schema captures the complete belief state at a
+    point in time, including temporal context for the model to reason
+    about information staleness and deadline proximity.
+    Attributes:
+        prior: The Beta distribution parameters representing current belief.
+        silence_delta: Normalized time since last information update.
+            0 = just updated, 1 = long silence (model should consider decay).
+        deadline_delta: Normalized time remaining until resolution.
+            0 = at resolution, 1 = maximum time remaining.
+        snapshot_time: UTC timestamp when this state was captured.
+        metadata: Optional additional context.
+    Example:
+        >>> from datetime import datetime, timezone
+        >>> state = PriorState(
+        ...     prior=BetaPrior(alpha=7.0, beta=3.0),
+        ...     silence_delta=0.1,
+        ...     deadline_delta=0.5,
+        ...     snapshot_time=datetime.now(timezone.utc),
+        ... )
+    """
+    prior: BetaPrior = Field(
+        ...,
+        description="Beta distribution parameters representing current belief",
+    )
+    silence_delta: float = Field(
+        default=0.0,
+        ge=0,
+        le=1,
+        description="Normalized time since last information update [0=just updated, 1=long silence]",
+    )
+    deadline_delta: float = Field(
+        default=1.0,
+        ge=0,
+        le=1,
+        description="Normalized time remaining until resolution [0=at resolution, 1=max time]",
+    )
+    snapshot_time: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        description="UTC timestamp when this state was captured",
+    )
+    metadata: Optional[dict] = Field(
+        default=None,
+        description="Optional additional context",
+    )
+    @classmethod
+    def uninformative(cls) -> "PriorState":
+        r"""Create an uninformative prior state with uniform Beta(1,1)."""
+        return cls(
+            prior=BetaPrior.uniform(),
+            silence_delta=0.0,
+            deadline_delta=1.0,
+        )
+__all__ = ["BetaPrior", "PriorState"]

xrtm-data 0.2.0__tar.gz → 0.2.1__tar.gz

xrtm-data 0.2.0tar.gz → 0.2.1tar.gz