xrtm-data 0.2.7__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/PKG-INFO +1 -5
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/pyproject.toml +2 -8
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/__init__.py +0 -1
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/core/schemas/forecast.py +1 -1
- xrtm_data-0.3.0/src/xrtm/data/corpora/__init__.py +56 -0
- xrtm_data-0.3.0/src/xrtm/data/corpora/_builtin_corpora.py +61 -0
- {xrtm_data-0.2.7/src/xrtm/data/providers/local → xrtm_data-0.3.0/src/xrtm/data/providers}/__init__.py +1 -5
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/providers/online/__init__.py +3 -2
- xrtm_data-0.3.0/src/xrtm/data/providers/online/metaculus.py +155 -0
- xrtm_data-0.3.0/src/xrtm/data/providers/online/polymarket.py +161 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/version.py +1 -1
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm_data.egg-info/PKG-INFO +1 -5
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm_data.egg-info/SOURCES.txt +1 -20
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm_data.egg-info/requires.txt +0 -4
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/tests/test_corpus_registry.py +0 -45
- xrtm_data-0.2.7/src/xrtm/data/cli/__init__.py +0 -330
- xrtm_data-0.2.7/src/xrtm/data/corpora/__init__.py +0 -99
- xrtm_data-0.2.7/src/xrtm/data/corpora/_builtin_corpora.py +0 -185
- xrtm_data-0.2.7/src/xrtm/data/corpora/forecast_importer.py +0 -517
- xrtm_data-0.2.7/src/xrtm/data/corpora/importers.py +0 -296
- xrtm_data-0.2.7/src/xrtm/data/corpora/splits.py +0 -261
- xrtm_data-0.2.7/src/xrtm/data/kit/__init__.py +0 -33
- xrtm_data-0.2.7/src/xrtm/data/kit/processors/__init__.py +0 -212
- xrtm_data-0.2.7/src/xrtm/data/providers/__init__.py +0 -28
- xrtm_data-0.2.7/src/xrtm/data/providers/local/csv.py +0 -176
- xrtm_data-0.2.7/src/xrtm/data/providers/online/polymarket.py +0 -256
- xrtm_data-0.2.7/src/xrtm/data/providers/subgraph/__init__.py +0 -25
- xrtm_data-0.2.7/src/xrtm/data/providers/subgraph/polymarket.py +0 -357
- xrtm_data-0.2.7/src/xrtm_data.egg-info/entry_points.txt +0 -2
- xrtm_data-0.2.7/tests/test_beta_fitter.py +0 -147
- xrtm_data-0.2.7/tests/test_cli_loading.py +0 -42
- xrtm_data-0.2.7/tests/test_cli_ux.py +0 -41
- xrtm_data-0.2.7/tests/test_corpus_importers.py +0 -270
- xrtm_data-0.2.7/tests/test_corpus_splits.py +0 -278
- xrtm_data-0.2.7/tests/test_forecast_importer.py +0 -302
- xrtm_data-0.2.7/tests/test_local_datasource.py +0 -214
- xrtm_data-0.2.7/tests/test_polymarket_source.py +0 -150
- xrtm_data-0.2.7/tests/test_polymarket_subgraph.py +0 -286
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/LICENSE +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/README.md +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/setup.cfg +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/core/__init__.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/core/interfaces.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/core/schemas/__init__.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/core/schemas/prior.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/core/schemas/trade.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/corpora/real_binary.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm/data/corpora/registry.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm_data.egg-info/dependency_links.txt +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/src/xrtm_data.egg-info/top_level.txt +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/tests/test_prior_schemas.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/tests/test_real_binary_corpus.py +0 -0
- {xrtm_data-0.2.7 → xrtm_data-0.3.0}/tests/test_schemas.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xrtm-data
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: The Snapshot Vault for XRTM.
|
|
5
5
|
Author-email: XRTM Team <moy@xrtm.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -8,11 +8,7 @@ Requires-Python: <3.13,>=3.11
|
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Requires-Dist: pydantic>=2.0.0
|
|
11
|
-
Requires-Dist: aiohttp>=3.9.0
|
|
12
11
|
Requires-Dist: scipy>=1.11.0
|
|
13
|
-
Requires-Dist: click>=8.0.0
|
|
14
|
-
Requires-Dist: rich>=13.0.0
|
|
15
|
-
Requires-Dist: pyarrow>=14.0.0
|
|
16
12
|
Provides-Extra: dev
|
|
17
13
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
18
14
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "xrtm-data"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "The Snapshot Vault for XRTM."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11,<3.13"
|
|
@@ -14,11 +14,7 @@ authors = [
|
|
|
14
14
|
]
|
|
15
15
|
dependencies = [
|
|
16
16
|
"pydantic>=2.0.0",
|
|
17
|
-
"aiohttp>=3.9.0",
|
|
18
17
|
"scipy>=1.11.0",
|
|
19
|
-
"click>=8.0.0",
|
|
20
|
-
"rich>=13.0.0",
|
|
21
|
-
"pyarrow>=14.0.0",
|
|
22
18
|
]
|
|
23
19
|
|
|
24
20
|
[project.optional-dependencies]
|
|
@@ -30,9 +26,6 @@ dev = [
|
|
|
30
26
|
"mypy>=1.0.0",
|
|
31
27
|
]
|
|
32
28
|
|
|
33
|
-
[project.scripts]
|
|
34
|
-
xrtm-data = "xrtm.data.cli:main"
|
|
35
|
-
|
|
36
29
|
[tool.setuptools]
|
|
37
30
|
package-dir = {"" = "src"}
|
|
38
31
|
packages = {find = {where = ["src"], include = ["xrtm*"], namespaces = true}}
|
|
@@ -55,3 +48,4 @@ python_version = "3.11"
|
|
|
55
48
|
ignore_missing_imports = true
|
|
56
49
|
strict = false
|
|
57
50
|
explicit_package_bases = true
|
|
51
|
+
exclude = ["build/", "dist/"]
|
|
@@ -155,7 +155,7 @@ class CausalEdge(BaseModel):
|
|
|
155
155
|
|
|
156
156
|
source: str = Field(..., description="ID of the source node")
|
|
157
157
|
target: str = Field(..., description="ID of the target node")
|
|
158
|
-
weight: float = Field(default=1.0, ge
|
|
158
|
+
weight: float = Field(default=1.0, ge=-1, le=1, description="Strength of causal relationship (negative = inhibitory)")
|
|
159
159
|
description: Optional[str] = Field(None, description="Context for this causal link")
|
|
160
160
|
|
|
161
161
|
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""Public entry points for XRTM corpora."""
|
|
17
|
+
|
|
18
|
+
from xrtm.data.corpora.real_binary import (
|
|
19
|
+
REAL_BINARY_CORPUS_ID,
|
|
20
|
+
RealBinaryCorpusSource,
|
|
21
|
+
RealBinaryQuestionRecord,
|
|
22
|
+
load_real_binary_corpus,
|
|
23
|
+
load_real_binary_questions,
|
|
24
|
+
load_real_binary_resolved_outcomes,
|
|
25
|
+
validate_real_binary_corpus,
|
|
26
|
+
)
|
|
27
|
+
from xrtm.data.corpora.registry import (
|
|
28
|
+
CorpusManifest,
|
|
29
|
+
CorpusMetadata,
|
|
30
|
+
CorpusRegistry,
|
|
31
|
+
CorpusSplit,
|
|
32
|
+
CorpusTier,
|
|
33
|
+
LicenseType,
|
|
34
|
+
get_corpus,
|
|
35
|
+
get_corpus_metadata,
|
|
36
|
+
list_available_corpora,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"REAL_BINARY_CORPUS_ID",
|
|
41
|
+
"RealBinaryQuestionRecord",
|
|
42
|
+
"RealBinaryCorpusSource",
|
|
43
|
+
"load_real_binary_corpus",
|
|
44
|
+
"load_real_binary_questions",
|
|
45
|
+
"load_real_binary_resolved_outcomes",
|
|
46
|
+
"validate_real_binary_corpus",
|
|
47
|
+
"CorpusRegistry",
|
|
48
|
+
"CorpusMetadata",
|
|
49
|
+
"CorpusManifest",
|
|
50
|
+
"CorpusTier",
|
|
51
|
+
"LicenseType",
|
|
52
|
+
"CorpusSplit",
|
|
53
|
+
"get_corpus",
|
|
54
|
+
"get_corpus_metadata",
|
|
55
|
+
"list_available_corpora",
|
|
56
|
+
]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""Concrete built-in corpus registrations."""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from typing import List
|
|
21
|
+
|
|
22
|
+
from xrtm.data.corpora.real_binary import REAL_BINARY_CORPUS_ID, RealBinaryCorpusSource
|
|
23
|
+
from xrtm.data.corpora.registry import (
|
|
24
|
+
CorpusManifest,
|
|
25
|
+
CorpusMetadata,
|
|
26
|
+
CorpusSplit,
|
|
27
|
+
CorpusTier,
|
|
28
|
+
LicenseType,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def build_builtin_manifests() -> List[CorpusManifest]:
|
|
33
|
+
"""Build manifests for the corpora shipped with the registry bootstrap."""
|
|
34
|
+
return [
|
|
35
|
+
_build_real_binary_manifest(),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _build_real_binary_manifest() -> CorpusManifest:
|
|
40
|
+
real_binary_metadata = CorpusMetadata(
|
|
41
|
+
corpus_id=REAL_BINARY_CORPUS_ID,
|
|
42
|
+
name="XRTM Real Binary v1",
|
|
43
|
+
tier=CorpusTier.TIER_1,
|
|
44
|
+
license_type=LicenseType.APACHE_2_0,
|
|
45
|
+
description="Minimal deterministic real-world binary question corpus for CI smoke tests",
|
|
46
|
+
version="1.0",
|
|
47
|
+
release_gate_approved=True,
|
|
48
|
+
bundled=True,
|
|
49
|
+
size_estimate=25,
|
|
50
|
+
tags=["binary", "deterministic", "embedded", "seed-corpus"],
|
|
51
|
+
provenance_url="https://github.com/xrtm/xrtm",
|
|
52
|
+
license_url="https://www.apache.org/licenses/LICENSE-2.0",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return CorpusManifest(
|
|
56
|
+
corpus_id=REAL_BINARY_CORPUS_ID,
|
|
57
|
+
metadata=real_binary_metadata,
|
|
58
|
+
loader_fn=lambda: RealBinaryCorpusSource(),
|
|
59
|
+
available_splits=[CorpusSplit.FULL],
|
|
60
|
+
default_split=CorpusSplit.FULL,
|
|
61
|
+
)
|
|
@@ -13,8 +13,4 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
-
r"""
|
|
17
|
-
|
|
18
|
-
from xrtm.data.providers.local.csv import LocalDataSource
|
|
19
|
-
|
|
20
|
-
__all__ = ["LocalDataSource"]
|
|
16
|
+
r"""External data providers implementing the DataSource ABC."""
|
|
@@ -13,8 +13,9 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
-
r"""Online
|
|
16
|
+
r"""Online data providers (Metaculus, Polymarket, etc.)."""
|
|
17
17
|
|
|
18
|
+
from xrtm.data.providers.online.metaculus import MetaculusSource
|
|
18
19
|
from xrtm.data.providers.online.polymarket import PolymarketSource
|
|
19
20
|
|
|
20
|
-
__all__ = ["PolymarketSource"]
|
|
21
|
+
__all__ = ["MetaculusSource", "PolymarketSource"]
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""Metaculus data provider.
|
|
17
|
+
|
|
18
|
+
Fetches forecasting questions from the Metaculus API.
|
|
19
|
+
Requires a Metaculus API key (free account). Set ``METACULUS_API_KEY``
|
|
20
|
+
environment variable or pass ``api_key`` to the constructor.
|
|
21
|
+
|
|
22
|
+
Get a key: https://www.metaculus.com/accounts/signup/
|
|
23
|
+
API docs: https://www.metaculus.com/api2/
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
import urllib.request
|
|
31
|
+
from datetime import datetime, timezone
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
from xrtm.data.core.interfaces import DataSource, DataSourceError
|
|
35
|
+
from xrtm.data.core.schemas.forecast import ForecastQuestion, MetadataBase
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
METACULUS_API_BASE = "https://www.metaculus.com/api2"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class MetaculusSource(DataSource):
|
|
43
|
+
r"""Data source for Metaculus forecasting questions.
|
|
44
|
+
|
|
45
|
+
Fetches open binary questions from the Metaculus API.
|
|
46
|
+
Requires a Metaculus API key. Set ``METACULUS_API_KEY`` env var
|
|
47
|
+
or pass ``api_key`` to the constructor.
|
|
48
|
+
(Free account at https://www.metaculus.com/accounts/signup/)
|
|
49
|
+
|
|
50
|
+
Example:
|
|
51
|
+
>>> source = MetaculusSource(api_key="...")
|
|
52
|
+
>>> questions = await source.fetch_questions(limit=5)
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self, api_base: str = METACULUS_API_BASE, api_key: str | None = None):
|
|
56
|
+
import os
|
|
57
|
+
self.api_base = api_base.rstrip("/")
|
|
58
|
+
self.api_key = api_key or os.environ.get("METACULUS_API_KEY", "")
|
|
59
|
+
if not self.api_key:
|
|
60
|
+
logger.warning(
|
|
61
|
+
"MetaculusSource: No METACULUS_API_KEY set. API calls will fail. "
|
|
62
|
+
"Get a free key at https://www.metaculus.com/accounts/signup/"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
async def fetch_questions(
|
|
66
|
+
self,
|
|
67
|
+
limit: int = 10,
|
|
68
|
+
offset: int = 0,
|
|
69
|
+
**kwargs: Any,
|
|
70
|
+
) -> list[ForecastQuestion]:
|
|
71
|
+
r"""Fetch open binary questions from Metaculus.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
limit: Maximum number of questions to return.
|
|
75
|
+
offset: Pagination offset.
|
|
76
|
+
**kwargs: Additional filters (ignored).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
List of ``ForecastQuestion`` objects.
|
|
80
|
+
"""
|
|
81
|
+
url = (
|
|
82
|
+
f"{self.api_base}/questions/?"
|
|
83
|
+
f"limit={min(limit, 100)}&offset={offset}"
|
|
84
|
+
f"&order_by=-activity&status=open&type=binary"
|
|
85
|
+
)
|
|
86
|
+
data = self._get_json(url)
|
|
87
|
+
results = data.get("results", [])
|
|
88
|
+
|
|
89
|
+
questions = []
|
|
90
|
+
for item in results[:limit]:
|
|
91
|
+
try:
|
|
92
|
+
questions.append(self._to_forecast_question(item))
|
|
93
|
+
except Exception as exc:
|
|
94
|
+
logger.warning(f"Skipping Metaculus question {item.get('id')}: {exc}")
|
|
95
|
+
return questions
|
|
96
|
+
|
|
97
|
+
async def get_question_by_id(self, question_id: str) -> ForecastQuestion | None:
|
|
98
|
+
r"""Fetch a single Metaculus question by ID."""
|
|
99
|
+
try:
|
|
100
|
+
url = f"{self.api_base}/questions/{question_id}/"
|
|
101
|
+
data = self._get_json(url)
|
|
102
|
+
return self._to_forecast_question(data)
|
|
103
|
+
except Exception as exc:
|
|
104
|
+
logger.warning(f"Metaculus question {question_id} not found: {exc}")
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def _get_json(self, url: str) -> dict[str, Any]:
|
|
108
|
+
r"""Fetch JSON from a URL."""
|
|
109
|
+
headers = {"User-Agent": "xrtm/0.1"}
|
|
110
|
+
if self.api_key:
|
|
111
|
+
headers["Authorization"] = f"Token {self.api_key}"
|
|
112
|
+
req = urllib.request.Request(url, headers=headers)
|
|
113
|
+
try:
|
|
114
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
115
|
+
return json.loads(resp.read().decode("utf-8"))
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
raise DataSourceError(f"Metaculus API error: {exc}") from exc
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _to_forecast_question(item: dict[str, Any]) -> ForecastQuestion:
|
|
121
|
+
r"""Convert a Metaculus API question dict to ``ForecastQuestion``."""
|
|
122
|
+
qid = str(item.get("id", ""))
|
|
123
|
+
title = item.get("title", "") or item.get("name", "")
|
|
124
|
+
description = item.get("description", "") or ""
|
|
125
|
+
resolution_criteria = item.get("resolution_criteria", "") or ""
|
|
126
|
+
|
|
127
|
+
publish_time = item.get("publish_time") or item.get("created_at")
|
|
128
|
+
close_time = item.get("close_time") or item.get("resolve_time")
|
|
129
|
+
snapshot_time = datetime.now(timezone.utc)
|
|
130
|
+
|
|
131
|
+
if close_time:
|
|
132
|
+
try:
|
|
133
|
+
snapshot_time = datetime.fromisoformat(close_time.replace("Z", "+00:00"))
|
|
134
|
+
except (ValueError, TypeError):
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
return ForecastQuestion(
|
|
138
|
+
id=f"metaculus-{qid}",
|
|
139
|
+
title=title[:500],
|
|
140
|
+
description=description[:2000] if description else f"Metaculus binary question {qid}",
|
|
141
|
+
resolution_criteria=resolution_criteria[:1000] if resolution_criteria else "",
|
|
142
|
+
metadata=MetadataBase(
|
|
143
|
+
snapshot_time=snapshot_time,
|
|
144
|
+
source_version="metaculus",
|
|
145
|
+
tags=["metaculus", "binary"],
|
|
146
|
+
raw_data={
|
|
147
|
+
"metaculus_id": qid,
|
|
148
|
+
"publish_time": publish_time,
|
|
149
|
+
"community_prediction": item.get("community_prediction"),
|
|
150
|
+
},
|
|
151
|
+
),
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
__all__ = ["MetaculusSource"]
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""Polymarket data provider.
|
|
17
|
+
|
|
18
|
+
Fetches prediction market data from the Polymarket Gamma API.
|
|
19
|
+
The API is free and public for read access.
|
|
20
|
+
|
|
21
|
+
API docs: https://docs.polymarket.com/
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
import urllib.request
|
|
29
|
+
from datetime import datetime, timezone
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
from xrtm.data.core.interfaces import DataSource, DataSourceError
|
|
33
|
+
from xrtm.data.core.schemas.forecast import ForecastQuestion, MetadataBase
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
POLYMARKET_GAMMA_API = "https://gamma-api.polymarket.com"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class PolymarketSource(DataSource):
|
|
41
|
+
r"""Data source for Polymarket prediction markets.
|
|
42
|
+
|
|
43
|
+
Fetches open binary markets from the Polymarket Gamma API.
|
|
44
|
+
No authentication required for read access.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
api_base: Override the API base URL.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> source = PolymarketSource()
|
|
51
|
+
>>> questions = await source.fetch_questions(limit=5)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(self, api_base: str = POLYMARKET_GAMMA_API):
|
|
55
|
+
self.api_base = api_base.rstrip("/")
|
|
56
|
+
|
|
57
|
+
async def fetch_questions(
|
|
58
|
+
self,
|
|
59
|
+
limit: int = 10,
|
|
60
|
+
offset: int = 0,
|
|
61
|
+
**kwargs: Any,
|
|
62
|
+
) -> list[ForecastQuestion]:
|
|
63
|
+
r"""Fetch open binary markets from Polymarket.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
limit: Maximum number of markets to return.
|
|
67
|
+
offset: Pagination offset.
|
|
68
|
+
**kwargs: Additional filters.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
List of ``ForecastQuestion`` objects.
|
|
72
|
+
"""
|
|
73
|
+
url = (
|
|
74
|
+
f"{self.api_base}/markets?"
|
|
75
|
+
f"limit={min(limit, 100)}&offset={offset}"
|
|
76
|
+
f"&closed=false&order=volume24hr&ascending=false"
|
|
77
|
+
)
|
|
78
|
+
try:
|
|
79
|
+
data = self._get_json(url)
|
|
80
|
+
except DataSourceError:
|
|
81
|
+
# Gamma API may return a list directly
|
|
82
|
+
url = f"{self.api_base}/events?limit={min(limit, 50)}&closed=false"
|
|
83
|
+
data = self._get_json(url)
|
|
84
|
+
|
|
85
|
+
# Gamma API may wrap in "markets" key or return list directly
|
|
86
|
+
items = data if isinstance(data, list) else data.get("markets", data.get("results", []))
|
|
87
|
+
|
|
88
|
+
questions = []
|
|
89
|
+
for item in items[:limit]:
|
|
90
|
+
try:
|
|
91
|
+
questions.append(self._to_forecast_question(item))
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
logger.warning(f"Skipping Polymarket market {item.get('id')}: {exc}")
|
|
94
|
+
return questions
|
|
95
|
+
|
|
96
|
+
async def get_question_by_id(self, question_id: str) -> ForecastQuestion | None:
|
|
97
|
+
r"""Fetch a single Polymarket market by ID."""
|
|
98
|
+
# Strip prefix if present
|
|
99
|
+
market_id = question_id.replace("polymarket-", "")
|
|
100
|
+
try:
|
|
101
|
+
url = f"{self.api_base}/markets/{market_id}"
|
|
102
|
+
data = self._get_json(url)
|
|
103
|
+
return self._to_forecast_question(data)
|
|
104
|
+
except Exception as exc:
|
|
105
|
+
logger.warning(f"Polymarket market {market_id} not found: {exc}")
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def _get_json(self, url: str) -> Any:
|
|
109
|
+
r"""Fetch JSON from a URL."""
|
|
110
|
+
req = urllib.request.Request(url, headers={"User-Agent": "xrtm/0.1"})
|
|
111
|
+
try:
|
|
112
|
+
with urllib.request.urlopen(req, timeout=15) as resp:
|
|
113
|
+
return json.loads(resp.read().decode("utf-8"))
|
|
114
|
+
except Exception as exc:
|
|
115
|
+
raise DataSourceError(f"Polymarket API error: {exc}") from exc
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def _to_forecast_question(item: dict[str, Any]) -> ForecastQuestion:
|
|
119
|
+
r"""Convert a Polymarket API market dict to ``ForecastQuestion``."""
|
|
120
|
+
market_id = str(item.get("id", ""))
|
|
121
|
+
question_text = item.get("question", "") or item.get("title", "") or ""
|
|
122
|
+
description = item.get("description", "") or ""
|
|
123
|
+
|
|
124
|
+
# Extract outcome prices if available
|
|
125
|
+
outcomes = item.get("outcomes", []) or []
|
|
126
|
+
outcome_prices = item.get("outcomePrices", []) or []
|
|
127
|
+
price_info = ""
|
|
128
|
+
if outcomes and outcome_prices and len(outcomes) == len(outcome_prices):
|
|
129
|
+
parts = [f"{o}: {float(p)*100:.1f}%" for o, p in zip(outcomes, outcome_prices)]
|
|
130
|
+
price_info = "Current prices: " + ", ".join(parts)
|
|
131
|
+
|
|
132
|
+
close_time = item.get("endDate") or item.get("closeTime")
|
|
133
|
+
snapshot_time = datetime.now(timezone.utc)
|
|
134
|
+
if close_time:
|
|
135
|
+
try:
|
|
136
|
+
snapshot_time = datetime.fromisoformat(close_time.replace("Z", "+00:00"))
|
|
137
|
+
except (ValueError, TypeError):
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
return ForecastQuestion(
|
|
141
|
+
id=f"polymarket-{market_id}",
|
|
142
|
+
title=question_text[:500],
|
|
143
|
+
description=(
|
|
144
|
+
f"{description[:1000]}\n\n{price_info}" if description or price_info
|
|
145
|
+
else f"Polymarket binary market {market_id}. {price_info}"
|
|
146
|
+
),
|
|
147
|
+
metadata=MetadataBase(
|
|
148
|
+
snapshot_time=snapshot_time,
|
|
149
|
+
source_version="polymarket",
|
|
150
|
+
tags=["polymarket", "binary", "prediction-market"],
|
|
151
|
+
raw_data={
|
|
152
|
+
"polymarket_id": market_id,
|
|
153
|
+
"volume_24hr": item.get("volume24hr"),
|
|
154
|
+
"liquidity": item.get("liquidity"),
|
|
155
|
+
"outcome_prices": outcome_prices,
|
|
156
|
+
},
|
|
157
|
+
),
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
__all__ = ["PolymarketSource"]
|
|
@@ -21,7 +21,7 @@ This module provides the single source of truth for the package version.
|
|
|
21
21
|
|
|
22
22
|
__all__ = ["__version__", "__author__", "__contact__", "__license__", "__copyright__"]
|
|
23
23
|
|
|
24
|
-
__version__ = "0.
|
|
24
|
+
__version__ = "0.3.0"
|
|
25
25
|
__author__ = "XRTM Team"
|
|
26
26
|
__contact__ = "moy@xrtm.org"
|
|
27
27
|
__license__ = "Apache-2.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xrtm-data
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: The Snapshot Vault for XRTM.
|
|
5
5
|
Author-email: XRTM Team <moy@xrtm.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -8,11 +8,7 @@ Requires-Python: <3.13,>=3.11
|
|
|
8
8
|
Description-Content-Type: text/markdown
|
|
9
9
|
License-File: LICENSE
|
|
10
10
|
Requires-Dist: pydantic>=2.0.0
|
|
11
|
-
Requires-Dist: aiohttp>=3.9.0
|
|
12
11
|
Requires-Dist: scipy>=1.11.0
|
|
13
|
-
Requires-Dist: click>=8.0.0
|
|
14
|
-
Requires-Dist: rich>=13.0.0
|
|
15
|
-
Requires-Dist: pyarrow>=14.0.0
|
|
16
12
|
Provides-Extra: dev
|
|
17
13
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
18
14
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
@@ -3,7 +3,6 @@ README.md
|
|
|
3
3
|
pyproject.toml
|
|
4
4
|
src/xrtm/data/__init__.py
|
|
5
5
|
src/xrtm/data/version.py
|
|
6
|
-
src/xrtm/data/cli/__init__.py
|
|
7
6
|
src/xrtm/data/core/__init__.py
|
|
8
7
|
src/xrtm/data/core/interfaces.py
|
|
9
8
|
src/xrtm/data/core/schemas/__init__.py
|
|
@@ -12,36 +11,18 @@ src/xrtm/data/core/schemas/prior.py
|
|
|
12
11
|
src/xrtm/data/core/schemas/trade.py
|
|
13
12
|
src/xrtm/data/corpora/__init__.py
|
|
14
13
|
src/xrtm/data/corpora/_builtin_corpora.py
|
|
15
|
-
src/xrtm/data/corpora/forecast_importer.py
|
|
16
|
-
src/xrtm/data/corpora/importers.py
|
|
17
14
|
src/xrtm/data/corpora/real_binary.py
|
|
18
15
|
src/xrtm/data/corpora/registry.py
|
|
19
|
-
src/xrtm/data/corpora/splits.py
|
|
20
|
-
src/xrtm/data/kit/__init__.py
|
|
21
|
-
src/xrtm/data/kit/processors/__init__.py
|
|
22
16
|
src/xrtm/data/providers/__init__.py
|
|
23
|
-
src/xrtm/data/providers/local/__init__.py
|
|
24
|
-
src/xrtm/data/providers/local/csv.py
|
|
25
17
|
src/xrtm/data/providers/online/__init__.py
|
|
18
|
+
src/xrtm/data/providers/online/metaculus.py
|
|
26
19
|
src/xrtm/data/providers/online/polymarket.py
|
|
27
|
-
src/xrtm/data/providers/subgraph/__init__.py
|
|
28
|
-
src/xrtm/data/providers/subgraph/polymarket.py
|
|
29
20
|
src/xrtm_data.egg-info/PKG-INFO
|
|
30
21
|
src/xrtm_data.egg-info/SOURCES.txt
|
|
31
22
|
src/xrtm_data.egg-info/dependency_links.txt
|
|
32
|
-
src/xrtm_data.egg-info/entry_points.txt
|
|
33
23
|
src/xrtm_data.egg-info/requires.txt
|
|
34
24
|
src/xrtm_data.egg-info/top_level.txt
|
|
35
|
-
tests/test_beta_fitter.py
|
|
36
|
-
tests/test_cli_loading.py
|
|
37
|
-
tests/test_cli_ux.py
|
|
38
|
-
tests/test_corpus_importers.py
|
|
39
25
|
tests/test_corpus_registry.py
|
|
40
|
-
tests/test_corpus_splits.py
|
|
41
|
-
tests/test_forecast_importer.py
|
|
42
|
-
tests/test_local_datasource.py
|
|
43
|
-
tests/test_polymarket_source.py
|
|
44
|
-
tests/test_polymarket_subgraph.py
|
|
45
26
|
tests/test_prior_schemas.py
|
|
46
27
|
tests/test_real_binary_corpus.py
|
|
47
28
|
tests/test_schemas.py
|