xrtm-eval 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,42 @@
1
1
  # coding=utf-8
2
2
  # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
3
15
 
4
- from .epistemics import IntegrityGuardian, SourceTrustEntry, SourceTrustRegistry
5
- from .eval import EvaluationReport, EvaluationResult, Evaluator
16
+ r"""
17
+ Core interfaces and domain-agnostic logic for xrtm-eval.
18
+
19
+ This module exports evaluator protocols, epistemics utilities, and
20
+ core schemas. MUST NOT import from kit/ or providers/.
21
+ """
22
+
23
+ from xrtm.eval.core.epistemics import (
24
+ IntegrityGuardian,
25
+ SourceTrustEntry,
26
+ SourceTrustRegistry,
27
+ )
28
+ from xrtm.eval.core.eval import EvaluationReport, EvaluationResult, Evaluator
29
+ from xrtm.eval.core.schemas import ForecastResolution
6
30
 
7
31
  __all__ = [
32
+ # Evaluator protocol
8
33
  "Evaluator",
9
34
  "EvaluationResult",
10
35
  "EvaluationReport",
36
+ # Epistemics
11
37
  "IntegrityGuardian",
12
38
  "SourceTrustRegistry",
13
39
  "SourceTrustEntry",
40
+ # Schemas
41
+ "ForecastResolution",
14
42
  ]
@@ -30,11 +30,9 @@ class BrierDecomposition(BaseModel):
30
30
 
31
31
 
32
32
  class Evaluator(Protocol):
33
- def score(self, prediction: Any, ground_truth: Any) -> float:
34
- ...
33
+ def score(self, prediction: Any, ground_truth: Any) -> float: ...
35
34
 
36
- def evaluate(self, prediction: Any, ground_truth: Any, subject_id: str) -> EvaluationResult:
37
- ...
35
+ def evaluate(self, prediction: Any, ground_truth: Any, subject_id: str) -> EvaluationResult: ...
38
36
 
39
37
 
40
38
  class EvaluationReport(BaseModel):
@@ -55,6 +53,7 @@ class EvaluationReport(BaseModel):
55
53
  def to_pandas(self) -> Any:
56
54
  try:
57
55
  import pandas as pd
56
+
58
57
  return pd.DataFrame([r.model_dump() for r in self.results])
59
58
  except ImportError:
60
59
  raise ImportError("Pandas is required for to_pandas(). Install it with `pip install pandas`.")
@@ -0,0 +1,24 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Core schemas for xrtm-eval.
18
+
19
+ This module exports evaluation-related Pydantic models.
20
+ """
21
+
22
+ from xrtm.eval.core.schemas.forecast import ForecastResolution
23
+
24
+ __all__ = ["ForecastResolution"]
@@ -0,0 +1,59 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Forecast resolution schema for evaluation.
18
+
19
+ This module defines the ground-truth outcome schema used to evaluate
20
+ forecast accuracy.
21
+
22
+ Example:
23
+ >>> from xrtm.eval.core.schemas import ForecastResolution
24
+ >>> resolution = ForecastResolution(
25
+ ... question_id="q1",
26
+ ... outcome="yes",
27
+ ... )
28
+ """
29
+
30
+ from datetime import datetime, timezone
31
+ from typing import Any, Dict
32
+
33
+ from pydantic import BaseModel, Field
34
+
35
+
36
+ class ForecastResolution(BaseModel):
37
+ r"""
38
+ The ground-truth outcome used to evaluate forecast accuracy.
39
+
40
+ Attributes:
41
+ question_id: Reference to the forecasted question.
42
+ outcome: The final winning outcome or value.
43
+ resolved_at: When the outcome was determined.
44
+ metadata: Source info, verification method, etc.
45
+
46
+ Example:
47
+ >>> resolution = ForecastResolution(question_id="q1", outcome="yes")
48
+ """
49
+
50
+ question_id: str = Field(..., description="Reference to the forecasted question")
51
+ outcome: str = Field(..., description="The final winning outcome or value")
52
+ resolved_at: datetime = Field(
53
+ default_factory=lambda: datetime.now(timezone.utc),
54
+ description="When the outcome was determined",
55
+ )
56
+ metadata: Dict[str, Any] = Field(default_factory=dict, description="Source info, verification method")
57
+
58
+
59
+ __all__ = ["ForecastResolution"]
@@ -8,10 +8,18 @@ from xrtm.eval.core.eval.definitions import EvaluationResult, Evaluator
8
8
 
9
9
  class BiasInterceptor(Evaluator):
10
10
  COGNITIVE_BIASES = [
11
- "Base-Rate Neglect", "Overconfidence", "Availability Heuristic",
12
- "Confirmation Bias", "Anchoring Bias", "Sunk Cost Fallacy",
13
- "Hindsight Bias", "Optimism Bias", "Pessimism Bias",
14
- "Status Quo Bias", "Framing Effect", "Recency Bias",
11
+ "Base-Rate Neglect",
12
+ "Overconfidence",
13
+ "Availability Heuristic",
14
+ "Confirmation Bias",
15
+ "Anchoring Bias",
16
+ "Sunk Cost Fallacy",
17
+ "Hindsight Bias",
18
+ "Optimism Bias",
19
+ "Pessimism Bias",
20
+ "Status Quo Bias",
21
+ "Framing Effect",
22
+ "Recency Bias",
15
23
  ]
16
24
 
17
25
  def __init__(self, model: Any):
@@ -46,4 +54,5 @@ class BiasInterceptor(Evaluator):
46
54
  metadata={"type": "bias_audit"},
47
55
  )
48
56
 
57
+
49
58
  __all__ = ["BiasInterceptor"]
@@ -12,6 +12,7 @@ from xrtm.eval.core.epistemics import IntegrityGuardian, SourceTrustRegistry
12
12
 
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
+
15
16
  class EpistemicEvaluator:
16
17
  def __init__(self, registry: Optional[SourceTrustRegistry] = None):
17
18
  self.registry = registry or SourceTrustRegistry()
@@ -28,4 +29,5 @@ class EpistemicEvaluator:
28
29
  "integrity_level": "HIGH" if avg_trust > 0.8 else "MEDIUM" if avg_trust >= 0.5 else "LOW",
29
30
  }
30
31
 
32
+
31
33
  __all__ = ["EpistemicEvaluator"]
@@ -8,6 +8,7 @@ from xrtm.data.schemas.forecast import ForecastOutput
8
8
 
9
9
  logger = logging.getLogger(__name__)
10
10
 
11
+
11
12
  class InterventionEngine:
12
13
  @staticmethod
13
14
  def apply_intervention(output: ForecastOutput, node_id: str, new_probability: float) -> ForecastOutput:
@@ -29,12 +30,18 @@ class InterventionEngine:
29
30
  weight = data.get("weight", 1.0)
30
31
  target_node = next(n for n in new_output.logical_trace if n.node_id == target_id)
31
32
  old_target_prob = target_node.probability or 0.5
32
- normalized_delta = (current_node.probability - (dg.nodes[current_id].get("probability") or 0.5)) * weight
33
+ normalized_delta = (
34
+ current_node.probability - (dg.nodes[current_id].get("probability") or 0.5)
35
+ ) * weight
33
36
  target_node.probability = max(0.0, min(1.0, old_target_prob + normalized_delta))
34
37
  leaf_nodes = [n for n in dg.nodes() if dg.out_degree(n) == 0]
35
38
  if leaf_nodes:
36
- avg_leaf_prob = sum(next(n.probability for n in new_output.logical_trace if n.node_id == leaf_id) or 0.0 for leaf_id in leaf_nodes) / len(leaf_nodes)
39
+ avg_leaf_prob = sum(
40
+ next(n.probability for n in new_output.logical_trace if n.node_id == leaf_id) or 0.0
41
+ for leaf_id in leaf_nodes
42
+ ) / len(leaf_nodes)
37
43
  new_output.confidence = avg_leaf_prob
38
44
  return new_output
39
45
 
46
+
40
47
  __all__ = ["InterventionEngine"]
@@ -73,15 +73,22 @@ class ExpectedCalibrationErrorEvaluator(Evaluator):
73
73
 
74
74
  def compute_calibration_data(self, results: List[EvaluationResult]) -> Tuple[float, List[ReliabilityBin]]:
75
75
  bin_size = 1.0 / self.num_bins
76
- bins: List[List[EvaluationResult]] = [[] for _ in range(self.num_bins)]
76
+ bins: List[List[Tuple[float, float]]] = [[] for _ in range(self.num_bins)]
77
77
 
78
78
  for res in results:
79
79
  try:
80
- conf = min(max(float(res.prediction), 0.0), 1.0)
80
+ raw_conf = float(res.prediction)
81
+ conf = min(max(raw_conf, 0.0), 1.0)
81
82
  idx = int(conf / bin_size)
82
83
  if idx == self.num_bins:
83
84
  idx -= 1
84
- bins[idx].append(res)
85
+
86
+ gt = res.ground_truth
87
+ normalized_gt = (
88
+ 1.0 if (gt.lower() in ["yes", "1", "true", "won", "pass"] if isinstance(gt, str) else gt) else 0.0
89
+ )
90
+
91
+ bins[idx].append((raw_conf, normalized_gt))
85
92
  except (ValueError, TypeError):
86
93
  continue
87
94
 
@@ -94,17 +101,13 @@ class ExpectedCalibrationErrorEvaluator(Evaluator):
94
101
  bin_center = (i + 0.5) * bin_size
95
102
 
96
103
  if n_b > 0:
97
- mean_conf = sum(float(x.prediction) for x in bin_items) / n_b
98
- accuracies = []
99
- for x in bin_items:
100
- gt = x.ground_truth
101
- normalized_gt = 1.0 if (gt.lower() in ["yes", "1", "true", "won", "pass"] if isinstance(gt, str) else gt) else 0.0
102
- accuracies.append(normalized_gt)
103
-
104
- mean_acc = sum(accuracies) / n_b
104
+ mean_conf = sum(x[0] for x in bin_items) / n_b
105
+ mean_acc = sum(x[1] for x in bin_items) / n_b
105
106
  ece += (n_b / total_count) * abs(mean_acc - mean_conf)
106
107
  reliability_data.append(
107
- ReliabilityBin(bin_center=bin_center, mean_prediction=mean_conf, mean_ground_truth=mean_acc, count=n_b)
108
+ ReliabilityBin(
109
+ bin_center=bin_center, mean_prediction=mean_conf, mean_ground_truth=mean_acc, count=n_b
110
+ )
108
111
  )
109
112
  else:
110
113
  reliability_data.append(
@@ -11,22 +11,34 @@ class FakeNewsItem(BaseModel):
11
11
  trust_score: float = 0.1
12
12
  intended_bias: str = Field(..., description="e.g. 'Bearish', 'Bullish'")
13
13
 
14
+
14
15
  class GullibilityReport(BaseModel):
15
16
  initial_confidence: float
16
17
  post_injection_confidence: float
17
18
  delta: float
18
19
  resilience_score: float
19
20
 
21
+
20
22
  class AdversarialInjector:
21
23
  def __init__(self, intensity: float = 0.5):
22
24
  self.intensity = intensity
25
+
23
26
  def generate_attack(self, subject: str, direction: str) -> FakeNewsItem:
24
27
  if direction.lower() == "bearish":
25
- return FakeNewsItem(headline=f"BREAKING: {subject} CEO Under Investigation", content="...", intended_bias="Bearish")
28
+ return FakeNewsItem(
29
+ headline=f"BREAKING: {subject} CEO Under Investigation", content="...", intended_bias="Bearish"
30
+ )
26
31
  return FakeNewsItem(headline=f"{subject} Secures Massive Contract", content="...", intended_bias="Bullish")
32
+
27
33
  def measure_resilience(self, initial_confidence: float, post_injection_confidence: float) -> GullibilityReport:
28
34
  delta = post_injection_confidence - initial_confidence
29
35
  score = max(0.0, 1.0 - abs(delta))
30
- return GullibilityReport(initial_confidence=initial_confidence, post_injection_confidence=post_injection_confidence, delta=delta, resilience_score=score)
36
+ return GullibilityReport(
37
+ initial_confidence=initial_confidence,
38
+ post_injection_confidence=post_injection_confidence,
39
+ delta=delta,
40
+ resilience_score=score,
41
+ )
42
+
31
43
 
32
44
  __all__ = ["FakeNewsItem", "GullibilityReport", "AdversarialInjector"]
xrtm/eval/kit/eval/viz.py CHANGED
@@ -9,12 +9,14 @@ import numpy as np
9
9
 
10
10
  logger = logging.getLogger(__name__)
11
11
 
12
+
12
13
  @dataclass
13
14
  class ReliabilityCurveData:
14
15
  prob_pred: np.ndarray
15
16
  prob_true: np.ndarray
16
17
  ece: float
17
18
 
19
+
18
20
  def compute_calibration_curve(y_true: List[int], y_prob: List[float], n_bins: int = 10) -> ReliabilityCurveData:
19
21
  y_true_arr = np.array(y_true)
20
22
  y_prob_arr = np.array(y_prob)
@@ -39,7 +41,10 @@ def compute_calibration_curve(y_true: List[int], y_prob: List[float], n_bins: in
39
41
  ece += (count / total_samples) * np.abs(fraction_true - mean_prob)
40
42
  return ReliabilityCurveData(prob_pred=np.array(bin_pred), prob_true=np.array(bin_true), ece=ece)
41
43
 
42
- def plot_reliability_diagram(data: ReliabilityCurveData, title: str = "Reliability Diagram", save_path: Optional[str] = None) -> Any:
44
+
45
+ def plot_reliability_diagram(
46
+ data: ReliabilityCurveData, title: str = "Reliability Diagram", save_path: Optional[str] = None
47
+ ) -> Any:
43
48
  try:
44
49
  import matplotlib.pyplot as plt
45
50
  import seaborn as sns
@@ -61,13 +66,17 @@ def plot_reliability_diagram(data: ReliabilityCurveData, title: str = "Reliabili
61
66
  plt.savefig(save_path)
62
67
  return fig
63
68
 
69
+
64
70
  class ReliabilityDiagram:
65
71
  def __init__(self, n_bins: int = 10):
66
72
  self.n_bins = n_bins
73
+
67
74
  def compute(self, y_true: List[int], y_prob: List[float]) -> ReliabilityCurveData:
68
75
  return compute_calibration_curve(y_true, y_prob, self.n_bins)
76
+
69
77
  def plot(self, y_true: List[int], y_prob: List[float], save_path: Optional[str] = None) -> Any:
70
78
  data = self.compute(y_true, y_prob)
71
79
  return plot_reliability_diagram(data, save_path=save_path)
72
80
 
81
+
73
82
  __all__ = ["ReliabilityCurveData", "compute_calibration_curve", "plot_reliability_diagram", "ReliabilityDiagram"]
@@ -0,0 +1,24 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ External providers for xrtm-eval.
18
+
19
+ This module provides adapters for external evaluation services.
20
+ Currently empty - will be populated with remote judges, LLM-as-judge
21
+ integrations, etc.
22
+ """
23
+
24
+ __all__: list[str] = []
xrtm/eval/version.py ADDED
@@ -0,0 +1,28 @@
1
+ # coding=utf-8
2
+ # Copyright 2026 XRTM Team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""
17
+ Version information for xrtm-eval.
18
+
19
+ This module provides the single source of truth for the package version.
20
+ """
21
+
22
+ __all__ = ["__version__", "__author__", "__contact__", "__license__", "__copyright__"]
23
+
24
+ __version__ = "0.2.0"
25
+ __author__ = "XRTM Team"
26
+ __contact__ = "moy@xrtm.org"
27
+ __license__ = "Apache-2.0"
28
+ __copyright__ = "Copyright 2026 XRTM Team"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xrtm-eval
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: The Judge/Scoring engine for XRTM.
5
5
  Author-email: XRTM Team <moy@xrtm.org>
6
6
  License: Apache-2.0
@@ -76,6 +76,19 @@ registry = SourceTrustRegistry()
76
76
  guardian = IntegrityGuardian(registry)
77
77
  ```
78
78
 
79
+ ## Project Structure
80
+
81
+ ```
82
+ src/xrtm/eval/
83
+ ├── core/ # Interfaces & Schemas
84
+ │ ├── eval/ # Evaluator protocol, EvaluationResult
85
+ │ ├── epistemics.py # Trust primitives (SourceTrustRegistry)
86
+ │ └── schemas/ # ForecastResolution
87
+ ├── kit/ # Composable evaluator implementations
88
+ │ └── eval/metrics.py # BrierScoreEvaluator, ECE
89
+ └── providers/ # External evaluation services (future)
90
+ ```
91
+
79
92
  ## Development
80
93
 
81
94
  Prerequisites:
@@ -0,0 +1,24 @@
1
+ xrtm/eval/__init__.py,sha256=4DLMyE6iTtJmIhlns76iKvBu3NCarF4wyUF-mBkgCes,973
2
+ xrtm/eval/version.py,sha256=KR0yQFZMEdxtRXPGI69PJ6k_ApaORhOSYzVCp6z98zE,964
3
+ xrtm/eval/core/__init__.py,sha256=xvfvjr_abGVs2DOReAnM0FXnXzaBiz6C9p5bS6qSrS0,1283
4
+ xrtm/eval/core/epistemics.py,sha256=3luGGiyWQBbULPkgHANSYBfQhmaJpfv08tJK1hsv30I,5144
5
+ xrtm/eval/core/eval/__init__.py,sha256=LEHGg2YJ8V-ZFZG6C3Ld0PEAFiIBcnsKK8Qgys3krrk,216
6
+ xrtm/eval/core/eval/aggregation.py,sha256=3RHpQ8ruXl6cFjbX_odHAWpLZSbOt7E5tqwvPR9ytRw,1359
7
+ xrtm/eval/core/eval/bayesian.py,sha256=nSlWxcwEGclm4SjCqpzKuTY56DXHbTqV6Aylc4hZRS8,755
8
+ xrtm/eval/core/eval/definitions.py,sha256=ixihDAhEQYpXhTMxDTbgI8DyLQ8rzr3vSQ8suCcarKQ,1798
9
+ xrtm/eval/core/schemas/__init__.py,sha256=uPuMwFGO5JF6YcLr1KBPPZMr_UclwoiNyAM1eZ1kgX4,803
10
+ xrtm/eval/core/schemas/forecast.py,sha256=bL34Rpg9Sr4P9xto4GhNDQEbqWt1thCto8YPmVxw24c,1958
11
+ xrtm/eval/kit/eval/__init__.py,sha256=F4tByHG13YysLVYJshG9gL5zoo5G-tdcy2jsNTGxMpk,906
12
+ xrtm/eval/kit/eval/analytics.py,sha256=ahaGE7l_Lb8LBRWeCyC8oaE-7K8XShT0hwqQwT4CHXY,1818
13
+ xrtm/eval/kit/eval/bias.py,sha256=rMUMLhZjwBw5UTcbGqdqMzsnu4ificSPOI6hqLSUmyE,1678
14
+ xrtm/eval/kit/eval/epistemic_evaluator.py,sha256=ZgbH6Y1n7nEFJPia7offnWlz7n7XMJrmqeYuewl5c-s,1156
15
+ xrtm/eval/kit/eval/intervention.py,sha256=arlfNPjVIjNe1scwPYP-rX6ArerICSCgqf6dPidBtwg,2017
16
+ xrtm/eval/kit/eval/metrics.py,sha256=nAzTcXg4iZ35MTRSE5GU0kqi2ynw6WPQdQ_EzaBdI2Y,4615
17
+ xrtm/eval/kit/eval/resilience.py,sha256=qEDN5JljQFVmwTd2Quy8b23rPfOaq2KiykFhnieJTKo,1500
18
+ xrtm/eval/kit/eval/viz.py,sha256=Us0bz2PclI1veeSBeAWr9FRkFaQZRC7uEbIk7-YKt7g,2786
19
+ xrtm/eval/providers/__init__.py,sha256=I7jUXXwPSYuGcUq3y8rFXik1pk48uyj2pMOyQ3s6egA,832
20
+ xrtm_eval-0.2.0.dist-info/licenses/LICENSE,sha256=BexUTTsX5WlzyJ0Tqajo1h_LFYfCtfFgWdRaGltpm5I,11328
21
+ xrtm_eval-0.2.0.dist-info/METADATA,sha256=YzikKaoy19V_ZBIfNLUcA7-YWtJnZEYg9AveEiaeotA,3162
22
+ xrtm_eval-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
23
+ xrtm_eval-0.2.0.dist-info/top_level.txt,sha256=Jz-i0a9P8GVrIR9KJTT-9wT95E1brww6U5o2QViAt20,5
24
+ xrtm_eval-0.2.0.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- from .forecast import ForecastResolution
2
-
3
- __all__ = ["ForecastResolution"]
@@ -1,21 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2026 XRTM Team. All rights reserved.
3
-
4
- from datetime import datetime, timezone
5
- from typing import Any, Dict
6
-
7
- from pydantic import BaseModel, Field
8
-
9
-
10
- class ForecastResolution(BaseModel):
11
- r"""
12
- The ground-truth outcome used to evaluate forecast accuracy.
13
- """
14
-
15
- question_id: str
16
- outcome: str = Field(..., description="The final winning outcome or value")
17
- resolved_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
18
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Source info, verification method")
19
-
20
-
21
- __all__ = ["ForecastResolution"]
@@ -1,22 +0,0 @@
1
- xrtm/eval/__init__.py,sha256=4DLMyE6iTtJmIhlns76iKvBu3NCarF4wyUF-mBkgCes,973
2
- xrtm/eval/core/__init__.py,sha256=ypetAyWpTtLvQK5sXk5sSdlPKad5zpBwhymUoyoxVl4,366
3
- xrtm/eval/core/epistemics.py,sha256=3luGGiyWQBbULPkgHANSYBfQhmaJpfv08tJK1hsv30I,5144
4
- xrtm/eval/core/eval/__init__.py,sha256=LEHGg2YJ8V-ZFZG6C3Ld0PEAFiIBcnsKK8Qgys3krrk,216
5
- xrtm/eval/core/eval/aggregation.py,sha256=3RHpQ8ruXl6cFjbX_odHAWpLZSbOt7E5tqwvPR9ytRw,1359
6
- xrtm/eval/core/eval/bayesian.py,sha256=nSlWxcwEGclm4SjCqpzKuTY56DXHbTqV6Aylc4hZRS8,755
7
- xrtm/eval/core/eval/definitions.py,sha256=p7Sf5JjOlypjglOg_4oOw-1y48s-d-ud8U3Ll6MEE4I,1813
8
- xrtm/eval/kit/eval/__init__.py,sha256=F4tByHG13YysLVYJshG9gL5zoo5G-tdcy2jsNTGxMpk,906
9
- xrtm/eval/kit/eval/analytics.py,sha256=ahaGE7l_Lb8LBRWeCyC8oaE-7K8XShT0hwqQwT4CHXY,1818
10
- xrtm/eval/kit/eval/bias.py,sha256=OQvJzSd_beBzVai-n9fZyvXPwLo78Y794HOkC8eyEiU,1613
11
- xrtm/eval/kit/eval/epistemic_evaluator.py,sha256=KYqgfxg_MK8qaeZEr6o7sGbP3MBXu5FSbfsLrr-warQ,1154
12
- xrtm/eval/kit/eval/intervention.py,sha256=ghC9dw8I1DcEf9M0JqO1oxLiTUZd4AOsM8vlX36XlVI,1931
13
- xrtm/eval/kit/eval/metrics.py,sha256=VKMRwIt9UGJ23uqSUhq_vUSO0LhSe4e2H7S6eAablw4,4596
14
- xrtm/eval/kit/eval/resilience.py,sha256=q95enu-mglq-S5MSIiO9kDN2Kn9898LvuxNeqo90mKc,1406
15
- xrtm/eval/kit/eval/viz.py,sha256=LGss9nuGpubJdBbXRlQU2KiGqouTNnWCDyXvKAct3pE,2773
16
- xrtm/eval/schemas/__init__.py,sha256=6nwuE6LpwosNhFR-xRPPWcGzP1ZxoXhN-vWrzK4XfbQ,75
17
- xrtm/eval/schemas/forecast.py,sha256=vA5V5RTo7k2ZV3THs2h3fxmdusS06Vw92H5WKJmXVzA,624
18
- xrtm_eval-0.1.2.dist-info/licenses/LICENSE,sha256=BexUTTsX5WlzyJ0Tqajo1h_LFYfCtfFgWdRaGltpm5I,11328
19
- xrtm_eval-0.1.2.dist-info/METADATA,sha256=h4-gh_4ofAluGCFqWZ6lHbv-absbh_-fs94_jqgt8W4,2673
20
- xrtm_eval-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
- xrtm_eval-0.1.2.dist-info/top_level.txt,sha256=Jz-i0a9P8GVrIR9KJTT-9wT95E1brww6U5o2QViAt20,5
22
- xrtm_eval-0.1.2.dist-info/RECORD,,