xrtm-eval 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xrtm/eval/core/__init__.py +30 -2
- xrtm/eval/core/eval/definitions.py +3 -4
- xrtm/eval/core/schemas/__init__.py +24 -0
- xrtm/eval/core/schemas/forecast.py +59 -0
- xrtm/eval/kit/eval/bias.py +13 -4
- xrtm/eval/kit/eval/epistemic_evaluator.py +2 -0
- xrtm/eval/kit/eval/intervention.py +9 -2
- xrtm/eval/kit/eval/metrics.py +15 -12
- xrtm/eval/kit/eval/resilience.py +14 -2
- xrtm/eval/kit/eval/viz.py +10 -1
- xrtm/eval/providers/__init__.py +24 -0
- xrtm/eval/version.py +28 -0
- {xrtm_eval-0.1.2.dist-info → xrtm_eval-0.2.0.dist-info}/METADATA +14 -1
- xrtm_eval-0.2.0.dist-info/RECORD +24 -0
- xrtm/eval/schemas/__init__.py +0 -3
- xrtm/eval/schemas/forecast.py +0 -21
- xrtm_eval-0.1.2.dist-info/RECORD +0 -22
- {xrtm_eval-0.1.2.dist-info → xrtm_eval-0.2.0.dist-info}/WHEEL +0 -0
- {xrtm_eval-0.1.2.dist-info → xrtm_eval-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {xrtm_eval-0.1.2.dist-info → xrtm_eval-0.2.0.dist-info}/top_level.txt +0 -0
xrtm/eval/core/__init__.py
CHANGED
|
@@ -1,14 +1,42 @@
|
|
|
1
1
|
# coding=utf-8
|
|
2
2
|
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
3
15
|
|
|
4
|
-
|
|
5
|
-
|
|
16
|
+
r"""
|
|
17
|
+
Core interfaces and domain-agnostic logic for xrtm-eval.
|
|
18
|
+
|
|
19
|
+
This module exports evaluator protocols, epistemics utilities, and
|
|
20
|
+
core schemas. MUST NOT import from kit/ or providers/.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from xrtm.eval.core.epistemics import (
|
|
24
|
+
IntegrityGuardian,
|
|
25
|
+
SourceTrustEntry,
|
|
26
|
+
SourceTrustRegistry,
|
|
27
|
+
)
|
|
28
|
+
from xrtm.eval.core.eval import EvaluationReport, EvaluationResult, Evaluator
|
|
29
|
+
from xrtm.eval.core.schemas import ForecastResolution
|
|
6
30
|
|
|
7
31
|
__all__ = [
|
|
32
|
+
# Evaluator protocol
|
|
8
33
|
"Evaluator",
|
|
9
34
|
"EvaluationResult",
|
|
10
35
|
"EvaluationReport",
|
|
36
|
+
# Epistemics
|
|
11
37
|
"IntegrityGuardian",
|
|
12
38
|
"SourceTrustRegistry",
|
|
13
39
|
"SourceTrustEntry",
|
|
40
|
+
# Schemas
|
|
41
|
+
"ForecastResolution",
|
|
14
42
|
]
|
|
@@ -30,11 +30,9 @@ class BrierDecomposition(BaseModel):
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class Evaluator(Protocol):
|
|
33
|
-
def score(self, prediction: Any, ground_truth: Any) -> float:
|
|
34
|
-
...
|
|
33
|
+
def score(self, prediction: Any, ground_truth: Any) -> float: ...
|
|
35
34
|
|
|
36
|
-
def evaluate(self, prediction: Any, ground_truth: Any, subject_id: str) -> EvaluationResult:
|
|
37
|
-
...
|
|
35
|
+
def evaluate(self, prediction: Any, ground_truth: Any, subject_id: str) -> EvaluationResult: ...
|
|
38
36
|
|
|
39
37
|
|
|
40
38
|
class EvaluationReport(BaseModel):
|
|
@@ -55,6 +53,7 @@ class EvaluationReport(BaseModel):
|
|
|
55
53
|
def to_pandas(self) -> Any:
|
|
56
54
|
try:
|
|
57
55
|
import pandas as pd
|
|
56
|
+
|
|
58
57
|
return pd.DataFrame([r.model_dump() for r in self.results])
|
|
59
58
|
except ImportError:
|
|
60
59
|
raise ImportError("Pandas is required for to_pandas(). Install it with `pip install pandas`.")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""
|
|
17
|
+
Core schemas for xrtm-eval.
|
|
18
|
+
|
|
19
|
+
This module exports evaluation-related Pydantic models.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from xrtm.eval.core.schemas.forecast import ForecastResolution
|
|
23
|
+
|
|
24
|
+
__all__ = ["ForecastResolution"]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""
|
|
17
|
+
Forecast resolution schema for evaluation.
|
|
18
|
+
|
|
19
|
+
This module defines the ground-truth outcome schema used to evaluate
|
|
20
|
+
forecast accuracy.
|
|
21
|
+
|
|
22
|
+
Example:
|
|
23
|
+
>>> from xrtm.eval.core.schemas import ForecastResolution
|
|
24
|
+
>>> resolution = ForecastResolution(
|
|
25
|
+
... question_id="q1",
|
|
26
|
+
... outcome="yes",
|
|
27
|
+
... )
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from typing import Any, Dict
|
|
32
|
+
|
|
33
|
+
from pydantic import BaseModel, Field
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ForecastResolution(BaseModel):
|
|
37
|
+
r"""
|
|
38
|
+
The ground-truth outcome used to evaluate forecast accuracy.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
question_id: Reference to the forecasted question.
|
|
42
|
+
outcome: The final winning outcome or value.
|
|
43
|
+
resolved_at: When the outcome was determined.
|
|
44
|
+
metadata: Source info, verification method, etc.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
>>> resolution = ForecastResolution(question_id="q1", outcome="yes")
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
question_id: str = Field(..., description="Reference to the forecasted question")
|
|
51
|
+
outcome: str = Field(..., description="The final winning outcome or value")
|
|
52
|
+
resolved_at: datetime = Field(
|
|
53
|
+
default_factory=lambda: datetime.now(timezone.utc),
|
|
54
|
+
description="When the outcome was determined",
|
|
55
|
+
)
|
|
56
|
+
metadata: Dict[str, Any] = Field(default_factory=dict, description="Source info, verification method")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
__all__ = ["ForecastResolution"]
|
xrtm/eval/kit/eval/bias.py
CHANGED
|
@@ -8,10 +8,18 @@ from xrtm.eval.core.eval.definitions import EvaluationResult, Evaluator
|
|
|
8
8
|
|
|
9
9
|
class BiasInterceptor(Evaluator):
|
|
10
10
|
COGNITIVE_BIASES = [
|
|
11
|
-
"Base-Rate Neglect",
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
"
|
|
11
|
+
"Base-Rate Neglect",
|
|
12
|
+
"Overconfidence",
|
|
13
|
+
"Availability Heuristic",
|
|
14
|
+
"Confirmation Bias",
|
|
15
|
+
"Anchoring Bias",
|
|
16
|
+
"Sunk Cost Fallacy",
|
|
17
|
+
"Hindsight Bias",
|
|
18
|
+
"Optimism Bias",
|
|
19
|
+
"Pessimism Bias",
|
|
20
|
+
"Status Quo Bias",
|
|
21
|
+
"Framing Effect",
|
|
22
|
+
"Recency Bias",
|
|
15
23
|
]
|
|
16
24
|
|
|
17
25
|
def __init__(self, model: Any):
|
|
@@ -46,4 +54,5 @@ class BiasInterceptor(Evaluator):
|
|
|
46
54
|
metadata={"type": "bias_audit"},
|
|
47
55
|
)
|
|
48
56
|
|
|
57
|
+
|
|
49
58
|
__all__ = ["BiasInterceptor"]
|
|
@@ -12,6 +12,7 @@ from xrtm.eval.core.epistemics import IntegrityGuardian, SourceTrustRegistry
|
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
class EpistemicEvaluator:
|
|
16
17
|
def __init__(self, registry: Optional[SourceTrustRegistry] = None):
|
|
17
18
|
self.registry = registry or SourceTrustRegistry()
|
|
@@ -28,4 +29,5 @@ class EpistemicEvaluator:
|
|
|
28
29
|
"integrity_level": "HIGH" if avg_trust > 0.8 else "MEDIUM" if avg_trust >= 0.5 else "LOW",
|
|
29
30
|
}
|
|
30
31
|
|
|
32
|
+
|
|
31
33
|
__all__ = ["EpistemicEvaluator"]
|
|
@@ -8,6 +8,7 @@ from xrtm.data.schemas.forecast import ForecastOutput
|
|
|
8
8
|
|
|
9
9
|
logger = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
|
+
|
|
11
12
|
class InterventionEngine:
|
|
12
13
|
@staticmethod
|
|
13
14
|
def apply_intervention(output: ForecastOutput, node_id: str, new_probability: float) -> ForecastOutput:
|
|
@@ -29,12 +30,18 @@ class InterventionEngine:
|
|
|
29
30
|
weight = data.get("weight", 1.0)
|
|
30
31
|
target_node = next(n for n in new_output.logical_trace if n.node_id == target_id)
|
|
31
32
|
old_target_prob = target_node.probability or 0.5
|
|
32
|
-
normalized_delta = (
|
|
33
|
+
normalized_delta = (
|
|
34
|
+
current_node.probability - (dg.nodes[current_id].get("probability") or 0.5)
|
|
35
|
+
) * weight
|
|
33
36
|
target_node.probability = max(0.0, min(1.0, old_target_prob + normalized_delta))
|
|
34
37
|
leaf_nodes = [n for n in dg.nodes() if dg.out_degree(n) == 0]
|
|
35
38
|
if leaf_nodes:
|
|
36
|
-
avg_leaf_prob = sum(
|
|
39
|
+
avg_leaf_prob = sum(
|
|
40
|
+
next(n.probability for n in new_output.logical_trace if n.node_id == leaf_id) or 0.0
|
|
41
|
+
for leaf_id in leaf_nodes
|
|
42
|
+
) / len(leaf_nodes)
|
|
37
43
|
new_output.confidence = avg_leaf_prob
|
|
38
44
|
return new_output
|
|
39
45
|
|
|
46
|
+
|
|
40
47
|
__all__ = ["InterventionEngine"]
|
xrtm/eval/kit/eval/metrics.py
CHANGED
|
@@ -73,15 +73,22 @@ class ExpectedCalibrationErrorEvaluator(Evaluator):
|
|
|
73
73
|
|
|
74
74
|
def compute_calibration_data(self, results: List[EvaluationResult]) -> Tuple[float, List[ReliabilityBin]]:
|
|
75
75
|
bin_size = 1.0 / self.num_bins
|
|
76
|
-
bins: List[List[
|
|
76
|
+
bins: List[List[Tuple[float, float]]] = [[] for _ in range(self.num_bins)]
|
|
77
77
|
|
|
78
78
|
for res in results:
|
|
79
79
|
try:
|
|
80
|
-
|
|
80
|
+
raw_conf = float(res.prediction)
|
|
81
|
+
conf = min(max(raw_conf, 0.0), 1.0)
|
|
81
82
|
idx = int(conf / bin_size)
|
|
82
83
|
if idx == self.num_bins:
|
|
83
84
|
idx -= 1
|
|
84
|
-
|
|
85
|
+
|
|
86
|
+
gt = res.ground_truth
|
|
87
|
+
normalized_gt = (
|
|
88
|
+
1.0 if (gt.lower() in ["yes", "1", "true", "won", "pass"] if isinstance(gt, str) else gt) else 0.0
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
bins[idx].append((raw_conf, normalized_gt))
|
|
85
92
|
except (ValueError, TypeError):
|
|
86
93
|
continue
|
|
87
94
|
|
|
@@ -94,17 +101,13 @@ class ExpectedCalibrationErrorEvaluator(Evaluator):
|
|
|
94
101
|
bin_center = (i + 0.5) * bin_size
|
|
95
102
|
|
|
96
103
|
if n_b > 0:
|
|
97
|
-
mean_conf = sum(
|
|
98
|
-
|
|
99
|
-
for x in bin_items:
|
|
100
|
-
gt = x.ground_truth
|
|
101
|
-
normalized_gt = 1.0 if (gt.lower() in ["yes", "1", "true", "won", "pass"] if isinstance(gt, str) else gt) else 0.0
|
|
102
|
-
accuracies.append(normalized_gt)
|
|
103
|
-
|
|
104
|
-
mean_acc = sum(accuracies) / n_b
|
|
104
|
+
mean_conf = sum(x[0] for x in bin_items) / n_b
|
|
105
|
+
mean_acc = sum(x[1] for x in bin_items) / n_b
|
|
105
106
|
ece += (n_b / total_count) * abs(mean_acc - mean_conf)
|
|
106
107
|
reliability_data.append(
|
|
107
|
-
ReliabilityBin(
|
|
108
|
+
ReliabilityBin(
|
|
109
|
+
bin_center=bin_center, mean_prediction=mean_conf, mean_ground_truth=mean_acc, count=n_b
|
|
110
|
+
)
|
|
108
111
|
)
|
|
109
112
|
else:
|
|
110
113
|
reliability_data.append(
|
xrtm/eval/kit/eval/resilience.py
CHANGED
|
@@ -11,22 +11,34 @@ class FakeNewsItem(BaseModel):
|
|
|
11
11
|
trust_score: float = 0.1
|
|
12
12
|
intended_bias: str = Field(..., description="e.g. 'Bearish', 'Bullish'")
|
|
13
13
|
|
|
14
|
+
|
|
14
15
|
class GullibilityReport(BaseModel):
|
|
15
16
|
initial_confidence: float
|
|
16
17
|
post_injection_confidence: float
|
|
17
18
|
delta: float
|
|
18
19
|
resilience_score: float
|
|
19
20
|
|
|
21
|
+
|
|
20
22
|
class AdversarialInjector:
|
|
21
23
|
def __init__(self, intensity: float = 0.5):
|
|
22
24
|
self.intensity = intensity
|
|
25
|
+
|
|
23
26
|
def generate_attack(self, subject: str, direction: str) -> FakeNewsItem:
|
|
24
27
|
if direction.lower() == "bearish":
|
|
25
|
-
return FakeNewsItem(
|
|
28
|
+
return FakeNewsItem(
|
|
29
|
+
headline=f"BREAKING: {subject} CEO Under Investigation", content="...", intended_bias="Bearish"
|
|
30
|
+
)
|
|
26
31
|
return FakeNewsItem(headline=f"{subject} Secures Massive Contract", content="...", intended_bias="Bullish")
|
|
32
|
+
|
|
27
33
|
def measure_resilience(self, initial_confidence: float, post_injection_confidence: float) -> GullibilityReport:
|
|
28
34
|
delta = post_injection_confidence - initial_confidence
|
|
29
35
|
score = max(0.0, 1.0 - abs(delta))
|
|
30
|
-
return GullibilityReport(
|
|
36
|
+
return GullibilityReport(
|
|
37
|
+
initial_confidence=initial_confidence,
|
|
38
|
+
post_injection_confidence=post_injection_confidence,
|
|
39
|
+
delta=delta,
|
|
40
|
+
resilience_score=score,
|
|
41
|
+
)
|
|
42
|
+
|
|
31
43
|
|
|
32
44
|
__all__ = ["FakeNewsItem", "GullibilityReport", "AdversarialInjector"]
|
xrtm/eval/kit/eval/viz.py
CHANGED
|
@@ -9,12 +9,14 @@ import numpy as np
|
|
|
9
9
|
|
|
10
10
|
logger = logging.getLogger(__name__)
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
@dataclass
|
|
13
14
|
class ReliabilityCurveData:
|
|
14
15
|
prob_pred: np.ndarray
|
|
15
16
|
prob_true: np.ndarray
|
|
16
17
|
ece: float
|
|
17
18
|
|
|
19
|
+
|
|
18
20
|
def compute_calibration_curve(y_true: List[int], y_prob: List[float], n_bins: int = 10) -> ReliabilityCurveData:
|
|
19
21
|
y_true_arr = np.array(y_true)
|
|
20
22
|
y_prob_arr = np.array(y_prob)
|
|
@@ -39,7 +41,10 @@ def compute_calibration_curve(y_true: List[int], y_prob: List[float], n_bins: in
|
|
|
39
41
|
ece += (count / total_samples) * np.abs(fraction_true - mean_prob)
|
|
40
42
|
return ReliabilityCurveData(prob_pred=np.array(bin_pred), prob_true=np.array(bin_true), ece=ece)
|
|
41
43
|
|
|
42
|
-
|
|
44
|
+
|
|
45
|
+
def plot_reliability_diagram(
|
|
46
|
+
data: ReliabilityCurveData, title: str = "Reliability Diagram", save_path: Optional[str] = None
|
|
47
|
+
) -> Any:
|
|
43
48
|
try:
|
|
44
49
|
import matplotlib.pyplot as plt
|
|
45
50
|
import seaborn as sns
|
|
@@ -61,13 +66,17 @@ def plot_reliability_diagram(data: ReliabilityCurveData, title: str = "Reliabili
|
|
|
61
66
|
plt.savefig(save_path)
|
|
62
67
|
return fig
|
|
63
68
|
|
|
69
|
+
|
|
64
70
|
class ReliabilityDiagram:
|
|
65
71
|
def __init__(self, n_bins: int = 10):
|
|
66
72
|
self.n_bins = n_bins
|
|
73
|
+
|
|
67
74
|
def compute(self, y_true: List[int], y_prob: List[float]) -> ReliabilityCurveData:
|
|
68
75
|
return compute_calibration_curve(y_true, y_prob, self.n_bins)
|
|
76
|
+
|
|
69
77
|
def plot(self, y_true: List[int], y_prob: List[float], save_path: Optional[str] = None) -> Any:
|
|
70
78
|
data = self.compute(y_true, y_prob)
|
|
71
79
|
return plot_reliability_diagram(data, save_path=save_path)
|
|
72
80
|
|
|
81
|
+
|
|
73
82
|
__all__ = ["ReliabilityCurveData", "compute_calibration_curve", "plot_reliability_diagram", "ReliabilityDiagram"]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""
|
|
17
|
+
External providers for xrtm-eval.
|
|
18
|
+
|
|
19
|
+
This module provides adapters for external evaluation services.
|
|
20
|
+
Currently empty - will be populated with remote judges, LLM-as-judge
|
|
21
|
+
integrations, etc.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__all__: list[str] = []
|
xrtm/eval/version.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
r"""
|
|
17
|
+
Version information for xrtm-eval.
|
|
18
|
+
|
|
19
|
+
This module provides the single source of truth for the package version.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
__all__ = ["__version__", "__author__", "__contact__", "__license__", "__copyright__"]
|
|
23
|
+
|
|
24
|
+
__version__ = "0.2.0"
|
|
25
|
+
__author__ = "XRTM Team"
|
|
26
|
+
__contact__ = "moy@xrtm.org"
|
|
27
|
+
__license__ = "Apache-2.0"
|
|
28
|
+
__copyright__ = "Copyright 2026 XRTM Team"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: xrtm-eval
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: The Judge/Scoring engine for XRTM.
|
|
5
5
|
Author-email: XRTM Team <moy@xrtm.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -76,6 +76,19 @@ registry = SourceTrustRegistry()
|
|
|
76
76
|
guardian = IntegrityGuardian(registry)
|
|
77
77
|
```
|
|
78
78
|
|
|
79
|
+
## Project Structure
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
src/xrtm/eval/
|
|
83
|
+
├── core/ # Interfaces & Schemas
|
|
84
|
+
│ ├── eval/ # Evaluator protocol, EvaluationResult
|
|
85
|
+
│ ├── epistemics.py # Trust primitives (SourceTrustRegistry)
|
|
86
|
+
│ └── schemas/ # ForecastResolution
|
|
87
|
+
├── kit/ # Composable evaluator implementations
|
|
88
|
+
│ └── eval/metrics.py # BrierScoreEvaluator, ECE
|
|
89
|
+
└── providers/ # External evaluation services (future)
|
|
90
|
+
```
|
|
91
|
+
|
|
79
92
|
## Development
|
|
80
93
|
|
|
81
94
|
Prerequisites:
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
xrtm/eval/__init__.py,sha256=4DLMyE6iTtJmIhlns76iKvBu3NCarF4wyUF-mBkgCes,973
|
|
2
|
+
xrtm/eval/version.py,sha256=KR0yQFZMEdxtRXPGI69PJ6k_ApaORhOSYzVCp6z98zE,964
|
|
3
|
+
xrtm/eval/core/__init__.py,sha256=xvfvjr_abGVs2DOReAnM0FXnXzaBiz6C9p5bS6qSrS0,1283
|
|
4
|
+
xrtm/eval/core/epistemics.py,sha256=3luGGiyWQBbULPkgHANSYBfQhmaJpfv08tJK1hsv30I,5144
|
|
5
|
+
xrtm/eval/core/eval/__init__.py,sha256=LEHGg2YJ8V-ZFZG6C3Ld0PEAFiIBcnsKK8Qgys3krrk,216
|
|
6
|
+
xrtm/eval/core/eval/aggregation.py,sha256=3RHpQ8ruXl6cFjbX_odHAWpLZSbOt7E5tqwvPR9ytRw,1359
|
|
7
|
+
xrtm/eval/core/eval/bayesian.py,sha256=nSlWxcwEGclm4SjCqpzKuTY56DXHbTqV6Aylc4hZRS8,755
|
|
8
|
+
xrtm/eval/core/eval/definitions.py,sha256=ixihDAhEQYpXhTMxDTbgI8DyLQ8rzr3vSQ8suCcarKQ,1798
|
|
9
|
+
xrtm/eval/core/schemas/__init__.py,sha256=uPuMwFGO5JF6YcLr1KBPPZMr_UclwoiNyAM1eZ1kgX4,803
|
|
10
|
+
xrtm/eval/core/schemas/forecast.py,sha256=bL34Rpg9Sr4P9xto4GhNDQEbqWt1thCto8YPmVxw24c,1958
|
|
11
|
+
xrtm/eval/kit/eval/__init__.py,sha256=F4tByHG13YysLVYJshG9gL5zoo5G-tdcy2jsNTGxMpk,906
|
|
12
|
+
xrtm/eval/kit/eval/analytics.py,sha256=ahaGE7l_Lb8LBRWeCyC8oaE-7K8XShT0hwqQwT4CHXY,1818
|
|
13
|
+
xrtm/eval/kit/eval/bias.py,sha256=rMUMLhZjwBw5UTcbGqdqMzsnu4ificSPOI6hqLSUmyE,1678
|
|
14
|
+
xrtm/eval/kit/eval/epistemic_evaluator.py,sha256=ZgbH6Y1n7nEFJPia7offnWlz7n7XMJrmqeYuewl5c-s,1156
|
|
15
|
+
xrtm/eval/kit/eval/intervention.py,sha256=arlfNPjVIjNe1scwPYP-rX6ArerICSCgqf6dPidBtwg,2017
|
|
16
|
+
xrtm/eval/kit/eval/metrics.py,sha256=nAzTcXg4iZ35MTRSE5GU0kqi2ynw6WPQdQ_EzaBdI2Y,4615
|
|
17
|
+
xrtm/eval/kit/eval/resilience.py,sha256=qEDN5JljQFVmwTd2Quy8b23rPfOaq2KiykFhnieJTKo,1500
|
|
18
|
+
xrtm/eval/kit/eval/viz.py,sha256=Us0bz2PclI1veeSBeAWr9FRkFaQZRC7uEbIk7-YKt7g,2786
|
|
19
|
+
xrtm/eval/providers/__init__.py,sha256=I7jUXXwPSYuGcUq3y8rFXik1pk48uyj2pMOyQ3s6egA,832
|
|
20
|
+
xrtm_eval-0.2.0.dist-info/licenses/LICENSE,sha256=BexUTTsX5WlzyJ0Tqajo1h_LFYfCtfFgWdRaGltpm5I,11328
|
|
21
|
+
xrtm_eval-0.2.0.dist-info/METADATA,sha256=YzikKaoy19V_ZBIfNLUcA7-YWtJnZEYg9AveEiaeotA,3162
|
|
22
|
+
xrtm_eval-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
23
|
+
xrtm_eval-0.2.0.dist-info/top_level.txt,sha256=Jz-i0a9P8GVrIR9KJTT-9wT95E1brww6U5o2QViAt20,5
|
|
24
|
+
xrtm_eval-0.2.0.dist-info/RECORD,,
|
xrtm/eval/schemas/__init__.py
DELETED
xrtm/eval/schemas/forecast.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
|
-
# Copyright 2026 XRTM Team. All rights reserved.
|
|
3
|
-
|
|
4
|
-
from datetime import datetime, timezone
|
|
5
|
-
from typing import Any, Dict
|
|
6
|
-
|
|
7
|
-
from pydantic import BaseModel, Field
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class ForecastResolution(BaseModel):
|
|
11
|
-
r"""
|
|
12
|
-
The ground-truth outcome used to evaluate forecast accuracy.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
question_id: str
|
|
16
|
-
outcome: str = Field(..., description="The final winning outcome or value")
|
|
17
|
-
resolved_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
18
|
-
metadata: Dict[str, Any] = Field(default_factory=dict, description="Source info, verification method")
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
__all__ = ["ForecastResolution"]
|
xrtm_eval-0.1.2.dist-info/RECORD
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
xrtm/eval/__init__.py,sha256=4DLMyE6iTtJmIhlns76iKvBu3NCarF4wyUF-mBkgCes,973
|
|
2
|
-
xrtm/eval/core/__init__.py,sha256=ypetAyWpTtLvQK5sXk5sSdlPKad5zpBwhymUoyoxVl4,366
|
|
3
|
-
xrtm/eval/core/epistemics.py,sha256=3luGGiyWQBbULPkgHANSYBfQhmaJpfv08tJK1hsv30I,5144
|
|
4
|
-
xrtm/eval/core/eval/__init__.py,sha256=LEHGg2YJ8V-ZFZG6C3Ld0PEAFiIBcnsKK8Qgys3krrk,216
|
|
5
|
-
xrtm/eval/core/eval/aggregation.py,sha256=3RHpQ8ruXl6cFjbX_odHAWpLZSbOt7E5tqwvPR9ytRw,1359
|
|
6
|
-
xrtm/eval/core/eval/bayesian.py,sha256=nSlWxcwEGclm4SjCqpzKuTY56DXHbTqV6Aylc4hZRS8,755
|
|
7
|
-
xrtm/eval/core/eval/definitions.py,sha256=p7Sf5JjOlypjglOg_4oOw-1y48s-d-ud8U3Ll6MEE4I,1813
|
|
8
|
-
xrtm/eval/kit/eval/__init__.py,sha256=F4tByHG13YysLVYJshG9gL5zoo5G-tdcy2jsNTGxMpk,906
|
|
9
|
-
xrtm/eval/kit/eval/analytics.py,sha256=ahaGE7l_Lb8LBRWeCyC8oaE-7K8XShT0hwqQwT4CHXY,1818
|
|
10
|
-
xrtm/eval/kit/eval/bias.py,sha256=OQvJzSd_beBzVai-n9fZyvXPwLo78Y794HOkC8eyEiU,1613
|
|
11
|
-
xrtm/eval/kit/eval/epistemic_evaluator.py,sha256=KYqgfxg_MK8qaeZEr6o7sGbP3MBXu5FSbfsLrr-warQ,1154
|
|
12
|
-
xrtm/eval/kit/eval/intervention.py,sha256=ghC9dw8I1DcEf9M0JqO1oxLiTUZd4AOsM8vlX36XlVI,1931
|
|
13
|
-
xrtm/eval/kit/eval/metrics.py,sha256=VKMRwIt9UGJ23uqSUhq_vUSO0LhSe4e2H7S6eAablw4,4596
|
|
14
|
-
xrtm/eval/kit/eval/resilience.py,sha256=q95enu-mglq-S5MSIiO9kDN2Kn9898LvuxNeqo90mKc,1406
|
|
15
|
-
xrtm/eval/kit/eval/viz.py,sha256=LGss9nuGpubJdBbXRlQU2KiGqouTNnWCDyXvKAct3pE,2773
|
|
16
|
-
xrtm/eval/schemas/__init__.py,sha256=6nwuE6LpwosNhFR-xRPPWcGzP1ZxoXhN-vWrzK4XfbQ,75
|
|
17
|
-
xrtm/eval/schemas/forecast.py,sha256=vA5V5RTo7k2ZV3THs2h3fxmdusS06Vw92H5WKJmXVzA,624
|
|
18
|
-
xrtm_eval-0.1.2.dist-info/licenses/LICENSE,sha256=BexUTTsX5WlzyJ0Tqajo1h_LFYfCtfFgWdRaGltpm5I,11328
|
|
19
|
-
xrtm_eval-0.1.2.dist-info/METADATA,sha256=h4-gh_4ofAluGCFqWZ6lHbv-absbh_-fs94_jqgt8W4,2673
|
|
20
|
-
xrtm_eval-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
21
|
-
xrtm_eval-0.1.2.dist-info/top_level.txt,sha256=Jz-i0a9P8GVrIR9KJTT-9wT95E1brww6U5o2QViAt20,5
|
|
22
|
-
xrtm_eval-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|