structuremappingmemory 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sma/__init__.py +5 -0
- sma/__main__.py +5 -0
- sma/agent/__init__.py +5 -0
- sma/agent/adapter_draft.py +217 -0
- sma/agent/api.py +67 -0
- sma/agent/comparison.py +591 -0
- sma/agent/llm.py +280 -0
- sma/agent/policies.py +21 -0
- sma/agent/service.py +95 -0
- sma/cli.py +65 -0
- sma/encoders/__init__.py +38 -0
- sma/encoders/agentobs.py +27 -0
- sma/encoders/base.py +23 -0
- sma/encoders/code_treesitter.py +64 -0
- sma/encoders/coverage.py +80 -0
- sma/encoders/draft_adapter.py +183 -0
- sma/encoders/healthcare.py +207 -0
- sma/encoders/logs_drain.py +142 -0
- sma/encoders/prose_tier1.py +57 -0
- sma/encoders/structured.py +57 -0
- sma/encoders/traces.py +45 -0
- sma/eval/__init__.py +2 -0
- sma/eval/agentic/__init__.py +35 -0
- sma/eval/agentic/arms/__init__.py +0 -0
- sma/eval/agentic/arms/cyber.py +48 -0
- sma/eval/agentic/arms/discovery.py +35 -0
- sma/eval/agentic/arms/finance.py +38 -0
- sma/eval/agentic/arms/legal.py +74 -0
- sma/eval/agentic/arms/medicine.py +45 -0
- sma/eval/agentic/harness.py +275 -0
- sma/eval/agentic/memories.py +308 -0
- sma/eval/agentic/metrics.py +82 -0
- sma/eval/agentic_qa/__init__.py +27 -0
- sma/eval/agentic_qa/agent.py +383 -0
- sma/eval/agentic_qa/metrics.py +239 -0
- sma/eval/agentic_qa/pools.py +197 -0
- sma/eval/arn.py +65 -0
- sma/eval/baselines/__init__.py +6 -0
- sma/eval/baselines/bge_dense.py +54 -0
- sma/eval/baselines/bm25.py +18 -0
- sma/eval/baselines/dense.py +42 -0
- sma/eval/baselines/hipporag.py +235 -0
- sma/eval/baselines/hybrid_rrf.py +30 -0
- sma/eval/baselines/longcontext_llm.py +124 -0
- sma/eval/baselines/rerank.py +41 -0
- sma/eval/baselines/splade.py +77 -0
- sma/eval/baselines/wl_kernel.py +163 -0
- sma/eval/bugsinpy.py +358 -0
- sma/eval/bugsinpy_families.py +164 -0
- sma/eval/crossdomain.py +89 -0
- sma/eval/diabetes.py +61 -0
- sma/eval/drift_env.py +26 -0
- sma/eval/drift_metrics.py +24 -0
- sma/eval/family_labels.py +167 -0
- sma/eval/fraud_elliptic/__init__.py +29 -0
- sma/eval/fraud_elliptic/encoder.py +279 -0
- sma/eval/fraud_elliptic/eval.py +269 -0
- sma/eval/fraud_elliptic/test_encoder.py +123 -0
- sma/eval/ieee_cis.py +66 -0
- sma/eval/loghub.py +16 -0
- sma/eval/loghub_eval.py +480 -0
- sma/eval/longmemeval.py +51 -0
- sma/eval/memory_backends/__init__.py +2 -0
- sma/eval/memory_backends/base.py +22 -0
- sma/eval/memory_backends/context_only.py +14 -0
- sma/eval/memory_backends/rag_notes.py +17 -0
- sma/eval/memory_backends/shared_llm.py +30 -0
- sma/eval/memory_backends/sma_memory.py +54 -0
- sma/eval/memory_backends/zep_graphiti.py +33 -0
- sma/eval/metrics.py +32 -0
- sma/eval/ontology_bench.py +219 -0
- sma/eval/report.py +573 -0
- sma/eval/ssb_eval.py +216 -0
- sma/eval/ssb_generator.py +116 -0
- sma/eval/stats.py +108 -0
- sma/eval/transfer_eval.py +844 -0
- sma/index/__init__.py +15 -0
- sma/index/ann.py +21 -0
- sma/index/content_vectors.py +60 -0
- sma/index/inverted.py +63 -0
- sma/index/macfac.py +174 -0
- sma/ir/__init__.py +22 -0
- sma/ir/canon.py +106 -0
- sma/ir/schema.py +165 -0
- sma/ir/sexpr.py +86 -0
- sma/ir/signatures.py +76 -0
- sma/match/__init__.py +20 -0
- sma/match/conflicts.py +46 -0
- sma/match/engine.py +60 -0
- sma/match/explain.py +59 -0
- sma/match/infer.py +54 -0
- sma/match/kernels.py +54 -0
- sma/match/mdl.py +30 -0
- sma/match/merge_cpsat.py +77 -0
- sma/match/merge_greedy.py +15 -0
- sma/match/mh.py +177 -0
- sma/match/ses.py +84 -0
- sma/match/types.py +115 -0
- sma/match/verifier.py +27 -0
- sma/ontology/__init__.py +45 -0
- sma/ontology/attack.py +134 -0
- sma/ontology/cpc.py +69 -0
- sma/ontology/graph.py +58 -0
- sma/ontology/loader.py +262 -0
- sma/ontology/mitre_xml.py +67 -0
- sma/ontology/mount.py +101 -0
- sma/ontology/rdf_loader.py +75 -0
- sma/ontology/registry.py +115 -0
- sma/ontology/router.py +69 -0
- sma/ontology/usgaap.py +73 -0
- sma/sage/__init__.py +6 -0
- sma/sage/assimilate.py +12 -0
- sma/sage/pools.py +105 -0
- sma/sage/probabilities.py +10 -0
- sma/store/__init__.py +6 -0
- sma/store/lmdb_store.py +78 -0
- sma/store/registry.py +26 -0
- sma/store/wal.py +26 -0
- sma/ui/app.py +642 -0
- structuremappingmemory-1.0.0.dist-info/METADATA +190 -0
- structuremappingmemory-1.0.0.dist-info/RECORD +125 -0
- structuremappingmemory-1.0.0.dist-info/WHEEL +5 -0
- structuremappingmemory-1.0.0.dist-info/entry_points.txt +2 -0
- structuremappingmemory-1.0.0.dist-info/licenses/LICENSE +204 -0
- structuremappingmemory-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""SOTA baseline: Graphiti temporal knowledge graph (the engine behind Zep).
|
|
2
|
+
Isolated behind a lazy import so the core never depends on it; the graph DB
|
|
3
|
+
runs in docker/zep. Graphiti's extraction is pointed at the SAME DeepSeek
|
|
4
|
+
backbone (via env) so the comparison is equal-footing."""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from .base import MemoryBackend, QueryResult
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
import graphiti_core # noqa: F401
|
|
10
|
+
ZEP_AVAILABLE = True
|
|
11
|
+
except Exception:
|
|
12
|
+
ZEP_AVAILABLE = False
|
|
13
|
+
|
|
14
|
+
class ZepGraphiti(MemoryBackend):
|
|
15
|
+
name = "zep-graphiti"
|
|
16
|
+
def __init__(self, llm, uri: str = "bolt://localhost:7687"):
|
|
17
|
+
if not ZEP_AVAILABLE:
|
|
18
|
+
raise RuntimeError("graphiti_core not installed; see docker/zep/README")
|
|
19
|
+
from graphiti_core import Graphiti
|
|
20
|
+
self.g = Graphiti(uri) # configured to use DeepSeek via env in the container
|
|
21
|
+
self.llm = llm
|
|
22
|
+
def reset(self):
|
|
23
|
+
self.g.clear()
|
|
24
|
+
def ingest(self, session):
|
|
25
|
+
for t in session.turns:
|
|
26
|
+
self.g.add_episode(name=session.session_id, episode_body=t["content"],
|
|
27
|
+
reference_time=session.date)
|
|
28
|
+
def query(self, question):
|
|
29
|
+
from .shared_llm import answer_from
|
|
30
|
+
hits = self.g.search(question)
|
|
31
|
+
retrieved = [h.fact for h in hits]
|
|
32
|
+
return QueryResult(answer=answer_from(self.llm, question, retrieved),
|
|
33
|
+
retrieved=retrieved)
|
sma/eval/metrics.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Evaluation metrics."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def recall_at_k(ranked: list[str], relevant: set[str], k: int) -> float:
|
|
7
|
+
if not relevant:
|
|
8
|
+
return 0.0
|
|
9
|
+
return len(set(ranked[:k]).intersection(relevant)) / len(relevant)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def mrr(ranked: list[str], relevant: set[str]) -> float:
|
|
13
|
+
for i, item in enumerate(ranked, start=1):
|
|
14
|
+
if item in relevant:
|
|
15
|
+
return 1.0 / i
|
|
16
|
+
return 0.0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def macro_f1(y_true: list[str], y_pred: list[str]) -> float:
|
|
20
|
+
labels = sorted(set(y_true) | set(y_pred))
|
|
21
|
+
if not labels:
|
|
22
|
+
return 0.0
|
|
23
|
+
scores = []
|
|
24
|
+
for label in labels:
|
|
25
|
+
tp = sum(1 for t, p in zip(y_true, y_pred, strict=True) if t == label and p == label)
|
|
26
|
+
fp = sum(1 for t, p in zip(y_true, y_pred, strict=True) if t != label and p == label)
|
|
27
|
+
fn = sum(1 for t, p in zip(y_true, y_pred, strict=True) if t == label and p != label)
|
|
28
|
+
precision = tp / (tp + fp) if tp + fp else 0.0
|
|
29
|
+
recall = tp / (tp + fn) if tp + fn else 0.0
|
|
30
|
+
scores.append(2 * precision * recall / (precision + recall) if precision + recall else 0.0)
|
|
31
|
+
return sum(scores) / len(scores)
|
|
32
|
+
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""Shared harness for the multi-domain ontology benchmark suite (gigatest).
|
|
2
|
+
|
|
3
|
+
One protocol, every golden-ontology domain (configs/preregistration_ontology.md):
|
|
4
|
+
mount the ontology, index entities by their annotation term-sets, query with hard
|
|
5
|
+
partial/imprecise observations, and rank the true entity. SMA (the universal
|
|
6
|
+
adapter) is scored against FOUR baselines:
|
|
7
|
+
- Phenomizer / Resnik IC best-match (ontology-AWARE SOTA-equivalent)
|
|
8
|
+
- Jaccard term overlap (lexical floor)
|
|
9
|
+
- TF-IDF dense cosine (real dense-RAG over the same annotations)
|
|
10
|
+
- HippoRAG phrase-graph + PPR (real KG retriever over the same annotations)
|
|
11
|
+
Reported on ALL queries and on the registered RARE slice (entities whose rarest
|
|
12
|
+
term's IC exceeds the corpus median). Reproducibility: every set->list is sorted
|
|
13
|
+
and every RNG is explicitly seeded (hash-independent). No per-domain code here.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import math
|
|
18
|
+
import random
|
|
19
|
+
import statistics
|
|
20
|
+
import time
|
|
21
|
+
from typing import Iterable
|
|
22
|
+
|
|
23
|
+
from sma.eval.baselines.dense import rank_tfidf_dense_batch
|
|
24
|
+
from sma.eval.baselines.hipporag import HippoRAGRetriever
|
|
25
|
+
from sma.eval.stats import cliffs_delta, paired_bootstrap
|
|
26
|
+
from sma.ontology import MountedOntology
|
|
27
|
+
|
|
28
|
+
METHODS = ("sma", "phen", "jac", "dense", "hippo")
|
|
29
|
+
LABELS = {"sma": "SMA", "phen": "Phenomizer", "jac": "Jaccard",
|
|
30
|
+
"dense": "Dense-RAG", "hippo": "HippoRAG"}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# --- ontology IC machinery (closure-propagated term frequency) -------------
|
|
34
|
+
def _ancestors(term, parents, cache):
|
|
35
|
+
if term in cache:
|
|
36
|
+
return cache[term]
|
|
37
|
+
acc: set[str] = set()
|
|
38
|
+
for p in parents.get(term, ()):
|
|
39
|
+
acc.add(p)
|
|
40
|
+
acc |= _ancestors(p, parents, cache)
|
|
41
|
+
cache[term] = acc
|
|
42
|
+
return acc
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _build_ic(entity_terms, parents, anc_cache):
|
|
46
|
+
n = len(entity_terms)
|
|
47
|
+
freq: dict[str, int] = {}
|
|
48
|
+
for terms in entity_terms:
|
|
49
|
+
clo = set(terms)
|
|
50
|
+
for t in terms:
|
|
51
|
+
clo |= _ancestors(t, parents, anc_cache)
|
|
52
|
+
for t in clo:
|
|
53
|
+
freq[t] = freq.get(t, 0) + 1
|
|
54
|
+
return {t: -math.log(c / n) for t, c in freq.items()}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _resnik(a, b, parents, anc_cache, ic):
|
|
58
|
+
ca = {a} | _ancestors(a, parents, anc_cache)
|
|
59
|
+
cb = {b} | _ancestors(b, parents, anc_cache)
|
|
60
|
+
return max((ic.get(x, 0.0) for x in ca & cb), default=0.0)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _phenomizer(query, terms, parents, anc_cache, ic):
|
|
64
|
+
def bma(src, tgt):
|
|
65
|
+
return sum(max((_resnik(q, t, parents, anc_cache, ic) for t in tgt), default=0.0)
|
|
66
|
+
for q in src) / max(len(src), 1)
|
|
67
|
+
return 0.5 * (bma(query, terms) + bma(terms, query))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _jaccard(query, terms):
|
|
71
|
+
q = set(query)
|
|
72
|
+
return len(q & terms) / max(len(q | terms), 1)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _rank_of(ranked_ids, target):
|
|
76
|
+
return next((i for i, cid in enumerate(ranked_ids, 1) if cid == target), 999)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# --- one arm ---------------------------------------------------------------
|
|
80
|
+
def run_arm(
|
|
81
|
+
name: str,
|
|
82
|
+
mounted: MountedOntology,
|
|
83
|
+
records: dict[str, set[str]],
|
|
84
|
+
*,
|
|
85
|
+
seeds: Iterable[int] = (7, 17, 23),
|
|
86
|
+
n_index: int = 2500,
|
|
87
|
+
n_query: int = 150,
|
|
88
|
+
min_terms: int = 7,
|
|
89
|
+
max_terms: int = 30,
|
|
90
|
+
use_hippo: bool = True,
|
|
91
|
+
verbose: bool = True,
|
|
92
|
+
) -> dict:
|
|
93
|
+
"""records: entity_id -> set of ontology term ids. Returns a result dict with
|
|
94
|
+
pooled per-query ranks for every method, on ALL queries and the RARE slice."""
|
|
95
|
+
graph = mounted.graph
|
|
96
|
+
parents = {tid: tuple(t.parents) for tid, t in graph.terms.items()}
|
|
97
|
+
|
|
98
|
+
def term_text(t):
|
|
99
|
+
nm = graph.terms[t].name if t in graph.terms else ""
|
|
100
|
+
return nm or t
|
|
101
|
+
|
|
102
|
+
eligible = sorted(
|
|
103
|
+
eid for eid, terms in records.items()
|
|
104
|
+
if min_terms <= len({t for t in terms if t in graph.terms}) <= max_terms
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# per-query rows pooled across seeds: {method: rank, "rare": bool}
|
|
108
|
+
rows: list[dict] = []
|
|
109
|
+
per_seed = []
|
|
110
|
+
|
|
111
|
+
for seed in seeds:
|
|
112
|
+
rng = random.Random(seed)
|
|
113
|
+
ids = list(eligible)
|
|
114
|
+
rng.shuffle(ids)
|
|
115
|
+
idx_ids = sorted(ids[:n_index])
|
|
116
|
+
dz = {e: sorted(t for t in records[e] if t in graph.terms) for e in idx_ids}
|
|
117
|
+
anc_cache: dict[str, set] = {}
|
|
118
|
+
ic = _build_ic([set(v) for v in dz.values()], parents, anc_cache)
|
|
119
|
+
noise_pool = sorted(ic)
|
|
120
|
+
median_ic = statistics.median(ic.values()) if ic else 0.0
|
|
121
|
+
|
|
122
|
+
index = mounted.build_index((e, dz[e], {"id": e}) for e in idx_ids)
|
|
123
|
+
key_of = index.key_of
|
|
124
|
+
index_docs = [(e, " ".join(term_text(t) for t in dz[e])) for e in idx_ids]
|
|
125
|
+
|
|
126
|
+
# generate the hard queries first (so dense can batch)
|
|
127
|
+
query_ids = [e for e in idx_ids if len(dz[e]) >= 8][:n_query]
|
|
128
|
+
qspecs = []
|
|
129
|
+
for e in query_ids:
|
|
130
|
+
terms = dz[e]
|
|
131
|
+
keep = rng.sample(terms, min(5, len(terms)))
|
|
132
|
+
q = []
|
|
133
|
+
for t in keep:
|
|
134
|
+
cur = t
|
|
135
|
+
for _ in range(rng.choice([0, 0, 1, 1, 2])):
|
|
136
|
+
ps = parents.get(cur)
|
|
137
|
+
if ps:
|
|
138
|
+
cur = rng.choice(sorted(ps))
|
|
139
|
+
q.append(cur)
|
|
140
|
+
q += rng.sample(noise_pool, min(3, len(noise_pool)))
|
|
141
|
+
qspecs.append((e, q))
|
|
142
|
+
|
|
143
|
+
qtexts = [" ".join(term_text(t) for t in q) for _, q in qspecs]
|
|
144
|
+
dense_rk = rank_tfidf_dense_batch(qtexts, index_docs, k=20)
|
|
145
|
+
hippo = None
|
|
146
|
+
if use_hippo:
|
|
147
|
+
hippo = HippoRAGRetriever(); hippo.build(index_docs)
|
|
148
|
+
|
|
149
|
+
t0 = time.perf_counter()
|
|
150
|
+
seed_ranks = {m: [] for m in METHODS}
|
|
151
|
+
for n, (e, q) in enumerate(qspecs, 1):
|
|
152
|
+
row = {"rare": max((ic.get(t, 0.0) for t in dz[e]), default=0.0) > median_ic}
|
|
153
|
+
# SMA
|
|
154
|
+
res = mounted.build_case(q)
|
|
155
|
+
sres = index.retrieve(res, k=10, shortlist=80, fac_budget=40)
|
|
156
|
+
row["sma"] = _rank_of([key_of.get(r.case_id) for r in sres], e)
|
|
157
|
+
# Phenomizer + Jaccard (rank true entity among all index entities)
|
|
158
|
+
phen = sorted(((_phenomizer(q, set(dz[o]), parents, anc_cache, ic), o) for o in idx_ids),
|
|
159
|
+
key=lambda x: (-x[0], x[1]))
|
|
160
|
+
row["phen"] = _rank_of([o for _, o in phen], e)
|
|
161
|
+
jac = sorted(((_jaccard(q, set(dz[o])), o) for o in idx_ids), key=lambda x: (-x[0], x[1]))
|
|
162
|
+
row["jac"] = _rank_of([o for _, o in jac], e)
|
|
163
|
+
# Dense-RAG (precomputed batch)
|
|
164
|
+
row["dense"] = _rank_of([cid for cid, _ in dense_rk[n - 1]], e)
|
|
165
|
+
# HippoRAG (KG/PPR)
|
|
166
|
+
row["hippo"] = _rank_of([cid for cid, _ in hippo.retrieve(qtexts[n - 1], k=20)], e) if hippo else 999
|
|
167
|
+
rows.append(row)
|
|
168
|
+
for m in METHODS:
|
|
169
|
+
seed_ranks[m].append(row[m])
|
|
170
|
+
if verbose and n % 50 == 0:
|
|
171
|
+
print(f" [{name} seed {seed}] {n}/{len(qspecs)} ({time.perf_counter()-t0:.0f}s)", flush=True)
|
|
172
|
+
|
|
173
|
+
per_seed.append({"seed": seed, "n": len(qspecs),
|
|
174
|
+
**{f"{m}_t5": _acc(seed_ranks[m], 5) for m in METHODS}})
|
|
175
|
+
|
|
176
|
+
result = {"arm": name, "n_all": len(rows), "n_rare": sum(1 for r in rows if r["rare"]),
|
|
177
|
+
"per_seed": per_seed, "slices": {}}
|
|
178
|
+
for slice_name, sub in (("all", rows), ("rare", [r for r in rows if r["rare"]])):
|
|
179
|
+
result["slices"][slice_name] = _summarize(sub)
|
|
180
|
+
if verbose:
|
|
181
|
+
_print_arm(result)
|
|
182
|
+
return result
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _acc(ranks, k):
|
|
186
|
+
return sum(1 for r in ranks if r <= k) / max(len(ranks), 1)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _summarize(rows):
|
|
190
|
+
if not rows:
|
|
191
|
+
return None
|
|
192
|
+
metr = {m: {f"t{k}": _acc([r[m] for r in rows], k) for k in (1, 5, 10)} for m in METHODS}
|
|
193
|
+
for m in METHODS:
|
|
194
|
+
metr[m]["mrr"] = sum(1 / r[m] for r in rows if r[m] < 999) / len(rows)
|
|
195
|
+
# primary: SMA vs BEST non-SMA baseline on top-5
|
|
196
|
+
sma_c = [1.0 if r["sma"] <= 5 else 0.0 for r in rows]
|
|
197
|
+
others = [m for m in METHODS if m != "sma"]
|
|
198
|
+
best = max(others, key=lambda m: metr[m]["t5"])
|
|
199
|
+
best_c = [1.0 if r[best] <= 5 else 0.0 for r in rows]
|
|
200
|
+
bs = paired_bootstrap(sma_c, best_c)
|
|
201
|
+
return {"n": len(rows), "metrics": metr, "best_baseline": best,
|
|
202
|
+
"delta_t5": bs["delta"], "ci_low": bs["ci_low"], "ci_high": bs["ci_high"],
|
|
203
|
+
"p_value": bs["p_value"], "cliffs": cliffs_delta(sma_c, best_c)}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _print_arm(r):
|
|
207
|
+
print(f"\n=== arm {r['arm']}: {r['n_all']} queries ({r['n_rare']} rare) ===")
|
|
208
|
+
for slice_name in ("all", "rare"):
|
|
209
|
+
s = r["slices"][slice_name]
|
|
210
|
+
if not s:
|
|
211
|
+
continue
|
|
212
|
+
print(f"\n [{slice_name}] n={s['n']}")
|
|
213
|
+
print(f" {'method':<12}{'top-1':<8}{'top-5':<8}{'top-10':<8}{'MRR':<8}")
|
|
214
|
+
for m in METHODS:
|
|
215
|
+
mm = s["metrics"][m]
|
|
216
|
+
print(f" {LABELS[m]:<12}{mm['t1']:<8.3f}{mm['t5']:<8.3f}{mm['t10']:<8.3f}{mm['mrr']:<8.3f}")
|
|
217
|
+
print(f" primary top-5 SMA vs {LABELS[s['best_baseline']]}: "
|
|
218
|
+
f"delta={s['delta_t5']:+.4f} CI[{s['ci_low']:+.4f},{s['ci_high']:+.4f}] "
|
|
219
|
+
f"p={s['p_value']:.4f} cliffs={s['cliffs']:+.3f}")
|