structuremappingmemory 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. sma/__init__.py +5 -0
  2. sma/__main__.py +5 -0
  3. sma/agent/__init__.py +5 -0
  4. sma/agent/adapter_draft.py +217 -0
  5. sma/agent/api.py +67 -0
  6. sma/agent/comparison.py +591 -0
  7. sma/agent/llm.py +280 -0
  8. sma/agent/policies.py +21 -0
  9. sma/agent/service.py +95 -0
  10. sma/cli.py +65 -0
  11. sma/encoders/__init__.py +38 -0
  12. sma/encoders/agentobs.py +27 -0
  13. sma/encoders/base.py +23 -0
  14. sma/encoders/code_treesitter.py +64 -0
  15. sma/encoders/coverage.py +80 -0
  16. sma/encoders/draft_adapter.py +183 -0
  17. sma/encoders/healthcare.py +207 -0
  18. sma/encoders/logs_drain.py +142 -0
  19. sma/encoders/prose_tier1.py +57 -0
  20. sma/encoders/structured.py +57 -0
  21. sma/encoders/traces.py +45 -0
  22. sma/eval/__init__.py +2 -0
  23. sma/eval/agentic/__init__.py +35 -0
  24. sma/eval/agentic/arms/__init__.py +0 -0
  25. sma/eval/agentic/arms/cyber.py +48 -0
  26. sma/eval/agentic/arms/discovery.py +35 -0
  27. sma/eval/agentic/arms/finance.py +38 -0
  28. sma/eval/agentic/arms/legal.py +74 -0
  29. sma/eval/agentic/arms/medicine.py +45 -0
  30. sma/eval/agentic/harness.py +275 -0
  31. sma/eval/agentic/memories.py +308 -0
  32. sma/eval/agentic/metrics.py +82 -0
  33. sma/eval/agentic_qa/__init__.py +27 -0
  34. sma/eval/agentic_qa/agent.py +383 -0
  35. sma/eval/agentic_qa/metrics.py +239 -0
  36. sma/eval/agentic_qa/pools.py +197 -0
  37. sma/eval/arn.py +65 -0
  38. sma/eval/baselines/__init__.py +6 -0
  39. sma/eval/baselines/bge_dense.py +54 -0
  40. sma/eval/baselines/bm25.py +18 -0
  41. sma/eval/baselines/dense.py +42 -0
  42. sma/eval/baselines/hipporag.py +235 -0
  43. sma/eval/baselines/hybrid_rrf.py +30 -0
  44. sma/eval/baselines/longcontext_llm.py +124 -0
  45. sma/eval/baselines/rerank.py +41 -0
  46. sma/eval/baselines/splade.py +77 -0
  47. sma/eval/baselines/wl_kernel.py +163 -0
  48. sma/eval/bugsinpy.py +358 -0
  49. sma/eval/bugsinpy_families.py +164 -0
  50. sma/eval/crossdomain.py +89 -0
  51. sma/eval/diabetes.py +61 -0
  52. sma/eval/drift_env.py +26 -0
  53. sma/eval/drift_metrics.py +24 -0
  54. sma/eval/family_labels.py +167 -0
  55. sma/eval/fraud_elliptic/__init__.py +29 -0
  56. sma/eval/fraud_elliptic/encoder.py +279 -0
  57. sma/eval/fraud_elliptic/eval.py +269 -0
  58. sma/eval/fraud_elliptic/test_encoder.py +123 -0
  59. sma/eval/ieee_cis.py +66 -0
  60. sma/eval/loghub.py +16 -0
  61. sma/eval/loghub_eval.py +480 -0
  62. sma/eval/longmemeval.py +51 -0
  63. sma/eval/memory_backends/__init__.py +2 -0
  64. sma/eval/memory_backends/base.py +22 -0
  65. sma/eval/memory_backends/context_only.py +14 -0
  66. sma/eval/memory_backends/rag_notes.py +17 -0
  67. sma/eval/memory_backends/shared_llm.py +30 -0
  68. sma/eval/memory_backends/sma_memory.py +54 -0
  69. sma/eval/memory_backends/zep_graphiti.py +33 -0
  70. sma/eval/metrics.py +32 -0
  71. sma/eval/ontology_bench.py +219 -0
  72. sma/eval/report.py +573 -0
  73. sma/eval/ssb_eval.py +216 -0
  74. sma/eval/ssb_generator.py +116 -0
  75. sma/eval/stats.py +108 -0
  76. sma/eval/transfer_eval.py +844 -0
  77. sma/index/__init__.py +15 -0
  78. sma/index/ann.py +21 -0
  79. sma/index/content_vectors.py +60 -0
  80. sma/index/inverted.py +63 -0
  81. sma/index/macfac.py +174 -0
  82. sma/ir/__init__.py +22 -0
  83. sma/ir/canon.py +106 -0
  84. sma/ir/schema.py +165 -0
  85. sma/ir/sexpr.py +86 -0
  86. sma/ir/signatures.py +76 -0
  87. sma/match/__init__.py +20 -0
  88. sma/match/conflicts.py +46 -0
  89. sma/match/engine.py +60 -0
  90. sma/match/explain.py +59 -0
  91. sma/match/infer.py +54 -0
  92. sma/match/kernels.py +54 -0
  93. sma/match/mdl.py +30 -0
  94. sma/match/merge_cpsat.py +77 -0
  95. sma/match/merge_greedy.py +15 -0
  96. sma/match/mh.py +177 -0
  97. sma/match/ses.py +84 -0
  98. sma/match/types.py +115 -0
  99. sma/match/verifier.py +27 -0
  100. sma/ontology/__init__.py +45 -0
  101. sma/ontology/attack.py +134 -0
  102. sma/ontology/cpc.py +69 -0
  103. sma/ontology/graph.py +58 -0
  104. sma/ontology/loader.py +262 -0
  105. sma/ontology/mitre_xml.py +67 -0
  106. sma/ontology/mount.py +101 -0
  107. sma/ontology/rdf_loader.py +75 -0
  108. sma/ontology/registry.py +115 -0
  109. sma/ontology/router.py +69 -0
  110. sma/ontology/usgaap.py +73 -0
  111. sma/sage/__init__.py +6 -0
  112. sma/sage/assimilate.py +12 -0
  113. sma/sage/pools.py +105 -0
  114. sma/sage/probabilities.py +10 -0
  115. sma/store/__init__.py +6 -0
  116. sma/store/lmdb_store.py +78 -0
  117. sma/store/registry.py +26 -0
  118. sma/store/wal.py +26 -0
  119. sma/ui/app.py +642 -0
  120. structuremappingmemory-1.0.0.dist-info/METADATA +190 -0
  121. structuremappingmemory-1.0.0.dist-info/RECORD +125 -0
  122. structuremappingmemory-1.0.0.dist-info/WHEEL +5 -0
  123. structuremappingmemory-1.0.0.dist-info/entry_points.txt +2 -0
  124. structuremappingmemory-1.0.0.dist-info/licenses/LICENSE +204 -0
  125. structuremappingmemory-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,33 @@
1
+ """SOTA baseline: Graphiti temporal knowledge graph (the engine behind Zep).
2
+ Isolated behind a lazy import so the core never depends on it; the graph DB
3
+ runs in docker/zep. Graphiti's extraction is pointed at the SAME DeepSeek
4
+ backbone (via env) so the comparison is equal-footing."""
5
+ from __future__ import annotations
6
+ from .base import MemoryBackend, QueryResult
7
+
8
+ try:
9
+ import graphiti_core # noqa: F401
10
+ ZEP_AVAILABLE = True
11
+ except Exception:
12
+ ZEP_AVAILABLE = False
13
+
14
+ class ZepGraphiti(MemoryBackend):
15
+ name = "zep-graphiti"
16
+ def __init__(self, llm, uri: str = "bolt://localhost:7687"):
17
+ if not ZEP_AVAILABLE:
18
+ raise RuntimeError("graphiti_core not installed; see docker/zep/README")
19
+ from graphiti_core import Graphiti
20
+ self.g = Graphiti(uri) # configured to use DeepSeek via env in the container
21
+ self.llm = llm
22
+ def reset(self):
23
+ self.g.clear()
24
+ def ingest(self, session):
25
+ for t in session.turns:
26
+ self.g.add_episode(name=session.session_id, episode_body=t["content"],
27
+ reference_time=session.date)
28
+ def query(self, question):
29
+ from .shared_llm import answer_from
30
+ hits = self.g.search(question)
31
+ retrieved = [h.fact for h in hits]
32
+ return QueryResult(answer=answer_from(self.llm, question, retrieved),
33
+ retrieved=retrieved)
sma/eval/metrics.py ADDED
@@ -0,0 +1,32 @@
1
+ """Evaluation metrics."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def recall_at_k(ranked: list[str], relevant: set[str], k: int) -> float:
7
+ if not relevant:
8
+ return 0.0
9
+ return len(set(ranked[:k]).intersection(relevant)) / len(relevant)
10
+
11
+
12
+ def mrr(ranked: list[str], relevant: set[str]) -> float:
13
+ for i, item in enumerate(ranked, start=1):
14
+ if item in relevant:
15
+ return 1.0 / i
16
+ return 0.0
17
+
18
+
19
+ def macro_f1(y_true: list[str], y_pred: list[str]) -> float:
20
+ labels = sorted(set(y_true) | set(y_pred))
21
+ if not labels:
22
+ return 0.0
23
+ scores = []
24
+ for label in labels:
25
+ tp = sum(1 for t, p in zip(y_true, y_pred, strict=True) if t == label and p == label)
26
+ fp = sum(1 for t, p in zip(y_true, y_pred, strict=True) if t != label and p == label)
27
+ fn = sum(1 for t, p in zip(y_true, y_pred, strict=True) if t == label and p != label)
28
+ precision = tp / (tp + fp) if tp + fp else 0.0
29
+ recall = tp / (tp + fn) if tp + fn else 0.0
30
+ scores.append(2 * precision * recall / (precision + recall) if precision + recall else 0.0)
31
+ return sum(scores) / len(scores)
32
+
@@ -0,0 +1,219 @@
1
+ """Shared harness for the multi-domain ontology benchmark suite (gigatest).
2
+
3
+ One protocol, every golden-ontology domain (configs/preregistration_ontology.md):
4
+ mount the ontology, index entities by their annotation term-sets, query with hard
5
+ partial/imprecise observations, and rank the true entity. SMA (the universal
6
+ adapter) is scored against FOUR baselines:
7
+ - Phenomizer / Resnik IC best-match (ontology-AWARE SOTA-equivalent)
8
+ - Jaccard term overlap (lexical floor)
9
+ - TF-IDF dense cosine (real dense-RAG over the same annotations)
10
+ - HippoRAG phrase-graph + PPR (real KG retriever over the same annotations)
11
+ Reported on ALL queries and on the registered RARE slice (entities whose rarest
12
+ term's IC exceeds the corpus median). Reproducibility: every set->list is sorted
13
+ and every RNG is explicitly seeded (hash-independent). No per-domain code here.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import math
18
+ import random
19
+ import statistics
20
+ import time
21
+ from typing import Iterable
22
+
23
+ from sma.eval.baselines.dense import rank_tfidf_dense_batch
24
+ from sma.eval.baselines.hipporag import HippoRAGRetriever
25
+ from sma.eval.stats import cliffs_delta, paired_bootstrap
26
+ from sma.ontology import MountedOntology
27
+
28
+ METHODS = ("sma", "phen", "jac", "dense", "hippo")
29
+ LABELS = {"sma": "SMA", "phen": "Phenomizer", "jac": "Jaccard",
30
+ "dense": "Dense-RAG", "hippo": "HippoRAG"}
31
+
32
+
33
+ # --- ontology IC machinery (closure-propagated term frequency) -------------
34
+ def _ancestors(term, parents, cache):
35
+ if term in cache:
36
+ return cache[term]
37
+ acc: set[str] = set()
38
+ for p in parents.get(term, ()):
39
+ acc.add(p)
40
+ acc |= _ancestors(p, parents, cache)
41
+ cache[term] = acc
42
+ return acc
43
+
44
+
45
+ def _build_ic(entity_terms, parents, anc_cache):
46
+ n = len(entity_terms)
47
+ freq: dict[str, int] = {}
48
+ for terms in entity_terms:
49
+ clo = set(terms)
50
+ for t in terms:
51
+ clo |= _ancestors(t, parents, anc_cache)
52
+ for t in clo:
53
+ freq[t] = freq.get(t, 0) + 1
54
+ return {t: -math.log(c / n) for t, c in freq.items()}
55
+
56
+
57
+ def _resnik(a, b, parents, anc_cache, ic):
58
+ ca = {a} | _ancestors(a, parents, anc_cache)
59
+ cb = {b} | _ancestors(b, parents, anc_cache)
60
+ return max((ic.get(x, 0.0) for x in ca & cb), default=0.0)
61
+
62
+
63
+ def _phenomizer(query, terms, parents, anc_cache, ic):
64
+ def bma(src, tgt):
65
+ return sum(max((_resnik(q, t, parents, anc_cache, ic) for t in tgt), default=0.0)
66
+ for q in src) / max(len(src), 1)
67
+ return 0.5 * (bma(query, terms) + bma(terms, query))
68
+
69
+
70
+ def _jaccard(query, terms):
71
+ q = set(query)
72
+ return len(q & terms) / max(len(q | terms), 1)
73
+
74
+
75
+ def _rank_of(ranked_ids, target):
76
+ return next((i for i, cid in enumerate(ranked_ids, 1) if cid == target), 999)
77
+
78
+
79
+ # --- one arm ---------------------------------------------------------------
80
+ def run_arm(
81
+ name: str,
82
+ mounted: MountedOntology,
83
+ records: dict[str, set[str]],
84
+ *,
85
+ seeds: Iterable[int] = (7, 17, 23),
86
+ n_index: int = 2500,
87
+ n_query: int = 150,
88
+ min_terms: int = 7,
89
+ max_terms: int = 30,
90
+ use_hippo: bool = True,
91
+ verbose: bool = True,
92
+ ) -> dict:
93
+ """records: entity_id -> set of ontology term ids. Returns a result dict with
94
+ pooled per-query ranks for every method, on ALL queries and the RARE slice."""
95
+ graph = mounted.graph
96
+ parents = {tid: tuple(t.parents) for tid, t in graph.terms.items()}
97
+
98
+ def term_text(t):
99
+ nm = graph.terms[t].name if t in graph.terms else ""
100
+ return nm or t
101
+
102
+ eligible = sorted(
103
+ eid for eid, terms in records.items()
104
+ if min_terms <= len({t for t in terms if t in graph.terms}) <= max_terms
105
+ )
106
+
107
+ # per-query rows pooled across seeds: {method: rank, "rare": bool}
108
+ rows: list[dict] = []
109
+ per_seed = []
110
+
111
+ for seed in seeds:
112
+ rng = random.Random(seed)
113
+ ids = list(eligible)
114
+ rng.shuffle(ids)
115
+ idx_ids = sorted(ids[:n_index])
116
+ dz = {e: sorted(t for t in records[e] if t in graph.terms) for e in idx_ids}
117
+ anc_cache: dict[str, set] = {}
118
+ ic = _build_ic([set(v) for v in dz.values()], parents, anc_cache)
119
+ noise_pool = sorted(ic)
120
+ median_ic = statistics.median(ic.values()) if ic else 0.0
121
+
122
+ index = mounted.build_index((e, dz[e], {"id": e}) for e in idx_ids)
123
+ key_of = index.key_of
124
+ index_docs = [(e, " ".join(term_text(t) for t in dz[e])) for e in idx_ids]
125
+
126
+ # generate the hard queries first (so dense can batch)
127
+ query_ids = [e for e in idx_ids if len(dz[e]) >= 8][:n_query]
128
+ qspecs = []
129
+ for e in query_ids:
130
+ terms = dz[e]
131
+ keep = rng.sample(terms, min(5, len(terms)))
132
+ q = []
133
+ for t in keep:
134
+ cur = t
135
+ for _ in range(rng.choice([0, 0, 1, 1, 2])):
136
+ ps = parents.get(cur)
137
+ if ps:
138
+ cur = rng.choice(sorted(ps))
139
+ q.append(cur)
140
+ q += rng.sample(noise_pool, min(3, len(noise_pool)))
141
+ qspecs.append((e, q))
142
+
143
+ qtexts = [" ".join(term_text(t) for t in q) for _, q in qspecs]
144
+ dense_rk = rank_tfidf_dense_batch(qtexts, index_docs, k=20)
145
+ hippo = None
146
+ if use_hippo:
147
+ hippo = HippoRAGRetriever(); hippo.build(index_docs)
148
+
149
+ t0 = time.perf_counter()
150
+ seed_ranks = {m: [] for m in METHODS}
151
+ for n, (e, q) in enumerate(qspecs, 1):
152
+ row = {"rare": max((ic.get(t, 0.0) for t in dz[e]), default=0.0) > median_ic}
153
+ # SMA
154
+ res = mounted.build_case(q)
155
+ sres = index.retrieve(res, k=10, shortlist=80, fac_budget=40)
156
+ row["sma"] = _rank_of([key_of.get(r.case_id) for r in sres], e)
157
+ # Phenomizer + Jaccard (rank true entity among all index entities)
158
+ phen = sorted(((_phenomizer(q, set(dz[o]), parents, anc_cache, ic), o) for o in idx_ids),
159
+ key=lambda x: (-x[0], x[1]))
160
+ row["phen"] = _rank_of([o for _, o in phen], e)
161
+ jac = sorted(((_jaccard(q, set(dz[o])), o) for o in idx_ids), key=lambda x: (-x[0], x[1]))
162
+ row["jac"] = _rank_of([o for _, o in jac], e)
163
+ # Dense-RAG (precomputed batch)
164
+ row["dense"] = _rank_of([cid for cid, _ in dense_rk[n - 1]], e)
165
+ # HippoRAG (KG/PPR)
166
+ row["hippo"] = _rank_of([cid for cid, _ in hippo.retrieve(qtexts[n - 1], k=20)], e) if hippo else 999
167
+ rows.append(row)
168
+ for m in METHODS:
169
+ seed_ranks[m].append(row[m])
170
+ if verbose and n % 50 == 0:
171
+ print(f" [{name} seed {seed}] {n}/{len(qspecs)} ({time.perf_counter()-t0:.0f}s)", flush=True)
172
+
173
+ per_seed.append({"seed": seed, "n": len(qspecs),
174
+ **{f"{m}_t5": _acc(seed_ranks[m], 5) for m in METHODS}})
175
+
176
+ result = {"arm": name, "n_all": len(rows), "n_rare": sum(1 for r in rows if r["rare"]),
177
+ "per_seed": per_seed, "slices": {}}
178
+ for slice_name, sub in (("all", rows), ("rare", [r for r in rows if r["rare"]])):
179
+ result["slices"][slice_name] = _summarize(sub)
180
+ if verbose:
181
+ _print_arm(result)
182
+ return result
183
+
184
+
185
+ def _acc(ranks, k):
186
+ return sum(1 for r in ranks if r <= k) / max(len(ranks), 1)
187
+
188
+
189
+ def _summarize(rows):
190
+ if not rows:
191
+ return None
192
+ metr = {m: {f"t{k}": _acc([r[m] for r in rows], k) for k in (1, 5, 10)} for m in METHODS}
193
+ for m in METHODS:
194
+ metr[m]["mrr"] = sum(1 / r[m] for r in rows if r[m] < 999) / len(rows)
195
+ # primary: SMA vs BEST non-SMA baseline on top-5
196
+ sma_c = [1.0 if r["sma"] <= 5 else 0.0 for r in rows]
197
+ others = [m for m in METHODS if m != "sma"]
198
+ best = max(others, key=lambda m: metr[m]["t5"])
199
+ best_c = [1.0 if r[best] <= 5 else 0.0 for r in rows]
200
+ bs = paired_bootstrap(sma_c, best_c)
201
+ return {"n": len(rows), "metrics": metr, "best_baseline": best,
202
+ "delta_t5": bs["delta"], "ci_low": bs["ci_low"], "ci_high": bs["ci_high"],
203
+ "p_value": bs["p_value"], "cliffs": cliffs_delta(sma_c, best_c)}
204
+
205
+
206
+ def _print_arm(r):
207
+ print(f"\n=== arm {r['arm']}: {r['n_all']} queries ({r['n_rare']} rare) ===")
208
+ for slice_name in ("all", "rare"):
209
+ s = r["slices"][slice_name]
210
+ if not s:
211
+ continue
212
+ print(f"\n [{slice_name}] n={s['n']}")
213
+ print(f" {'method':<12}{'top-1':<8}{'top-5':<8}{'top-10':<8}{'MRR':<8}")
214
+ for m in METHODS:
215
+ mm = s["metrics"][m]
216
+ print(f" {LABELS[m]:<12}{mm['t1']:<8.3f}{mm['t5']:<8.3f}{mm['t10']:<8.3f}{mm['mrr']:<8.3f}")
217
+ print(f" primary top-5 SMA vs {LABELS[s['best_baseline']]}: "
218
+ f"delta={s['delta_t5']:+.4f} CI[{s['ci_low']:+.4f},{s['ci_high']:+.4f}] "
219
+ f"p={s['p_value']:.4f} cliffs={s['cliffs']:+.3f}")