structuremappingmemory 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. sma/__init__.py +5 -0
  2. sma/__main__.py +5 -0
  3. sma/agent/__init__.py +5 -0
  4. sma/agent/adapter_draft.py +217 -0
  5. sma/agent/api.py +67 -0
  6. sma/agent/comparison.py +591 -0
  7. sma/agent/llm.py +280 -0
  8. sma/agent/policies.py +21 -0
  9. sma/agent/service.py +95 -0
  10. sma/cli.py +65 -0
  11. sma/encoders/__init__.py +38 -0
  12. sma/encoders/agentobs.py +27 -0
  13. sma/encoders/base.py +23 -0
  14. sma/encoders/code_treesitter.py +64 -0
  15. sma/encoders/coverage.py +80 -0
  16. sma/encoders/draft_adapter.py +183 -0
  17. sma/encoders/healthcare.py +207 -0
  18. sma/encoders/logs_drain.py +142 -0
  19. sma/encoders/prose_tier1.py +57 -0
  20. sma/encoders/structured.py +57 -0
  21. sma/encoders/traces.py +45 -0
  22. sma/eval/__init__.py +2 -0
  23. sma/eval/agentic/__init__.py +35 -0
  24. sma/eval/agentic/arms/__init__.py +0 -0
  25. sma/eval/agentic/arms/cyber.py +48 -0
  26. sma/eval/agentic/arms/discovery.py +35 -0
  27. sma/eval/agentic/arms/finance.py +38 -0
  28. sma/eval/agentic/arms/legal.py +74 -0
  29. sma/eval/agentic/arms/medicine.py +45 -0
  30. sma/eval/agentic/harness.py +275 -0
  31. sma/eval/agentic/memories.py +308 -0
  32. sma/eval/agentic/metrics.py +82 -0
  33. sma/eval/agentic_qa/__init__.py +27 -0
  34. sma/eval/agentic_qa/agent.py +383 -0
  35. sma/eval/agentic_qa/metrics.py +239 -0
  36. sma/eval/agentic_qa/pools.py +197 -0
  37. sma/eval/arn.py +65 -0
  38. sma/eval/baselines/__init__.py +6 -0
  39. sma/eval/baselines/bge_dense.py +54 -0
  40. sma/eval/baselines/bm25.py +18 -0
  41. sma/eval/baselines/dense.py +42 -0
  42. sma/eval/baselines/hipporag.py +235 -0
  43. sma/eval/baselines/hybrid_rrf.py +30 -0
  44. sma/eval/baselines/longcontext_llm.py +124 -0
  45. sma/eval/baselines/rerank.py +41 -0
  46. sma/eval/baselines/splade.py +77 -0
  47. sma/eval/baselines/wl_kernel.py +163 -0
  48. sma/eval/bugsinpy.py +358 -0
  49. sma/eval/bugsinpy_families.py +164 -0
  50. sma/eval/crossdomain.py +89 -0
  51. sma/eval/diabetes.py +61 -0
  52. sma/eval/drift_env.py +26 -0
  53. sma/eval/drift_metrics.py +24 -0
  54. sma/eval/family_labels.py +167 -0
  55. sma/eval/fraud_elliptic/__init__.py +29 -0
  56. sma/eval/fraud_elliptic/encoder.py +279 -0
  57. sma/eval/fraud_elliptic/eval.py +269 -0
  58. sma/eval/fraud_elliptic/test_encoder.py +123 -0
  59. sma/eval/ieee_cis.py +66 -0
  60. sma/eval/loghub.py +16 -0
  61. sma/eval/loghub_eval.py +480 -0
  62. sma/eval/longmemeval.py +51 -0
  63. sma/eval/memory_backends/__init__.py +2 -0
  64. sma/eval/memory_backends/base.py +22 -0
  65. sma/eval/memory_backends/context_only.py +14 -0
  66. sma/eval/memory_backends/rag_notes.py +17 -0
  67. sma/eval/memory_backends/shared_llm.py +30 -0
  68. sma/eval/memory_backends/sma_memory.py +54 -0
  69. sma/eval/memory_backends/zep_graphiti.py +33 -0
  70. sma/eval/metrics.py +32 -0
  71. sma/eval/ontology_bench.py +219 -0
  72. sma/eval/report.py +573 -0
  73. sma/eval/ssb_eval.py +216 -0
  74. sma/eval/ssb_generator.py +116 -0
  75. sma/eval/stats.py +108 -0
  76. sma/eval/transfer_eval.py +844 -0
  77. sma/index/__init__.py +15 -0
  78. sma/index/ann.py +21 -0
  79. sma/index/content_vectors.py +60 -0
  80. sma/index/inverted.py +63 -0
  81. sma/index/macfac.py +174 -0
  82. sma/ir/__init__.py +22 -0
  83. sma/ir/canon.py +106 -0
  84. sma/ir/schema.py +165 -0
  85. sma/ir/sexpr.py +86 -0
  86. sma/ir/signatures.py +76 -0
  87. sma/match/__init__.py +20 -0
  88. sma/match/conflicts.py +46 -0
  89. sma/match/engine.py +60 -0
  90. sma/match/explain.py +59 -0
  91. sma/match/infer.py +54 -0
  92. sma/match/kernels.py +54 -0
  93. sma/match/mdl.py +30 -0
  94. sma/match/merge_cpsat.py +77 -0
  95. sma/match/merge_greedy.py +15 -0
  96. sma/match/mh.py +177 -0
  97. sma/match/ses.py +84 -0
  98. sma/match/types.py +115 -0
  99. sma/match/verifier.py +27 -0
  100. sma/ontology/__init__.py +45 -0
  101. sma/ontology/attack.py +134 -0
  102. sma/ontology/cpc.py +69 -0
  103. sma/ontology/graph.py +58 -0
  104. sma/ontology/loader.py +262 -0
  105. sma/ontology/mitre_xml.py +67 -0
  106. sma/ontology/mount.py +101 -0
  107. sma/ontology/rdf_loader.py +75 -0
  108. sma/ontology/registry.py +115 -0
  109. sma/ontology/router.py +69 -0
  110. sma/ontology/usgaap.py +73 -0
  111. sma/sage/__init__.py +6 -0
  112. sma/sage/assimilate.py +12 -0
  113. sma/sage/pools.py +105 -0
  114. sma/sage/probabilities.py +10 -0
  115. sma/store/__init__.py +6 -0
  116. sma/store/lmdb_store.py +78 -0
  117. sma/store/registry.py +26 -0
  118. sma/store/wal.py +26 -0
  119. sma/ui/app.py +642 -0
  120. structuremappingmemory-1.0.0.dist-info/METADATA +190 -0
  121. structuremappingmemory-1.0.0.dist-info/RECORD +125 -0
  122. structuremappingmemory-1.0.0.dist-info/WHEEL +5 -0
  123. structuremappingmemory-1.0.0.dist-info/entry_points.txt +2 -0
  124. structuremappingmemory-1.0.0.dist-info/licenses/LICENSE +204 -0
  125. structuremappingmemory-1.0.0.dist-info/top_level.txt +1 -0
sma/ui/app.py ADDED
@@ -0,0 +1,642 @@
1
+ """Gradio comparison workbench for SMA-1.
2
+
3
+ Chat with an LLM (local Qwen GGUF or DeepSeek API) whose memory mode is
4
+ toggleable per turn, or compare all memory modes side by side. Extraction and
5
+ retrieval are deterministic; the LLM only verbalizes retrieved evidence.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import html
12
+ import json
13
+ import pathlib
14
+
15
+ from sma.agent.adapter_draft import draft_rules
16
+ from sma.agent.comparison import MODES, ComparisonFramework, challenge_corpus, demo_corpus
17
+ from sma.agent.llm import DEFAULT_MODEL_FILE, DEFAULT_MODEL_REPO, DEEPSEEK_MODEL
18
+ from sma.encoders.draft_adapter import (
19
+ DraftAdapter,
20
+ check_determinism,
21
+ rules_from_json,
22
+ rules_hash,
23
+ rules_to_json,
24
+ )
25
+ from sma.eval.arn import DEFAULT_ARN_PATH, arn_choice_corpus
26
+
27
+ UI_CORPORA = {
28
+ "HDFS sample (5,000 labeled sessions)": pathlib.Path("data/processed/ui_corpus_hdfs.jsonl"),
29
+ "BGL sample (2,500 labeled sessions)": pathlib.Path("data/processed/ui_corpus_bgl.jsonl"),
30
+ "Liberty HAYSTACK (5,000 sessions, ~5% anomalies)": pathlib.Path("data/processed/ui_corpus_liberty.jsonl"),
31
+ }
32
+
33
+ LLM_CHOICES = {
34
+ "Local (Qwen2.5-0.5B, CPU)": "local",
35
+ f"DeepSeek API ({DEEPSEEK_MODEL})": "deepseek",
36
+ }
37
+
38
+ MODE_ACCENTS = {
39
+ "sma": "#2563eb",
40
+ "bm25": "#b45309",
41
+ "dense rag": "#7c3aed",
42
+ "knowledge graph": "#047857",
43
+ "hybrid (fused)": "#0e7490",
44
+ "context only": "#64748b",
45
+ }
46
+
47
+ HARD_QUESTIONS = [
48
+ "ERROR StreamIngest connector timeout polling source kafka-9\n"
49
+ "WARN StreamIngest connector retrying poll\n"
50
+ "WARN StreamIngest connector retrying poll\n"
51
+ "ERROR StreamIngest sink write failed after repeated retry\n"
52
+ "ERROR StreamIngest backpressure queue overflow failure",
53
+ "INFO PaymentGateway deployment completed successfully\n"
54
+ "INFO PaymentGateway timeout setting increased to 45s by operator\n"
55
+ "INFO PaymentGateway connection pool resized",
56
+ "ERROR BackupAgent snapshot timeout on host 10.0.0.99\n"
57
+ "WARN BackupAgent retrying snapshot upload\n"
58
+ "ERROR BackupAgent snapshot failed permanently",
59
+ "WARN CdnOrigin fetch timeout for asset bundle\n"
60
+ "WARN CdnOrigin retrying fetch\n"
61
+ "WARN CdnOrigin retrying fetch\n"
62
+ "WARN CdnOrigin retrying fetch\n"
63
+ "ERROR CdnOrigin served stale asset after retry failure",
64
+ "ERROR DeviceHub firmware download timeout for sensor fleet\n"
65
+ "WARN DeviceHub retrying firmware push\n"
66
+ "ERROR DeviceHub provisioning failed for batch\n"
67
+ "ERROR DeviceHub heartbeat lost after failure",
68
+ ]
69
+
70
+ CSS = """
71
+ .sma-shell {max-width: 1480px; margin: 0 auto;}
72
+ /* All custom surfaces hard-code light backgrounds, so every text color is
73
+ pinned too — dark-mode themes must never bleed white text into them. */
74
+ .sma-cards {display: grid; grid-template-columns: repeat(auto-fit, minmax(310px, 1fr)); gap: 16px;}
75
+ .sma-card {border: 1px solid #cbd5e1; border-radius: 12px; background: #ffffff; color: #111827;
76
+ overflow: hidden; box-shadow: 0 2px 6px rgba(15, 23, 42, .10);
77
+ display: flex; flex-direction: column; transition: box-shadow .15s ease;}
78
+ .sma-card:hover {box-shadow: 0 4px 14px rgba(15, 23, 42, .16);}
79
+ .sma-card * {color: inherit;}
80
+ .sma-card-head {padding: 9px 14px; color: #ffffff; font-weight: 700; font-size: 13px;
81
+ text-transform: uppercase; letter-spacing: .06em; display: flex;
82
+ justify-content: space-between; align-items: center;}
83
+ .sma-card-head .sma-llm-badge {color: rgba(255,255,255,.92); font-weight: 600; font-size: 10px;
84
+ background: rgba(255,255,255,.18); border-radius: 99px; padding: 2px 8px;
85
+ text-transform: none; letter-spacing: 0;}
86
+ .sma-card-body {padding: 13px 15px; font-size: 14px; line-height: 1.6; white-space: pre-wrap;
87
+ color: #111827; flex: 1;}
88
+ .sma-evidence {border-top: 1px solid #e2e8f0; padding: 9px 15px; font-size: 12px; color: #111827;}
89
+ .sma-evidence summary {cursor: pointer; color: #334155; font-weight: 700;}
90
+ .sma-ev-item {margin: 8px 0; padding: 9px; background: #f1f5f9; border-radius: 8px; color: #1f2933;}
91
+ .sma-ev-meta {color: #475569; font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
92
+ font-size: 11px; word-break: break-all;}
93
+ .sma-ev-text {margin-top: 5px; white-space: pre-wrap; font-family: ui-monospace, Menlo, monospace;
94
+ font-size: 11px; max-height: 130px; overflow-y: auto; color: #1f2933;}
95
+ .sma-inference {color: #1d4ed8; font-family: ui-monospace, Menlo, monospace; font-size: 11px;
96
+ margin-top: 4px;}
97
+ .sma-ev-warning {margin: 8px 0; padding: 9px; background: #fff7ed; border: 1px solid #fed7aa;
98
+ border-radius: 8px; color: #9a3412; font-size: 12px; font-weight: 600;}
99
+ .sma-detail {color: #64748b; font-size: 11px; padding: 4px 15px 11px;}
100
+ .sma-empty {color: inherit; font-size: 14px;}
101
+ .sma-chips {display: flex; gap: 8px; flex-wrap: wrap; margin: 2px 0 6px;}
102
+ .sma-chip {background: #eef2ff; color: #3730a3; border: 1px solid #c7d2fe; border-radius: 99px;
103
+ padding: 3px 12px; font-size: 12px; font-weight: 600;}
104
+ .sma-chip.ok {background: #ecfdf5; color: #065f46; border-color: #a7f3d0;}
105
+ .sma-chip.warn {background: #fff7ed; color: #9a3412; border-color: #fed7aa;}
106
+ .sma-table {width: 100%; border-collapse: collapse; background: #ffffff; color: #111827;
107
+ border: 1px solid #cbd5e1; border-radius: 10px; overflow: hidden; font-size: 13px;}
108
+ .sma-table th {background: #f1f5f9; color: #334155; text-align: left; padding: 8px 10px;
109
+ font-size: 12px; text-transform: uppercase; letter-spacing: .04em;
110
+ border-bottom: 1px solid #cbd5e1;}
111
+ .sma-table td {padding: 7px 10px; border-bottom: 1px solid #e2e8f0; color: #1f2933;
112
+ vertical-align: top;}
113
+ .sma-table td.mono {font-family: ui-monospace, Menlo, monospace; font-size: 12px; color: #475569;}
114
+ .sma-table tr:last-child td {border-bottom: none;}
115
+ .sma-panel {border: 1px solid #cbd5e1; border-radius: 12px; background: #ffffff; color: #111827;
116
+ overflow: hidden;}
117
+ .sma-panel * {color: inherit;}
118
+ .sma-panel-head {padding: 8px 14px; color: #ffffff; font-weight: 700; font-size: 12px;
119
+ text-transform: uppercase; letter-spacing: .06em;}
120
+ .sma-panel-body {padding: 6px 12px 10px;}
121
+ .sma-label {border-radius: 99px; padding: 1px 8px; font-size: 10px; font-weight: 700;
122
+ margin-left: 6px;}
123
+ .sma-label.bad {background: #fee2e2; color: #991b1b;}
124
+ .sma-label.good {background: #dcfce7; color: #166534;}
125
+ .sma-vote {font-size: 12px; color: #334155; margin: 2px 0 8px;}
126
+ """
127
+
128
+
129
+ def coverage_chip(evidence: list[dict]) -> str:
130
+ """Structural-coverage chip (blueprint 12-R3): amber below threshold, green otherwise."""
131
+ coverage = next((row.get("coverage") for row in evidence if row.get("coverage")), None)
132
+ if not coverage:
133
+ return ""
134
+ cls = "warn" if coverage.get("low") else "ok"
135
+ return (
136
+ f'<span class="sma-chip {cls}">structural coverage: '
137
+ f'{coverage.get("percent", 0)}%</span>'
138
+ )
139
+
140
+
141
+ def evidence_items_html(evidence: list[dict]) -> str:
142
+ items = []
143
+ for row in evidence:
144
+ if row.get("warning"):
145
+ items.append(
146
+ '<div class="sma-ev-warning">&#9888; '
147
+ f'{html.escape(row["warning"])}<br>'
148
+ f'<span class="sma-ev-meta">{html.escape(row.get("provenance", ""))}</span></div>'
149
+ )
150
+ continue
151
+ inferences = "".join(
152
+ f'<div class="sma-inference">&#8627; {html.escape(s)}</div>'
153
+ for s in row.get("inferences", [])
154
+ )
155
+ label = row.get("label") or ""
156
+ label_chip = (
157
+ f'<span class="sma-label {"bad" if label == "Anomaly" else "good"}">{html.escape(label)}</span>'
158
+ if label else ""
159
+ )
160
+ alignment = (
161
+ f'<br><b>{html.escape(row["alignment"])}</b>' if row.get("alignment") else ""
162
+ )
163
+ items.append(
164
+ '<div class="sma-ev-item">'
165
+ f'<div class="sma-ev-meta">{html.escape(row.get("source_id", ""))}{label_chip} · '
166
+ f'score={html.escape(str(row.get("score", "")))}<br>'
167
+ f'{html.escape(row.get("provenance", ""))}{alignment}</div>'
168
+ f'<div class="sma-ev-text">{html.escape(row.get("text", ""))}</div>'
169
+ f"{inferences}</div>"
170
+ )
171
+ return "".join(items)
172
+
173
+
174
+ def render_cards(results: dict, llm_label: str) -> str:
175
+ cards = []
176
+ for mode, result in results.items():
177
+ accent = MODE_ACCENTS.get(mode, "#334155")
178
+ detail = next(
179
+ (row["mode_detail"] for row in result.evidence
180
+ if row.get("mode_detail") and not row.get("warning")),
181
+ "no evidence retrieved",
182
+ )
183
+ chip = coverage_chip(result.evidence)
184
+ chip_html = f'<div class="sma-chips">{chip}</div>' if chip else ""
185
+ cards.append(
186
+ '<div class="sma-card">'
187
+ f'<div class="sma-card-head" style="background:{accent}">'
188
+ f"<span>{html.escape(mode)}</span>"
189
+ f'<span class="sma-llm-badge">{html.escape(llm_label)}</span></div>'
190
+ f'<div class="sma-card-body">{html.escape(result.answer)}</div>'
191
+ f'<div class="sma-detail">{chip_html}{html.escape(detail)}{label_vote_line(result.evidence)}</div>'
192
+ '<details class="sma-evidence"><summary>'
193
+ f"Evidence ({len(result.evidence)})</summary>{evidence_items_html(result.evidence)}</details>"
194
+ "</div>"
195
+ )
196
+ return f'<div class="sma-cards">{"".join(cards)}</div>'
197
+
198
+
199
+ def label_vote_line(evidence: list[dict]) -> str:
200
+ labels = [row.get("label") for row in evidence if row.get("label")]
201
+ if not labels:
202
+ return ""
203
+ anomalies = sum(1 for label in labels if label == "Anomaly")
204
+ normals = len(labels) - anomalies
205
+ verdict = "Anomaly" if anomalies > normals else "Normal"
206
+ return (
207
+ f'<div class="sma-vote">retrieved labels: {anomalies} Anomaly / {normals} Normal '
208
+ f"&rarr; vote: <b>{verdict}</b></div>"
209
+ )
210
+
211
+
212
+ def render_evidence_panel(mode: str, evidence: list[dict]) -> str:
213
+ accent = MODE_ACCENTS.get(mode, "#334155")
214
+ chip = coverage_chip(evidence)
215
+ chip_html = f'<div class="sma-chips">{chip}</div>' if chip else ""
216
+ n_items = sum(1 for row in evidence if not row.get("warning"))
217
+ return (
218
+ '<div class="sma-panel">'
219
+ f'<div class="sma-panel-head" style="background:{accent}">'
220
+ f"evidence · {html.escape(mode)} · {n_items} item(s)</div>"
221
+ f'<div class="sma-panel-body">{chip_html}{label_vote_line(evidence)}'
222
+ f'{evidence_items_html(evidence) or "<p class=sma-empty>none retrieved</p>"}</div>'
223
+ "</div>"
224
+ )
225
+
226
+
227
+ def render_corpus_table(framework: ComparisonFramework) -> str:
228
+ if not framework.items:
229
+ return '<p class="sma-empty">Corpus is empty — paste raw text above and click Load.</p>'
230
+ rows = []
231
+ shown = framework.items[:200]
232
+ for item in shown:
233
+ rows.append(
234
+ "<tr>"
235
+ f'<td class="mono">{html.escape(item.item_id)}</td>'
236
+ f"<td>{html.escape(item.adapter_id)}</td>"
237
+ f"<td>{len(item.case.statements)}</td>"
238
+ f'<td class="mono">{html.escape(item.case.case_id[:12])}</td>'
239
+ f"<td>{html.escape(item.text[:150])}</td>"
240
+ "</tr>"
241
+ )
242
+ note = (
243
+ f'<p class="sma-empty">Showing first {len(shown)} of {len(framework.items)} items.</p>'
244
+ if len(framework.items) > len(shown) else ""
245
+ )
246
+ return (
247
+ '<table class="sma-table"><thead><tr>'
248
+ "<th>item</th><th>adapter</th><th>statements</th><th>case id</th><th>text</th>"
249
+ "</tr></thead><tbody>" + "".join(rows) + "</tbody></table>" + note
250
+ )
251
+
252
+
253
+ def render_chips(framework: ComparisonFramework) -> str:
254
+ local = framework.orchestrators["local"].status
255
+ deepseek = framework.orchestrators["deepseek"].status
256
+ local_cls, local_text = (
257
+ ("ok", "local LLM ready") if local.get("loaded") or local.get("backend") == "llama_cpp"
258
+ else ("warn", "local LLM missing")
259
+ )
260
+ ds_cls, ds_text = (
261
+ ("ok", "DeepSeek key present") if deepseek.get("key_present")
262
+ else ("warn", "DeepSeek key missing")
263
+ )
264
+ draft_chip = (
265
+ f'<span class="sma-chip warn">{html.escape(framework.draft_note)}</span>'
266
+ if framework.draft_note else ""
267
+ )
268
+ return (
269
+ '<div class="sma-chips">'
270
+ f'<span class="sma-chip">{len(framework.items)} corpus items</span>'
271
+ f'<span class="sma-chip {local_cls}">{local_text}</span>'
272
+ f'<span class="sma-chip {ds_cls}">{ds_text}</span>'
273
+ f"{draft_chip}"
274
+ "</div>"
275
+ )
276
+
277
+
278
+ def build_demo(framework: ComparisonFramework | None = None):
279
+ framework = framework or ComparisonFramework()
280
+ if not framework.items:
281
+ # Pre-load the challenge corpus so chat and compare work immediately.
282
+ framework.load_lines(challenge_corpus(), adapter_id="logs")
283
+ try:
284
+ import gradio as gr
285
+ except ImportError as exc: # pragma: no cover
286
+ raise RuntimeError("Install gradio to run the UI: pip install gradio") from exc
287
+
288
+ adapters = ["logs", "code", "traces", "structured", "agentobs", "prose_tier1"]
289
+
290
+ def chat_send(message, history, mode, llm_label, adapter_id, k, scorer):
291
+ message = (message or "").strip()
292
+ history = history or []
293
+ if not message:
294
+ return history, "", gr.skip()
295
+ if not framework.items:
296
+ history = history + [
297
+ {"role": "user", "content": message},
298
+ {"role": "assistant", "content": "The corpus is empty — load one on the Corpus tab first."},
299
+ ]
300
+ return history, "", gr.skip()
301
+ llm = LLM_CHOICES.get(llm_label, "local")
302
+ framework.set_scorer(scorer)
303
+ resolved_mode, evidence = framework.evidence_for(message, mode, adapter_id=adapter_id, k=int(k))
304
+ orchestrator = framework.orchestrators[llm]
305
+ answer = orchestrator.answer(message, resolved_mode, evidence, history=history)
306
+ history = history + [
307
+ {"role": "user", "content": message},
308
+ {"role": "assistant", "content": answer},
309
+ ]
310
+ return history, "", render_evidence_panel(resolved_mode, evidence)
311
+
312
+ def chat_clear():
313
+ return [], "", '<p class="sma-empty">Evidence for the latest turn appears here.</p>'
314
+
315
+ def run_comparison(question, adapter_id, k, selected_modes, llm_label, scorer):
316
+ if not framework.items:
317
+ return '<p class="sma-empty">Load a corpus first (Corpus tab).</p>'
318
+ if not selected_modes:
319
+ return '<p class="sma-empty">Select at least one memory mode.</p>'
320
+ llm = LLM_CHOICES.get(llm_label, "local")
321
+ framework.set_scorer(scorer)
322
+ results = framework.ask_all(
323
+ question, adapter_id=adapter_id, k=int(k), modes=selected_modes, llm=llm
324
+ )
325
+ return render_cards(results, llm_label)
326
+
327
+ def load_corpus(corpus_text, adapter_id, max_items, clear_existing, single_case):
328
+ if clear_existing:
329
+ framework.clear()
330
+ added = framework.load_lines(
331
+ corpus_text, adapter_id=adapter_id, max_items=int(max_items), single_case=bool(single_case)
332
+ )
333
+ status = {"added": len(added), "total": len(framework.items), "adapter": adapter_id}
334
+ return render_corpus_table(framework), json.dumps(status, indent=2), render_chips(framework)
335
+
336
+ def fill_challenge():
337
+ return challenge_corpus(), "logs"
338
+
339
+ def fill_demo():
340
+ return demo_corpus(), "logs"
341
+
342
+ def load_loghub(corpus_name, n_items, clear_existing, progress=gr.Progress()):
343
+ path = UI_CORPORA.get(corpus_name)
344
+ if path is None or not path.exists():
345
+ status = {
346
+ "error": f"{corpus_name} not prepared",
347
+ "fix": "python3 -u scripts/prepare_ui_corpus.py",
348
+ }
349
+ return render_corpus_table(framework), json.dumps(status, indent=2), render_chips(framework)
350
+ if clear_existing:
351
+ framework.clear()
352
+ n_items = int(n_items)
353
+ added = 0
354
+ with path.open(encoding="utf-8") as fh:
355
+ rows = [json.loads(line) for line in fh]
356
+ # The JSONL stores all anomalies first; shuffle deterministically so a
357
+ # partial load keeps the stratified label balance.
358
+ import random
359
+
360
+ random.Random(7).shuffle(rows)
361
+ rows = rows[:n_items]
362
+ for row in progress.tqdm(rows, desc=f"Encoding {corpus_name}"):
363
+ framework.add_document(row["text"], adapter_id="logs", label=row.get("label", ""))
364
+ added += 1
365
+ labels = [item.label for item in framework.items if item.label]
366
+ status = {
367
+ "added": added,
368
+ "total": len(framework.items),
369
+ "anomaly": sum(1 for label in labels if label == "Anomaly"),
370
+ "normal": sum(1 for label in labels if label == "Normal"),
371
+ "note": "First dense-RAG query embeds the whole corpus once (~1-2 min at 5k); later queries are fast.",
372
+ }
373
+ return render_corpus_table(framework), json.dumps(status, indent=2), render_chips(framework)
374
+
375
+ def load_arn_sample(max_items, clear_existing):
376
+ if not DEFAULT_ARN_PATH.exists():
377
+ status = {
378
+ "error": "ARN CSV is not downloaded",
379
+ "expected_path": str(DEFAULT_ARN_PATH),
380
+ "fetch": "python3 scripts/fetch_datasets.py --manifest data/manifests/datasets.json --only arn",
381
+ }
382
+ return (
383
+ gr.skip(),
384
+ gr.skip(),
385
+ render_corpus_table(framework),
386
+ json.dumps(status, indent=2),
387
+ render_chips(framework),
388
+ )
389
+ corpus_text, suggested_query = arn_choice_corpus(limit=int(max_items))
390
+ if clear_existing:
391
+ framework.clear()
392
+ added = framework.load_lines(corpus_text, adapter_id="prose_tier1", max_items=int(max_items) * 2)
393
+ status = {
394
+ "added": len(added),
395
+ "total": len(framework.items),
396
+ "adapter": "prose_tier1",
397
+ "suggested_query": suggested_query,
398
+ "tier_note": "ARN uses flagged Tier-1 prose extraction; not part of headline Tier-0 claims.",
399
+ }
400
+ return (
401
+ corpus_text,
402
+ "prose_tier1",
403
+ render_corpus_table(framework),
404
+ json.dumps(status, indent=2),
405
+ render_chips(framework),
406
+ )
407
+
408
+ def draft_adapter_from_corpus(llm_label):
409
+ if not framework.items:
410
+ return "", "", json.dumps({"error": "corpus is empty - load one first"}, indent=2)
411
+ llm = LLM_CHOICES.get(llm_label, "deepseek")
412
+ rules, note = draft_rules([item.text for item in framework.items], llm=llm)
413
+ if not rules.classes:
414
+ return "", "", json.dumps({"error": note, "backend": llm}, indent=2)
415
+ status = {
416
+ "note": note,
417
+ "backend": llm,
418
+ "discipline": "LLM proposed RULES (data); encoding stays deterministic. Review before trusting.",
419
+ }
420
+ return rules_to_json(rules), rules_hash(rules), json.dumps(status, indent=2)
421
+
422
+ def apply_draft_adapter(rules_json):
423
+ try:
424
+ rules = rules_from_json(rules_json or "")
425
+ adapter = DraftAdapter(rules)
426
+ probe = framework.items[0].text if framework.items else "probe timeout error line"
427
+ check_determinism(adapter, probe)
428
+ count = framework.apply_draft_adapter(adapter)
429
+ except Exception as exc:
430
+ status = {"error": f"{type(exc).__name__}: {exc}"}
431
+ return (
432
+ render_corpus_table(framework),
433
+ gr.skip(),
434
+ json.dumps(status, indent=2),
435
+ render_chips(framework),
436
+ )
437
+ status = {
438
+ "applied": framework.draft_note,
439
+ "reencoded_items": count,
440
+ "draft_hash": adapter.draft_hash,
441
+ "case_metadata": {"adapter": "draft", "draft_hash": adapter.draft_hash},
442
+ }
443
+ return (
444
+ render_corpus_table(framework),
445
+ adapter.draft_hash,
446
+ json.dumps(status, indent=2),
447
+ render_chips(framework),
448
+ )
449
+
450
+ def revert_draft_adapter():
451
+ count = framework.revert_draft_adapter()
452
+ status = {"reverted_to": "base adapters", "reencoded_items": count}
453
+ return render_corpus_table(framework), json.dumps(status, indent=2), render_chips(framework)
454
+
455
+ def backend_status():
456
+ return json.dumps(
457
+ {name: orch.status for name, orch in framework.orchestrators.items()}, indent=2
458
+ )
459
+
460
+ with gr.Blocks(title="SMA-1 Agentic Memory Workbench") as demo:
461
+ with gr.Column(elem_classes=["sma-shell"]):
462
+ gr.Markdown(
463
+ "# SMA-1 Agentic Memory Workbench\n"
464
+ "Chat with toggleable memory, or compare all memory modes side by side. "
465
+ "Extraction is deterministic (Tier-0 adapters); the LLM — local or DeepSeek — "
466
+ "only verbalizes retrieved evidence."
467
+ )
468
+ chips = gr.HTML(render_chips(framework))
469
+ with gr.Tab("Chat"):
470
+ with gr.Row():
471
+ with gr.Column(scale=3):
472
+ chatbot = gr.Chatbot(height=430, label="Conversation")
473
+ msg = gr.Textbox(
474
+ lines=3,
475
+ label="Message (paste log lines as a new incident, or ask a follow-up)",
476
+ placeholder="Describe or paste an incident… then ask follow-ups.",
477
+ )
478
+ with gr.Row():
479
+ send = gr.Button("Send", variant="primary")
480
+ clear_chat = gr.Button("Clear chat")
481
+ gr.Examples(
482
+ examples=[[q] for q in HARD_QUESTIONS],
483
+ inputs=[msg],
484
+ label="Challenge incidents (paired with the pre-loaded challenge corpus)",
485
+ )
486
+ with gr.Column(scale=2):
487
+ chat_mode = gr.Radio(list(MODES), value="sma", label="Memory mode")
488
+ chat_llm = gr.Radio(
489
+ list(LLM_CHOICES), value=list(LLM_CHOICES)[0], label="Answer model"
490
+ )
491
+ chat_scorer = gr.Radio(
492
+ ["surprisal", "ses", "mdl"], value="surprisal",
493
+ label="SMA scorer (surprisal: frozen default, finds rare failure families; "
494
+ "ses: pure systematicity; mdl: parameter-free compression)",
495
+ )
496
+ with gr.Row():
497
+ chat_adapter = gr.Dropdown(adapters, value="logs", label="Query adapter")
498
+ chat_k = gr.Number(value=5, precision=0, label="Evidence k")
499
+ evidence_panel = gr.HTML(
500
+ '<p class="sma-empty">Evidence for the latest turn appears here.</p>'
501
+ )
502
+ chat_inputs = [msg, chatbot, chat_mode, chat_llm, chat_adapter, chat_k, chat_scorer]
503
+ send.click(chat_send, chat_inputs, [chatbot, msg, evidence_panel])
504
+ msg.submit(chat_send, chat_inputs, [chatbot, msg, evidence_panel])
505
+ clear_chat.click(chat_clear, None, [chatbot, msg, evidence_panel])
506
+ with gr.Tab("Compare"):
507
+ with gr.Row():
508
+ question = gr.Textbox(
509
+ value=HARD_QUESTIONS[2],
510
+ lines=4,
511
+ scale=4,
512
+ label="Question / new incident",
513
+ )
514
+ query_adapter = gr.Dropdown(adapters, value="logs", label="Query adapter", scale=1)
515
+ top_k = gr.Number(value=5, precision=0, label="Evidence k", scale=1)
516
+ with gr.Row():
517
+ modes = gr.CheckboxGroup(
518
+ choices=list(MODES),
519
+ value=list(MODES),
520
+ label="Memory modes (toggle to compare)",
521
+ scale=3,
522
+ )
523
+ llm_choice = gr.Radio(
524
+ choices=list(LLM_CHOICES),
525
+ value=list(LLM_CHOICES)[0],
526
+ label="Answer model",
527
+ scale=2,
528
+ )
529
+ compare_scorer = gr.Radio(
530
+ ["surprisal", "ses", "mdl"], value="surprisal", label="SMA scorer", scale=1
531
+ )
532
+ run = gr.Button("Run comparison", variant="primary")
533
+ cards = gr.HTML()
534
+ run.click(run_comparison, [question, query_adapter, top_k, modes, llm_choice, compare_scorer], cards)
535
+ gr.Examples(
536
+ examples=[[q] for q in HARD_QUESTIONS],
537
+ inputs=[question],
538
+ label="Challenge incidents",
539
+ )
540
+ with gr.Tab("Corpus"):
541
+ with gr.Row():
542
+ adapter = gr.Dropdown(adapters, value="logs", label="Deterministic adapter")
543
+ max_items = gr.Number(value=50, precision=0, label="Max items")
544
+ clear = gr.Checkbox(value=True, label="Clear existing corpus")
545
+ single_case = gr.Checkbox(
546
+ value=False,
547
+ label="Load as ONE incident (don't split lines/blocks)",
548
+ )
549
+ with gr.Row():
550
+ loghub_choice = gr.Dropdown(
551
+ list(UI_CORPORA), value=list(UI_CORPORA)[0], label="Real LogHub sample", scale=2
552
+ )
553
+ loghub_n = gr.Number(value=5000, precision=0, label="Sessions to index", scale=1)
554
+ load_loghub_btn = gr.Button("Load LogHub sample", variant="primary", scale=1)
555
+ corpus = gr.Textbox(value=challenge_corpus(), lines=10, label="Raw corpus (manual)")
556
+ with gr.Row():
557
+ load = gr.Button("Load pasted corpus")
558
+ use_challenge = gr.Button("Fill challenge corpus")
559
+ use_demo = gr.Button("Fill demo corpus")
560
+ load_arn = gr.Button("Load downloaded ARN sample")
561
+ corpus_table = gr.HTML(render_corpus_table(framework))
562
+ load_status = gr.Code(language="json", label="Load status")
563
+ load.click(
564
+ load_corpus,
565
+ [corpus, adapter, max_items, clear, single_case],
566
+ [corpus_table, load_status, chips],
567
+ )
568
+ load_loghub_btn.click(
569
+ load_loghub, [loghub_choice, loghub_n, clear], [corpus_table, load_status, chips]
570
+ )
571
+ use_challenge.click(fill_challenge, None, [corpus, adapter])
572
+ use_demo.click(fill_demo, None, [corpus, adapter])
573
+ load_arn.click(
574
+ load_arn_sample,
575
+ [max_items, clear],
576
+ [corpus, adapter, corpus_table, load_status, chips],
577
+ )
578
+ gr.Markdown(
579
+ "### Draft adapter (LLM proposes rules; encoding stays deterministic)\n"
580
+ "The LLM drafts extra keyword class rules as data for the frozen logs "
581
+ "encoder - it never writes facts. Drafts are content-addressed (blake3) "
582
+ "and remain flagged *LLM-proposed, unreviewed* until reverted or promoted."
583
+ )
584
+ with gr.Row():
585
+ draft_llm = gr.Radio(
586
+ list(LLM_CHOICES), value=list(LLM_CHOICES)[1], label="Drafting model", scale=2
587
+ )
588
+ draft_btn = gr.Button("Draft adapter from corpus (LLM)", scale=1)
589
+ apply_draft_btn = gr.Button("Apply draft adapter", variant="primary", scale=1)
590
+ revert_draft_btn = gr.Button("Revert to base adapter", scale=1)
591
+ draft_json = gr.Textbox(
592
+ lines=12,
593
+ label="Proposed rules (editable JSON: classes + maskings)",
594
+ placeholder='{"classes": [{"name": "...Event", "keywords": ["..."]}], "maskings": []}',
595
+ )
596
+ draft_hash_box = gr.Textbox(label="Draft blake3 hash", interactive=False)
597
+ draft_status = gr.Code(language="json", label="Draft status")
598
+ draft_btn.click(
599
+ draft_adapter_from_corpus, [draft_llm], [draft_json, draft_hash_box, draft_status]
600
+ )
601
+ apply_draft_btn.click(
602
+ apply_draft_adapter,
603
+ [draft_json],
604
+ [corpus_table, draft_hash_box, draft_status, chips],
605
+ )
606
+ revert_draft_btn.click(
607
+ revert_draft_adapter, None, [corpus_table, draft_status, chips]
608
+ )
609
+ with gr.Tab("System"):
610
+ gr.Markdown(
611
+ f"**Local model:** `{DEFAULT_MODEL_REPO}/{DEFAULT_MODEL_FILE}` "
612
+ "(fetch with `python3 scripts/fetch_model.py`).\n\n"
613
+ f"**API model:** `{DEEPSEEK_MODEL}` via DeepSeek — set `SMA_DEEPSEEK_API_KEY` "
614
+ "in the environment or the repo `.env` file.\n\n"
615
+ "Extraction and retrieval never use either model. Low-level "
616
+ "`encode/retrieve/map/project/verify` tools: `make api`."
617
+ )
618
+ status = gr.Code(value=backend_status(), language="json", label="LLM backend status")
619
+ refresh = gr.Button("Refresh status")
620
+ refresh.click(backend_status, None, status)
621
+ return demo
622
+
623
+
624
+ def main(argv: list[str] | None = None) -> int:
625
+ parser = argparse.ArgumentParser()
626
+ parser.add_argument("--host", default="127.0.0.1")
627
+ parser.add_argument("--port", type=int, default=7860)
628
+ args = parser.parse_args(argv)
629
+ demo = build_demo()
630
+ import gradio as gr
631
+
632
+ demo.launch(
633
+ server_name=args.host,
634
+ server_port=args.port,
635
+ theme=gr.themes.Soft(),
636
+ css=CSS,
637
+ )
638
+ return 0
639
+
640
+
641
+ if __name__ == "__main__":
642
+ raise SystemExit(main())