structuremappingmemory 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. sma/__init__.py +5 -0
  2. sma/__main__.py +5 -0
  3. sma/agent/__init__.py +5 -0
  4. sma/agent/adapter_draft.py +217 -0
  5. sma/agent/api.py +67 -0
  6. sma/agent/comparison.py +591 -0
  7. sma/agent/llm.py +280 -0
  8. sma/agent/policies.py +21 -0
  9. sma/agent/service.py +95 -0
  10. sma/cli.py +65 -0
  11. sma/encoders/__init__.py +38 -0
  12. sma/encoders/agentobs.py +27 -0
  13. sma/encoders/base.py +23 -0
  14. sma/encoders/code_treesitter.py +64 -0
  15. sma/encoders/coverage.py +80 -0
  16. sma/encoders/draft_adapter.py +183 -0
  17. sma/encoders/healthcare.py +207 -0
  18. sma/encoders/logs_drain.py +142 -0
  19. sma/encoders/prose_tier1.py +57 -0
  20. sma/encoders/structured.py +57 -0
  21. sma/encoders/traces.py +45 -0
  22. sma/eval/__init__.py +2 -0
  23. sma/eval/agentic/__init__.py +35 -0
  24. sma/eval/agentic/arms/__init__.py +0 -0
  25. sma/eval/agentic/arms/cyber.py +48 -0
  26. sma/eval/agentic/arms/discovery.py +35 -0
  27. sma/eval/agentic/arms/finance.py +38 -0
  28. sma/eval/agentic/arms/legal.py +74 -0
  29. sma/eval/agentic/arms/medicine.py +45 -0
  30. sma/eval/agentic/harness.py +275 -0
  31. sma/eval/agentic/memories.py +308 -0
  32. sma/eval/agentic/metrics.py +82 -0
  33. sma/eval/agentic_qa/__init__.py +27 -0
  34. sma/eval/agentic_qa/agent.py +383 -0
  35. sma/eval/agentic_qa/metrics.py +239 -0
  36. sma/eval/agentic_qa/pools.py +197 -0
  37. sma/eval/arn.py +65 -0
  38. sma/eval/baselines/__init__.py +6 -0
  39. sma/eval/baselines/bge_dense.py +54 -0
  40. sma/eval/baselines/bm25.py +18 -0
  41. sma/eval/baselines/dense.py +42 -0
  42. sma/eval/baselines/hipporag.py +235 -0
  43. sma/eval/baselines/hybrid_rrf.py +30 -0
  44. sma/eval/baselines/longcontext_llm.py +124 -0
  45. sma/eval/baselines/rerank.py +41 -0
  46. sma/eval/baselines/splade.py +77 -0
  47. sma/eval/baselines/wl_kernel.py +163 -0
  48. sma/eval/bugsinpy.py +358 -0
  49. sma/eval/bugsinpy_families.py +164 -0
  50. sma/eval/crossdomain.py +89 -0
  51. sma/eval/diabetes.py +61 -0
  52. sma/eval/drift_env.py +26 -0
  53. sma/eval/drift_metrics.py +24 -0
  54. sma/eval/family_labels.py +167 -0
  55. sma/eval/fraud_elliptic/__init__.py +29 -0
  56. sma/eval/fraud_elliptic/encoder.py +279 -0
  57. sma/eval/fraud_elliptic/eval.py +269 -0
  58. sma/eval/fraud_elliptic/test_encoder.py +123 -0
  59. sma/eval/ieee_cis.py +66 -0
  60. sma/eval/loghub.py +16 -0
  61. sma/eval/loghub_eval.py +480 -0
  62. sma/eval/longmemeval.py +51 -0
  63. sma/eval/memory_backends/__init__.py +2 -0
  64. sma/eval/memory_backends/base.py +22 -0
  65. sma/eval/memory_backends/context_only.py +14 -0
  66. sma/eval/memory_backends/rag_notes.py +17 -0
  67. sma/eval/memory_backends/shared_llm.py +30 -0
  68. sma/eval/memory_backends/sma_memory.py +54 -0
  69. sma/eval/memory_backends/zep_graphiti.py +33 -0
  70. sma/eval/metrics.py +32 -0
  71. sma/eval/ontology_bench.py +219 -0
  72. sma/eval/report.py +573 -0
  73. sma/eval/ssb_eval.py +216 -0
  74. sma/eval/ssb_generator.py +116 -0
  75. sma/eval/stats.py +108 -0
  76. sma/eval/transfer_eval.py +844 -0
  77. sma/index/__init__.py +15 -0
  78. sma/index/ann.py +21 -0
  79. sma/index/content_vectors.py +60 -0
  80. sma/index/inverted.py +63 -0
  81. sma/index/macfac.py +174 -0
  82. sma/ir/__init__.py +22 -0
  83. sma/ir/canon.py +106 -0
  84. sma/ir/schema.py +165 -0
  85. sma/ir/sexpr.py +86 -0
  86. sma/ir/signatures.py +76 -0
  87. sma/match/__init__.py +20 -0
  88. sma/match/conflicts.py +46 -0
  89. sma/match/engine.py +60 -0
  90. sma/match/explain.py +59 -0
  91. sma/match/infer.py +54 -0
  92. sma/match/kernels.py +54 -0
  93. sma/match/mdl.py +30 -0
  94. sma/match/merge_cpsat.py +77 -0
  95. sma/match/merge_greedy.py +15 -0
  96. sma/match/mh.py +177 -0
  97. sma/match/ses.py +84 -0
  98. sma/match/types.py +115 -0
  99. sma/match/verifier.py +27 -0
  100. sma/ontology/__init__.py +45 -0
  101. sma/ontology/attack.py +134 -0
  102. sma/ontology/cpc.py +69 -0
  103. sma/ontology/graph.py +58 -0
  104. sma/ontology/loader.py +262 -0
  105. sma/ontology/mitre_xml.py +67 -0
  106. sma/ontology/mount.py +101 -0
  107. sma/ontology/rdf_loader.py +75 -0
  108. sma/ontology/registry.py +115 -0
  109. sma/ontology/router.py +69 -0
  110. sma/ontology/usgaap.py +73 -0
  111. sma/sage/__init__.py +6 -0
  112. sma/sage/assimilate.py +12 -0
  113. sma/sage/pools.py +105 -0
  114. sma/sage/probabilities.py +10 -0
  115. sma/store/__init__.py +6 -0
  116. sma/store/lmdb_store.py +78 -0
  117. sma/store/registry.py +26 -0
  118. sma/store/wal.py +26 -0
  119. sma/ui/app.py +642 -0
  120. structuremappingmemory-1.0.0.dist-info/METADATA +190 -0
  121. structuremappingmemory-1.0.0.dist-info/RECORD +125 -0
  122. structuremappingmemory-1.0.0.dist-info/WHEEL +5 -0
  123. structuremappingmemory-1.0.0.dist-info/entry_points.txt +2 -0
  124. structuremappingmemory-1.0.0.dist-info/licenses/LICENSE +204 -0
  125. structuremappingmemory-1.0.0.dist-info/top_level.txt +1 -0
sma/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """SMA-1: Structure-Mapping Agentic Memory MVP."""
2
+
3
+ __all__ = ["__version__"]
4
+ __version__ = "0.1.0"
5
+
sma/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
5
+
sma/agent/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .policies import reject_free_text_facts, require_provenance
2
+ from .service import MemoryService
3
+
4
+ __all__ = ["MemoryService", "reject_free_text_facts", "require_provenance"]
5
+
@@ -0,0 +1,217 @@
1
+ """LLM-drafted adapter rules: the model proposes RULES, never facts.
2
+
3
+ The LLM sees sample log lines and emits candidate keyword class rules as
4
+ strict JSON. The output is data for a deterministic encoder
5
+ (sma.encoders.draft_adapter.DraftAdapter); no model output ever enters a case
6
+ directly. Drafts are content-addressed (blake3) and stored with a
7
+ generated-by note so every case encoded under them is auditable; they remain
8
+ "LLM-proposed, unreviewed" until a human promotes them.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ from sma.encoders.draft_adapter import (
19
+ MAX_CLASSES,
20
+ MAX_KEYWORDS,
21
+ DraftRules,
22
+ rules_hash,
23
+ rules_to_json,
24
+ validate_rules,
25
+ )
26
+ from sma.encoders.logs_drain import EVENT_CLASS_RULES, event_classes
27
+
28
+ # Keywords already claimed by the frozen ontology. A drafted keyword that
29
+ # equals one of these, or that contains one as a substring, can only fire on
30
+ # lines the frozen rules already cover - redundant rules double-count matches
31
+ # and dilute surprisal statistics (measured: the EOF rare-family regression).
32
+ FROZEN_KEYWORDS = tuple(sorted({kw for _, kws in EVENT_CLASS_RULES for kw in kws}))
33
+
34
+
35
+ def _redundant_keyword(keyword: str) -> bool:
36
+ return any(frozen == keyword or frozen in keyword for frozen in FROZEN_KEYWORDS)
37
+
38
+ from .llm import DeepSeekOrchestrator, LocalOrchestrator, default_deepseek, default_orchestrator
39
+
40
+ MAX_SAMPLE_LINES = 30
41
+
42
+ DRAFT_DIR = Path("data/draft_adapters")
43
+
44
+ DRAFT_SYSTEM_PROMPT = (
45
+ "You draft deterministic log-classification rules for SMA-1, a structure-mapping "
46
+ "memory system. You propose RULES (keyword -> event class), never facts. Reply with "
47
+ "STRICT JSON only - no prose, no markdown fences. Schema: "
48
+ '{"classes": [{"name": "somethingEvent", "keywords": ["kw1", "kw2"]}], '
49
+ '"maskings": ["regex", ...]}. Constraints: at most '
50
+ f"{MAX_CLASSES} classes; at most {MAX_KEYWORDS} keywords per class; class names are "
51
+ "alphanumeric ending in 'Event'; keywords are lowercase substrings that appear in the "
52
+ "sample lines; maskings are regexes for variable tokens (ids, timestamps, counters). "
53
+ "Do NOT reuse these frozen class names: timeoutEvent, retryEvent, ioEvent, memoryEvent, "
54
+ "kernelEvent, networkEvent, authEvent, storageEvent, lifecycleEvent, failureEvent."
55
+ )
56
+
57
+
58
+ def _resolve_orchestrator(llm) -> LocalOrchestrator | DeepSeekOrchestrator:
59
+ if isinstance(llm, str):
60
+ if llm == "local":
61
+ return default_orchestrator
62
+ if llm == "deepseek":
63
+ return default_deepseek
64
+ raise ValueError(f"unknown llm backend: {llm!r}; expected 'local' or 'deepseek'")
65
+ return llm
66
+
67
+
68
+ def build_draft_prompt(sample_texts: list[str], residual_only: bool = True) -> list[dict]:
69
+ """Build the drafting prompt from sample lines.
70
+
71
+ residual_only (default): only lines that fire NO frozen ontology class are
72
+ shown to the LLM, so it is structurally impossible for the draft to
73
+ re-cover vocabulary the frozen rules already handle. Returns messages with
74
+ an empty user sample if everything is covered (caller should not draft).
75
+ """
76
+ lines: list[str] = []
77
+ for text in sample_texts:
78
+ for line in text.splitlines():
79
+ stripped = line.strip()
80
+ if not stripped:
81
+ continue
82
+ if residual_only and event_classes(stripped.lower()):
83
+ continue
84
+ lines.append(stripped)
85
+ if len(lines) >= MAX_SAMPLE_LINES:
86
+ break
87
+ if len(lines) >= MAX_SAMPLE_LINES:
88
+ break
89
+ sample = "\n".join(lines)
90
+ user = (
91
+ "Sample log lines that fired NO rule of the frozen ontology (the covered lines "
92
+ "are deliberately excluded - do not re-cover known vocabulary):\n\n"
93
+ f"{sample}\n\n"
94
+ "Propose extra deterministic class rules (JSON only, schema above) so these lines "
95
+ "fire shared cross-system event classes. Prefer FEW, COARSE, reusable categories."
96
+ )
97
+ return [
98
+ {"role": "system", "content": DRAFT_SYSTEM_PROMPT},
99
+ {"role": "user", "content": user},
100
+ ]
101
+
102
+
103
+ def parse_draft_response(raw: str) -> tuple[DraftRules, str]:
104
+ """Defensive parse of the LLM reply into DraftRules.
105
+
106
+ Returns (rules, note). On any parse failure returns empty rules plus the
107
+ error text. Invalid or colliding classes are dropped (recorded in the
108
+ note) rather than failing the whole draft.
109
+ """
110
+ text = raw.strip()
111
+ # Strip markdown fences and any prose around the first JSON object.
112
+ text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.MULTILINE).strip()
113
+ start, end = text.find("{"), text.rfind("}")
114
+ if start < 0 or end <= start:
115
+ return DraftRules(), f"parse failure: no JSON object in reply: {raw[:200]!r}"
116
+ try:
117
+ payload = json.loads(text[start : end + 1])
118
+ except json.JSONDecodeError as exc:
119
+ return DraftRules(), f"parse failure: {exc}"
120
+ if not isinstance(payload, dict):
121
+ return DraftRules(), f"parse failure: expected object, got {type(payload).__name__}"
122
+
123
+ classes: list[tuple[str, tuple[str, ...]]] = []
124
+ dropped: list[str] = []
125
+ for row in (payload.get("classes") or [])[:MAX_CLASSES]:
126
+ if not isinstance(row, dict):
127
+ dropped.append(repr(row))
128
+ continue
129
+ name = row.get("name")
130
+ keywords = tuple(
131
+ k.strip().lower()
132
+ for k in (row.get("keywords") or [])[:MAX_KEYWORDS]
133
+ if isinstance(k, str) and k.strip()
134
+ )
135
+ stripped_redundant = tuple(k for k in keywords if _redundant_keyword(k))
136
+ keywords = tuple(k for k in keywords if not _redundant_keyword(k))
137
+ if stripped_redundant:
138
+ dropped.append(f"{name}: redundant keywords {', '.join(stripped_redundant)}")
139
+ if not keywords:
140
+ dropped.append(f"{name} (fully covered by frozen ontology)")
141
+ continue
142
+ candidate = DraftRules(classes=((str(name), keywords),))
143
+ if validate_rules(candidate):
144
+ dropped.append(str(name))
145
+ continue
146
+ if any(existing == name for existing, _ in classes):
147
+ dropped.append(f"{name} (duplicate)")
148
+ continue
149
+ classes.append((str(name), keywords))
150
+ maskings = tuple(
151
+ m for m in (payload.get("maskings") or []) if isinstance(m, str) and m.strip()
152
+ )
153
+ rules = DraftRules(classes=tuple(classes), maskings=maskings)
154
+ errors = validate_rules(rules)
155
+ if errors:
156
+ # Bad masking regexes etc.: drop maskings and retry once without them.
157
+ rules = DraftRules(classes=tuple(classes))
158
+ errors = validate_rules(rules)
159
+ if errors:
160
+ return DraftRules(), "parse failure: " + "; ".join(errors)
161
+ note = f"parsed {len(rules.classes)} class(es), {len(rules.maskings)} masking(s)"
162
+ if dropped:
163
+ note += f"; dropped invalid: {', '.join(dropped)}"
164
+ return rules, note
165
+
166
+
167
+ def store_draft(rules: DraftRules, generated_by: str, note: str) -> tuple[str, Path]:
168
+ """Persist the draft artifact under its blake3 content address."""
169
+ digest = rules_hash(rules)
170
+ DRAFT_DIR.mkdir(parents=True, exist_ok=True)
171
+ path = DRAFT_DIR / f"{digest[:16]}.json"
172
+ artifact = {
173
+ "blake3": digest,
174
+ "generated_by": generated_by,
175
+ "status": "LLM-proposed, unreviewed",
176
+ "created": datetime.now(timezone.utc).isoformat(),
177
+ "note": note,
178
+ "rules": json.loads(rules_to_json(rules)),
179
+ }
180
+ path.write_text(json.dumps(artifact, indent=2, sort_keys=True), encoding="utf-8")
181
+ return digest, path
182
+
183
+
184
+ def draft_rules(sample_texts: list[str], llm="deepseek") -> tuple[DraftRules, str]:
185
+ """Ask an LLM backend to draft extra class rules from sample lines.
186
+
187
+ Returns (rules, note). On any failure the rules are empty and the note
188
+ carries the error. On success the artifact is stored content-addressed
189
+ under data/draft_adapters/ with a generated-by note.
190
+ """
191
+ orchestrator = _resolve_orchestrator(llm)
192
+ messages = build_draft_prompt(sample_texts)
193
+ if not messages[-1]["content"].split("\n\n")[1].strip():
194
+ return DraftRules(), (
195
+ "all sample lines already fire frozen ontology rules; nothing to draft "
196
+ "(coverage is not the problem for this corpus)"
197
+ )
198
+ try:
199
+ raw = orchestrator.complete(messages, max_tokens=600, temperature=0.0)
200
+ except Exception as exc:
201
+ return DraftRules(), f"draft failed ({orchestrator.name}): {type(exc).__name__}: {exc}"
202
+ rules, note = parse_draft_response(raw)
203
+ if not rules.classes:
204
+ return rules, note
205
+ generated_by = f"{orchestrator.name}:{orchestrator.status.get('model', '?')} via sma.agent.adapter_draft"
206
+ digest, path = store_draft(rules, generated_by, note)
207
+ return rules, f"{note}; blake3={digest}; stored={path}; generated-by {generated_by}"
208
+
209
+
210
+ __all__ = [
211
+ "DRAFT_SYSTEM_PROMPT",
212
+ "MAX_SAMPLE_LINES",
213
+ "build_draft_prompt",
214
+ "draft_rules",
215
+ "parse_draft_response",
216
+ "store_draft",
217
+ ]
sma/agent/api.py ADDED
@@ -0,0 +1,67 @@
1
+ """FastAPI tool surface for SMA."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from .service import default_service
8
+
9
+ try:
10
+ from fastapi import FastAPI
11
+ from pydantic import BaseModel
12
+ except ImportError: # pragma: no cover - optional dependency fallback
13
+ FastAPI = None # type: ignore
14
+ BaseModel = object # type: ignore
15
+
16
+
17
+ if FastAPI is not None:
18
+ app = FastAPI(title="SMA-1 Agentic Memory API")
19
+
20
+ class EncodeRequest(BaseModel):
21
+ artifact: str
22
+ adapter_id: str
23
+ kwargs: dict[str, Any] = {}
24
+
25
+ class RetrieveRequest(BaseModel):
26
+ case_id: str | None = None
27
+ inline_case: str | None = None
28
+ k: int = 10
29
+
30
+ class MapRequest(BaseModel):
31
+ base_id: str
32
+ target_id: str
33
+ scorer: str = "ses"
34
+
35
+ class ProjectRequest(BaseModel):
36
+ gmap_id: str
37
+
38
+ class VerifyRequest(BaseModel):
39
+ inference: str
40
+
41
+ @app.post("/encode")
42
+ def encode(req: EncodeRequest):
43
+ return default_service.encode(req.artifact, req.adapter_id, **req.kwargs)
44
+
45
+ @app.post("/retrieve")
46
+ def retrieve(req: RetrieveRequest):
47
+ return default_service.retrieve(req.case_id, req.inline_case, req.k)
48
+
49
+ @app.post("/map")
50
+ def map_cases(req: MapRequest):
51
+ return default_service.map(req.base_id, req.target_id, req.scorer)
52
+
53
+ @app.post("/project")
54
+ def project(req: ProjectRequest):
55
+ return default_service.project(req.gmap_id)
56
+
57
+ @app.post("/verify")
58
+ def verify(req: VerifyRequest):
59
+ return default_service.verify(req.inference)
60
+
61
+ @app.get("/pool/{pool_id}")
62
+ def pool_stats(pool_id: str):
63
+ return default_service.pool_stats(pool_id)
64
+
65
+ else:
66
+ app = None
67
+