structuremappingmemory 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sma/__init__.py +5 -0
- sma/__main__.py +5 -0
- sma/agent/__init__.py +5 -0
- sma/agent/adapter_draft.py +217 -0
- sma/agent/api.py +67 -0
- sma/agent/comparison.py +591 -0
- sma/agent/llm.py +280 -0
- sma/agent/policies.py +21 -0
- sma/agent/service.py +95 -0
- sma/cli.py +65 -0
- sma/encoders/__init__.py +38 -0
- sma/encoders/agentobs.py +27 -0
- sma/encoders/base.py +23 -0
- sma/encoders/code_treesitter.py +64 -0
- sma/encoders/coverage.py +80 -0
- sma/encoders/draft_adapter.py +183 -0
- sma/encoders/healthcare.py +207 -0
- sma/encoders/logs_drain.py +142 -0
- sma/encoders/prose_tier1.py +57 -0
- sma/encoders/structured.py +57 -0
- sma/encoders/traces.py +45 -0
- sma/eval/__init__.py +2 -0
- sma/eval/agentic/__init__.py +35 -0
- sma/eval/agentic/arms/__init__.py +0 -0
- sma/eval/agentic/arms/cyber.py +48 -0
- sma/eval/agentic/arms/discovery.py +35 -0
- sma/eval/agentic/arms/finance.py +38 -0
- sma/eval/agentic/arms/legal.py +74 -0
- sma/eval/agentic/arms/medicine.py +45 -0
- sma/eval/agentic/harness.py +275 -0
- sma/eval/agentic/memories.py +308 -0
- sma/eval/agentic/metrics.py +82 -0
- sma/eval/agentic_qa/__init__.py +27 -0
- sma/eval/agentic_qa/agent.py +383 -0
- sma/eval/agentic_qa/metrics.py +239 -0
- sma/eval/agentic_qa/pools.py +197 -0
- sma/eval/arn.py +65 -0
- sma/eval/baselines/__init__.py +6 -0
- sma/eval/baselines/bge_dense.py +54 -0
- sma/eval/baselines/bm25.py +18 -0
- sma/eval/baselines/dense.py +42 -0
- sma/eval/baselines/hipporag.py +235 -0
- sma/eval/baselines/hybrid_rrf.py +30 -0
- sma/eval/baselines/longcontext_llm.py +124 -0
- sma/eval/baselines/rerank.py +41 -0
- sma/eval/baselines/splade.py +77 -0
- sma/eval/baselines/wl_kernel.py +163 -0
- sma/eval/bugsinpy.py +358 -0
- sma/eval/bugsinpy_families.py +164 -0
- sma/eval/crossdomain.py +89 -0
- sma/eval/diabetes.py +61 -0
- sma/eval/drift_env.py +26 -0
- sma/eval/drift_metrics.py +24 -0
- sma/eval/family_labels.py +167 -0
- sma/eval/fraud_elliptic/__init__.py +29 -0
- sma/eval/fraud_elliptic/encoder.py +279 -0
- sma/eval/fraud_elliptic/eval.py +269 -0
- sma/eval/fraud_elliptic/test_encoder.py +123 -0
- sma/eval/ieee_cis.py +66 -0
- sma/eval/loghub.py +16 -0
- sma/eval/loghub_eval.py +480 -0
- sma/eval/longmemeval.py +51 -0
- sma/eval/memory_backends/__init__.py +2 -0
- sma/eval/memory_backends/base.py +22 -0
- sma/eval/memory_backends/context_only.py +14 -0
- sma/eval/memory_backends/rag_notes.py +17 -0
- sma/eval/memory_backends/shared_llm.py +30 -0
- sma/eval/memory_backends/sma_memory.py +54 -0
- sma/eval/memory_backends/zep_graphiti.py +33 -0
- sma/eval/metrics.py +32 -0
- sma/eval/ontology_bench.py +219 -0
- sma/eval/report.py +573 -0
- sma/eval/ssb_eval.py +216 -0
- sma/eval/ssb_generator.py +116 -0
- sma/eval/stats.py +108 -0
- sma/eval/transfer_eval.py +844 -0
- sma/index/__init__.py +15 -0
- sma/index/ann.py +21 -0
- sma/index/content_vectors.py +60 -0
- sma/index/inverted.py +63 -0
- sma/index/macfac.py +174 -0
- sma/ir/__init__.py +22 -0
- sma/ir/canon.py +106 -0
- sma/ir/schema.py +165 -0
- sma/ir/sexpr.py +86 -0
- sma/ir/signatures.py +76 -0
- sma/match/__init__.py +20 -0
- sma/match/conflicts.py +46 -0
- sma/match/engine.py +60 -0
- sma/match/explain.py +59 -0
- sma/match/infer.py +54 -0
- sma/match/kernels.py +54 -0
- sma/match/mdl.py +30 -0
- sma/match/merge_cpsat.py +77 -0
- sma/match/merge_greedy.py +15 -0
- sma/match/mh.py +177 -0
- sma/match/ses.py +84 -0
- sma/match/types.py +115 -0
- sma/match/verifier.py +27 -0
- sma/ontology/__init__.py +45 -0
- sma/ontology/attack.py +134 -0
- sma/ontology/cpc.py +69 -0
- sma/ontology/graph.py +58 -0
- sma/ontology/loader.py +262 -0
- sma/ontology/mitre_xml.py +67 -0
- sma/ontology/mount.py +101 -0
- sma/ontology/rdf_loader.py +75 -0
- sma/ontology/registry.py +115 -0
- sma/ontology/router.py +69 -0
- sma/ontology/usgaap.py +73 -0
- sma/sage/__init__.py +6 -0
- sma/sage/assimilate.py +12 -0
- sma/sage/pools.py +105 -0
- sma/sage/probabilities.py +10 -0
- sma/store/__init__.py +6 -0
- sma/store/lmdb_store.py +78 -0
- sma/store/registry.py +26 -0
- sma/store/wal.py +26 -0
- sma/ui/app.py +642 -0
- structuremappingmemory-1.0.0.dist-info/METADATA +190 -0
- structuremappingmemory-1.0.0.dist-info/RECORD +125 -0
- structuremappingmemory-1.0.0.dist-info/WHEEL +5 -0
- structuremappingmemory-1.0.0.dist-info/entry_points.txt +2 -0
- structuremappingmemory-1.0.0.dist-info/licenses/LICENSE +204 -0
- structuremappingmemory-1.0.0.dist-info/top_level.txt +1 -0
sma/__init__.py
ADDED
sma/__main__.py
ADDED
sma/agent/__init__.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""LLM-drafted adapter rules: the model proposes RULES, never facts.
|
|
2
|
+
|
|
3
|
+
The LLM sees sample log lines and emits candidate keyword class rules as
|
|
4
|
+
strict JSON. The output is data for a deterministic encoder
|
|
5
|
+
(sma.encoders.draft_adapter.DraftAdapter); no model output ever enters a case
|
|
6
|
+
directly. Drafts are content-addressed (blake3) and stored with a
|
|
7
|
+
generated-by note so every case encoded under them is auditable; they remain
|
|
8
|
+
"LLM-proposed, unreviewed" until a human promotes them.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import re
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from sma.encoders.draft_adapter import (
|
|
19
|
+
MAX_CLASSES,
|
|
20
|
+
MAX_KEYWORDS,
|
|
21
|
+
DraftRules,
|
|
22
|
+
rules_hash,
|
|
23
|
+
rules_to_json,
|
|
24
|
+
validate_rules,
|
|
25
|
+
)
|
|
26
|
+
from sma.encoders.logs_drain import EVENT_CLASS_RULES, event_classes
|
|
27
|
+
|
|
28
|
+
# Keywords already claimed by the frozen ontology. A drafted keyword that
|
|
29
|
+
# equals one of these, or that contains one as a substring, can only fire on
|
|
30
|
+
# lines the frozen rules already cover - redundant rules double-count matches
|
|
31
|
+
# and dilute surprisal statistics (measured: the EOF rare-family regression).
|
|
32
|
+
FROZEN_KEYWORDS = tuple(sorted({kw for _, kws in EVENT_CLASS_RULES for kw in kws}))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _redundant_keyword(keyword: str) -> bool:
|
|
36
|
+
return any(frozen == keyword or frozen in keyword for frozen in FROZEN_KEYWORDS)
|
|
37
|
+
|
|
38
|
+
from .llm import DeepSeekOrchestrator, LocalOrchestrator, default_deepseek, default_orchestrator
|
|
39
|
+
|
|
40
|
+
MAX_SAMPLE_LINES = 30
|
|
41
|
+
|
|
42
|
+
DRAFT_DIR = Path("data/draft_adapters")
|
|
43
|
+
|
|
44
|
+
DRAFT_SYSTEM_PROMPT = (
|
|
45
|
+
"You draft deterministic log-classification rules for SMA-1, a structure-mapping "
|
|
46
|
+
"memory system. You propose RULES (keyword -> event class), never facts. Reply with "
|
|
47
|
+
"STRICT JSON only - no prose, no markdown fences. Schema: "
|
|
48
|
+
'{"classes": [{"name": "somethingEvent", "keywords": ["kw1", "kw2"]}], '
|
|
49
|
+
'"maskings": ["regex", ...]}. Constraints: at most '
|
|
50
|
+
f"{MAX_CLASSES} classes; at most {MAX_KEYWORDS} keywords per class; class names are "
|
|
51
|
+
"alphanumeric ending in 'Event'; keywords are lowercase substrings that appear in the "
|
|
52
|
+
"sample lines; maskings are regexes for variable tokens (ids, timestamps, counters). "
|
|
53
|
+
"Do NOT reuse these frozen class names: timeoutEvent, retryEvent, ioEvent, memoryEvent, "
|
|
54
|
+
"kernelEvent, networkEvent, authEvent, storageEvent, lifecycleEvent, failureEvent."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _resolve_orchestrator(llm) -> LocalOrchestrator | DeepSeekOrchestrator:
|
|
59
|
+
if isinstance(llm, str):
|
|
60
|
+
if llm == "local":
|
|
61
|
+
return default_orchestrator
|
|
62
|
+
if llm == "deepseek":
|
|
63
|
+
return default_deepseek
|
|
64
|
+
raise ValueError(f"unknown llm backend: {llm!r}; expected 'local' or 'deepseek'")
|
|
65
|
+
return llm
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def build_draft_prompt(sample_texts: list[str], residual_only: bool = True) -> list[dict]:
|
|
69
|
+
"""Build the drafting prompt from sample lines.
|
|
70
|
+
|
|
71
|
+
residual_only (default): only lines that fire NO frozen ontology class are
|
|
72
|
+
shown to the LLM, so it is structurally impossible for the draft to
|
|
73
|
+
re-cover vocabulary the frozen rules already handle. Returns messages with
|
|
74
|
+
an empty user sample if everything is covered (caller should not draft).
|
|
75
|
+
"""
|
|
76
|
+
lines: list[str] = []
|
|
77
|
+
for text in sample_texts:
|
|
78
|
+
for line in text.splitlines():
|
|
79
|
+
stripped = line.strip()
|
|
80
|
+
if not stripped:
|
|
81
|
+
continue
|
|
82
|
+
if residual_only and event_classes(stripped.lower()):
|
|
83
|
+
continue
|
|
84
|
+
lines.append(stripped)
|
|
85
|
+
if len(lines) >= MAX_SAMPLE_LINES:
|
|
86
|
+
break
|
|
87
|
+
if len(lines) >= MAX_SAMPLE_LINES:
|
|
88
|
+
break
|
|
89
|
+
sample = "\n".join(lines)
|
|
90
|
+
user = (
|
|
91
|
+
"Sample log lines that fired NO rule of the frozen ontology (the covered lines "
|
|
92
|
+
"are deliberately excluded - do not re-cover known vocabulary):\n\n"
|
|
93
|
+
f"{sample}\n\n"
|
|
94
|
+
"Propose extra deterministic class rules (JSON only, schema above) so these lines "
|
|
95
|
+
"fire shared cross-system event classes. Prefer FEW, COARSE, reusable categories."
|
|
96
|
+
)
|
|
97
|
+
return [
|
|
98
|
+
{"role": "system", "content": DRAFT_SYSTEM_PROMPT},
|
|
99
|
+
{"role": "user", "content": user},
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def parse_draft_response(raw: str) -> tuple[DraftRules, str]:
|
|
104
|
+
"""Defensive parse of the LLM reply into DraftRules.
|
|
105
|
+
|
|
106
|
+
Returns (rules, note). On any parse failure returns empty rules plus the
|
|
107
|
+
error text. Invalid or colliding classes are dropped (recorded in the
|
|
108
|
+
note) rather than failing the whole draft.
|
|
109
|
+
"""
|
|
110
|
+
text = raw.strip()
|
|
111
|
+
# Strip markdown fences and any prose around the first JSON object.
|
|
112
|
+
text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.MULTILINE).strip()
|
|
113
|
+
start, end = text.find("{"), text.rfind("}")
|
|
114
|
+
if start < 0 or end <= start:
|
|
115
|
+
return DraftRules(), f"parse failure: no JSON object in reply: {raw[:200]!r}"
|
|
116
|
+
try:
|
|
117
|
+
payload = json.loads(text[start : end + 1])
|
|
118
|
+
except json.JSONDecodeError as exc:
|
|
119
|
+
return DraftRules(), f"parse failure: {exc}"
|
|
120
|
+
if not isinstance(payload, dict):
|
|
121
|
+
return DraftRules(), f"parse failure: expected object, got {type(payload).__name__}"
|
|
122
|
+
|
|
123
|
+
classes: list[tuple[str, tuple[str, ...]]] = []
|
|
124
|
+
dropped: list[str] = []
|
|
125
|
+
for row in (payload.get("classes") or [])[:MAX_CLASSES]:
|
|
126
|
+
if not isinstance(row, dict):
|
|
127
|
+
dropped.append(repr(row))
|
|
128
|
+
continue
|
|
129
|
+
name = row.get("name")
|
|
130
|
+
keywords = tuple(
|
|
131
|
+
k.strip().lower()
|
|
132
|
+
for k in (row.get("keywords") or [])[:MAX_KEYWORDS]
|
|
133
|
+
if isinstance(k, str) and k.strip()
|
|
134
|
+
)
|
|
135
|
+
stripped_redundant = tuple(k for k in keywords if _redundant_keyword(k))
|
|
136
|
+
keywords = tuple(k for k in keywords if not _redundant_keyword(k))
|
|
137
|
+
if stripped_redundant:
|
|
138
|
+
dropped.append(f"{name}: redundant keywords {', '.join(stripped_redundant)}")
|
|
139
|
+
if not keywords:
|
|
140
|
+
dropped.append(f"{name} (fully covered by frozen ontology)")
|
|
141
|
+
continue
|
|
142
|
+
candidate = DraftRules(classes=((str(name), keywords),))
|
|
143
|
+
if validate_rules(candidate):
|
|
144
|
+
dropped.append(str(name))
|
|
145
|
+
continue
|
|
146
|
+
if any(existing == name for existing, _ in classes):
|
|
147
|
+
dropped.append(f"{name} (duplicate)")
|
|
148
|
+
continue
|
|
149
|
+
classes.append((str(name), keywords))
|
|
150
|
+
maskings = tuple(
|
|
151
|
+
m for m in (payload.get("maskings") or []) if isinstance(m, str) and m.strip()
|
|
152
|
+
)
|
|
153
|
+
rules = DraftRules(classes=tuple(classes), maskings=maskings)
|
|
154
|
+
errors = validate_rules(rules)
|
|
155
|
+
if errors:
|
|
156
|
+
# Bad masking regexes etc.: drop maskings and retry once without them.
|
|
157
|
+
rules = DraftRules(classes=tuple(classes))
|
|
158
|
+
errors = validate_rules(rules)
|
|
159
|
+
if errors:
|
|
160
|
+
return DraftRules(), "parse failure: " + "; ".join(errors)
|
|
161
|
+
note = f"parsed {len(rules.classes)} class(es), {len(rules.maskings)} masking(s)"
|
|
162
|
+
if dropped:
|
|
163
|
+
note += f"; dropped invalid: {', '.join(dropped)}"
|
|
164
|
+
return rules, note
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def store_draft(rules: DraftRules, generated_by: str, note: str) -> tuple[str, Path]:
|
|
168
|
+
"""Persist the draft artifact under its blake3 content address."""
|
|
169
|
+
digest = rules_hash(rules)
|
|
170
|
+
DRAFT_DIR.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
path = DRAFT_DIR / f"{digest[:16]}.json"
|
|
172
|
+
artifact = {
|
|
173
|
+
"blake3": digest,
|
|
174
|
+
"generated_by": generated_by,
|
|
175
|
+
"status": "LLM-proposed, unreviewed",
|
|
176
|
+
"created": datetime.now(timezone.utc).isoformat(),
|
|
177
|
+
"note": note,
|
|
178
|
+
"rules": json.loads(rules_to_json(rules)),
|
|
179
|
+
}
|
|
180
|
+
path.write_text(json.dumps(artifact, indent=2, sort_keys=True), encoding="utf-8")
|
|
181
|
+
return digest, path
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def draft_rules(sample_texts: list[str], llm="deepseek") -> tuple[DraftRules, str]:
|
|
185
|
+
"""Ask an LLM backend to draft extra class rules from sample lines.
|
|
186
|
+
|
|
187
|
+
Returns (rules, note). On any failure the rules are empty and the note
|
|
188
|
+
carries the error. On success the artifact is stored content-addressed
|
|
189
|
+
under data/draft_adapters/ with a generated-by note.
|
|
190
|
+
"""
|
|
191
|
+
orchestrator = _resolve_orchestrator(llm)
|
|
192
|
+
messages = build_draft_prompt(sample_texts)
|
|
193
|
+
if not messages[-1]["content"].split("\n\n")[1].strip():
|
|
194
|
+
return DraftRules(), (
|
|
195
|
+
"all sample lines already fire frozen ontology rules; nothing to draft "
|
|
196
|
+
"(coverage is not the problem for this corpus)"
|
|
197
|
+
)
|
|
198
|
+
try:
|
|
199
|
+
raw = orchestrator.complete(messages, max_tokens=600, temperature=0.0)
|
|
200
|
+
except Exception as exc:
|
|
201
|
+
return DraftRules(), f"draft failed ({orchestrator.name}): {type(exc).__name__}: {exc}"
|
|
202
|
+
rules, note = parse_draft_response(raw)
|
|
203
|
+
if not rules.classes:
|
|
204
|
+
return rules, note
|
|
205
|
+
generated_by = f"{orchestrator.name}:{orchestrator.status.get('model', '?')} via sma.agent.adapter_draft"
|
|
206
|
+
digest, path = store_draft(rules, generated_by, note)
|
|
207
|
+
return rules, f"{note}; blake3={digest}; stored={path}; generated-by {generated_by}"
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
__all__ = [
|
|
211
|
+
"DRAFT_SYSTEM_PROMPT",
|
|
212
|
+
"MAX_SAMPLE_LINES",
|
|
213
|
+
"build_draft_prompt",
|
|
214
|
+
"draft_rules",
|
|
215
|
+
"parse_draft_response",
|
|
216
|
+
"store_draft",
|
|
217
|
+
]
|
sma/agent/api.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""FastAPI tool surface for SMA."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from .service import default_service
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from fastapi import FastAPI
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
except ImportError: # pragma: no cover - optional dependency fallback
|
|
13
|
+
FastAPI = None # type: ignore
|
|
14
|
+
BaseModel = object # type: ignore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
if FastAPI is not None:
|
|
18
|
+
app = FastAPI(title="SMA-1 Agentic Memory API")
|
|
19
|
+
|
|
20
|
+
class EncodeRequest(BaseModel):
|
|
21
|
+
artifact: str
|
|
22
|
+
adapter_id: str
|
|
23
|
+
kwargs: dict[str, Any] = {}
|
|
24
|
+
|
|
25
|
+
class RetrieveRequest(BaseModel):
|
|
26
|
+
case_id: str | None = None
|
|
27
|
+
inline_case: str | None = None
|
|
28
|
+
k: int = 10
|
|
29
|
+
|
|
30
|
+
class MapRequest(BaseModel):
|
|
31
|
+
base_id: str
|
|
32
|
+
target_id: str
|
|
33
|
+
scorer: str = "ses"
|
|
34
|
+
|
|
35
|
+
class ProjectRequest(BaseModel):
|
|
36
|
+
gmap_id: str
|
|
37
|
+
|
|
38
|
+
class VerifyRequest(BaseModel):
|
|
39
|
+
inference: str
|
|
40
|
+
|
|
41
|
+
@app.post("/encode")
|
|
42
|
+
def encode(req: EncodeRequest):
|
|
43
|
+
return default_service.encode(req.artifact, req.adapter_id, **req.kwargs)
|
|
44
|
+
|
|
45
|
+
@app.post("/retrieve")
|
|
46
|
+
def retrieve(req: RetrieveRequest):
|
|
47
|
+
return default_service.retrieve(req.case_id, req.inline_case, req.k)
|
|
48
|
+
|
|
49
|
+
@app.post("/map")
|
|
50
|
+
def map_cases(req: MapRequest):
|
|
51
|
+
return default_service.map(req.base_id, req.target_id, req.scorer)
|
|
52
|
+
|
|
53
|
+
@app.post("/project")
|
|
54
|
+
def project(req: ProjectRequest):
|
|
55
|
+
return default_service.project(req.gmap_id)
|
|
56
|
+
|
|
57
|
+
@app.post("/verify")
|
|
58
|
+
def verify(req: VerifyRequest):
|
|
59
|
+
return default_service.verify(req.inference)
|
|
60
|
+
|
|
61
|
+
@app.get("/pool/{pool_id}")
|
|
62
|
+
def pool_stats(pool_id: str):
|
|
63
|
+
return default_service.pool_stats(pool_id)
|
|
64
|
+
|
|
65
|
+
else:
|
|
66
|
+
app = None
|
|
67
|
+
|