tweek 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. tweek/__init__.py +2 -2
  2. tweek/_keygen.py +53 -0
  3. tweek/audit.py +288 -0
  4. tweek/cli.py +5303 -2396
  5. tweek/cli_model.py +380 -0
  6. tweek/config/families.yaml +609 -0
  7. tweek/config/manager.py +42 -5
  8. tweek/config/patterns.yaml +1510 -8
  9. tweek/config/tiers.yaml +161 -11
  10. tweek/diagnostics.py +71 -2
  11. tweek/hooks/break_glass.py +163 -0
  12. tweek/hooks/feedback.py +223 -0
  13. tweek/hooks/overrides.py +531 -0
  14. tweek/hooks/post_tool_use.py +472 -0
  15. tweek/hooks/pre_tool_use.py +1024 -62
  16. tweek/integrations/openclaw.py +443 -0
  17. tweek/integrations/openclaw_server.py +385 -0
  18. tweek/licensing.py +14 -54
  19. tweek/logging/bundle.py +2 -2
  20. tweek/logging/security_log.py +56 -13
  21. tweek/mcp/approval.py +57 -16
  22. tweek/mcp/proxy.py +18 -0
  23. tweek/mcp/screening.py +5 -5
  24. tweek/mcp/server.py +4 -1
  25. tweek/memory/__init__.py +24 -0
  26. tweek/memory/queries.py +223 -0
  27. tweek/memory/safety.py +140 -0
  28. tweek/memory/schemas.py +80 -0
  29. tweek/memory/store.py +989 -0
  30. tweek/platform/__init__.py +4 -4
  31. tweek/plugins/__init__.py +40 -24
  32. tweek/plugins/base.py +1 -1
  33. tweek/plugins/detectors/__init__.py +3 -3
  34. tweek/plugins/detectors/{moltbot.py → openclaw.py} +30 -27
  35. tweek/plugins/git_discovery.py +16 -4
  36. tweek/plugins/git_registry.py +8 -2
  37. tweek/plugins/git_security.py +21 -9
  38. tweek/plugins/screening/__init__.py +10 -1
  39. tweek/plugins/screening/heuristic_scorer.py +477 -0
  40. tweek/plugins/screening/llm_reviewer.py +14 -6
  41. tweek/plugins/screening/local_model_reviewer.py +161 -0
  42. tweek/proxy/__init__.py +38 -37
  43. tweek/proxy/addon.py +22 -3
  44. tweek/proxy/interceptor.py +1 -0
  45. tweek/proxy/server.py +4 -2
  46. tweek/sandbox/__init__.py +11 -0
  47. tweek/sandbox/docker_bridge.py +143 -0
  48. tweek/sandbox/executor.py +9 -6
  49. tweek/sandbox/layers.py +97 -0
  50. tweek/sandbox/linux.py +1 -0
  51. tweek/sandbox/project.py +548 -0
  52. tweek/sandbox/registry.py +149 -0
  53. tweek/security/__init__.py +9 -0
  54. tweek/security/language.py +250 -0
  55. tweek/security/llm_reviewer.py +1146 -60
  56. tweek/security/local_model.py +331 -0
  57. tweek/security/local_reviewer.py +146 -0
  58. tweek/security/model_registry.py +371 -0
  59. tweek/security/rate_limiter.py +11 -6
  60. tweek/security/secret_scanner.py +70 -4
  61. tweek/security/session_analyzer.py +26 -2
  62. tweek/skill_template/SKILL.md +200 -0
  63. tweek/skill_template/__init__.py +0 -0
  64. tweek/skill_template/cli-reference.md +331 -0
  65. tweek/skill_template/overrides-reference.md +184 -0
  66. tweek/skill_template/scripts/__init__.py +0 -0
  67. tweek/skill_template/scripts/check_installed.py +170 -0
  68. tweek/skills/__init__.py +38 -0
  69. tweek/skills/config.py +150 -0
  70. tweek/skills/fingerprints.py +198 -0
  71. tweek/skills/guard.py +293 -0
  72. tweek/skills/isolation.py +469 -0
  73. tweek/skills/scanner.py +715 -0
  74. tweek/vault/__init__.py +0 -1
  75. tweek/vault/cross_platform.py +12 -1
  76. tweek/vault/keychain.py +87 -29
  77. tweek-0.2.0.dist-info/METADATA +281 -0
  78. tweek-0.2.0.dist-info/RECORD +121 -0
  79. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/entry_points.txt +8 -1
  80. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/licenses/LICENSE +80 -0
  81. tweek/integrations/moltbot.py +0 -243
  82. tweek-0.1.0.dist-info/METADATA +0 -335
  83. tweek-0.1.0.dist-info/RECORD +0 -85
  84. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/WHEEL +0 -0
  85. {tweek-0.1.0.dist-info → tweek-0.2.0.dist-info}/top_level.txt +0 -0
tweek/memory/store.py ADDED
@@ -0,0 +1,989 @@
1
+ """
2
+ Tweek Memory Store
3
+
4
+ Core SQLite-backed storage for Tweek's agentic memory system.
5
+ Handles schema creation, CRUD operations, time decay, and audit logging.
6
+
7
+ Storage locations:
8
+ - Global: ~/.tweek/memory.db
9
+ - Per-project: .tweek/memory.db (inside project directory)
10
+ """
11
+
12
+ import hashlib
13
+ import math
14
+ import sqlite3
15
+ from datetime import datetime, timedelta
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional, Any
18
+
19
+ from tweek.memory.schemas import (
20
+ ConfidenceAdjustment,
21
+ LearnedWhitelistSuggestion,
22
+ PatternDecisionEntry,
23
+ SourceTrustEntry,
24
+ WorkflowBaseline,
25
+ )
26
+ from tweek.memory.safety import (
27
+ MIN_APPROVAL_RATIO,
28
+ MIN_CONFIDENCE_SCORE,
29
+ MIN_DECISION_THRESHOLD,
30
+ compute_suggested_decision,
31
+ is_immune_pattern,
32
+ )
33
+
34
+
35
+ # Half-life in days for time decay
36
+ DECAY_HALF_LIFE_DAYS = 30
37
+
38
+ # Default global memory DB path
39
+ GLOBAL_MEMORY_PATH = Path.home() / ".tweek" / "memory.db"
40
+
41
+
42
+ class MemoryStore:
43
+ """SQLite-backed persistent memory for security decisions.
44
+
45
+ Manages 5 tables + 1 view:
46
+ - pattern_decisions: Per-pattern approval/denial history
47
+ - source_trust: URL/file injection history
48
+ - workflow_baselines: Normal tool usage patterns
49
+ - learned_whitelists: Auto-generated whitelist suggestions
50
+ - memory_audit: Accountability log
51
+ - pattern_confidence_view: Computed confidence adjustments
52
+ """
53
+
54
+ SCHEMA_VERSION = 1
55
+
56
+ def __init__(self, db_path: Optional[Path] = None):
57
+ self.db_path = db_path or GLOBAL_MEMORY_PATH
58
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
59
+ self._conn: Optional[sqlite3.Connection] = None
60
+ self._ensure_schema()
61
+
62
+ def _get_connection(self) -> sqlite3.Connection:
63
+ """Get or create a SQLite connection with WAL mode."""
64
+ if self._conn is None:
65
+ self._conn = sqlite3.connect(
66
+ str(self.db_path),
67
+ timeout=5.0,
68
+ isolation_level=None, # autocommit
69
+ )
70
+ self._conn.row_factory = sqlite3.Row
71
+ self._conn.execute("PRAGMA journal_mode=WAL")
72
+ self._conn.execute("PRAGMA foreign_keys=ON")
73
+ return self._conn
74
+
75
+ def close(self):
76
+ """Close the database connection."""
77
+ if self._conn is not None:
78
+ self._conn.close()
79
+ self._conn = None
80
+
81
+ def _ensure_schema(self):
82
+ """Create tables, indexes, and views if they don't exist."""
83
+ conn = self._get_connection()
84
+ conn.executescript("""
85
+ CREATE TABLE IF NOT EXISTS schema_version (
86
+ version INTEGER PRIMARY KEY
87
+ );
88
+
89
+ CREATE TABLE IF NOT EXISTS pattern_decisions (
90
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
91
+ pattern_name TEXT NOT NULL,
92
+ pattern_id INTEGER,
93
+ original_severity TEXT NOT NULL,
94
+ original_confidence TEXT NOT NULL,
95
+ decision TEXT NOT NULL,
96
+ user_response TEXT,
97
+ tool_name TEXT NOT NULL,
98
+ content_hash TEXT,
99
+ path_prefix TEXT,
100
+ project_hash TEXT,
101
+ timestamp TEXT NOT NULL DEFAULT (datetime('now')),
102
+ decay_weight REAL NOT NULL DEFAULT 1.0,
103
+ CHECK (NOT (
104
+ original_severity = 'critical'
105
+ AND original_confidence = 'deterministic'
106
+ AND decision = 'allow'
107
+ ))
108
+ );
109
+
110
+ CREATE INDEX IF NOT EXISTS idx_pd_pattern_name
111
+ ON pattern_decisions(pattern_name);
112
+ CREATE INDEX IF NOT EXISTS idx_pd_pattern_path
113
+ ON pattern_decisions(pattern_name, path_prefix);
114
+ CREATE INDEX IF NOT EXISTS idx_pd_project
115
+ ON pattern_decisions(project_hash);
116
+ CREATE INDEX IF NOT EXISTS idx_pd_timestamp
117
+ ON pattern_decisions(timestamp);
118
+
119
+ CREATE TABLE IF NOT EXISTS source_trust (
120
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
121
+ source_type TEXT NOT NULL,
122
+ source_key TEXT NOT NULL,
123
+ total_scans INTEGER DEFAULT 0,
124
+ injection_detections INTEGER DEFAULT 0,
125
+ trust_score REAL DEFAULT 0.5,
126
+ last_clean_scan TEXT,
127
+ last_injection TEXT,
128
+ timestamp TEXT DEFAULT (datetime('now')),
129
+ decay_weight REAL DEFAULT 1.0,
130
+ UNIQUE(source_type, source_key)
131
+ );
132
+
133
+ CREATE INDEX IF NOT EXISTS idx_st_type_key
134
+ ON source_trust(source_type, source_key);
135
+
136
+ CREATE TABLE IF NOT EXISTS workflow_baselines (
137
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
138
+ project_hash TEXT NOT NULL,
139
+ tool_name TEXT NOT NULL,
140
+ hour_of_day INTEGER,
141
+ invocation_count INTEGER DEFAULT 0,
142
+ denied_count INTEGER DEFAULT 0,
143
+ last_updated TEXT DEFAULT (datetime('now')),
144
+ UNIQUE(project_hash, tool_name, hour_of_day)
145
+ );
146
+
147
+ CREATE INDEX IF NOT EXISTS idx_wb_project
148
+ ON workflow_baselines(project_hash);
149
+
150
+ CREATE TABLE IF NOT EXISTS learned_whitelists (
151
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
152
+ pattern_name TEXT NOT NULL,
153
+ tool_name TEXT,
154
+ path_prefix TEXT,
155
+ approval_count INTEGER DEFAULT 0,
156
+ denial_count INTEGER DEFAULT 0,
157
+ confidence REAL DEFAULT 0.0,
158
+ suggested_at TEXT,
159
+ human_reviewed INTEGER DEFAULT 0,
160
+ timestamp TEXT DEFAULT (datetime('now')),
161
+ UNIQUE(pattern_name, tool_name, path_prefix)
162
+ );
163
+
164
+ CREATE INDEX IF NOT EXISTS idx_lw_pattern
165
+ ON learned_whitelists(pattern_name);
166
+
167
+ CREATE TABLE IF NOT EXISTS memory_audit (
168
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
169
+ operation TEXT NOT NULL,
170
+ table_name TEXT NOT NULL,
171
+ key_info TEXT,
172
+ result TEXT,
173
+ timestamp TEXT DEFAULT (datetime('now'))
174
+ );
175
+
176
+ CREATE INDEX IF NOT EXISTS idx_ma_timestamp
177
+ ON memory_audit(timestamp);
178
+ """)
179
+
180
+ # Create or replace the confidence view
181
+ conn.execute("DROP VIEW IF EXISTS pattern_confidence_view")
182
+ conn.execute("""
183
+ CREATE VIEW pattern_confidence_view AS
184
+ SELECT
185
+ pattern_name,
186
+ path_prefix,
187
+ COUNT(*) as total_decisions,
188
+ SUM(CASE WHEN user_response = 'approved' THEN decay_weight ELSE 0 END)
189
+ as weighted_approvals,
190
+ SUM(CASE WHEN user_response = 'denied' THEN decay_weight ELSE 0 END)
191
+ as weighted_denials,
192
+ CASE WHEN SUM(decay_weight) > 0 THEN
193
+ SUM(CASE WHEN user_response = 'approved' THEN decay_weight ELSE 0 END)
194
+ / SUM(decay_weight)
195
+ ELSE 0.5 END as approval_ratio,
196
+ MAX(timestamp) as last_decision
197
+ FROM pattern_decisions
198
+ WHERE decay_weight > 0.01
199
+ GROUP BY pattern_name, path_prefix
200
+ """)
201
+
202
+ # Set schema version
203
+ conn.execute(
204
+ "INSERT OR REPLACE INTO schema_version (version) VALUES (?)",
205
+ (self.SCHEMA_VERSION,),
206
+ )
207
+
208
+ # =====================================================================
209
+ # Pattern Decisions
210
+ # =====================================================================
211
+
212
+ def record_decision(self, entry: PatternDecisionEntry) -> int:
213
+ """Record a pattern decision.
214
+
215
+ Returns the row ID of the inserted record.
216
+ """
217
+ conn = self._get_connection()
218
+
219
+ # Safety: never record 'allow' for CRITICAL+deterministic
220
+ if (
221
+ entry.original_severity == "critical"
222
+ and entry.original_confidence == "deterministic"
223
+ and entry.decision == "allow"
224
+ ):
225
+ self._audit("write", "pattern_decisions",
226
+ f"{entry.pattern_name}:{entry.path_prefix}",
227
+ "BLOCKED: attempted allow on critical+deterministic")
228
+ return -1
229
+
230
+ cursor = conn.execute(
231
+ """
232
+ INSERT INTO pattern_decisions (
233
+ pattern_name, pattern_id, original_severity, original_confidence,
234
+ decision, user_response, tool_name, content_hash,
235
+ path_prefix, project_hash, decay_weight
236
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
237
+ """,
238
+ (
239
+ entry.pattern_name,
240
+ entry.pattern_id,
241
+ entry.original_severity,
242
+ entry.original_confidence,
243
+ entry.decision,
244
+ entry.user_response,
245
+ entry.tool_name,
246
+ entry.content_hash,
247
+ entry.path_prefix,
248
+ entry.project_hash,
249
+ entry.decay_weight,
250
+ ),
251
+ )
252
+
253
+ row_id = cursor.lastrowid
254
+ self._audit(
255
+ "write", "pattern_decisions",
256
+ f"{entry.pattern_name}:{entry.path_prefix}",
257
+ f"id={row_id}, decision={entry.decision}, response={entry.user_response}",
258
+ )
259
+
260
+ # Update learned whitelists
261
+ self._update_learned_whitelist(entry)
262
+
263
+ return row_id
264
+
265
+ def get_confidence_adjustment(
266
+ self,
267
+ pattern_name: str,
268
+ path_prefix: Optional[str] = None,
269
+ current_decision: str = "ask",
270
+ original_severity: str = "medium",
271
+ original_confidence: str = "heuristic",
272
+ ) -> Optional[ConfidenceAdjustment]:
273
+ """Query memory for a confidence adjustment on a pattern.
274
+
275
+ Returns a ConfidenceAdjustment if memory has enough data,
276
+ or None if insufficient data / pattern is immune.
277
+ """
278
+ conn = self._get_connection()
279
+
280
+ # Check immunity first
281
+ if is_immune_pattern(original_severity, original_confidence):
282
+ self._audit(
283
+ "read", "pattern_decisions",
284
+ f"{pattern_name}:{path_prefix}",
285
+ "immune_pattern_skipped",
286
+ )
287
+ return None
288
+
289
+ # Query the confidence view
290
+ if path_prefix:
291
+ row = conn.execute(
292
+ """
293
+ SELECT * FROM pattern_confidence_view
294
+ WHERE pattern_name = ? AND path_prefix = ?
295
+ """,
296
+ (pattern_name, path_prefix),
297
+ ).fetchone()
298
+ else:
299
+ row = conn.execute(
300
+ """
301
+ SELECT * FROM pattern_confidence_view
302
+ WHERE pattern_name = ? AND path_prefix IS NULL
303
+ """,
304
+ (pattern_name,),
305
+ ).fetchone()
306
+
307
+ # Also try without path prefix as fallback
308
+ if not row and path_prefix:
309
+ row = conn.execute(
310
+ """
311
+ SELECT
312
+ pattern_name,
313
+ NULL as path_prefix,
314
+ SUM(total_decisions) as total_decisions,
315
+ SUM(weighted_approvals) as weighted_approvals,
316
+ SUM(weighted_denials) as weighted_denials,
317
+ CASE WHEN SUM(weighted_approvals) + SUM(weighted_denials) > 0 THEN
318
+ SUM(weighted_approvals) / (SUM(weighted_approvals) + SUM(weighted_denials))
319
+ ELSE 0.5 END as approval_ratio,
320
+ MAX(last_decision) as last_decision
321
+ FROM pattern_confidence_view
322
+ WHERE pattern_name = ?
323
+ GROUP BY pattern_name
324
+ """,
325
+ (pattern_name,),
326
+ ).fetchone()
327
+
328
+ if not row:
329
+ self._audit(
330
+ "read", "pattern_decisions",
331
+ f"{pattern_name}:{path_prefix}",
332
+ "no_data",
333
+ )
334
+ return None
335
+
336
+ total = row["total_decisions"]
337
+ weighted_approvals = row["weighted_approvals"] or 0.0
338
+ weighted_denials = row["weighted_denials"] or 0.0
339
+ approval_ratio = row["approval_ratio"] or 0.5
340
+ total_weighted = weighted_approvals + weighted_denials
341
+
342
+ # Compute suggested decision
343
+ suggested = compute_suggested_decision(
344
+ current_decision=current_decision,
345
+ approval_ratio=approval_ratio,
346
+ total_weighted_decisions=total_weighted,
347
+ original_severity=original_severity,
348
+ original_confidence=original_confidence,
349
+ )
350
+
351
+ # Confidence score: based on data quantity and consistency
352
+ confidence_score = 0.0
353
+ if total_weighted >= MIN_DECISION_THRESHOLD:
354
+ # Scale 0-1 based on how far above threshold and ratio strength
355
+ data_factor = min(total_weighted / (MIN_DECISION_THRESHOLD * 3), 1.0)
356
+ ratio_factor = approval_ratio if suggested == "log" else (1 - approval_ratio)
357
+ confidence_score = data_factor * ratio_factor
358
+
359
+ adjustment = ConfidenceAdjustment(
360
+ pattern_name=pattern_name,
361
+ path_prefix=path_prefix,
362
+ total_decisions=total,
363
+ weighted_approvals=weighted_approvals,
364
+ weighted_denials=weighted_denials,
365
+ approval_ratio=approval_ratio,
366
+ last_decision=row["last_decision"],
367
+ adjusted_decision=suggested,
368
+ confidence_score=confidence_score,
369
+ )
370
+
371
+ self._audit(
372
+ "read", "pattern_decisions",
373
+ f"{pattern_name}:{path_prefix}",
374
+ f"total={total}, ratio={approval_ratio:.2f}, suggested={suggested}, "
375
+ f"confidence={confidence_score:.2f}",
376
+ )
377
+
378
+ return adjustment
379
+
380
+ # =====================================================================
381
+ # Source Trust
382
+ # =====================================================================
383
+
384
+ def record_source_scan(
385
+ self,
386
+ source_type: str,
387
+ source_key: str,
388
+ had_injection: bool,
389
+ ) -> None:
390
+ """Record a source scan result (clean or injection detected)."""
391
+ conn = self._get_connection()
392
+ now = datetime.utcnow().isoformat()
393
+
394
+ if had_injection:
395
+ conn.execute(
396
+ """
397
+ INSERT INTO source_trust (source_type, source_key, total_scans,
398
+ injection_detections, trust_score, last_injection, timestamp)
399
+ VALUES (?, ?, 1, 1, 0.0, ?, ?)
400
+ ON CONFLICT(source_type, source_key) DO UPDATE SET
401
+ total_scans = total_scans + 1,
402
+ injection_detections = injection_detections + 1,
403
+ last_injection = excluded.last_injection,
404
+ trust_score = CASE
405
+ WHEN total_scans + 1 > 0 THEN
406
+ 1.0 - (CAST(injection_detections + 1 AS REAL) / (total_scans + 1))
407
+ ELSE 0.5
408
+ END
409
+ """,
410
+ (source_type, source_key, now, now),
411
+ )
412
+ else:
413
+ conn.execute(
414
+ """
415
+ INSERT INTO source_trust (source_type, source_key, total_scans,
416
+ injection_detections, trust_score, last_clean_scan, timestamp)
417
+ VALUES (?, ?, 1, 0, 1.0, ?, ?)
418
+ ON CONFLICT(source_type, source_key) DO UPDATE SET
419
+ total_scans = total_scans + 1,
420
+ last_clean_scan = excluded.last_clean_scan,
421
+ trust_score = CASE
422
+ WHEN total_scans + 1 > 0 THEN
423
+ 1.0 - (CAST(injection_detections AS REAL) / (total_scans + 1))
424
+ ELSE 0.5
425
+ END
426
+ """,
427
+ (source_type, source_key, now, now),
428
+ )
429
+
430
+ self._audit(
431
+ "write", "source_trust",
432
+ f"{source_type}:{source_key}",
433
+ f"injection={had_injection}",
434
+ )
435
+
436
+ def get_source_trust(
437
+ self, source_type: str, source_key: str
438
+ ) -> Optional[SourceTrustEntry]:
439
+ """Get trust information for a source."""
440
+ conn = self._get_connection()
441
+ row = conn.execute(
442
+ """
443
+ SELECT * FROM source_trust
444
+ WHERE source_type = ? AND source_key = ?
445
+ """,
446
+ (source_type, source_key),
447
+ ).fetchone()
448
+
449
+ if not row:
450
+ # Also check domain-level trust for URLs
451
+ if source_type == "url":
452
+ domain = _extract_domain(source_key)
453
+ if domain:
454
+ row = conn.execute(
455
+ """
456
+ SELECT * FROM source_trust
457
+ WHERE source_type = 'domain' AND source_key = ?
458
+ """,
459
+ (domain,),
460
+ ).fetchone()
461
+
462
+ if not row:
463
+ self._audit("read", "source_trust", f"{source_type}:{source_key}", "no_data")
464
+ return None
465
+
466
+ entry = SourceTrustEntry(
467
+ source_type=row["source_type"],
468
+ source_key=row["source_key"],
469
+ total_scans=row["total_scans"],
470
+ injection_detections=row["injection_detections"],
471
+ trust_score=row["trust_score"],
472
+ last_clean_scan=row["last_clean_scan"],
473
+ last_injection=row["last_injection"],
474
+ )
475
+
476
+ self._audit(
477
+ "read", "source_trust",
478
+ f"{source_type}:{source_key}",
479
+ f"trust={entry.trust_score:.2f}, scans={entry.total_scans}",
480
+ )
481
+
482
+ return entry
483
+
484
+ def get_all_sources(self, suspicious_only: bool = False) -> List[SourceTrustEntry]:
485
+ """Get all source trust entries, optionally filtering to suspicious ones."""
486
+ conn = self._get_connection()
487
+ if suspicious_only:
488
+ rows = conn.execute(
489
+ "SELECT * FROM source_trust WHERE trust_score < 0.5 ORDER BY trust_score ASC"
490
+ ).fetchall()
491
+ else:
492
+ rows = conn.execute(
493
+ "SELECT * FROM source_trust ORDER BY trust_score ASC"
494
+ ).fetchall()
495
+
496
+ return [
497
+ SourceTrustEntry(
498
+ source_type=r["source_type"],
499
+ source_key=r["source_key"],
500
+ total_scans=r["total_scans"],
501
+ injection_detections=r["injection_detections"],
502
+ trust_score=r["trust_score"],
503
+ last_clean_scan=r["last_clean_scan"],
504
+ last_injection=r["last_injection"],
505
+ )
506
+ for r in rows
507
+ ]
508
+
509
+ # =====================================================================
510
+ # Workflow Baselines
511
+ # =====================================================================
512
+
513
+ def update_workflow(
514
+ self,
515
+ project_hash: str,
516
+ tool_name: str,
517
+ hour_of_day: Optional[int] = None,
518
+ was_denied: bool = False,
519
+ ) -> None:
520
+ """Update workflow baseline for a project+tool+hour."""
521
+ conn = self._get_connection()
522
+ now = datetime.utcnow().isoformat()
523
+
524
+ denied_inc = 1 if was_denied else 0
525
+ conn.execute(
526
+ """
527
+ INSERT INTO workflow_baselines (
528
+ project_hash, tool_name, hour_of_day, invocation_count,
529
+ denied_count, last_updated
530
+ ) VALUES (?, ?, ?, 1, ?, ?)
531
+ ON CONFLICT(project_hash, tool_name, hour_of_day) DO UPDATE SET
532
+ invocation_count = invocation_count + 1,
533
+ denied_count = denied_count + ?,
534
+ last_updated = ?
535
+ """,
536
+ (project_hash, tool_name, hour_of_day, denied_inc, now, denied_inc, now),
537
+ )
538
+
539
+ def get_workflow_baseline(
540
+ self, project_hash: str
541
+ ) -> List[WorkflowBaseline]:
542
+ """Get all workflow baselines for a project."""
543
+ conn = self._get_connection()
544
+ rows = conn.execute(
545
+ """
546
+ SELECT * FROM workflow_baselines
547
+ WHERE project_hash = ?
548
+ ORDER BY tool_name, hour_of_day
549
+ """,
550
+ (project_hash,),
551
+ ).fetchall()
552
+
553
+ self._audit("read", "workflow_baselines", project_hash, f"count={len(rows)}")
554
+
555
+ return [
556
+ WorkflowBaseline(
557
+ project_hash=r["project_hash"],
558
+ tool_name=r["tool_name"],
559
+ hour_of_day=r["hour_of_day"],
560
+ invocation_count=r["invocation_count"],
561
+ denied_count=r["denied_count"],
562
+ )
563
+ for r in rows
564
+ ]
565
+
566
+ def get_workflow_tool_baseline(
567
+ self, project_hash: str, tool_name: str
568
+ ) -> Optional[WorkflowBaseline]:
569
+ """Get aggregated baseline for a specific tool in a project."""
570
+ conn = self._get_connection()
571
+ row = conn.execute(
572
+ """
573
+ SELECT project_hash, tool_name, NULL as hour_of_day,
574
+ SUM(invocation_count) as invocation_count,
575
+ SUM(denied_count) as denied_count
576
+ FROM workflow_baselines
577
+ WHERE project_hash = ? AND tool_name = ?
578
+ GROUP BY project_hash, tool_name
579
+ """,
580
+ (project_hash, tool_name),
581
+ ).fetchone()
582
+
583
+ if not row:
584
+ return None
585
+
586
+ return WorkflowBaseline(
587
+ project_hash=row["project_hash"],
588
+ tool_name=row["tool_name"],
589
+ hour_of_day=None,
590
+ invocation_count=row["invocation_count"],
591
+ denied_count=row["denied_count"],
592
+ )
593
+
594
+ # =====================================================================
595
+ # Learned Whitelists
596
+ # =====================================================================
597
+
598
+ def _update_learned_whitelist(self, entry: PatternDecisionEntry) -> None:
599
+ """Update learned whitelist suggestion based on a new decision."""
600
+ if not entry.user_response:
601
+ return
602
+
603
+ conn = self._get_connection()
604
+ now = datetime.utcnow().isoformat()
605
+
606
+ approval_inc = 1 if entry.user_response == "approved" else 0
607
+ denial_inc = 1 if entry.user_response == "denied" else 0
608
+
609
+ conn.execute(
610
+ """
611
+ INSERT INTO learned_whitelists (
612
+ pattern_name, tool_name, path_prefix,
613
+ approval_count, denial_count, timestamp
614
+ ) VALUES (?, ?, ?, ?, ?, ?)
615
+ ON CONFLICT(pattern_name, tool_name, path_prefix) DO UPDATE SET
616
+ approval_count = approval_count + ?,
617
+ denial_count = denial_count + ?,
618
+ timestamp = ?
619
+ """,
620
+ (
621
+ entry.pattern_name, entry.tool_name, entry.path_prefix,
622
+ approval_inc, denial_inc, now,
623
+ approval_inc, denial_inc, now,
624
+ ),
625
+ )
626
+
627
+ # Recompute confidence and check if suggestion threshold met
628
+ row = conn.execute(
629
+ """
630
+ SELECT approval_count, denial_count FROM learned_whitelists
631
+ WHERE pattern_name = ? AND tool_name = ? AND path_prefix IS ?
632
+ """,
633
+ (entry.pattern_name, entry.tool_name, entry.path_prefix),
634
+ ).fetchone()
635
+
636
+ if row:
637
+ total = row["approval_count"] + row["denial_count"]
638
+ if total > 0:
639
+ confidence = row["approval_count"] / total
640
+ suggested_at = now if (
641
+ confidence >= MIN_APPROVAL_RATIO
642
+ and total >= MIN_DECISION_THRESHOLD
643
+ ) else None
644
+
645
+ conn.execute(
646
+ """
647
+ UPDATE learned_whitelists
648
+ SET confidence = ?, suggested_at = ?
649
+ WHERE pattern_name = ? AND tool_name = ? AND path_prefix IS ?
650
+ """,
651
+ (confidence, suggested_at,
652
+ entry.pattern_name, entry.tool_name, entry.path_prefix),
653
+ )
654
+
655
+ def get_whitelist_suggestions(
656
+ self, pending_only: bool = True
657
+ ) -> List[LearnedWhitelistSuggestion]:
658
+ """Get learned whitelist suggestions.
659
+
660
+ Args:
661
+ pending_only: If True, only return unreviewed suggestions
662
+ """
663
+ conn = self._get_connection()
664
+
665
+ if pending_only:
666
+ rows = conn.execute(
667
+ """
668
+ SELECT * FROM learned_whitelists
669
+ WHERE suggested_at IS NOT NULL AND human_reviewed = 0
670
+ ORDER BY confidence DESC
671
+ """
672
+ ).fetchall()
673
+ else:
674
+ rows = conn.execute(
675
+ """
676
+ SELECT * FROM learned_whitelists
677
+ WHERE suggested_at IS NOT NULL
678
+ ORDER BY confidence DESC
679
+ """
680
+ ).fetchall()
681
+
682
+ self._audit("read", "learned_whitelists", "suggestions", f"count={len(rows)}")
683
+
684
+ return [
685
+ LearnedWhitelistSuggestion(
686
+ id=r["id"],
687
+ pattern_name=r["pattern_name"],
688
+ tool_name=r["tool_name"],
689
+ path_prefix=r["path_prefix"],
690
+ approval_count=r["approval_count"],
691
+ denial_count=r["denial_count"],
692
+ confidence=r["confidence"],
693
+ suggested_at=r["suggested_at"],
694
+ human_reviewed=r["human_reviewed"],
695
+ )
696
+ for r in rows
697
+ ]
698
+
699
+ def review_whitelist_suggestion(self, suggestion_id: int, accepted: bool) -> bool:
700
+ """Mark a whitelist suggestion as accepted or rejected.
701
+
702
+ Returns True if the suggestion was found and updated.
703
+ """
704
+ conn = self._get_connection()
705
+ status = 1 if accepted else -1
706
+ cursor = conn.execute(
707
+ "UPDATE learned_whitelists SET human_reviewed = ? WHERE id = ?",
708
+ (status, suggestion_id),
709
+ )
710
+
711
+ action = "accepted" if accepted else "rejected"
712
+ self._audit("write", "learned_whitelists", f"id={suggestion_id}", action)
713
+
714
+ return cursor.rowcount > 0
715
+
716
+ # =====================================================================
717
+ # Decay Engine
718
+ # =====================================================================
719
+
720
+ def apply_decay(self) -> Dict[str, int]:
721
+ """Apply time-based decay to all weighted entries.
722
+
723
+ Uses a 30-day half-life: weight = 2^(-days_elapsed/30)
724
+
725
+ Returns count of updated rows per table.
726
+ """
727
+ conn = self._get_connection()
728
+ now = datetime.utcnow()
729
+ results = {}
730
+
731
+ # Decay pattern decisions
732
+ rows = conn.execute(
733
+ "SELECT id, timestamp, decay_weight FROM pattern_decisions WHERE decay_weight > 0.01"
734
+ ).fetchall()
735
+
736
+ updated = 0
737
+ for row in rows:
738
+ try:
739
+ ts = datetime.fromisoformat(row["timestamp"])
740
+ days_elapsed = (now - ts).total_seconds() / 86400
741
+ new_weight = math.pow(2, -days_elapsed / DECAY_HALF_LIFE_DAYS)
742
+ new_weight = max(new_weight, 0.0) # Floor at 0
743
+
744
+ if abs(new_weight - row["decay_weight"]) > 0.001:
745
+ conn.execute(
746
+ "UPDATE pattern_decisions SET decay_weight = ? WHERE id = ?",
747
+ (new_weight, row["id"]),
748
+ )
749
+ updated += 1
750
+ except (ValueError, TypeError):
751
+ continue
752
+
753
+ results["pattern_decisions"] = updated
754
+
755
+ # Decay source trust
756
+ rows = conn.execute(
757
+ "SELECT id, timestamp, decay_weight FROM source_trust WHERE decay_weight > 0.01"
758
+ ).fetchall()
759
+
760
+ updated = 0
761
+ for row in rows:
762
+ try:
763
+ ts = datetime.fromisoformat(row["timestamp"])
764
+ days_elapsed = (now - ts).total_seconds() / 86400
765
+ new_weight = math.pow(2, -days_elapsed / DECAY_HALF_LIFE_DAYS)
766
+ new_weight = max(new_weight, 0.0)
767
+
768
+ if abs(new_weight - row["decay_weight"]) > 0.001:
769
+ conn.execute(
770
+ "UPDATE source_trust SET decay_weight = ? WHERE id = ?",
771
+ (new_weight, row["id"]),
772
+ )
773
+ updated += 1
774
+ except (ValueError, TypeError):
775
+ continue
776
+
777
+ results["source_trust"] = updated
778
+
779
+ self._audit("decay", "all", None, str(results))
780
+ return results
781
+
782
+ # =====================================================================
783
+ # Stats & Export
784
+ # =====================================================================
785
+
786
+ def get_stats(self) -> Dict[str, Any]:
787
+ """Get overall memory statistics."""
788
+ conn = self._get_connection()
789
+ stats = {}
790
+
791
+ for table in ("pattern_decisions", "source_trust", "workflow_baselines",
792
+ "learned_whitelists", "memory_audit"):
793
+ row = conn.execute(f"SELECT COUNT(*) as cnt FROM {table}").fetchone()
794
+ stats[table] = row["cnt"]
795
+
796
+ # Last decay
797
+ row = conn.execute(
798
+ """
799
+ SELECT timestamp FROM memory_audit
800
+ WHERE operation = 'decay'
801
+ ORDER BY timestamp DESC LIMIT 1
802
+ """
803
+ ).fetchone()
804
+ stats["last_decay"] = row["timestamp"] if row else None
805
+
806
+ # DB file size
807
+ try:
808
+ stats["db_size_bytes"] = self.db_path.stat().st_size
809
+ except OSError:
810
+ stats["db_size_bytes"] = 0
811
+
812
+ return stats
813
+
814
+ def get_pattern_stats(
815
+ self, min_decisions: int = 0, sort_by: str = "count"
816
+ ) -> List[Dict[str, Any]]:
817
+ """Get per-pattern confidence statistics."""
818
+ conn = self._get_connection()
819
+ rows = conn.execute(
820
+ """
821
+ SELECT * FROM pattern_confidence_view
822
+ WHERE total_decisions >= ?
823
+ """,
824
+ (min_decisions,),
825
+ ).fetchall()
826
+
827
+ results = [dict(r) for r in rows]
828
+
829
+ if sort_by == "approval":
830
+ results.sort(key=lambda r: r.get("approval_ratio", 0), reverse=True)
831
+ elif sort_by == "name":
832
+ results.sort(key=lambda r: r.get("pattern_name", ""))
833
+ else: # count
834
+ results.sort(key=lambda r: r.get("total_decisions", 0), reverse=True)
835
+
836
+ return results
837
+
838
+ def get_audit_log(self, limit: int = 50) -> List[Dict[str, Any]]:
839
+ """Get recent audit log entries."""
840
+ conn = self._get_connection()
841
+ rows = conn.execute(
842
+ "SELECT * FROM memory_audit ORDER BY timestamp DESC LIMIT ?",
843
+ (limit,),
844
+ ).fetchall()
845
+ return [dict(r) for r in rows]
846
+
847
+ def export_all(self) -> Dict[str, Any]:
848
+ """Export all memory data as a JSON-serializable dict."""
849
+ conn = self._get_connection()
850
+ data = {}
851
+
852
+ for table in ("pattern_decisions", "source_trust", "workflow_baselines",
853
+ "learned_whitelists"):
854
+ rows = conn.execute(f"SELECT * FROM {table}").fetchall()
855
+ data[table] = [dict(r) for r in rows]
856
+
857
+ data["stats"] = self.get_stats()
858
+ return data
859
+
860
+ def clear_table(self, table_name: str) -> int:
861
+ """Clear all data from a specific table.
862
+
863
+ Returns the number of deleted rows.
864
+ """
865
+ valid_tables = {
866
+ "pattern_decisions", "source_trust", "workflow_baselines",
867
+ "learned_whitelists", "memory_audit",
868
+ }
869
+ if table_name not in valid_tables:
870
+ raise ValueError(f"Invalid table: {table_name}. Must be one of {valid_tables}")
871
+
872
+ conn = self._get_connection()
873
+ cursor = conn.execute(f"DELETE FROM {table_name}")
874
+ count = cursor.rowcount
875
+
876
+ self._audit("clear", table_name, None, f"deleted={count}")
877
+ return count
878
+
879
+ def clear_all(self) -> Dict[str, int]:
880
+ """Clear all memory data. Returns counts per table."""
881
+ results = {}
882
+ for table in ("pattern_decisions", "source_trust", "workflow_baselines",
883
+ "learned_whitelists"):
884
+ results[table] = self.clear_table(table)
885
+
886
+ # Clear audit last (so the clear operations are logged first)
887
+ results["memory_audit"] = self.clear_table("memory_audit")
888
+ return results
889
+
890
+ # =====================================================================
891
+ # Audit
892
+ # =====================================================================
893
+
894
+ def _audit(
895
+ self,
896
+ operation: str,
897
+ table_name: str,
898
+ key_info: Optional[str],
899
+ result: Optional[str],
900
+ ) -> None:
901
+ """Log an operation to the memory audit table."""
902
+ try:
903
+ conn = self._get_connection()
904
+ conn.execute(
905
+ """
906
+ INSERT INTO memory_audit (operation, table_name, key_info, result)
907
+ VALUES (?, ?, ?, ?)
908
+ """,
909
+ (operation, table_name, key_info, result),
910
+ )
911
+ except Exception:
912
+ pass # Audit logging should never block operations
913
+
914
+
915
+ # =========================================================================
916
+ # Helpers
917
+ # =========================================================================
918
+
919
+
920
+ def _extract_domain(url: str) -> Optional[str]:
921
+ """Extract domain from a URL."""
922
+ try:
923
+ from urllib.parse import urlparse
924
+ parsed = urlparse(url)
925
+ return parsed.hostname
926
+ except Exception:
927
+ return None
928
+
929
+
930
+ def content_hash(content: str) -> str:
931
+ """Compute SHA-256 hash of content for deduplication."""
932
+ return hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
933
+
934
+
935
+ def normalize_path_prefix(path: str, depth: int = 3) -> Optional[str]:
936
+ """Normalize a path to a prefix for memory lookups.
937
+
938
+ Strips to first `depth` components from the project root.
939
+ Example: /home/user/project/src/lib/utils.py -> src/lib/utils.py
940
+ """
941
+ if not path:
942
+ return None
943
+ try:
944
+ p = Path(path).resolve()
945
+ parts = p.parts
946
+ if len(parts) <= depth:
947
+ return str(p)
948
+ # Return last `depth` components
949
+ return str(Path(*parts[-depth:]))
950
+ except (ValueError, TypeError):
951
+ return None
952
+
953
+
954
+ def hash_project(working_dir: str) -> Optional[str]:
955
+ """Hash a working directory to a project identifier."""
956
+ if not working_dir:
957
+ return None
958
+ return hashlib.sha256(working_dir.encode()).hexdigest()[:16]
959
+
960
+
961
+ # =========================================================================
962
+ # Module-level singleton
963
+ # =========================================================================
964
+
965
+ _global_store: Optional[MemoryStore] = None
966
+
967
+
968
+ def get_memory_store(db_path: Optional[Path] = None) -> MemoryStore:
969
+ """Get the global MemoryStore singleton.
970
+
971
+ Args:
972
+ db_path: Override path for the database. If None, uses ~/.tweek/memory.db.
973
+ """
974
+ global _global_store
975
+ if db_path:
976
+ # Custom path - return new instance (don't cache)
977
+ return MemoryStore(db_path=db_path)
978
+
979
+ if _global_store is None:
980
+ _global_store = MemoryStore()
981
+ return _global_store
982
+
983
+
984
+ def reset_memory_store() -> None:
985
+ """Reset the global singleton (for testing)."""
986
+ global _global_store
987
+ if _global_store is not None:
988
+ _global_store.close()
989
+ _global_store = None