topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,167 @@
1
+ """Orchestration for Filter Lab (create job, apply winner, helpers)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ from typing import Any, Dict, List, Optional, Set
9
+
10
+ from topos.config.sanitization_ollama import (
11
+ ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE,
12
+ SANITIZATION_OLLAMA_TRANSFORM_IDS,
13
+ normalize_put_device_overrides,
14
+ resolve_sanitization_ollama_effective,
15
+ )
16
+ from topos.config.settings import settings
17
+ from topos.core.state import get_db_connection, get_engine_config_value, set_engine_config_value
18
+ from topos.engine.backends.ollama import OllamaAdapter
19
+
20
+ from . import bundles as bundles_mod
21
+ from . import store
22
+ from . import worker
23
+
24
+ logger = logging.getLogger("topos.filter_lab.service")
25
+
26
+ _background_tasks: Set[asyncio.Task[Any]] = set()
27
+
28
+
29
+ def schedule_process_job_group(group_id: str) -> None:
30
+ """Run worker in a background asyncio task."""
31
+
32
+ async def _run() -> None:
33
+ await asyncio.to_thread(worker.process_job_group_sync, group_id)
34
+
35
+ try:
36
+ loop = asyncio.get_running_loop()
37
+ except RuntimeError:
38
+ worker.process_job_group_sync(group_id)
39
+ return
40
+ task = loop.create_task(_run())
41
+ _background_tasks.add(task)
42
+ task.add_done_callback(_background_tasks.discard)
43
+
44
+
45
+ def create_job_group(
46
+ *,
47
+ filter_id: str,
48
+ bundle_id: str,
49
+ models: List[str],
50
+ options: Optional[Dict[str, Any]] = None,
51
+ ) -> str:
52
+ conn = get_db_connection()
53
+ if not conn:
54
+ raise RuntimeError("Database not available")
55
+
56
+ if filter_id not in SANITIZATION_OLLAMA_TRANSFORM_IDS:
57
+ raise ValueError(f"filter_id {filter_id!r} is not runnable in Filter Lab (Ollama sanitization)")
58
+
59
+ bundle = bundles_mod.get_bundle(bundle_id)
60
+ if not bundle:
61
+ raise ValueError(f"Unknown bundle_id: {bundle_id!r}")
62
+
63
+ if not bundles_mod.is_bundle_compatible_with_filter(bundle, filter_id):
64
+ raise ValueError("Bundle is not compatible with this filter")
65
+
66
+ clean_models = [str(m).strip() for m in models if str(m).strip()]
67
+ if not clean_models:
68
+ raise ValueError("models must contain at least one model tag")
69
+
70
+ eff = resolve_sanitization_ollama_effective(settings, conn)
71
+ adapter = OllamaAdapter(base_url=eff.host)
72
+ baseline = adapter.list_models()
73
+
74
+ record_ids = bundles_mod.bundle_record_ids(bundle)
75
+ if not record_ids:
76
+ raise ValueError("Bundle has no records")
77
+
78
+ gid = store.insert_group(
79
+ conn,
80
+ filter_id=filter_id,
81
+ bundle_id=bundle_id,
82
+ bundle_version=str(bundle["bundle_version"]),
83
+ baseline_models=baseline,
84
+ models=clean_models,
85
+ record_ids=record_ids,
86
+ options=options or {},
87
+ )
88
+ schedule_process_job_group(gid)
89
+ return gid
90
+
91
+
92
+ def apply_preferred_model(group_id: str) -> Dict[str, Any]:
93
+ """Merge group's preferred_model_tag into device sanitization models for filter_id."""
94
+ from topos.config.sanitization_ollama import effective_config_for_api
95
+
96
+ conn = get_db_connection()
97
+ if not conn:
98
+ raise RuntimeError("Database not available")
99
+
100
+ row = store.get_group(conn, group_id)
101
+ if not row:
102
+ raise ValueError("Job group not found")
103
+ group = dict(row)
104
+ preferred = (group.get("preferred_model_tag") or "").strip()
105
+ if not preferred:
106
+ raise ValueError("preferred_model_tag is not set on this job group")
107
+
108
+ filter_id = group["filter_id"]
109
+ model_tags_in_group = {dict(r)["model_tag"] for r in store.list_runs(conn, group_id)}
110
+ if preferred not in model_tags_in_group:
111
+ raise ValueError("preferred_model_tag was not part of this job group")
112
+
113
+ raw = get_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE) or "{}"
114
+ try:
115
+ existing = json.loads(raw)
116
+ except json.JSONDecodeError:
117
+ existing = {}
118
+ if not isinstance(existing, dict):
119
+ existing = {}
120
+ merged: Dict[str, Any] = {"version": int(existing.get("version") or 1)}
121
+ for k in ("enabled", "host", "default_model", "timeout_sec", "max_input_chars"):
122
+ if k in existing and existing[k] is not None:
123
+ merged[k] = existing[k]
124
+ models = dict(existing.get("models") or {}) if isinstance(existing.get("models"), dict) else {}
125
+ models[filter_id] = preferred
126
+ merged["models"] = models
127
+
128
+ json_str = normalize_put_device_overrides({"device_overrides": merged})
129
+ set_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE, json_str)
130
+ return {"status": "ok", **effective_config_for_api(settings, conn)}
131
+
132
+
133
+ def enrich_job_groups_list_with_run_summaries(conn: Any, groups: List[Dict[str, Any]]) -> None:
134
+ """Attach per-group run aggregates for GET /job-groups list (models, latency, liked, rating)."""
135
+ if not groups:
136
+ return
137
+ gids = [str(g["id"]) for g in groups if g.get("id")]
138
+ summaries = store.history_summaries_for_group_ids(conn, gids)
139
+ for g in groups:
140
+ gid = str(g.get("id") or "")
141
+ g["history_summary"] = summaries.get(gid, store.empty_history_summary())
142
+
143
+
144
+ def serialize_job_group(conn: Any, group_id: str) -> Dict[str, Any]:
145
+ row = store.get_group(conn, group_id)
146
+ if not row:
147
+ raise KeyError(group_id)
148
+ g = dict(row)
149
+ g["baseline_models"] = json.loads(g.pop("baseline_models_json") or "[]")
150
+ g["pulled_models"] = json.loads(g.pop("pulled_models_json") or "[]")
151
+ opt_raw = g.pop("options_json", "{}")
152
+ try:
153
+ g["options"] = json.loads(opt_raw) if isinstance(opt_raw, str) else {}
154
+ except json.JSONDecodeError:
155
+ g["options"] = {}
156
+ runs_out = []
157
+ for r in store.list_runs(conn, group_id):
158
+ rd = dict(r)
159
+ ul = rd.get("user_liked")
160
+ if ul == 1:
161
+ rd["user_liked"] = True
162
+ elif ul == 0:
163
+ rd["user_liked"] = False
164
+ else:
165
+ rd["user_liked"] = None
166
+ runs_out.append(rd)
167
+ return {"group": g, "runs": runs_out}
@@ -0,0 +1,374 @@
1
+ """Persistence helpers for Filter Lab."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sqlite3
7
+ import uuid
8
+ from datetime import datetime, timezone
9
+ from collections import defaultdict
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ from .schema import ensure_filter_lab_schema
13
+
14
+
15
+ def _now() -> str:
16
+ return datetime.now(timezone.utc).isoformat()
17
+
18
+
19
+ def utc_now_iso() -> str:
20
+ """ISO timestamp for run boundaries (public for worker)."""
21
+ return _now()
22
+
23
+
24
+ def ensure_schema(conn: sqlite3.Connection) -> None:
25
+ ensure_filter_lab_schema(conn)
26
+
27
+
28
+ def insert_group(
29
+ conn: sqlite3.Connection,
30
+ *,
31
+ filter_id: str,
32
+ bundle_id: str,
33
+ bundle_version: str,
34
+ baseline_models: List[str],
35
+ models: List[str],
36
+ record_ids: List[str],
37
+ options: Optional[Dict[str, Any]] = None,
38
+ ) -> str:
39
+ ensure_schema(conn)
40
+ gid = str(uuid.uuid4())
41
+ created = _now()
42
+ conn.execute(
43
+ """
44
+ INSERT INTO filter_lab_job_group (
45
+ id, created_at, filter_id, bundle_id, bundle_version, status,
46
+ baseline_models_json, pulled_models_json, options_json
47
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
48
+ """,
49
+ (
50
+ gid,
51
+ created,
52
+ filter_id,
53
+ bundle_id,
54
+ bundle_version,
55
+ "pending",
56
+ json.dumps(baseline_models),
57
+ json.dumps([]),
58
+ json.dumps(options or {}),
59
+ ),
60
+ )
61
+ for model_tag in models:
62
+ for rid in record_ids:
63
+ run_id = str(uuid.uuid4())
64
+ conn.execute(
65
+ """
66
+ INSERT INTO filter_lab_run (
67
+ id, group_id, model_tag, record_id, status
68
+ ) VALUES (?, ?, ?, ?, ?)
69
+ """,
70
+ (run_id, gid, model_tag, rid, "queued"),
71
+ )
72
+ conn.commit()
73
+ return gid
74
+
75
+
76
+ def get_group(conn: sqlite3.Connection, group_id: str) -> Optional[sqlite3.Row]:
77
+ ensure_schema(conn)
78
+ cur = conn.execute("SELECT * FROM filter_lab_job_group WHERE id = ?", (group_id,))
79
+ return cur.fetchone()
80
+
81
+
82
+ def list_runs(conn: sqlite3.Connection, group_id: str) -> List[sqlite3.Row]:
83
+ ensure_schema(conn)
84
+ cur = conn.execute(
85
+ "SELECT * FROM filter_lab_run WHERE group_id = ? ORDER BY model_tag, record_id",
86
+ (group_id,),
87
+ )
88
+ return list(cur.fetchall())
89
+
90
+
91
+ def update_group_status(conn: sqlite3.Connection, group_id: str, status: str) -> None:
92
+ ensure_schema(conn)
93
+ conn.execute("UPDATE filter_lab_job_group SET status = ? WHERE id = ?", (status, group_id))
94
+ conn.commit()
95
+
96
+
97
+ def set_group_pulled_models(conn: sqlite3.Connection, group_id: str, pulled: List[str]) -> None:
98
+ ensure_schema(conn)
99
+ conn.execute(
100
+ "UPDATE filter_lab_job_group SET pulled_models_json = ? WHERE id = ?",
101
+ (json.dumps(pulled), group_id),
102
+ )
103
+ conn.commit()
104
+
105
+
106
+ def update_run(
107
+ conn: sqlite3.Connection,
108
+ run_id: str,
109
+ *,
110
+ status: Optional[str] = None,
111
+ started_at: Optional[str] = None,
112
+ finished_at: Optional[str] = None,
113
+ latency_ms: Optional[int] = None,
114
+ error_code: Optional[str] = None,
115
+ input_hash: Optional[str] = None,
116
+ input_text: Optional[str] = None,
117
+ output_text: Optional[str] = None,
118
+ metrics_json: Optional[str] = None,
119
+ ) -> None:
120
+ ensure_schema(conn)
121
+ fields: List[str] = []
122
+ vals: List[Any] = []
123
+ if status is not None:
124
+ fields.append("status = ?")
125
+ vals.append(status)
126
+ if started_at is not None:
127
+ fields.append("started_at = ?")
128
+ vals.append(started_at)
129
+ if finished_at is not None:
130
+ fields.append("finished_at = ?")
131
+ vals.append(finished_at)
132
+ if latency_ms is not None:
133
+ fields.append("latency_ms = ?")
134
+ vals.append(latency_ms)
135
+ if error_code is not None:
136
+ fields.append("error_code = ?")
137
+ vals.append(error_code)
138
+ if input_hash is not None:
139
+ fields.append("input_hash = ?")
140
+ vals.append(input_hash)
141
+ if input_text is not None:
142
+ fields.append("input_text = ?")
143
+ vals.append(input_text)
144
+ if output_text is not None:
145
+ fields.append("output_text = ?")
146
+ vals.append(output_text)
147
+ if metrics_json is not None:
148
+ fields.append("metrics_json = ?")
149
+ vals.append(metrics_json)
150
+ if not fields:
151
+ return
152
+ vals.append(run_id)
153
+ conn.execute(f"UPDATE filter_lab_run SET {', '.join(fields)} WHERE id = ?", vals)
154
+ conn.commit()
155
+
156
+
157
+ def patch_group(
158
+ conn: sqlite3.Connection,
159
+ group_id: str,
160
+ *,
161
+ preferred_model_tag: Any = None,
162
+ group_notes: Any = None,
163
+ notes: Any = None,
164
+ ) -> None:
165
+ ensure_schema(conn)
166
+ row = get_group(conn, group_id)
167
+ if not row:
168
+ return
169
+ fields: List[str] = []
170
+ vals: List[Any] = []
171
+ if preferred_model_tag is not None:
172
+ fields.append("preferred_model_tag = ?")
173
+ vals.append(preferred_model_tag)
174
+ if group_notes is not None:
175
+ fields.append("group_notes = ?")
176
+ vals.append(group_notes)
177
+ if notes is not None:
178
+ fields.append("notes = ?")
179
+ vals.append(notes)
180
+ if not fields:
181
+ return
182
+ vals.append(group_id)
183
+ conn.execute(f"UPDATE filter_lab_job_group SET {', '.join(fields)} WHERE id = ?", vals)
184
+ conn.commit()
185
+
186
+
187
+ def patch_run(
188
+ conn: sqlite3.Connection,
189
+ run_id: str,
190
+ *,
191
+ user_quality_score_0_10: Any = None,
192
+ user_liked: Any = None,
193
+ user_note: Any = None,
194
+ rated_at: Any = None,
195
+ ) -> None:
196
+ ensure_schema(conn)
197
+ fields: List[str] = []
198
+ vals: List[Any] = []
199
+ if user_quality_score_0_10 is not None:
200
+ fields.append("user_quality_score_0_10 = ?")
201
+ vals.append(user_quality_score_0_10)
202
+ if user_liked is not None:
203
+ fields.append("user_liked = ?")
204
+ vals.append(1 if user_liked is True else 0 if user_liked is False else None)
205
+ if user_note is not None:
206
+ fields.append("user_note = ?")
207
+ vals.append(user_note)
208
+ if rated_at is not None:
209
+ fields.append("rated_at = ?")
210
+ vals.append(rated_at)
211
+ if not fields:
212
+ return
213
+ vals.append(run_id)
214
+ conn.execute(f"UPDATE filter_lab_run SET {', '.join(fields)} WHERE id = ?", vals)
215
+ conn.commit()
216
+
217
+
218
+ def list_groups_for_filter(
219
+ conn: sqlite3.Connection, filter_id: str, *, limit: int = 20, offset: int = 0
220
+ ) -> List[sqlite3.Row]:
221
+ ensure_schema(conn)
222
+ cur = conn.execute(
223
+ """
224
+ SELECT * FROM filter_lab_job_group
225
+ WHERE filter_id = ?
226
+ ORDER BY created_at DESC
227
+ LIMIT ? OFFSET ?
228
+ """,
229
+ (filter_id, limit, offset),
230
+ )
231
+ return list(cur.fetchall())
232
+
233
+
234
+ def list_all_job_groups(
235
+ conn: sqlite3.Connection, *, limit: int = 50, offset: int = 0
236
+ ) -> List[sqlite3.Row]:
237
+ """Recent job groups across all transforms (newest first)."""
238
+ ensure_schema(conn)
239
+ cur = conn.execute(
240
+ """
241
+ SELECT * FROM filter_lab_job_group
242
+ ORDER BY created_at DESC
243
+ LIMIT ? OFFSET ?
244
+ """,
245
+ (limit, offset),
246
+ )
247
+ return list(cur.fetchall())
248
+
249
+
250
+ def _run_row_liked(val: Any) -> bool:
251
+ return val is True or val == 1
252
+
253
+
254
+ def empty_history_summary() -> Dict[str, Any]:
255
+ """Default summary when a group has no runs (should be rare)."""
256
+ return {
257
+ "models": "",
258
+ "avg_latency_ms": None,
259
+ "any_liked": False,
260
+ "rating_text": None,
261
+ }
262
+
263
+
264
+ def _history_summary_from_runs(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
265
+ """Aggregate run rows for list/history UI (models, latency, liked, ratings)."""
266
+ if not runs:
267
+ return empty_history_summary()
268
+ models = sorted({str(r.get("model_tag") or "") for r in runs if str(r.get("model_tag") or "").strip()})
269
+ latencies: List[int] = []
270
+ for r in runs:
271
+ if str(r.get("status") or "") != "succeeded":
272
+ continue
273
+ lm = r.get("latency_ms")
274
+ if lm is None:
275
+ continue
276
+ try:
277
+ latencies.append(int(lm))
278
+ except (TypeError, ValueError):
279
+ continue
280
+ avg_lat = round(sum(latencies) / len(latencies)) if latencies else None
281
+ any_liked = any(_run_row_liked(r.get("user_liked")) for r in runs)
282
+ by_model: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
283
+ for r in runs:
284
+ mt = str(r.get("model_tag") or "")
285
+ if mt:
286
+ by_model[mt].append(r)
287
+ rating_parts: List[str] = []
288
+ for m in sorted(by_model.keys()):
289
+ scores = [
290
+ x.get("user_quality_score_0_10")
291
+ for x in by_model[m]
292
+ if x.get("user_quality_score_0_10") is not None
293
+ ]
294
+ if not scores:
295
+ continue
296
+ try:
297
+ best = max(int(s) for s in scores if isinstance(s, (int, float)))
298
+ except ValueError:
299
+ continue
300
+ rating_parts.append(f"{m}: {best}/10")
301
+ if not rating_parts:
302
+ rating_text = None
303
+ elif len(models) == 1 and len(rating_parts) == 1:
304
+ rating_text = rating_parts[0].split(": ", 1)[-1]
305
+ else:
306
+ rating_text = " · ".join(rating_parts)
307
+ return {
308
+ "models": ", ".join(models),
309
+ "avg_latency_ms": avg_lat,
310
+ "any_liked": any_liked,
311
+ "rating_text": rating_text,
312
+ }
313
+
314
+
315
+ def history_summaries_for_group_ids(
316
+ conn: sqlite3.Connection, group_ids: List[str]
317
+ ) -> Dict[str, Dict[str, Any]]:
318
+ """One query; map group_id -> summary dict for list endpoints."""
319
+ ensure_schema(conn)
320
+ if not group_ids:
321
+ return {}
322
+ placeholders = ",".join("?" * len(group_ids))
323
+ cur = conn.execute(
324
+ f"""
325
+ SELECT * FROM filter_lab_run
326
+ WHERE group_id IN ({placeholders})
327
+ ORDER BY group_id, model_tag, record_id
328
+ """,
329
+ group_ids,
330
+ )
331
+ by_gid: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
332
+ for row in cur.fetchall():
333
+ d = dict(row)
334
+ gid = str(d.get("group_id") or "")
335
+ if gid:
336
+ by_gid[gid].append(d)
337
+ out: Dict[str, Dict[str, Any]] = {}
338
+ for gid in group_ids:
339
+ out[str(gid)] = _history_summary_from_runs(by_gid.get(str(gid), []))
340
+ return out
341
+
342
+
343
+ def insert_model_event(conn: sqlite3.Connection, group_id: str, event_type: str, model_tag: str) -> None:
344
+ ensure_schema(conn)
345
+ conn.execute(
346
+ """
347
+ INSERT INTO filter_lab_model_event (group_id, event_type, model_tag, created_at)
348
+ VALUES (?, ?, ?, ?)
349
+ """,
350
+ (group_id, event_type, model_tag, _now()),
351
+ )
352
+ conn.commit()
353
+
354
+
355
+ def delete_group(conn: sqlite3.Connection, group_id: str) -> None:
356
+ ensure_schema(conn)
357
+ conn.execute("DELETE FROM filter_lab_job_group WHERE id = ?", (group_id,))
358
+ conn.commit()
359
+
360
+
361
+ def prune_old_groups(conn: sqlite3.Connection, *, max_age_days: int = 30) -> int:
362
+ """Delete job groups (and runs) older than max_age_days. Returns deleted group count."""
363
+ ensure_schema(conn)
364
+ cutoff = datetime.now(timezone.utc).timestamp() - max_age_days * 86400
365
+ # created_at is ISO string — compare lexicographically works for ISO8601 UTC
366
+ from datetime import datetime as dt
367
+
368
+ cutoff_iso = dt.fromtimestamp(cutoff, tz=timezone.utc).isoformat()
369
+ cur = conn.execute("SELECT id FROM filter_lab_job_group WHERE created_at < ?", (cutoff_iso,))
370
+ ids = [r[0] for r in cur.fetchall()]
371
+ for gid in ids:
372
+ conn.execute("DELETE FROM filter_lab_job_group WHERE id = ?", (gid,))
373
+ conn.commit()
374
+ return len(ids)