topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Orchestration for Filter Lab (create job, apply winner, helpers)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set
|
|
9
|
+
|
|
10
|
+
from topos.config.sanitization_ollama import (
|
|
11
|
+
ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE,
|
|
12
|
+
SANITIZATION_OLLAMA_TRANSFORM_IDS,
|
|
13
|
+
normalize_put_device_overrides,
|
|
14
|
+
resolve_sanitization_ollama_effective,
|
|
15
|
+
)
|
|
16
|
+
from topos.config.settings import settings
|
|
17
|
+
from topos.core.state import get_db_connection, get_engine_config_value, set_engine_config_value
|
|
18
|
+
from topos.engine.backends.ollama import OllamaAdapter
|
|
19
|
+
|
|
20
|
+
from . import bundles as bundles_mod
|
|
21
|
+
from . import store
|
|
22
|
+
from . import worker
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger("topos.filter_lab.service")
|
|
25
|
+
|
|
26
|
+
_background_tasks: Set[asyncio.Task[Any]] = set()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def schedule_process_job_group(group_id: str) -> None:
|
|
30
|
+
"""Run worker in a background asyncio task."""
|
|
31
|
+
|
|
32
|
+
async def _run() -> None:
|
|
33
|
+
await asyncio.to_thread(worker.process_job_group_sync, group_id)
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
loop = asyncio.get_running_loop()
|
|
37
|
+
except RuntimeError:
|
|
38
|
+
worker.process_job_group_sync(group_id)
|
|
39
|
+
return
|
|
40
|
+
task = loop.create_task(_run())
|
|
41
|
+
_background_tasks.add(task)
|
|
42
|
+
task.add_done_callback(_background_tasks.discard)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def create_job_group(
|
|
46
|
+
*,
|
|
47
|
+
filter_id: str,
|
|
48
|
+
bundle_id: str,
|
|
49
|
+
models: List[str],
|
|
50
|
+
options: Optional[Dict[str, Any]] = None,
|
|
51
|
+
) -> str:
|
|
52
|
+
conn = get_db_connection()
|
|
53
|
+
if not conn:
|
|
54
|
+
raise RuntimeError("Database not available")
|
|
55
|
+
|
|
56
|
+
if filter_id not in SANITIZATION_OLLAMA_TRANSFORM_IDS:
|
|
57
|
+
raise ValueError(f"filter_id {filter_id!r} is not runnable in Filter Lab (Ollama sanitization)")
|
|
58
|
+
|
|
59
|
+
bundle = bundles_mod.get_bundle(bundle_id)
|
|
60
|
+
if not bundle:
|
|
61
|
+
raise ValueError(f"Unknown bundle_id: {bundle_id!r}")
|
|
62
|
+
|
|
63
|
+
if not bundles_mod.is_bundle_compatible_with_filter(bundle, filter_id):
|
|
64
|
+
raise ValueError("Bundle is not compatible with this filter")
|
|
65
|
+
|
|
66
|
+
clean_models = [str(m).strip() for m in models if str(m).strip()]
|
|
67
|
+
if not clean_models:
|
|
68
|
+
raise ValueError("models must contain at least one model tag")
|
|
69
|
+
|
|
70
|
+
eff = resolve_sanitization_ollama_effective(settings, conn)
|
|
71
|
+
adapter = OllamaAdapter(base_url=eff.host)
|
|
72
|
+
baseline = adapter.list_models()
|
|
73
|
+
|
|
74
|
+
record_ids = bundles_mod.bundle_record_ids(bundle)
|
|
75
|
+
if not record_ids:
|
|
76
|
+
raise ValueError("Bundle has no records")
|
|
77
|
+
|
|
78
|
+
gid = store.insert_group(
|
|
79
|
+
conn,
|
|
80
|
+
filter_id=filter_id,
|
|
81
|
+
bundle_id=bundle_id,
|
|
82
|
+
bundle_version=str(bundle["bundle_version"]),
|
|
83
|
+
baseline_models=baseline,
|
|
84
|
+
models=clean_models,
|
|
85
|
+
record_ids=record_ids,
|
|
86
|
+
options=options or {},
|
|
87
|
+
)
|
|
88
|
+
schedule_process_job_group(gid)
|
|
89
|
+
return gid
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def apply_preferred_model(group_id: str) -> Dict[str, Any]:
|
|
93
|
+
"""Merge group's preferred_model_tag into device sanitization models for filter_id."""
|
|
94
|
+
from topos.config.sanitization_ollama import effective_config_for_api
|
|
95
|
+
|
|
96
|
+
conn = get_db_connection()
|
|
97
|
+
if not conn:
|
|
98
|
+
raise RuntimeError("Database not available")
|
|
99
|
+
|
|
100
|
+
row = store.get_group(conn, group_id)
|
|
101
|
+
if not row:
|
|
102
|
+
raise ValueError("Job group not found")
|
|
103
|
+
group = dict(row)
|
|
104
|
+
preferred = (group.get("preferred_model_tag") or "").strip()
|
|
105
|
+
if not preferred:
|
|
106
|
+
raise ValueError("preferred_model_tag is not set on this job group")
|
|
107
|
+
|
|
108
|
+
filter_id = group["filter_id"]
|
|
109
|
+
model_tags_in_group = {dict(r)["model_tag"] for r in store.list_runs(conn, group_id)}
|
|
110
|
+
if preferred not in model_tags_in_group:
|
|
111
|
+
raise ValueError("preferred_model_tag was not part of this job group")
|
|
112
|
+
|
|
113
|
+
raw = get_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE) or "{}"
|
|
114
|
+
try:
|
|
115
|
+
existing = json.loads(raw)
|
|
116
|
+
except json.JSONDecodeError:
|
|
117
|
+
existing = {}
|
|
118
|
+
if not isinstance(existing, dict):
|
|
119
|
+
existing = {}
|
|
120
|
+
merged: Dict[str, Any] = {"version": int(existing.get("version") or 1)}
|
|
121
|
+
for k in ("enabled", "host", "default_model", "timeout_sec", "max_input_chars"):
|
|
122
|
+
if k in existing and existing[k] is not None:
|
|
123
|
+
merged[k] = existing[k]
|
|
124
|
+
models = dict(existing.get("models") or {}) if isinstance(existing.get("models"), dict) else {}
|
|
125
|
+
models[filter_id] = preferred
|
|
126
|
+
merged["models"] = models
|
|
127
|
+
|
|
128
|
+
json_str = normalize_put_device_overrides({"device_overrides": merged})
|
|
129
|
+
set_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE, json_str)
|
|
130
|
+
return {"status": "ok", **effective_config_for_api(settings, conn)}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def enrich_job_groups_list_with_run_summaries(conn: Any, groups: List[Dict[str, Any]]) -> None:
|
|
134
|
+
"""Attach per-group run aggregates for GET /job-groups list (models, latency, liked, rating)."""
|
|
135
|
+
if not groups:
|
|
136
|
+
return
|
|
137
|
+
gids = [str(g["id"]) for g in groups if g.get("id")]
|
|
138
|
+
summaries = store.history_summaries_for_group_ids(conn, gids)
|
|
139
|
+
for g in groups:
|
|
140
|
+
gid = str(g.get("id") or "")
|
|
141
|
+
g["history_summary"] = summaries.get(gid, store.empty_history_summary())
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def serialize_job_group(conn: Any, group_id: str) -> Dict[str, Any]:
|
|
145
|
+
row = store.get_group(conn, group_id)
|
|
146
|
+
if not row:
|
|
147
|
+
raise KeyError(group_id)
|
|
148
|
+
g = dict(row)
|
|
149
|
+
g["baseline_models"] = json.loads(g.pop("baseline_models_json") or "[]")
|
|
150
|
+
g["pulled_models"] = json.loads(g.pop("pulled_models_json") or "[]")
|
|
151
|
+
opt_raw = g.pop("options_json", "{}")
|
|
152
|
+
try:
|
|
153
|
+
g["options"] = json.loads(opt_raw) if isinstance(opt_raw, str) else {}
|
|
154
|
+
except json.JSONDecodeError:
|
|
155
|
+
g["options"] = {}
|
|
156
|
+
runs_out = []
|
|
157
|
+
for r in store.list_runs(conn, group_id):
|
|
158
|
+
rd = dict(r)
|
|
159
|
+
ul = rd.get("user_liked")
|
|
160
|
+
if ul == 1:
|
|
161
|
+
rd["user_liked"] = True
|
|
162
|
+
elif ul == 0:
|
|
163
|
+
rd["user_liked"] = False
|
|
164
|
+
else:
|
|
165
|
+
rd["user_liked"] = None
|
|
166
|
+
runs_out.append(rd)
|
|
167
|
+
return {"group": g, "runs": runs_out}
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
"""Persistence helpers for Filter Lab."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sqlite3
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
from .schema import ensure_filter_lab_schema
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _now() -> str:
|
|
16
|
+
return datetime.now(timezone.utc).isoformat()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def utc_now_iso() -> str:
|
|
20
|
+
"""ISO timestamp for run boundaries (public for worker)."""
|
|
21
|
+
return _now()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def ensure_schema(conn: sqlite3.Connection) -> None:
|
|
25
|
+
ensure_filter_lab_schema(conn)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def insert_group(
|
|
29
|
+
conn: sqlite3.Connection,
|
|
30
|
+
*,
|
|
31
|
+
filter_id: str,
|
|
32
|
+
bundle_id: str,
|
|
33
|
+
bundle_version: str,
|
|
34
|
+
baseline_models: List[str],
|
|
35
|
+
models: List[str],
|
|
36
|
+
record_ids: List[str],
|
|
37
|
+
options: Optional[Dict[str, Any]] = None,
|
|
38
|
+
) -> str:
|
|
39
|
+
ensure_schema(conn)
|
|
40
|
+
gid = str(uuid.uuid4())
|
|
41
|
+
created = _now()
|
|
42
|
+
conn.execute(
|
|
43
|
+
"""
|
|
44
|
+
INSERT INTO filter_lab_job_group (
|
|
45
|
+
id, created_at, filter_id, bundle_id, bundle_version, status,
|
|
46
|
+
baseline_models_json, pulled_models_json, options_json
|
|
47
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
48
|
+
""",
|
|
49
|
+
(
|
|
50
|
+
gid,
|
|
51
|
+
created,
|
|
52
|
+
filter_id,
|
|
53
|
+
bundle_id,
|
|
54
|
+
bundle_version,
|
|
55
|
+
"pending",
|
|
56
|
+
json.dumps(baseline_models),
|
|
57
|
+
json.dumps([]),
|
|
58
|
+
json.dumps(options or {}),
|
|
59
|
+
),
|
|
60
|
+
)
|
|
61
|
+
for model_tag in models:
|
|
62
|
+
for rid in record_ids:
|
|
63
|
+
run_id = str(uuid.uuid4())
|
|
64
|
+
conn.execute(
|
|
65
|
+
"""
|
|
66
|
+
INSERT INTO filter_lab_run (
|
|
67
|
+
id, group_id, model_tag, record_id, status
|
|
68
|
+
) VALUES (?, ?, ?, ?, ?)
|
|
69
|
+
""",
|
|
70
|
+
(run_id, gid, model_tag, rid, "queued"),
|
|
71
|
+
)
|
|
72
|
+
conn.commit()
|
|
73
|
+
return gid
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def get_group(conn: sqlite3.Connection, group_id: str) -> Optional[sqlite3.Row]:
|
|
77
|
+
ensure_schema(conn)
|
|
78
|
+
cur = conn.execute("SELECT * FROM filter_lab_job_group WHERE id = ?", (group_id,))
|
|
79
|
+
return cur.fetchone()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def list_runs(conn: sqlite3.Connection, group_id: str) -> List[sqlite3.Row]:
|
|
83
|
+
ensure_schema(conn)
|
|
84
|
+
cur = conn.execute(
|
|
85
|
+
"SELECT * FROM filter_lab_run WHERE group_id = ? ORDER BY model_tag, record_id",
|
|
86
|
+
(group_id,),
|
|
87
|
+
)
|
|
88
|
+
return list(cur.fetchall())
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def update_group_status(conn: sqlite3.Connection, group_id: str, status: str) -> None:
|
|
92
|
+
ensure_schema(conn)
|
|
93
|
+
conn.execute("UPDATE filter_lab_job_group SET status = ? WHERE id = ?", (status, group_id))
|
|
94
|
+
conn.commit()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def set_group_pulled_models(conn: sqlite3.Connection, group_id: str, pulled: List[str]) -> None:
|
|
98
|
+
ensure_schema(conn)
|
|
99
|
+
conn.execute(
|
|
100
|
+
"UPDATE filter_lab_job_group SET pulled_models_json = ? WHERE id = ?",
|
|
101
|
+
(json.dumps(pulled), group_id),
|
|
102
|
+
)
|
|
103
|
+
conn.commit()
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def update_run(
|
|
107
|
+
conn: sqlite3.Connection,
|
|
108
|
+
run_id: str,
|
|
109
|
+
*,
|
|
110
|
+
status: Optional[str] = None,
|
|
111
|
+
started_at: Optional[str] = None,
|
|
112
|
+
finished_at: Optional[str] = None,
|
|
113
|
+
latency_ms: Optional[int] = None,
|
|
114
|
+
error_code: Optional[str] = None,
|
|
115
|
+
input_hash: Optional[str] = None,
|
|
116
|
+
input_text: Optional[str] = None,
|
|
117
|
+
output_text: Optional[str] = None,
|
|
118
|
+
metrics_json: Optional[str] = None,
|
|
119
|
+
) -> None:
|
|
120
|
+
ensure_schema(conn)
|
|
121
|
+
fields: List[str] = []
|
|
122
|
+
vals: List[Any] = []
|
|
123
|
+
if status is not None:
|
|
124
|
+
fields.append("status = ?")
|
|
125
|
+
vals.append(status)
|
|
126
|
+
if started_at is not None:
|
|
127
|
+
fields.append("started_at = ?")
|
|
128
|
+
vals.append(started_at)
|
|
129
|
+
if finished_at is not None:
|
|
130
|
+
fields.append("finished_at = ?")
|
|
131
|
+
vals.append(finished_at)
|
|
132
|
+
if latency_ms is not None:
|
|
133
|
+
fields.append("latency_ms = ?")
|
|
134
|
+
vals.append(latency_ms)
|
|
135
|
+
if error_code is not None:
|
|
136
|
+
fields.append("error_code = ?")
|
|
137
|
+
vals.append(error_code)
|
|
138
|
+
if input_hash is not None:
|
|
139
|
+
fields.append("input_hash = ?")
|
|
140
|
+
vals.append(input_hash)
|
|
141
|
+
if input_text is not None:
|
|
142
|
+
fields.append("input_text = ?")
|
|
143
|
+
vals.append(input_text)
|
|
144
|
+
if output_text is not None:
|
|
145
|
+
fields.append("output_text = ?")
|
|
146
|
+
vals.append(output_text)
|
|
147
|
+
if metrics_json is not None:
|
|
148
|
+
fields.append("metrics_json = ?")
|
|
149
|
+
vals.append(metrics_json)
|
|
150
|
+
if not fields:
|
|
151
|
+
return
|
|
152
|
+
vals.append(run_id)
|
|
153
|
+
conn.execute(f"UPDATE filter_lab_run SET {', '.join(fields)} WHERE id = ?", vals)
|
|
154
|
+
conn.commit()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def patch_group(
|
|
158
|
+
conn: sqlite3.Connection,
|
|
159
|
+
group_id: str,
|
|
160
|
+
*,
|
|
161
|
+
preferred_model_tag: Any = None,
|
|
162
|
+
group_notes: Any = None,
|
|
163
|
+
notes: Any = None,
|
|
164
|
+
) -> None:
|
|
165
|
+
ensure_schema(conn)
|
|
166
|
+
row = get_group(conn, group_id)
|
|
167
|
+
if not row:
|
|
168
|
+
return
|
|
169
|
+
fields: List[str] = []
|
|
170
|
+
vals: List[Any] = []
|
|
171
|
+
if preferred_model_tag is not None:
|
|
172
|
+
fields.append("preferred_model_tag = ?")
|
|
173
|
+
vals.append(preferred_model_tag)
|
|
174
|
+
if group_notes is not None:
|
|
175
|
+
fields.append("group_notes = ?")
|
|
176
|
+
vals.append(group_notes)
|
|
177
|
+
if notes is not None:
|
|
178
|
+
fields.append("notes = ?")
|
|
179
|
+
vals.append(notes)
|
|
180
|
+
if not fields:
|
|
181
|
+
return
|
|
182
|
+
vals.append(group_id)
|
|
183
|
+
conn.execute(f"UPDATE filter_lab_job_group SET {', '.join(fields)} WHERE id = ?", vals)
|
|
184
|
+
conn.commit()
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def patch_run(
|
|
188
|
+
conn: sqlite3.Connection,
|
|
189
|
+
run_id: str,
|
|
190
|
+
*,
|
|
191
|
+
user_quality_score_0_10: Any = None,
|
|
192
|
+
user_liked: Any = None,
|
|
193
|
+
user_note: Any = None,
|
|
194
|
+
rated_at: Any = None,
|
|
195
|
+
) -> None:
|
|
196
|
+
ensure_schema(conn)
|
|
197
|
+
fields: List[str] = []
|
|
198
|
+
vals: List[Any] = []
|
|
199
|
+
if user_quality_score_0_10 is not None:
|
|
200
|
+
fields.append("user_quality_score_0_10 = ?")
|
|
201
|
+
vals.append(user_quality_score_0_10)
|
|
202
|
+
if user_liked is not None:
|
|
203
|
+
fields.append("user_liked = ?")
|
|
204
|
+
vals.append(1 if user_liked is True else 0 if user_liked is False else None)
|
|
205
|
+
if user_note is not None:
|
|
206
|
+
fields.append("user_note = ?")
|
|
207
|
+
vals.append(user_note)
|
|
208
|
+
if rated_at is not None:
|
|
209
|
+
fields.append("rated_at = ?")
|
|
210
|
+
vals.append(rated_at)
|
|
211
|
+
if not fields:
|
|
212
|
+
return
|
|
213
|
+
vals.append(run_id)
|
|
214
|
+
conn.execute(f"UPDATE filter_lab_run SET {', '.join(fields)} WHERE id = ?", vals)
|
|
215
|
+
conn.commit()
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def list_groups_for_filter(
|
|
219
|
+
conn: sqlite3.Connection, filter_id: str, *, limit: int = 20, offset: int = 0
|
|
220
|
+
) -> List[sqlite3.Row]:
|
|
221
|
+
ensure_schema(conn)
|
|
222
|
+
cur = conn.execute(
|
|
223
|
+
"""
|
|
224
|
+
SELECT * FROM filter_lab_job_group
|
|
225
|
+
WHERE filter_id = ?
|
|
226
|
+
ORDER BY created_at DESC
|
|
227
|
+
LIMIT ? OFFSET ?
|
|
228
|
+
""",
|
|
229
|
+
(filter_id, limit, offset),
|
|
230
|
+
)
|
|
231
|
+
return list(cur.fetchall())
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def list_all_job_groups(
|
|
235
|
+
conn: sqlite3.Connection, *, limit: int = 50, offset: int = 0
|
|
236
|
+
) -> List[sqlite3.Row]:
|
|
237
|
+
"""Recent job groups across all transforms (newest first)."""
|
|
238
|
+
ensure_schema(conn)
|
|
239
|
+
cur = conn.execute(
|
|
240
|
+
"""
|
|
241
|
+
SELECT * FROM filter_lab_job_group
|
|
242
|
+
ORDER BY created_at DESC
|
|
243
|
+
LIMIT ? OFFSET ?
|
|
244
|
+
""",
|
|
245
|
+
(limit, offset),
|
|
246
|
+
)
|
|
247
|
+
return list(cur.fetchall())
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _run_row_liked(val: Any) -> bool:
|
|
251
|
+
return val is True or val == 1
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def empty_history_summary() -> Dict[str, Any]:
|
|
255
|
+
"""Default summary when a group has no runs (should be rare)."""
|
|
256
|
+
return {
|
|
257
|
+
"models": "",
|
|
258
|
+
"avg_latency_ms": None,
|
|
259
|
+
"any_liked": False,
|
|
260
|
+
"rating_text": None,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _history_summary_from_runs(runs: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
265
|
+
"""Aggregate run rows for list/history UI (models, latency, liked, ratings)."""
|
|
266
|
+
if not runs:
|
|
267
|
+
return empty_history_summary()
|
|
268
|
+
models = sorted({str(r.get("model_tag") or "") for r in runs if str(r.get("model_tag") or "").strip()})
|
|
269
|
+
latencies: List[int] = []
|
|
270
|
+
for r in runs:
|
|
271
|
+
if str(r.get("status") or "") != "succeeded":
|
|
272
|
+
continue
|
|
273
|
+
lm = r.get("latency_ms")
|
|
274
|
+
if lm is None:
|
|
275
|
+
continue
|
|
276
|
+
try:
|
|
277
|
+
latencies.append(int(lm))
|
|
278
|
+
except (TypeError, ValueError):
|
|
279
|
+
continue
|
|
280
|
+
avg_lat = round(sum(latencies) / len(latencies)) if latencies else None
|
|
281
|
+
any_liked = any(_run_row_liked(r.get("user_liked")) for r in runs)
|
|
282
|
+
by_model: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
283
|
+
for r in runs:
|
|
284
|
+
mt = str(r.get("model_tag") or "")
|
|
285
|
+
if mt:
|
|
286
|
+
by_model[mt].append(r)
|
|
287
|
+
rating_parts: List[str] = []
|
|
288
|
+
for m in sorted(by_model.keys()):
|
|
289
|
+
scores = [
|
|
290
|
+
x.get("user_quality_score_0_10")
|
|
291
|
+
for x in by_model[m]
|
|
292
|
+
if x.get("user_quality_score_0_10") is not None
|
|
293
|
+
]
|
|
294
|
+
if not scores:
|
|
295
|
+
continue
|
|
296
|
+
try:
|
|
297
|
+
best = max(int(s) for s in scores if isinstance(s, (int, float)))
|
|
298
|
+
except ValueError:
|
|
299
|
+
continue
|
|
300
|
+
rating_parts.append(f"{m}: {best}/10")
|
|
301
|
+
if not rating_parts:
|
|
302
|
+
rating_text = None
|
|
303
|
+
elif len(models) == 1 and len(rating_parts) == 1:
|
|
304
|
+
rating_text = rating_parts[0].split(": ", 1)[-1]
|
|
305
|
+
else:
|
|
306
|
+
rating_text = " · ".join(rating_parts)
|
|
307
|
+
return {
|
|
308
|
+
"models": ", ".join(models),
|
|
309
|
+
"avg_latency_ms": avg_lat,
|
|
310
|
+
"any_liked": any_liked,
|
|
311
|
+
"rating_text": rating_text,
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def history_summaries_for_group_ids(
|
|
316
|
+
conn: sqlite3.Connection, group_ids: List[str]
|
|
317
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
318
|
+
"""One query; map group_id -> summary dict for list endpoints."""
|
|
319
|
+
ensure_schema(conn)
|
|
320
|
+
if not group_ids:
|
|
321
|
+
return {}
|
|
322
|
+
placeholders = ",".join("?" * len(group_ids))
|
|
323
|
+
cur = conn.execute(
|
|
324
|
+
f"""
|
|
325
|
+
SELECT * FROM filter_lab_run
|
|
326
|
+
WHERE group_id IN ({placeholders})
|
|
327
|
+
ORDER BY group_id, model_tag, record_id
|
|
328
|
+
""",
|
|
329
|
+
group_ids,
|
|
330
|
+
)
|
|
331
|
+
by_gid: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
332
|
+
for row in cur.fetchall():
|
|
333
|
+
d = dict(row)
|
|
334
|
+
gid = str(d.get("group_id") or "")
|
|
335
|
+
if gid:
|
|
336
|
+
by_gid[gid].append(d)
|
|
337
|
+
out: Dict[str, Dict[str, Any]] = {}
|
|
338
|
+
for gid in group_ids:
|
|
339
|
+
out[str(gid)] = _history_summary_from_runs(by_gid.get(str(gid), []))
|
|
340
|
+
return out
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def insert_model_event(conn: sqlite3.Connection, group_id: str, event_type: str, model_tag: str) -> None:
|
|
344
|
+
ensure_schema(conn)
|
|
345
|
+
conn.execute(
|
|
346
|
+
"""
|
|
347
|
+
INSERT INTO filter_lab_model_event (group_id, event_type, model_tag, created_at)
|
|
348
|
+
VALUES (?, ?, ?, ?)
|
|
349
|
+
""",
|
|
350
|
+
(group_id, event_type, model_tag, _now()),
|
|
351
|
+
)
|
|
352
|
+
conn.commit()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def delete_group(conn: sqlite3.Connection, group_id: str) -> None:
|
|
356
|
+
ensure_schema(conn)
|
|
357
|
+
conn.execute("DELETE FROM filter_lab_job_group WHERE id = ?", (group_id,))
|
|
358
|
+
conn.commit()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def prune_old_groups(conn: sqlite3.Connection, *, max_age_days: int = 30) -> int:
|
|
362
|
+
"""Delete job groups (and runs) older than max_age_days. Returns deleted group count."""
|
|
363
|
+
ensure_schema(conn)
|
|
364
|
+
cutoff = datetime.now(timezone.utc).timestamp() - max_age_days * 86400
|
|
365
|
+
# created_at is ISO string — compare lexicographically works for ISO8601 UTC
|
|
366
|
+
from datetime import datetime as dt
|
|
367
|
+
|
|
368
|
+
cutoff_iso = dt.fromtimestamp(cutoff, tz=timezone.utc).isoformat()
|
|
369
|
+
cur = conn.execute("SELECT id FROM filter_lab_job_group WHERE created_at < ?", (cutoff_iso,))
|
|
370
|
+
ids = [r[0] for r in cur.fetchall()]
|
|
371
|
+
for gid in ids:
|
|
372
|
+
conn.execute("DELETE FROM filter_lab_job_group WHERE id = ?", (gid,))
|
|
373
|
+
conn.commit()
|
|
374
|
+
return len(ids)
|