topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
topos/engine/engine.py ADDED
@@ -0,0 +1,165 @@
1
+ """Topos Engine: single entry point for processing tasks (PRD §5)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from typing import Any, Dict, Optional
8
+
9
+ from .intake import normalize_task
10
+
11
+ logger = logging.getLogger("topos.engine")
12
+ from .queue_manager import QueueManager, TaskHandle
13
+ from .result_formatter import format_result
14
+ from .router import get_adapter_for_task
15
+ from .tasks import ExecutionMeta, ProcessingResult, ProcessingTask
16
+ from .validator import validate_task
17
+
18
+
19
+ class Engine:
20
+ """Core processing engine. run(task) sync; submit(task) async via queue."""
21
+
22
+ def __init__(
23
+ self,
24
+ registry: Optional[Any] = None,
25
+ queue_max_size: int = 0,
26
+ use_queue_analyzer: bool = False,
27
+ ) -> None:
28
+ """Optional registry for model resolution; queue_max_size 0 = unbounded.
29
+ use_queue_analyzer: when True, worker dequeues in model-batched order (same model together)."""
30
+ self._registry = registry
31
+ self._queue = QueueManager(max_size=queue_max_size)
32
+ self._use_queue_analyzer = use_queue_analyzer
33
+
34
+ def submit(self, task: ProcessingTask) -> Optional[TaskHandle]:
35
+ """Enqueue task; return TaskHandle or None if queue full. Worker must run to process."""
36
+ valid, err_msg = validate_task(task)
37
+ if not valid:
38
+ handle = TaskHandle(task.id)
39
+ handle._set_failed(format_result(task_id=task.id, status="failed", raw_output={}, error=err_msg))
40
+ return handle
41
+ task_id = self._queue.enqueue(task)
42
+ if task_id is None:
43
+ return None
44
+ return self._queue.get_handle(task_id)
45
+
46
+ def run_worker_once(self) -> bool:
47
+ """Dequeue one task (FIFO or model-sorted when use_queue_analyzer), run it, store result in handle. Returns True if a task was run."""
48
+ task = self._queue.get_next_for_worker(self._use_queue_analyzer, block=False)
49
+ if task is None:
50
+ return False
51
+ handle = self._queue.get_handle(task.id)
52
+ if handle:
53
+ handle._set_running()
54
+ result = self.run(task)
55
+ if handle:
56
+ if result.status == "completed":
57
+ handle._set_completed(result)
58
+ else:
59
+ handle._set_failed(result)
60
+ return True
61
+
62
+ def run(self, task: ProcessingTask) -> ProcessingResult:
63
+ """
64
+ Execute a task synchronously: intake → validate → route → inference → format.
65
+ Returns a ProcessingResult (never raises; errors are in result.status and result.error).
66
+ """
67
+ # Validate first (before intake fills defaults, so empty provider is rejected)
68
+ valid, err_msg = validate_task(task)
69
+ if not valid:
70
+ return format_result(
71
+ task_id=task.id,
72
+ status="failed",
73
+ raw_output={},
74
+ error=err_msg,
75
+ )
76
+ # Intake: normalize defaults
77
+ normalized = normalize_task(task)
78
+ # Build adapter config: subtype and model from task or registry
79
+ config = self._build_inference_config(normalized)
80
+ # Route to backend
81
+ adapter = get_adapter_for_task(normalized)
82
+ # Run inference
83
+ start = time.perf_counter()
84
+ raw_output = adapter.run_inference(normalized.input, config=config)
85
+ duration_ms = int((time.perf_counter() - start) * 1000)
86
+ # Adapter may return error in output
87
+ if raw_output.get("error"):
88
+ try:
89
+ from ..observability.metrics import record_metric
90
+ record_metric("engine.task_failed", 1.0)
91
+ except Exception:
92
+ pass
93
+ return format_result(
94
+ task_id=normalized.id,
95
+ status="failed",
96
+ raw_output=raw_output,
97
+ provenance_source_id=normalized.source_id,
98
+ provenance_record_ids=normalized.record_ids if normalized.record_ids else None,
99
+ execution_meta=ExecutionMeta(
100
+ provider=normalized.model_request.provider,
101
+ model=config.get("model"),
102
+ duration_ms=duration_ms,
103
+ cache_hit=False,
104
+ ),
105
+ error=str(raw_output.get("error")),
106
+ )
107
+ execution_meta = ExecutionMeta(
108
+ provider=normalized.model_request.provider,
109
+ model=raw_output.get("model") or normalized.model_request.model or config.get("model"),
110
+ duration_ms=duration_ms,
111
+ cache_hit=False,
112
+ )
113
+ try:
114
+ from ..observability.metrics import record_metric
115
+ record_metric("engine.task_completed", 1.0)
116
+ record_metric("engine.inference_duration_ms", float(duration_ms))
117
+ except Exception:
118
+ pass
119
+ return format_result(
120
+ task_id=normalized.id,
121
+ status="completed",
122
+ raw_output=raw_output,
123
+ provenance_source_id=normalized.source_id,
124
+ provenance_record_ids=normalized.record_ids if normalized.record_ids else None,
125
+ execution_meta=execution_meta,
126
+ )
127
+
128
+ def ensure_model(self, provider: str, model_name: str, **kwargs: Any) -> bool:
129
+ """
130
+ Ensure the model is available: download/pull if not present.
131
+ Returns True if we downloaded/pulled (caller may remove it later for cleanup), False if already present.
132
+ Logs when a download is started and when complete; adapters may log progress.
133
+ """
134
+ provider = (provider or "").strip().lower()
135
+ if provider == "ollama":
136
+ from .backends import get_ollama_adapter
137
+ adapter = get_ollama_adapter()
138
+ if adapter.ensure_model(model_name):
139
+ logger.info("Model %s (provider=ollama) download complete.", model_name)
140
+ return True
141
+ return False
142
+ if provider == "huggingface":
143
+ from .backends import get_huggingface_adapter
144
+ adapter = get_huggingface_adapter()
145
+ if adapter.ensure_model(model_name, subtype=kwargs.get("subtype")):
146
+ logger.info("Model %s (provider=huggingface) download complete.", model_name)
147
+ return True
148
+ return False
149
+ return False
150
+
151
+ def _build_inference_config(self, task: ProcessingTask) -> Dict[str, Any]:
152
+ """Build config dict for adapter.run_inference: subtype and model."""
153
+ model = task.model_request.model
154
+ if not model and self._registry is not None:
155
+ spec = self._registry.get_model_for_task(task.type, task.subtype)
156
+ if spec:
157
+ provider = (task.model_request.provider or "").strip().lower()
158
+ if provider == "ollama":
159
+ model = spec.get("ollama_model") or spec.get("model")
160
+ else:
161
+ model = spec.get("huggingface_path") or spec.get("model")
162
+ return {
163
+ "subtype": task.subtype or "",
164
+ "model": model,
165
+ }
topos/engine/intake.py ADDED
@@ -0,0 +1,32 @@
1
+ """Task intake: accept and normalize tasks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict
6
+
7
+ from .tasks import ExecutionSpec, ModelRequest, ProcessingTask
8
+
9
+
10
+ def normalize_task(task: ProcessingTask) -> ProcessingTask:
11
+ """Apply defaults to missing optional fields. Returns a copy with defaults set."""
12
+ data = task.model_dump(mode="json")
13
+ # Ensure execution has defaults
14
+ if "execution" not in data or data["execution"] is None:
15
+ data["execution"] = ExecutionSpec().model_dump(mode="json")
16
+ else:
17
+ exec_spec = data["execution"]
18
+ if exec_spec.get("mode") is None:
19
+ exec_spec["mode"] = "sync"
20
+ if exec_spec.get("priority") is None:
21
+ exec_spec["priority"] = 100
22
+ # Ensure model_request has provider default
23
+ if "model_request" in data and data["model_request"]:
24
+ mr = data["model_request"]
25
+ if mr.get("provider") is None or mr.get("provider") == "":
26
+ mr["provider"] = "huggingface"
27
+ return ProcessingTask.model_validate(data)
28
+
29
+
30
+ def task_from_dict(data: Dict[str, Any]) -> ProcessingTask:
31
+ """Build ProcessingTask from dict (e.g. JSON payload)."""
32
+ return ProcessingTask.model_validate(data)
@@ -0,0 +1,112 @@
1
+ """In-memory task queue for async execution (Sprint 05)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import queue
6
+ import threading
7
+ from typing import Any, Dict, Optional
8
+
9
+ from .tasks import ProcessingTask, ProcessingResult
10
+
11
+
12
+ class TaskHandle:
13
+ """Handle for a submitted task: poll status and get result."""
14
+
15
+ def __init__(self, task_id: str) -> None:
16
+ self.task_id = task_id
17
+ self._status = "pending" # pending | running | completed | failed
18
+ self._result: Optional[ProcessingResult] = None
19
+ self._lock = threading.Lock()
20
+
21
+ def get_status(self) -> str:
22
+ with self._lock:
23
+ return self._status
24
+
25
+ def get_result(self, timeout: Optional[float] = None) -> Optional[ProcessingResult]:
26
+ with self._lock:
27
+ return self._result
28
+
29
+ def _set_running(self) -> None:
30
+ with self._lock:
31
+ self._status = "running"
32
+
33
+ def _set_completed(self, result: ProcessingResult) -> None:
34
+ with self._lock:
35
+ self._status = "completed"
36
+ self._result = result
37
+
38
+ def _set_failed(self, result: Optional[ProcessingResult] = None) -> None:
39
+ with self._lock:
40
+ self._status = "failed"
41
+ self._result = result
42
+
43
+
44
+ class QueueManager:
45
+ """In-memory queue with optional max size."""
46
+
47
+ def __init__(self, max_size: int = 0) -> None:
48
+ self._max_size = max_size # 0 = unbounded
49
+ self._queue: queue.Queue = queue.Queue(maxsize=max_size if max_size > 0 else 0)
50
+ self._handles: Dict[str, TaskHandle] = {}
51
+ self._handles_lock = threading.Lock()
52
+
53
+ def enqueue(self, task: ProcessingTask) -> Optional[str]:
54
+ """Enqueue task; return task_id or None if queue full."""
55
+ task_id = task.id
56
+ if self._max_size > 0 and self._queue.qsize() >= self._max_size:
57
+ return None
58
+ try:
59
+ self._queue.put_nowait(task)
60
+ except queue.Full:
61
+ return None
62
+ with self._handles_lock:
63
+ self._handles[task_id] = TaskHandle(task_id)
64
+ return task_id
65
+
66
+ def dequeue(self, block: bool = True, timeout: Optional[float] = None) -> Optional[ProcessingTask]:
67
+ try:
68
+ return self._queue.get(block=block, timeout=timeout)
69
+ except queue.Empty:
70
+ return None
71
+
72
+ def get_next_for_worker(self, sort_by_model: bool, block: bool = False, timeout: Optional[float] = None) -> Optional[ProcessingTask]:
73
+ """
74
+ Get the next task for the worker. If sort_by_model is True, drain the queue,
75
+ sort tasks by (provider, model) to batch same-model runs, then return the first
76
+ and put the rest back in order.
77
+ """
78
+ if not sort_by_model:
79
+ return self.dequeue(block=block, timeout=timeout)
80
+ # Drain into list
81
+ tasks: list = []
82
+ while True:
83
+ t = self.dequeue(block=False)
84
+ if t is None:
85
+ break
86
+ tasks.append(t)
87
+ if not tasks:
88
+ return None
89
+ if len(tasks) == 1:
90
+ return tasks[0]
91
+ # Sort by (provider, model) so same model is processed together
92
+ def model_key(task: ProcessingTask) -> str:
93
+ p = (task.model_request.provider or "").strip().lower()
94
+ m = (task.model_request.model or "").strip()
95
+ return f"{p}|{m}"
96
+
97
+ tasks.sort(key=model_key)
98
+ # Put back all but the first
99
+ for t in tasks[1:]:
100
+ try:
101
+ self._queue.put_nowait(t)
102
+ except queue.Full:
103
+ # Should not happen with unbounded queue; put the rest back and return first
104
+ break
105
+ return tasks[0]
106
+
107
+ def get_handle(self, task_id: str) -> Optional[TaskHandle]:
108
+ with self._handles_lock:
109
+ return self._handles.get(task_id)
110
+
111
+ def qsize(self) -> int:
112
+ return self._queue.qsize()
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timezone
4
+ from typing import Any, Dict
5
+ from uuid import uuid4
6
+
7
+ from ..config.settings import settings
8
+
9
+
10
+ CAPABILITIES_SCHEMA_VERSION = "v1"
11
+
12
+ RUNTIME_PROFILE_OPERATIONS: dict[str, list[str]] = {
13
+ "basic_hosted": ["healthcheck", "sanitization.run", "filter_lab.list_job_groups"],
14
+ "upgraded_hosted": [
15
+ "healthcheck",
16
+ "sanitization.run",
17
+ "filter_lab.list_job_groups",
18
+ "filter_lab.run",
19
+ "filter_lab.create_job_group",
20
+ ],
21
+ "local_engine": [
22
+ "healthcheck",
23
+ "sanitization.run",
24
+ "filter_lab.list_job_groups",
25
+ "filter_lab.run",
26
+ "filter_lab.create_job_group",
27
+ "llm_generation",
28
+ "ollama_list_models",
29
+ ],
30
+ }
31
+
32
+
33
+ def resolve_runtime_profile() -> str:
34
+ raw = str(getattr(settings, "topos_compute_profile", "basic_hosted") or "basic_hosted").strip().lower()
35
+ aliases = {
36
+ "basic": "basic_hosted",
37
+ "hosted_basic": "basic_hosted",
38
+ "pro": "upgraded_hosted",
39
+ "hosted_pro": "upgraded_hosted",
40
+ "upgraded": "upgraded_hosted",
41
+ "local": "local_engine",
42
+ }
43
+ normalized = aliases.get(raw, raw)
44
+ if normalized not in RUNTIME_PROFILE_OPERATIONS:
45
+ return "basic_hosted"
46
+ return normalized
47
+
48
+
49
+ def build_engine_capabilities() -> Dict[str, Any]:
50
+ runtime_profile = resolve_runtime_profile()
51
+ providers: list[str] = []
52
+ models: list[str] = []
53
+
54
+ if settings.enable_llm:
55
+ providers.append("openai")
56
+ if settings.openai_model:
57
+ models.append(settings.openai_model)
58
+
59
+ if settings.engine_ollama_base_url:
60
+ providers.append("ollama")
61
+
62
+ return {
63
+ "schema_version": CAPABILITIES_SCHEMA_VERSION,
64
+ "providers": sorted(set(providers)),
65
+ "models": sorted(set(models)),
66
+ "supports_filtering": True,
67
+ "supports_sanitization": True,
68
+ "supports_enrichment": True,
69
+ "operations": list(RUNTIME_PROFILE_OPERATIONS.get(runtime_profile, [])),
70
+ "runtime_profile": {
71
+ "id": runtime_profile,
72
+ "allowed_operations": list(RUNTIME_PROFILE_OPERATIONS.get(runtime_profile, [])),
73
+ "deployment_mode": "local" if runtime_profile == "local_engine" else "hosted",
74
+ "pricing_tier": "pro" if runtime_profile == "upgraded_hosted" else ("local" if runtime_profile == "local_engine" else "basic"),
75
+ },
76
+ "limits": {
77
+ "sanitization_ollama_max_input_chars": settings.sanitization_ollama_max_input_chars,
78
+ "request_timeout_seconds": settings.request_timeout_seconds,
79
+ },
80
+ "transport": {
81
+ "mode": resolve_transport_mode(),
82
+ "control_plane_url_configured": bool(settings.topos_control_plane_url),
83
+ },
84
+ }
85
+
86
+
87
+ def resolve_transport_mode() -> str:
88
+ mode = str(getattr(settings, "engine_transport_mode", "ws") or "ws").strip().lower()
89
+ if mode not in {"ws", "endpoint"}:
90
+ return "ws"
91
+ return mode
92
+
93
+
94
+ def build_engine_register_message() -> Dict[str, Any]:
95
+ now = datetime.now(timezone.utc).isoformat()
96
+ return {
97
+ "id": str(uuid4()),
98
+ "type": "engine_register",
99
+ "payload": {
100
+ "occurred_at": now,
101
+ "status": "connected",
102
+ "transport_mode": resolve_transport_mode(),
103
+ "capabilities": build_engine_capabilities(),
104
+ "metadata": {
105
+ "engine_mode": settings.engine_mode,
106
+ "enable_llm": settings.enable_llm,
107
+ },
108
+ },
109
+ }
110
+
111
+
112
+ def build_engine_heartbeat_message() -> Dict[str, Any]:
113
+ now = datetime.now(timezone.utc).isoformat()
114
+ return {
115
+ "id": str(uuid4()),
116
+ "type": "engine_heartbeat",
117
+ "payload": {
118
+ "occurred_at": now,
119
+ "status": "connected",
120
+ "transport_mode": resolve_transport_mode(),
121
+ "metadata": {
122
+ "engine_mode": settings.engine_mode,
123
+ "enable_llm": settings.enable_llm,
124
+ },
125
+ },
126
+ }
@@ -0,0 +1,38 @@
1
+ """Result formatter: raw adapter output + meta → ProcessingResult."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from .tasks import ExecutionMeta, ProcessingResult, Provenance
8
+
9
+
10
+ def format_result(
11
+ task_id: str,
12
+ status: str,
13
+ raw_output: Dict[str, Any],
14
+ *,
15
+ provenance_source_id: Optional[str] = None,
16
+ provenance_record_ids: Optional[list] = None,
17
+ execution_meta: Optional[ExecutionMeta] = None,
18
+ error: Optional[str] = None,
19
+ confidence: Optional[float] = None,
20
+ output_type: str = "json",
21
+ ) -> ProcessingResult:
22
+ """Build a ProcessingResult from adapter output and metadata."""
23
+ provenance = None
24
+ if provenance_source_id is not None or (provenance_record_ids is not None and len(provenance_record_ids or []) > 0):
25
+ provenance = Provenance(
26
+ source_id=provenance_source_id,
27
+ record_ids=provenance_record_ids or [],
28
+ )
29
+ return ProcessingResult(
30
+ task_id=task_id,
31
+ status=status,
32
+ output=raw_output,
33
+ output_type=output_type,
34
+ confidence=confidence,
35
+ provenance=provenance,
36
+ execution_meta=execution_meta,
37
+ error=error,
38
+ )
topos/engine/router.py ADDED
@@ -0,0 +1,19 @@
1
+ """Task router: select backend adapter by provider."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .backends import BackendAdapter, get_huggingface_adapter, get_ollama_adapter, get_stub_adapter
6
+ from .tasks import ProcessingTask
7
+
8
+
9
+ def get_adapter_for_task(task: ProcessingTask) -> BackendAdapter:
10
+ """
11
+ Return the backend adapter for this task's model_request.provider.
12
+ huggingface -> HuggingFaceAdapter; ollama -> OllamaAdapter; else StubBackendAdapter.
13
+ """
14
+ provider = (task.model_request.provider or "").strip().lower()
15
+ if provider == "huggingface":
16
+ return get_huggingface_adapter()
17
+ if provider == "ollama":
18
+ return get_ollama_adapter()
19
+ return get_stub_adapter()
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import hashlib
5
+ import hmac
6
+ import json
7
+ import time
8
+ from typing import Any, Dict, Optional
9
+
10
+
11
+ def _b64url_encode(raw: bytes) -> str:
12
+ return base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
13
+
14
+
15
+ def _b64url_decode(raw: str) -> bytes:
16
+ padding = "=" * ((4 - len(raw) % 4) % 4)
17
+ return base64.urlsafe_b64decode((raw + padding).encode("ascii"))
18
+
19
+
20
+ def _sign(data: str, secret: str) -> str:
21
+ digest = hmac.new(secret.encode("utf-8"), data.encode("utf-8"), hashlib.sha256).digest()
22
+ return _b64url_encode(digest)
23
+
24
+
25
+ def create_scoped_invocation_token(claims: Dict[str, Any], *, secret: str) -> str:
26
+ payload = json.dumps(claims, separators=(",", ":"), sort_keys=True)
27
+ encoded_payload = _b64url_encode(payload.encode("utf-8"))
28
+ signature = _sign(encoded_payload, secret)
29
+ return f"{encoded_payload}.{signature}"
30
+
31
+
32
+ class ScopedTokenValidationError(ValueError):
33
+ def __init__(self, code: str, message: str):
34
+ super().__init__(message)
35
+ self.code = code
36
+ self.message = message
37
+
38
+
39
+ def validate_scoped_invocation_token(
40
+ *,
41
+ token: str,
42
+ secret: str,
43
+ resource_id: str,
44
+ operation: str,
45
+ request_id: Optional[str] = None,
46
+ now_epoch_s: Optional[int] = None,
47
+ ) -> Dict[str, Any]:
48
+ if not token or "." not in token:
49
+ raise ScopedTokenValidationError("TOKEN_INVALID", "missing or malformed invocation token")
50
+ encoded_payload, signature = token.split(".", 1)
51
+ expected = _sign(encoded_payload, secret)
52
+ if not hmac.compare_digest(signature, expected):
53
+ raise ScopedTokenValidationError("TOKEN_INVALID", "invalid token signature")
54
+ try:
55
+ claims = json.loads(_b64url_decode(encoded_payload).decode("utf-8"))
56
+ except Exception as exc: # noqa: BLE001
57
+ raise ScopedTokenValidationError("TOKEN_INVALID", "token payload is not valid JSON") from exc
58
+ if not isinstance(claims, dict):
59
+ raise ScopedTokenValidationError("TOKEN_INVALID", "token payload must be an object")
60
+
61
+ now_s = int(now_epoch_s if now_epoch_s is not None else time.time())
62
+ exp = claims.get("exp")
63
+ if not isinstance(exp, int):
64
+ raise ScopedTokenValidationError("TOKEN_INVALID", "token exp claim is required")
65
+ if now_s >= exp:
66
+ raise ScopedTokenValidationError("TOKEN_EXPIRED", "invocation token has expired")
67
+
68
+ token_resource_id = str(claims.get("resource_id") or "")
69
+ if token_resource_id != resource_id:
70
+ raise ScopedTokenValidationError("SCOPE_MISMATCH", "resource_id does not match token scope")
71
+
72
+ allowed_ops = claims.get("allowed_operations")
73
+ if not isinstance(allowed_ops, list) or not allowed_ops:
74
+ raise ScopedTokenValidationError("TOKEN_INVALID", "allowed_operations claim is required")
75
+ normalized_allowed = {str(v) for v in allowed_ops if str(v)}
76
+ if "*" not in normalized_allowed and operation not in normalized_allowed:
77
+ raise ScopedTokenValidationError("OPERATION_NOT_ALLOWED", "operation not allowed by token scope")
78
+
79
+ token_request_id = str(claims.get("request_id") or "")
80
+ if request_id and token_request_id and token_request_id != request_id:
81
+ raise ScopedTokenValidationError("SCOPE_MISMATCH", "request_id does not match token scope")
82
+ return claims