topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
topos/engine/engine.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
"""Topos Engine: single entry point for processing tasks (PRD §5)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from .intake import normalize_task
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("topos.engine")
|
|
12
|
+
from .queue_manager import QueueManager, TaskHandle
|
|
13
|
+
from .result_formatter import format_result
|
|
14
|
+
from .router import get_adapter_for_task
|
|
15
|
+
from .tasks import ExecutionMeta, ProcessingResult, ProcessingTask
|
|
16
|
+
from .validator import validate_task
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Engine:
|
|
20
|
+
"""Core processing engine. run(task) sync; submit(task) async via queue."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
registry: Optional[Any] = None,
|
|
25
|
+
queue_max_size: int = 0,
|
|
26
|
+
use_queue_analyzer: bool = False,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Optional registry for model resolution; queue_max_size 0 = unbounded.
|
|
29
|
+
use_queue_analyzer: when True, worker dequeues in model-batched order (same model together)."""
|
|
30
|
+
self._registry = registry
|
|
31
|
+
self._queue = QueueManager(max_size=queue_max_size)
|
|
32
|
+
self._use_queue_analyzer = use_queue_analyzer
|
|
33
|
+
|
|
34
|
+
def submit(self, task: ProcessingTask) -> Optional[TaskHandle]:
|
|
35
|
+
"""Enqueue task; return TaskHandle or None if queue full. Worker must run to process."""
|
|
36
|
+
valid, err_msg = validate_task(task)
|
|
37
|
+
if not valid:
|
|
38
|
+
handle = TaskHandle(task.id)
|
|
39
|
+
handle._set_failed(format_result(task_id=task.id, status="failed", raw_output={}, error=err_msg))
|
|
40
|
+
return handle
|
|
41
|
+
task_id = self._queue.enqueue(task)
|
|
42
|
+
if task_id is None:
|
|
43
|
+
return None
|
|
44
|
+
return self._queue.get_handle(task_id)
|
|
45
|
+
|
|
46
|
+
def run_worker_once(self) -> bool:
|
|
47
|
+
"""Dequeue one task (FIFO or model-sorted when use_queue_analyzer), run it, store result in handle. Returns True if a task was run."""
|
|
48
|
+
task = self._queue.get_next_for_worker(self._use_queue_analyzer, block=False)
|
|
49
|
+
if task is None:
|
|
50
|
+
return False
|
|
51
|
+
handle = self._queue.get_handle(task.id)
|
|
52
|
+
if handle:
|
|
53
|
+
handle._set_running()
|
|
54
|
+
result = self.run(task)
|
|
55
|
+
if handle:
|
|
56
|
+
if result.status == "completed":
|
|
57
|
+
handle._set_completed(result)
|
|
58
|
+
else:
|
|
59
|
+
handle._set_failed(result)
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
def run(self, task: ProcessingTask) -> ProcessingResult:
|
|
63
|
+
"""
|
|
64
|
+
Execute a task synchronously: intake → validate → route → inference → format.
|
|
65
|
+
Returns a ProcessingResult (never raises; errors are in result.status and result.error).
|
|
66
|
+
"""
|
|
67
|
+
# Validate first (before intake fills defaults, so empty provider is rejected)
|
|
68
|
+
valid, err_msg = validate_task(task)
|
|
69
|
+
if not valid:
|
|
70
|
+
return format_result(
|
|
71
|
+
task_id=task.id,
|
|
72
|
+
status="failed",
|
|
73
|
+
raw_output={},
|
|
74
|
+
error=err_msg,
|
|
75
|
+
)
|
|
76
|
+
# Intake: normalize defaults
|
|
77
|
+
normalized = normalize_task(task)
|
|
78
|
+
# Build adapter config: subtype and model from task or registry
|
|
79
|
+
config = self._build_inference_config(normalized)
|
|
80
|
+
# Route to backend
|
|
81
|
+
adapter = get_adapter_for_task(normalized)
|
|
82
|
+
# Run inference
|
|
83
|
+
start = time.perf_counter()
|
|
84
|
+
raw_output = adapter.run_inference(normalized.input, config=config)
|
|
85
|
+
duration_ms = int((time.perf_counter() - start) * 1000)
|
|
86
|
+
# Adapter may return error in output
|
|
87
|
+
if raw_output.get("error"):
|
|
88
|
+
try:
|
|
89
|
+
from ..observability.metrics import record_metric
|
|
90
|
+
record_metric("engine.task_failed", 1.0)
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
return format_result(
|
|
94
|
+
task_id=normalized.id,
|
|
95
|
+
status="failed",
|
|
96
|
+
raw_output=raw_output,
|
|
97
|
+
provenance_source_id=normalized.source_id,
|
|
98
|
+
provenance_record_ids=normalized.record_ids if normalized.record_ids else None,
|
|
99
|
+
execution_meta=ExecutionMeta(
|
|
100
|
+
provider=normalized.model_request.provider,
|
|
101
|
+
model=config.get("model"),
|
|
102
|
+
duration_ms=duration_ms,
|
|
103
|
+
cache_hit=False,
|
|
104
|
+
),
|
|
105
|
+
error=str(raw_output.get("error")),
|
|
106
|
+
)
|
|
107
|
+
execution_meta = ExecutionMeta(
|
|
108
|
+
provider=normalized.model_request.provider,
|
|
109
|
+
model=raw_output.get("model") or normalized.model_request.model or config.get("model"),
|
|
110
|
+
duration_ms=duration_ms,
|
|
111
|
+
cache_hit=False,
|
|
112
|
+
)
|
|
113
|
+
try:
|
|
114
|
+
from ..observability.metrics import record_metric
|
|
115
|
+
record_metric("engine.task_completed", 1.0)
|
|
116
|
+
record_metric("engine.inference_duration_ms", float(duration_ms))
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
return format_result(
|
|
120
|
+
task_id=normalized.id,
|
|
121
|
+
status="completed",
|
|
122
|
+
raw_output=raw_output,
|
|
123
|
+
provenance_source_id=normalized.source_id,
|
|
124
|
+
provenance_record_ids=normalized.record_ids if normalized.record_ids else None,
|
|
125
|
+
execution_meta=execution_meta,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def ensure_model(self, provider: str, model_name: str, **kwargs: Any) -> bool:
|
|
129
|
+
"""
|
|
130
|
+
Ensure the model is available: download/pull if not present.
|
|
131
|
+
Returns True if we downloaded/pulled (caller may remove it later for cleanup), False if already present.
|
|
132
|
+
Logs when a download is started and when complete; adapters may log progress.
|
|
133
|
+
"""
|
|
134
|
+
provider = (provider or "").strip().lower()
|
|
135
|
+
if provider == "ollama":
|
|
136
|
+
from .backends import get_ollama_adapter
|
|
137
|
+
adapter = get_ollama_adapter()
|
|
138
|
+
if adapter.ensure_model(model_name):
|
|
139
|
+
logger.info("Model %s (provider=ollama) download complete.", model_name)
|
|
140
|
+
return True
|
|
141
|
+
return False
|
|
142
|
+
if provider == "huggingface":
|
|
143
|
+
from .backends import get_huggingface_adapter
|
|
144
|
+
adapter = get_huggingface_adapter()
|
|
145
|
+
if adapter.ensure_model(model_name, subtype=kwargs.get("subtype")):
|
|
146
|
+
logger.info("Model %s (provider=huggingface) download complete.", model_name)
|
|
147
|
+
return True
|
|
148
|
+
return False
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
def _build_inference_config(self, task: ProcessingTask) -> Dict[str, Any]:
|
|
152
|
+
"""Build config dict for adapter.run_inference: subtype and model."""
|
|
153
|
+
model = task.model_request.model
|
|
154
|
+
if not model and self._registry is not None:
|
|
155
|
+
spec = self._registry.get_model_for_task(task.type, task.subtype)
|
|
156
|
+
if spec:
|
|
157
|
+
provider = (task.model_request.provider or "").strip().lower()
|
|
158
|
+
if provider == "ollama":
|
|
159
|
+
model = spec.get("ollama_model") or spec.get("model")
|
|
160
|
+
else:
|
|
161
|
+
model = spec.get("huggingface_path") or spec.get("model")
|
|
162
|
+
return {
|
|
163
|
+
"subtype": task.subtype or "",
|
|
164
|
+
"model": model,
|
|
165
|
+
}
|
topos/engine/intake.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Task intake: accept and normalize tasks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
7
|
+
from .tasks import ExecutionSpec, ModelRequest, ProcessingTask
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def normalize_task(task: ProcessingTask) -> ProcessingTask:
|
|
11
|
+
"""Apply defaults to missing optional fields. Returns a copy with defaults set."""
|
|
12
|
+
data = task.model_dump(mode="json")
|
|
13
|
+
# Ensure execution has defaults
|
|
14
|
+
if "execution" not in data or data["execution"] is None:
|
|
15
|
+
data["execution"] = ExecutionSpec().model_dump(mode="json")
|
|
16
|
+
else:
|
|
17
|
+
exec_spec = data["execution"]
|
|
18
|
+
if exec_spec.get("mode") is None:
|
|
19
|
+
exec_spec["mode"] = "sync"
|
|
20
|
+
if exec_spec.get("priority") is None:
|
|
21
|
+
exec_spec["priority"] = 100
|
|
22
|
+
# Ensure model_request has provider default
|
|
23
|
+
if "model_request" in data and data["model_request"]:
|
|
24
|
+
mr = data["model_request"]
|
|
25
|
+
if mr.get("provider") is None or mr.get("provider") == "":
|
|
26
|
+
mr["provider"] = "huggingface"
|
|
27
|
+
return ProcessingTask.model_validate(data)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def task_from_dict(data: Dict[str, Any]) -> ProcessingTask:
|
|
31
|
+
"""Build ProcessingTask from dict (e.g. JSON payload)."""
|
|
32
|
+
return ProcessingTask.model_validate(data)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""In-memory task queue for async execution (Sprint 05)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import queue
|
|
6
|
+
import threading
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
from .tasks import ProcessingTask, ProcessingResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TaskHandle:
|
|
13
|
+
"""Handle for a submitted task: poll status and get result."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, task_id: str) -> None:
|
|
16
|
+
self.task_id = task_id
|
|
17
|
+
self._status = "pending" # pending | running | completed | failed
|
|
18
|
+
self._result: Optional[ProcessingResult] = None
|
|
19
|
+
self._lock = threading.Lock()
|
|
20
|
+
|
|
21
|
+
def get_status(self) -> str:
|
|
22
|
+
with self._lock:
|
|
23
|
+
return self._status
|
|
24
|
+
|
|
25
|
+
def get_result(self, timeout: Optional[float] = None) -> Optional[ProcessingResult]:
|
|
26
|
+
with self._lock:
|
|
27
|
+
return self._result
|
|
28
|
+
|
|
29
|
+
def _set_running(self) -> None:
|
|
30
|
+
with self._lock:
|
|
31
|
+
self._status = "running"
|
|
32
|
+
|
|
33
|
+
def _set_completed(self, result: ProcessingResult) -> None:
|
|
34
|
+
with self._lock:
|
|
35
|
+
self._status = "completed"
|
|
36
|
+
self._result = result
|
|
37
|
+
|
|
38
|
+
def _set_failed(self, result: Optional[ProcessingResult] = None) -> None:
|
|
39
|
+
with self._lock:
|
|
40
|
+
self._status = "failed"
|
|
41
|
+
self._result = result
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class QueueManager:
|
|
45
|
+
"""In-memory queue with optional max size."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, max_size: int = 0) -> None:
|
|
48
|
+
self._max_size = max_size # 0 = unbounded
|
|
49
|
+
self._queue: queue.Queue = queue.Queue(maxsize=max_size if max_size > 0 else 0)
|
|
50
|
+
self._handles: Dict[str, TaskHandle] = {}
|
|
51
|
+
self._handles_lock = threading.Lock()
|
|
52
|
+
|
|
53
|
+
def enqueue(self, task: ProcessingTask) -> Optional[str]:
|
|
54
|
+
"""Enqueue task; return task_id or None if queue full."""
|
|
55
|
+
task_id = task.id
|
|
56
|
+
if self._max_size > 0 and self._queue.qsize() >= self._max_size:
|
|
57
|
+
return None
|
|
58
|
+
try:
|
|
59
|
+
self._queue.put_nowait(task)
|
|
60
|
+
except queue.Full:
|
|
61
|
+
return None
|
|
62
|
+
with self._handles_lock:
|
|
63
|
+
self._handles[task_id] = TaskHandle(task_id)
|
|
64
|
+
return task_id
|
|
65
|
+
|
|
66
|
+
def dequeue(self, block: bool = True, timeout: Optional[float] = None) -> Optional[ProcessingTask]:
|
|
67
|
+
try:
|
|
68
|
+
return self._queue.get(block=block, timeout=timeout)
|
|
69
|
+
except queue.Empty:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
def get_next_for_worker(self, sort_by_model: bool, block: bool = False, timeout: Optional[float] = None) -> Optional[ProcessingTask]:
|
|
73
|
+
"""
|
|
74
|
+
Get the next task for the worker. If sort_by_model is True, drain the queue,
|
|
75
|
+
sort tasks by (provider, model) to batch same-model runs, then return the first
|
|
76
|
+
and put the rest back in order.
|
|
77
|
+
"""
|
|
78
|
+
if not sort_by_model:
|
|
79
|
+
return self.dequeue(block=block, timeout=timeout)
|
|
80
|
+
# Drain into list
|
|
81
|
+
tasks: list = []
|
|
82
|
+
while True:
|
|
83
|
+
t = self.dequeue(block=False)
|
|
84
|
+
if t is None:
|
|
85
|
+
break
|
|
86
|
+
tasks.append(t)
|
|
87
|
+
if not tasks:
|
|
88
|
+
return None
|
|
89
|
+
if len(tasks) == 1:
|
|
90
|
+
return tasks[0]
|
|
91
|
+
# Sort by (provider, model) so same model is processed together
|
|
92
|
+
def model_key(task: ProcessingTask) -> str:
|
|
93
|
+
p = (task.model_request.provider or "").strip().lower()
|
|
94
|
+
m = (task.model_request.model or "").strip()
|
|
95
|
+
return f"{p}|{m}"
|
|
96
|
+
|
|
97
|
+
tasks.sort(key=model_key)
|
|
98
|
+
# Put back all but the first
|
|
99
|
+
for t in tasks[1:]:
|
|
100
|
+
try:
|
|
101
|
+
self._queue.put_nowait(t)
|
|
102
|
+
except queue.Full:
|
|
103
|
+
# Should not happen with unbounded queue; put the rest back and return first
|
|
104
|
+
break
|
|
105
|
+
return tasks[0]
|
|
106
|
+
|
|
107
|
+
def get_handle(self, task_id: str) -> Optional[TaskHandle]:
|
|
108
|
+
with self._handles_lock:
|
|
109
|
+
return self._handles.get(task_id)
|
|
110
|
+
|
|
111
|
+
def qsize(self) -> int:
|
|
112
|
+
return self._queue.qsize()
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from ..config.settings import settings
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
CAPABILITIES_SCHEMA_VERSION = "v1"
|
|
11
|
+
|
|
12
|
+
RUNTIME_PROFILE_OPERATIONS: dict[str, list[str]] = {
|
|
13
|
+
"basic_hosted": ["healthcheck", "sanitization.run", "filter_lab.list_job_groups"],
|
|
14
|
+
"upgraded_hosted": [
|
|
15
|
+
"healthcheck",
|
|
16
|
+
"sanitization.run",
|
|
17
|
+
"filter_lab.list_job_groups",
|
|
18
|
+
"filter_lab.run",
|
|
19
|
+
"filter_lab.create_job_group",
|
|
20
|
+
],
|
|
21
|
+
"local_engine": [
|
|
22
|
+
"healthcheck",
|
|
23
|
+
"sanitization.run",
|
|
24
|
+
"filter_lab.list_job_groups",
|
|
25
|
+
"filter_lab.run",
|
|
26
|
+
"filter_lab.create_job_group",
|
|
27
|
+
"llm_generation",
|
|
28
|
+
"ollama_list_models",
|
|
29
|
+
],
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def resolve_runtime_profile() -> str:
|
|
34
|
+
raw = str(getattr(settings, "topos_compute_profile", "basic_hosted") or "basic_hosted").strip().lower()
|
|
35
|
+
aliases = {
|
|
36
|
+
"basic": "basic_hosted",
|
|
37
|
+
"hosted_basic": "basic_hosted",
|
|
38
|
+
"pro": "upgraded_hosted",
|
|
39
|
+
"hosted_pro": "upgraded_hosted",
|
|
40
|
+
"upgraded": "upgraded_hosted",
|
|
41
|
+
"local": "local_engine",
|
|
42
|
+
}
|
|
43
|
+
normalized = aliases.get(raw, raw)
|
|
44
|
+
if normalized not in RUNTIME_PROFILE_OPERATIONS:
|
|
45
|
+
return "basic_hosted"
|
|
46
|
+
return normalized
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_engine_capabilities() -> Dict[str, Any]:
|
|
50
|
+
runtime_profile = resolve_runtime_profile()
|
|
51
|
+
providers: list[str] = []
|
|
52
|
+
models: list[str] = []
|
|
53
|
+
|
|
54
|
+
if settings.enable_llm:
|
|
55
|
+
providers.append("openai")
|
|
56
|
+
if settings.openai_model:
|
|
57
|
+
models.append(settings.openai_model)
|
|
58
|
+
|
|
59
|
+
if settings.engine_ollama_base_url:
|
|
60
|
+
providers.append("ollama")
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
"schema_version": CAPABILITIES_SCHEMA_VERSION,
|
|
64
|
+
"providers": sorted(set(providers)),
|
|
65
|
+
"models": sorted(set(models)),
|
|
66
|
+
"supports_filtering": True,
|
|
67
|
+
"supports_sanitization": True,
|
|
68
|
+
"supports_enrichment": True,
|
|
69
|
+
"operations": list(RUNTIME_PROFILE_OPERATIONS.get(runtime_profile, [])),
|
|
70
|
+
"runtime_profile": {
|
|
71
|
+
"id": runtime_profile,
|
|
72
|
+
"allowed_operations": list(RUNTIME_PROFILE_OPERATIONS.get(runtime_profile, [])),
|
|
73
|
+
"deployment_mode": "local" if runtime_profile == "local_engine" else "hosted",
|
|
74
|
+
"pricing_tier": "pro" if runtime_profile == "upgraded_hosted" else ("local" if runtime_profile == "local_engine" else "basic"),
|
|
75
|
+
},
|
|
76
|
+
"limits": {
|
|
77
|
+
"sanitization_ollama_max_input_chars": settings.sanitization_ollama_max_input_chars,
|
|
78
|
+
"request_timeout_seconds": settings.request_timeout_seconds,
|
|
79
|
+
},
|
|
80
|
+
"transport": {
|
|
81
|
+
"mode": resolve_transport_mode(),
|
|
82
|
+
"control_plane_url_configured": bool(settings.topos_control_plane_url),
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def resolve_transport_mode() -> str:
|
|
88
|
+
mode = str(getattr(settings, "engine_transport_mode", "ws") or "ws").strip().lower()
|
|
89
|
+
if mode not in {"ws", "endpoint"}:
|
|
90
|
+
return "ws"
|
|
91
|
+
return mode
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def build_engine_register_message() -> Dict[str, Any]:
|
|
95
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
96
|
+
return {
|
|
97
|
+
"id": str(uuid4()),
|
|
98
|
+
"type": "engine_register",
|
|
99
|
+
"payload": {
|
|
100
|
+
"occurred_at": now,
|
|
101
|
+
"status": "connected",
|
|
102
|
+
"transport_mode": resolve_transport_mode(),
|
|
103
|
+
"capabilities": build_engine_capabilities(),
|
|
104
|
+
"metadata": {
|
|
105
|
+
"engine_mode": settings.engine_mode,
|
|
106
|
+
"enable_llm": settings.enable_llm,
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def build_engine_heartbeat_message() -> Dict[str, Any]:
|
|
113
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
114
|
+
return {
|
|
115
|
+
"id": str(uuid4()),
|
|
116
|
+
"type": "engine_heartbeat",
|
|
117
|
+
"payload": {
|
|
118
|
+
"occurred_at": now,
|
|
119
|
+
"status": "connected",
|
|
120
|
+
"transport_mode": resolve_transport_mode(),
|
|
121
|
+
"metadata": {
|
|
122
|
+
"engine_mode": settings.engine_mode,
|
|
123
|
+
"enable_llm": settings.enable_llm,
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Result formatter: raw adapter output + meta → ProcessingResult."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from .tasks import ExecutionMeta, ProcessingResult, Provenance
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def format_result(
|
|
11
|
+
task_id: str,
|
|
12
|
+
status: str,
|
|
13
|
+
raw_output: Dict[str, Any],
|
|
14
|
+
*,
|
|
15
|
+
provenance_source_id: Optional[str] = None,
|
|
16
|
+
provenance_record_ids: Optional[list] = None,
|
|
17
|
+
execution_meta: Optional[ExecutionMeta] = None,
|
|
18
|
+
error: Optional[str] = None,
|
|
19
|
+
confidence: Optional[float] = None,
|
|
20
|
+
output_type: str = "json",
|
|
21
|
+
) -> ProcessingResult:
|
|
22
|
+
"""Build a ProcessingResult from adapter output and metadata."""
|
|
23
|
+
provenance = None
|
|
24
|
+
if provenance_source_id is not None or (provenance_record_ids is not None and len(provenance_record_ids or []) > 0):
|
|
25
|
+
provenance = Provenance(
|
|
26
|
+
source_id=provenance_source_id,
|
|
27
|
+
record_ids=provenance_record_ids or [],
|
|
28
|
+
)
|
|
29
|
+
return ProcessingResult(
|
|
30
|
+
task_id=task_id,
|
|
31
|
+
status=status,
|
|
32
|
+
output=raw_output,
|
|
33
|
+
output_type=output_type,
|
|
34
|
+
confidence=confidence,
|
|
35
|
+
provenance=provenance,
|
|
36
|
+
execution_meta=execution_meta,
|
|
37
|
+
error=error,
|
|
38
|
+
)
|
topos/engine/router.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Task router: select backend adapter by provider."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .backends import BackendAdapter, get_huggingface_adapter, get_ollama_adapter, get_stub_adapter
|
|
6
|
+
from .tasks import ProcessingTask
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_adapter_for_task(task: ProcessingTask) -> BackendAdapter:
|
|
10
|
+
"""
|
|
11
|
+
Return the backend adapter for this task's model_request.provider.
|
|
12
|
+
huggingface -> HuggingFaceAdapter; ollama -> OllamaAdapter; else StubBackendAdapter.
|
|
13
|
+
"""
|
|
14
|
+
provider = (task.model_request.provider or "").strip().lower()
|
|
15
|
+
if provider == "huggingface":
|
|
16
|
+
return get_huggingface_adapter()
|
|
17
|
+
if provider == "ollama":
|
|
18
|
+
return get_ollama_adapter()
|
|
19
|
+
return get_stub_adapter()
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import hashlib
|
|
5
|
+
import hmac
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _b64url_encode(raw: bytes) -> str:
|
|
12
|
+
return base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _b64url_decode(raw: str) -> bytes:
|
|
16
|
+
padding = "=" * ((4 - len(raw) % 4) % 4)
|
|
17
|
+
return base64.urlsafe_b64decode((raw + padding).encode("ascii"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _sign(data: str, secret: str) -> str:
|
|
21
|
+
digest = hmac.new(secret.encode("utf-8"), data.encode("utf-8"), hashlib.sha256).digest()
|
|
22
|
+
return _b64url_encode(digest)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_scoped_invocation_token(claims: Dict[str, Any], *, secret: str) -> str:
|
|
26
|
+
payload = json.dumps(claims, separators=(",", ":"), sort_keys=True)
|
|
27
|
+
encoded_payload = _b64url_encode(payload.encode("utf-8"))
|
|
28
|
+
signature = _sign(encoded_payload, secret)
|
|
29
|
+
return f"{encoded_payload}.{signature}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ScopedTokenValidationError(ValueError):
|
|
33
|
+
def __init__(self, code: str, message: str):
|
|
34
|
+
super().__init__(message)
|
|
35
|
+
self.code = code
|
|
36
|
+
self.message = message
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def validate_scoped_invocation_token(
|
|
40
|
+
*,
|
|
41
|
+
token: str,
|
|
42
|
+
secret: str,
|
|
43
|
+
resource_id: str,
|
|
44
|
+
operation: str,
|
|
45
|
+
request_id: Optional[str] = None,
|
|
46
|
+
now_epoch_s: Optional[int] = None,
|
|
47
|
+
) -> Dict[str, Any]:
|
|
48
|
+
if not token or "." not in token:
|
|
49
|
+
raise ScopedTokenValidationError("TOKEN_INVALID", "missing or malformed invocation token")
|
|
50
|
+
encoded_payload, signature = token.split(".", 1)
|
|
51
|
+
expected = _sign(encoded_payload, secret)
|
|
52
|
+
if not hmac.compare_digest(signature, expected):
|
|
53
|
+
raise ScopedTokenValidationError("TOKEN_INVALID", "invalid token signature")
|
|
54
|
+
try:
|
|
55
|
+
claims = json.loads(_b64url_decode(encoded_payload).decode("utf-8"))
|
|
56
|
+
except Exception as exc: # noqa: BLE001
|
|
57
|
+
raise ScopedTokenValidationError("TOKEN_INVALID", "token payload is not valid JSON") from exc
|
|
58
|
+
if not isinstance(claims, dict):
|
|
59
|
+
raise ScopedTokenValidationError("TOKEN_INVALID", "token payload must be an object")
|
|
60
|
+
|
|
61
|
+
now_s = int(now_epoch_s if now_epoch_s is not None else time.time())
|
|
62
|
+
exp = claims.get("exp")
|
|
63
|
+
if not isinstance(exp, int):
|
|
64
|
+
raise ScopedTokenValidationError("TOKEN_INVALID", "token exp claim is required")
|
|
65
|
+
if now_s >= exp:
|
|
66
|
+
raise ScopedTokenValidationError("TOKEN_EXPIRED", "invocation token has expired")
|
|
67
|
+
|
|
68
|
+
token_resource_id = str(claims.get("resource_id") or "")
|
|
69
|
+
if token_resource_id != resource_id:
|
|
70
|
+
raise ScopedTokenValidationError("SCOPE_MISMATCH", "resource_id does not match token scope")
|
|
71
|
+
|
|
72
|
+
allowed_ops = claims.get("allowed_operations")
|
|
73
|
+
if not isinstance(allowed_ops, list) or not allowed_ops:
|
|
74
|
+
raise ScopedTokenValidationError("TOKEN_INVALID", "allowed_operations claim is required")
|
|
75
|
+
normalized_allowed = {str(v) for v in allowed_ops if str(v)}
|
|
76
|
+
if "*" not in normalized_allowed and operation not in normalized_allowed:
|
|
77
|
+
raise ScopedTokenValidationError("OPERATION_NOT_ALLOWED", "operation not allowed by token scope")
|
|
78
|
+
|
|
79
|
+
token_request_id = str(claims.get("request_id") or "")
|
|
80
|
+
if request_id and token_request_id and token_request_id != request_id:
|
|
81
|
+
raise ScopedTokenValidationError("SCOPE_MISMATCH", "request_id does not match token scope")
|
|
82
|
+
return claims
|