topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
topos/engine/tasks.py ADDED
@@ -0,0 +1,154 @@
1
+ """Task contract for the Topos Engine (PRD §6)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timezone
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+
11
+ # --- Nested structures (PRD §6.1) ---
12
+
13
+
14
+ class ModelRequest(BaseModel):
15
+ """Model selection for a task."""
16
+
17
+ provider: str = Field(..., description="e.g. ollama, huggingface")
18
+ model: Optional[str] = Field(None, description="Model name or id; default from registry if omitted")
19
+
20
+
21
+ class ExecutionSpec(BaseModel):
22
+ """Execution mode and scheduling hints."""
23
+
24
+ mode: str = Field(default="sync", description="sync, async, batch, etc.")
25
+ priority: int = Field(default=100, description="Lower = higher priority")
26
+ batch_key: Optional[str] = Field(None, description="Group tasks for model-aware batching")
27
+
28
+
29
+ class TaskOptions(BaseModel):
30
+ """Per-task options."""
31
+
32
+ store_result: bool = Field(default=True, description="Whether to persist result")
33
+ apply_fisher_filter: bool = Field(default=False, description="Apply Fisher filter at output")
34
+
35
+
36
+ class RequestedBy(BaseModel):
37
+ """Who requested the task and from where."""
38
+
39
+ user_id: Optional[str] = None
40
+ origin: Optional[str] = Field(None, description="e.g. write_event, batch, manual")
41
+
42
+
43
+ # --- ProcessingTask (PRD §6.1) ---
44
+
45
+
46
+ class ProcessingTask(BaseModel):
47
+ """Input contract for the Engine. All work is represented as a task."""
48
+
49
+ id: str = Field(..., description="Unique task id")
50
+ type: str = Field(..., description="enrichment, transformation, derivation, query, etc.")
51
+ subtype: Optional[str] = Field(None, description="e.g. emotion_classification, url_classification")
52
+ source_id: Optional[str] = None
53
+ record_ids: List[str] = Field(default_factory=list)
54
+ input: Dict[str, Any] = Field(default_factory=dict)
55
+ model_request: ModelRequest = Field(...)
56
+ execution: ExecutionSpec = Field(default_factory=ExecutionSpec)
57
+ options: TaskOptions = Field(default_factory=TaskOptions)
58
+ requested_by: Optional[RequestedBy] = None
59
+ created_at: Optional[str] = Field(
60
+ default_factory=lambda: datetime.now(tz=timezone.utc).isoformat()
61
+ )
62
+
63
+ model_config = {"extra": "forbid"}
64
+
65
+ def model_dump_json_roundtrip(self) -> Dict[str, Any]:
66
+ """Serialize to JSON-compatible dict (for transport and tests)."""
67
+ return self.model_dump(mode="json")
68
+
69
+
70
+ # --- ProcessingResult (PRD §6.2) ---
71
+
72
+
73
+ class Provenance(BaseModel):
74
+ """Provenance of the result."""
75
+
76
+ source_id: Optional[str] = None
77
+ record_ids: List[str] = Field(default_factory=list)
78
+
79
+
80
+ class ExecutionMeta(BaseModel):
81
+ """Execution metadata."""
82
+
83
+ provider: Optional[str] = None
84
+ model: Optional[str] = None
85
+ duration_ms: Optional[int] = None
86
+ cache_hit: bool = False
87
+
88
+
89
+ class ProcessingResult(BaseModel):
90
+ """Output contract for the Engine."""
91
+
92
+ task_id: str = Field(...)
93
+ status: str = Field(..., description="completed, failed, error, etc.")
94
+ output: Dict[str, Any] = Field(default_factory=dict)
95
+ output_type: str = Field(default="json")
96
+ confidence: Optional[float] = None
97
+ provenance: Optional[Provenance] = None
98
+ execution_meta: Optional[ExecutionMeta] = None
99
+ error: Optional[str] = None
100
+
101
+ model_config = {"extra": "forbid"}
102
+
103
+ def model_dump_json_roundtrip(self) -> Dict[str, Any]:
104
+ """Serialize to JSON-compatible dict."""
105
+ return self.model_dump(mode="json")
106
+
107
+
108
+ # --- Helper ---
109
+
110
+
111
+ def build_task(
112
+ task_id: str,
113
+ task_type: str,
114
+ model_request: ModelRequest,
115
+ *,
116
+ subtype: Optional[str] = None,
117
+ source_id: Optional[str] = None,
118
+ record_ids: Optional[List[str]] = None,
119
+ input_data: Optional[Dict[str, Any]] = None,
120
+ execution: Optional[ExecutionSpec] = None,
121
+ requested_by: Optional[RequestedBy] = None,
122
+ ) -> ProcessingTask:
123
+ """Build a ProcessingTask with required fields and optional overrides."""
124
+ return ProcessingTask(
125
+ id=task_id,
126
+ type=task_type,
127
+ subtype=subtype,
128
+ source_id=source_id,
129
+ record_ids=record_ids or [],
130
+ input=input_data or {},
131
+ model_request=model_request,
132
+ execution=execution or ExecutionSpec(),
133
+ requested_by=requested_by,
134
+ )
135
+
136
+
137
+ def build_url_classification_task(
138
+ task_id: str,
139
+ url: str,
140
+ title: Optional[str] = None,
141
+ *,
142
+ source_id: Optional[str] = None,
143
+ record_ids: Optional[List[str]] = None,
144
+ ) -> ProcessingTask:
145
+ """Build a ProcessingTask for URL classification (enrichment, url_classification)."""
146
+ return build_task(
147
+ task_id=task_id,
148
+ task_type="enrichment",
149
+ model_request=ModelRequest(provider="huggingface"),
150
+ subtype="url_classification",
151
+ source_id=source_id,
152
+ record_ids=record_ids or [],
153
+ input_data={"url": url, "title": title or ""},
154
+ )
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from typing import Any, Awaitable, Callable, Dict
6
+
7
+ MessageHandler = Callable[[Dict[str, Any]], Awaitable[Dict[str, Any] | None]]
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class TransportConfig:
12
+ mode: str = "ws"
13
+
14
+
15
+ def normalize_transport_mode(value: str | None) -> str:
16
+ mode = (value or "").strip().lower()
17
+ if mode in {"ws", "endpoint"}:
18
+ return mode
19
+ return "ws"
20
+
21
+
22
+ async def dispatch_compute_message(
23
+ *,
24
+ mode: str,
25
+ payload: Dict[str, Any],
26
+ handler: MessageHandler,
27
+ ) -> Dict[str, Any] | None:
28
+ normalized = normalize_transport_mode(mode)
29
+ if normalized == "ws":
30
+ return await handler(payload)
31
+ # endpoint transport currently shares the same business handler.
32
+ return await handler(payload)
33
+
34
+
35
+ async def handle_ws_raw_message(raw: str, handler: MessageHandler) -> str | None:
36
+ message = json.loads(raw)
37
+ response = await dispatch_compute_message(mode="ws", payload=message, handler=handler)
38
+ if response is None:
39
+ return None
40
+ return json.dumps(response)
41
+
42
+
43
+ async def handle_endpoint_request(payload: Dict[str, Any], handler: MessageHandler) -> Dict[str, Any] | None:
44
+ return await dispatch_compute_message(mode="endpoint", payload=payload, handler=handler)
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from typing import Any, Dict, Optional
6
+
7
+ import httpx
8
+
9
+ from ..config.settings import settings
10
+
11
+ logger = logging.getLogger("topos.engine.usage_guard")
12
+
13
+ _KNOWN_GUARD_REASON_CODES = {
14
+ "LIMIT_EXCEEDED",
15
+ "MISSING_POLICY_RULE",
16
+ "UNTRUSTED_USAGE_SOURCE",
17
+ "METRIC_NOT_REGISTERED",
18
+ "PERIOD_RESOLUTION_FALLBACK",
19
+ }
20
+
21
+
22
+ def parse_guard_denial_payload(detail: Any) -> Optional[Dict[str, Any]]:
23
+ if not isinstance(detail, dict):
24
+ return None
25
+ reason_code = str(detail.get("reason_code") or "").strip()
26
+ metric_key = str(detail.get("metric_key") or "").strip()
27
+ period_start = str(detail.get("period_start") or "").strip()
28
+ period_end = str(detail.get("period_end") or "").strip()
29
+ if not reason_code or reason_code not in _KNOWN_GUARD_REASON_CODES:
30
+ return None
31
+ return {
32
+ "reason_code": reason_code,
33
+ "metric_key": metric_key or None,
34
+ "period_start": period_start or None,
35
+ "period_end": period_end or None,
36
+ "used": detail.get("used"),
37
+ "limit": detail.get("limit"),
38
+ "unlimited": detail.get("unlimited"),
39
+ "policy_version": detail.get("policy_version"),
40
+ "message": str(detail.get("message") or "Usage request denied by guard."),
41
+ }
42
+
43
+
44
+ def classify_guard_submission_error(status_code: int, detail: Any) -> str:
45
+ denial = parse_guard_denial_payload(detail)
46
+ if status_code == 402 and denial is not None:
47
+ return "guard_denial"
48
+ return "transport_or_system_error"
49
+
50
+
51
+ async def submit_usage_guard_check(
52
+ *,
53
+ usage_kind: str,
54
+ units: int,
55
+ timeout_seconds: float = 10.0,
56
+ ) -> Dict[str, Any]:
57
+ if os.getenv("TOPOS_USAGE_GUARD_ENFORCEMENT_ENABLED", "").strip().lower() not in {"1", "true", "yes", "on"}:
58
+ return {"allowed": True, "source": "guard_not_enabled"}
59
+
60
+ control_plane_http = str(settings.topos_control_plane_url or "").strip()
61
+ api_key = str(settings.topos_key or "").strip()
62
+ if control_plane_http.startswith("wss://"):
63
+ control_plane_http = control_plane_http.replace("wss://", "https://")
64
+ elif control_plane_http.startswith("ws://"):
65
+ control_plane_http = control_plane_http.replace("ws://", "http://")
66
+ control_plane_http = control_plane_http.split("/ws/")[0].rstrip("/")
67
+
68
+ # Allow dev/test operation when control-plane connectivity is absent.
69
+ if not control_plane_http or not api_key:
70
+ return {"allowed": True, "source": "guard_not_configured"}
71
+
72
+ url = f"{control_plane_http}/v1/billing/usage/charge"
73
+ headers = {"Authorization": f"Bearer {api_key}"}
74
+ payload = {"usage_kind": usage_kind, "units": int(max(0, units))}
75
+ async with httpx.AsyncClient(timeout=timeout_seconds) as client:
76
+ response = await client.post(url, headers=headers, json=payload)
77
+ if response.status_code == 200:
78
+ body = response.json() if response.content else {}
79
+ return {"allowed": True, "source": "control_plane", "payload": body}
80
+
81
+ detail = None
82
+ try:
83
+ body = response.json() if response.content else {}
84
+ detail = body.get("detail", body)
85
+ except Exception:
86
+ detail = response.text
87
+ classification = classify_guard_submission_error(response.status_code, detail)
88
+ if classification == "guard_denial":
89
+ denial = parse_guard_denial_payload(detail) or {}
90
+ logger.info(
91
+ "usage guard denied metric=%s reason=%s period_start=%s period_end=%s",
92
+ denial.get("metric_key"),
93
+ denial.get("reason_code"),
94
+ denial.get("period_start"),
95
+ denial.get("period_end"),
96
+ )
97
+ return {"allowed": False, "source": "control_plane", "denial": denial}
98
+ logger.warning("usage guard transport/system failure status=%s", response.status_code)
99
+ response.raise_for_status()
100
+ return {"allowed": True, "source": "control_plane_unreachable"}
@@ -0,0 +1,129 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import uuid
6
+ from datetime import datetime, timezone
7
+ from typing import Any, Dict, Optional
8
+
9
+ from ..config.settings import settings
10
+
11
+
12
+ ACTION_TO_METRIC_KEY: Dict[str, str] = {
13
+ "llm.generate": "llm_tokens",
14
+ "ingestion.file_processed": "file_transfer_mb",
15
+ "uma.permission_ticket.validated": "permission_tickets",
16
+ "source.install.completed": "source_installs",
17
+ "contacts.google.connect.started": "third_party_connections",
18
+ }
19
+
20
+
21
+ def map_action_to_metric_key(action: str) -> Optional[str]:
22
+ return ACTION_TO_METRIC_KEY.get(str(action or "").strip())
23
+
24
+
25
+ def _now_iso() -> str:
26
+ return datetime.now(timezone.utc).isoformat()
27
+
28
+
29
+ def _stable_identity_hash(identity: Dict[str, Any]) -> str:
30
+ canonical = json.dumps(identity, sort_keys=True, separators=(",", ":"), default=str)
31
+ return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
32
+
33
+
34
+ def derive_usage_idempotency_key(
35
+ *,
36
+ producer: str,
37
+ metric_key: str,
38
+ action: str,
39
+ canonical_action_identity: Dict[str, Any],
40
+ ) -> str:
41
+ return ":".join(
42
+ [
43
+ str(producer or "engine"),
44
+ str(metric_key or "unknown"),
45
+ str(action or "unknown"),
46
+ _stable_identity_hash(canonical_action_identity),
47
+ ]
48
+ )
49
+
50
+
51
+ def build_usage_observation_envelope(
52
+ *,
53
+ action: str,
54
+ quantity: int,
55
+ producer: str,
56
+ canonical_action_identity: Dict[str, Any],
57
+ topos_id: Optional[str] = None,
58
+ source: Optional[str] = None,
59
+ observed_by: str = "engine",
60
+ trust_class: str = "observe_only",
61
+ metadata: Optional[Dict[str, Any]] = None,
62
+ ) -> Dict[str, Any]:
63
+ metric_key = map_action_to_metric_key(action)
64
+ if not metric_key:
65
+ raise ValueError(f"Unknown usage action: {action}")
66
+ idempotency_key = derive_usage_idempotency_key(
67
+ producer=str(producer or "engine"),
68
+ metric_key=metric_key,
69
+ action=action,
70
+ canonical_action_identity=canonical_action_identity,
71
+ )
72
+ resolved_topos_id = str(topos_id or "").strip() or f"engine:{str(settings.topos_key or '')[:12]}"
73
+ return {
74
+ "event_id": f"eng_usage_{uuid.uuid4().hex}",
75
+ "topos_id": resolved_topos_id,
76
+ "metric_key": metric_key,
77
+ "quantity": int(max(0, quantity)),
78
+ "event_at": _now_iso(),
79
+ "source": str(source or f"engine.{producer}"),
80
+ "observed_by": observed_by,
81
+ "idempotency_key": idempotency_key,
82
+ "producer": producer,
83
+ "action": action,
84
+ "trust_class": trust_class,
85
+ "metadata": metadata or {},
86
+ }
87
+
88
+
89
+ async def emit_usage_observation(
90
+ *,
91
+ action: str,
92
+ quantity: int,
93
+ producer: str,
94
+ canonical_action_identity: Dict[str, Any],
95
+ topos_id: Optional[str] = None,
96
+ source: Optional[str] = None,
97
+ observed_by: str = "engine",
98
+ trust_class: str = "observe_only",
99
+ metadata: Optional[Dict[str, Any]] = None,
100
+ ) -> Dict[str, Any]:
101
+ envelope = build_usage_observation_envelope(
102
+ action=action,
103
+ quantity=quantity,
104
+ producer=producer,
105
+ canonical_action_identity=canonical_action_identity,
106
+ topos_id=topos_id,
107
+ source=source,
108
+ observed_by=observed_by,
109
+ trust_class=trust_class,
110
+ metadata=metadata,
111
+ )
112
+
113
+ # Use existing control-plane transport abstraction (WS unsolicited message).
114
+ try:
115
+ from ..core import state as engine_state
116
+
117
+ cp_client = getattr(engine_state, "control_plane_client", None)
118
+ if cp_client is not None:
119
+ await cp_client.send_message(
120
+ {
121
+ "id": f"usage_obs_{uuid.uuid4().hex}",
122
+ "type": "usage_observation",
123
+ "payload": envelope,
124
+ }
125
+ )
126
+ except Exception:
127
+ # Observation emission must be non-blocking for product flows.
128
+ pass
129
+ return envelope
@@ -0,0 +1,23 @@
1
+ """Task validator: reject invalid tasks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional, Tuple
6
+
7
+ from .tasks import ProcessingTask
8
+
9
+
10
+ def validate_task(task: ProcessingTask) -> Tuple[bool, Optional[str]]:
11
+ """
12
+ Validate required fields and obvious invariants.
13
+ Returns (is_valid, error_message).
14
+ """
15
+ if not task.id or not str(task.id).strip():
16
+ return False, "task id is required and must be non-empty"
17
+ if not task.type or not str(task.type).strip():
18
+ return False, "task type is required and must be non-empty"
19
+ if not task.model_request:
20
+ return False, "model_request is required"
21
+ if not task.model_request.provider or not str(task.model_request.provider).strip():
22
+ return False, "model_request.provider is required"
23
+ return True, None
@@ -0,0 +1 @@
1
+ """Enrichment layer for Topos."""
@@ -0,0 +1,214 @@
1
+ """Derived tables manager for enrichment data storage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import sqlite3
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Dict, List, Optional
9
+
10
+ from ..utils.base_object import BaseObject
11
+
12
+ logger = logging.getLogger("topos.enrichment.derived_tables")
13
+
14
+
15
+ class DerivedTablesManager(BaseObject):
16
+ """Manages derived tables for enrichment data."""
17
+
18
+ def __init__(self, conn: Optional[sqlite3.Connection] = None, *, name: Optional[str] = None):
19
+ """Initialize with optional database connection.
20
+
21
+ Args:
22
+ conn: SQLite connection. If None, will try to get from state or create new.
23
+ name: Optional custom name. Defaults to `ClassName#N`
24
+ """
25
+ super().__init__(name=name)
26
+ self.conn = conn
27
+ if self.conn is None:
28
+ # Try to get connection from state
29
+ try:
30
+ from ..core.state import db_conn
31
+ self.conn = db_conn
32
+ except Exception:
33
+ pass
34
+
35
+ # If still no connection, try to create one
36
+ if self.conn is None:
37
+ try:
38
+ from ..storage.db.paths import get_database_path
39
+ from ..config.settings import settings
40
+
41
+ db_path = get_database_path(settings.topos_database_path)
42
+ if db_path.exists() or db_path.parent.exists():
43
+ self.conn = sqlite3.connect(str(db_path))
44
+ logger.debug("%s: Created database connection: %s", self, db_path)
45
+ except Exception as e:
46
+ logger.warning("%s: Could not create database connection: %s", self, e)
47
+ self.conn = None
48
+
49
+ # Ensure tables exist
50
+ if self.conn:
51
+ self._ensure_tables()
52
+
53
+ def _ensure_tables(self) -> None:
54
+ """Ensure enrichment tables exist."""
55
+ if not self.conn:
56
+ return
57
+
58
+ try:
59
+ # Create message_emotions table (Stage 9: model_name, all_emotions_json)
60
+ self.conn.execute("""
61
+ CREATE TABLE IF NOT EXISTS message_emotions (
62
+ message_id TEXT NOT NULL,
63
+ source_id TEXT,
64
+ emotion_label TEXT,
65
+ confidence REAL,
66
+ model_name TEXT,
67
+ all_emotions_json TEXT,
68
+ created_at TEXT NOT NULL,
69
+ PRIMARY KEY (message_id, model_name)
70
+ )
71
+ """)
72
+
73
+ # Add source_id column if it doesn't exist (migration for existing tables)
74
+ try:
75
+ self.conn.execute("ALTER TABLE message_emotions ADD COLUMN source_id TEXT")
76
+ except sqlite3.OperationalError:
77
+ # Column already exists, ignore
78
+ pass
79
+
80
+ self.conn.execute("""
81
+ CREATE INDEX IF NOT EXISTS idx_message_emotions_message
82
+ ON message_emotions(message_id)
83
+ """)
84
+
85
+ self.conn.execute("""
86
+ CREATE INDEX IF NOT EXISTS idx_message_emotions_label
87
+ ON message_emotions(emotion_label)
88
+ """)
89
+
90
+ self.conn.execute("""
91
+ CREATE INDEX IF NOT EXISTS idx_message_emotions_source
92
+ ON message_emotions(source_id)
93
+ """)
94
+
95
+ self.conn.commit()
96
+ logger.debug("%s: Ensured message_emotions table exists", self)
97
+ except Exception as e:
98
+ logger.error("%s: Failed to ensure enrichment tables: %s", self, e)
99
+ if self.conn:
100
+ self.conn.rollback()
101
+
102
+ def write_enrichment_batch(
103
+ self,
104
+ enrichment_records: List[Dict[str, Any]],
105
+ table_name: str,
106
+ batch_size: int = 1000,
107
+ ) -> int:
108
+ """Write enrichment records to derived table in batches.
109
+
110
+ Args:
111
+ enrichment_records: List of enrichment record dicts
112
+ table_name: Derived table name (e.g., 'message_emotions', 'message_sentiment')
113
+ batch_size: Number of records per batch
114
+
115
+ Returns:
116
+ Number of records written
117
+ """
118
+ if not enrichment_records:
119
+ return 0
120
+
121
+ if not self.conn:
122
+ logger.warning("%s: No database connection available, skipping storage of %d records", self, len(enrichment_records))
123
+ return 0
124
+
125
+ # Determine table schema based on table_name
126
+ if table_name == "message_emotions":
127
+ return self._write_emotions_batch(enrichment_records, batch_size)
128
+ elif table_name == "message_topics":
129
+ return self._write_topics_batch(enrichment_records, batch_size)
130
+ elif table_name == "message_sentiment":
131
+ return self._write_sentiment_batch(enrichment_records, batch_size)
132
+ elif table_name == "message_embeddings":
133
+ return self._write_embeddings_batch(enrichment_records, batch_size)
134
+ else:
135
+ logger.warning("%s: Unknown derived table: %s", self, table_name)
136
+ return 0
137
+
138
+ def _write_emotions_batch(
139
+ self,
140
+ records: List[Dict[str, Any]],
141
+ batch_size: int,
142
+ ) -> int:
143
+ """Write emotion records to message_emotions table."""
144
+ written = 0
145
+ try:
146
+ self._ensure_tables()
147
+ extracted_at = datetime.now(timezone.utc).isoformat()
148
+
149
+ for i in range(0, len(records), batch_size):
150
+ batch = records[i:i + batch_size]
151
+
152
+ values = []
153
+ for record in batch:
154
+ import json
155
+ all_emotions_val = record.get("all_emotions_json") or record.get("all_emotions") or []
156
+ all_emotions_str = json.dumps(all_emotions_val) if isinstance(all_emotions_val, list) else (all_emotions_val if isinstance(all_emotions_val, str) else "[]")
157
+ values.append((
158
+ record.get("message_id"),
159
+ record.get("source_id"),
160
+ record.get("emotion_label"),
161
+ record.get("confidence"),
162
+ record.get("model_name") or record.get("model"),
163
+ all_emotions_str,
164
+ extracted_at,
165
+ ))
166
+
167
+ self.conn.executemany("""
168
+ INSERT OR REPLACE INTO message_emotions (
169
+ message_id, source_id, emotion_label, confidence, model_name, all_emotions_json, created_at
170
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
171
+ """, values)
172
+
173
+ self.conn.commit()
174
+ written += len(batch)
175
+ logger.debug(
176
+ "[PIPELINE:ENRICHMENT] %s: Wrote batch of %d emotion records (total: %d)",
177
+ self,
178
+ len(batch),
179
+ written,
180
+ )
181
+ except Exception as e:
182
+ if self.conn:
183
+ self.conn.rollback()
184
+ logger.error("[PIPELINE:ENRICHMENT] %s: Failed to write emotions batch: %s", self, e)
185
+ raise
186
+
187
+ return written
188
+
189
+ def _write_topics_batch(
190
+ self,
191
+ records: List[Dict[str, Any]],
192
+ batch_size: int,
193
+ ) -> int:
194
+ """Write topics records (stub)."""
195
+ logger.debug("%s: Topics batch write not yet implemented", self)
196
+ return 0
197
+
198
+ def _write_sentiment_batch(
199
+ self,
200
+ records: List[Dict[str, Any]],
201
+ batch_size: int,
202
+ ) -> int:
203
+ """Write sentiment records (stub)."""
204
+ logger.debug("%s: Sentiment batch write not yet implemented", self)
205
+ return 0
206
+
207
+ def _write_embeddings_batch(
208
+ self,
209
+ records: List[Dict[str, Any]],
210
+ batch_size: int,
211
+ ) -> int:
212
+ """Write embeddings records (stub)."""
213
+ logger.debug("%s: Embeddings batch write not yet implemented", self)
214
+ return 0