topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,600 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import os
5
+ import uuid
6
+ from datetime import datetime, timezone
7
+ from typing import Optional
8
+
9
+ from fastapi import APIRouter, Body, Depends, Query, Request # noqa: F401 Body used in put_signal_settings
10
+
11
+ from ..auth import require_api_key
12
+ from ..core.state import get_db_connection
13
+ from ..ingestion.ingest_helpers import ingest_file_payload, ingest_ui_payload
14
+ from ..ingestion.local_sync import run_imessage_sync, run_signal_sync, run_signal_upload
15
+ from ..sources.registry import REGISTRY
16
+ from ..storage.signal_identity import get_signal_identity, put_signal_identity
17
+ from ..storage.source_settings import get_source_settings, put_source_settings, update_sync_result
18
+ from ..analytics.messenger_communities import compute_and_persist_messenger_analytics
19
+ from ..engine.usage_observation import emit_usage_observation
20
+ from ..engine.usage_guard import submit_usage_guard_check
21
+
22
+ router = APIRouter()
23
+ _GOOGLE_CONTACT_IMPORT_SESSIONS: dict[str, dict] = {}
24
+
25
+
26
+ def _resolve_contact_import_targets(payload: Optional[dict]) -> tuple[List[str], Optional[str]]:
27
+ requested = (payload or {}).get("apply_to_sources")
28
+ if requested is None:
29
+ requested = ["imessage", "signal"]
30
+ if not isinstance(requested, list) or not requested:
31
+ return [], "apply_to_sources must be a non-empty list"
32
+ valid_local_sync = {
33
+ sid for sid, definition in REGISTRY.items()
34
+ if getattr(definition, "source_type", None) == "local_sync"
35
+ }
36
+ targets = [str(s or "").strip() for s in requested if str(s or "").strip()]
37
+ if not targets:
38
+ return [], "apply_to_sources has no valid source ids"
39
+ invalid = [sid for sid in targets if sid not in valid_local_sync]
40
+ if invalid:
41
+ return [], f"invalid local_sync source ids: {', '.join(invalid)}"
42
+ return sorted(set(targets)), None
43
+
44
+
45
+ @router.post("/sources/{source_id}/ingest", dependencies=[Depends(require_api_key)])
46
+ async def ingest_source(
47
+ source_id: str,
48
+ request: Request,
49
+ dataset_id: Optional[str] = None,
50
+ user_id: Optional[str] = None, # noqa: ARG001
51
+ file_path: Optional[str] = None,
52
+ payload: Optional[dict] = Body(default=None),
53
+ ):
54
+ source = REGISTRY.get(source_id)
55
+ if not source:
56
+ return {"status": "error", "error": "unknown source_id"}
57
+
58
+ if source.source_type == "file":
59
+ file = None
60
+ if request.headers.get("content-type", "").startswith("multipart/"):
61
+ form = await request.form()
62
+ file = form.get("file")
63
+ dataset_id = dataset_id or form.get("dataset_id")
64
+ file_path = file_path or form.get("file_path")
65
+ if not file and not file_path:
66
+ return {"status": "error", "error": "file or file_path required"}
67
+ if not dataset_id:
68
+ return {"status": "error", "error": "dataset_id required"}
69
+ if file_path:
70
+ estimated_bytes = 0
71
+ try:
72
+ estimated_bytes = int(os.path.getsize(file_path))
73
+ except Exception:
74
+ estimated_bytes = 0
75
+ guard = await submit_usage_guard_check(
76
+ usage_kind="file_transfer_bytes",
77
+ units=estimated_bytes,
78
+ )
79
+ if not bool(guard.get("allowed")):
80
+ return {
81
+ "status": "error",
82
+ "error": "usage_guard_denied",
83
+ "denial": guard.get("denial") or {},
84
+ }
85
+ return await ingest_file_payload(
86
+ dataset_id=dataset_id,
87
+ schema_id=source.schema_id,
88
+ file_path=file_path,
89
+ )
90
+ payload_bytes = await file.read()
91
+ guard = await submit_usage_guard_check(
92
+ usage_kind="file_transfer_bytes",
93
+ units=len(payload_bytes),
94
+ )
95
+ if not bool(guard.get("allowed")):
96
+ return {
97
+ "status": "error",
98
+ "error": "usage_guard_denied",
99
+ "denial": guard.get("denial") or {},
100
+ }
101
+ return await ingest_file_payload(
102
+ dataset_id=dataset_id,
103
+ schema_id=source.schema_id,
104
+ file_bytes=payload_bytes,
105
+ )
106
+
107
+ if source.source_type == "ui_stream":
108
+ if payload is None:
109
+ try:
110
+ payload = await request.json()
111
+ except Exception:
112
+ payload = None
113
+ if not dataset_id or not payload:
114
+ return {"status": "error", "error": "dataset_id and payload required"}
115
+ return await ingest_ui_payload(
116
+ dataset_id=dataset_id,
117
+ schema_id=source.schema_id,
118
+ payload=payload,
119
+ source_id=source_id, # Pass source_id to enable direct processing
120
+ )
121
+
122
+ if source.source_type == "local_sync":
123
+ # iMessage, Signal: ingestion is via sync endpoint (POST /sources/{source_id}/sync), not file/body upload
124
+ return {
125
+ "status": "error",
126
+ "error": "local_sync sources use sync endpoint; use POST /sources/{source_id}/sync to sync",
127
+ }
128
+
129
+ return {"status": "error", "error": "unsupported source type"}
130
+
131
+
132
+ @router.get("/sources/{source_id}/settings", dependencies=[Depends(require_api_key)])
133
+ async def get_source_settings_endpoint(
134
+ source_id: str,
135
+ dataset_id: Optional[str] = Query(default=None),
136
+ ):
137
+ """Get source settings: enabled, last_sync_at, last_error (for local_sync: imessage, signal)."""
138
+ source = REGISTRY.get(source_id)
139
+ if not source:
140
+ return {"status": "error", "error": "unknown source_id"}
141
+ if not dataset_id:
142
+ return {"status": "error", "error": "dataset_id required"}
143
+ conn = get_db_connection()
144
+ if not conn:
145
+ return {"status": "error", "error": "Database not available"}
146
+ settings = get_source_settings(conn, dataset_id, source_id)
147
+ return {"status": "ok", "dataset_id": dataset_id, "source_id": source_id, **settings}
148
+
149
+
150
+ @router.put("/sources/{source_id}/settings", dependencies=[Depends(require_api_key)])
151
+ async def put_source_settings_endpoint(
152
+ source_id: str,
153
+ dataset_id: Optional[str] = Query(default=None),
154
+ body: Optional[dict] = Body(default=None),
155
+ ):
156
+ """Set source settings (e.g. enabled). Valid for local_sync sources (imessage, signal)."""
157
+ source = REGISTRY.get(source_id)
158
+ if not source:
159
+ return {"status": "error", "error": "unknown source_id"}
160
+ if source.source_type != "local_sync":
161
+ return {"status": "error", "error": "settings only apply to local_sync sources"}
162
+ dataset_id = dataset_id or (body.get("dataset_id") if body else None)
163
+ if not dataset_id:
164
+ return {"status": "error", "error": "dataset_id required"}
165
+ enabled = body.get("enabled") if body else None
166
+ if enabled is None:
167
+ return {"status": "error", "error": "enabled required in body"}
168
+ conn = get_db_connection()
169
+ if not conn:
170
+ return {"status": "error", "error": "Database not available"}
171
+ put_source_settings(conn, dataset_id, source_id, enabled=enabled)
172
+ return {"status": "ok", "dataset_id": dataset_id, "source_id": source_id, "enabled": bool(enabled)}
173
+
174
+
175
+ @router.post("/sources/{source_id}/sync", dependencies=[Depends(require_api_key)])
176
+ async def sync_source(
177
+ source_id: str,
178
+ dataset_id: Optional[str] = Query(default=None, description="Dataset/owner scope for checkpoint and messages"),
179
+ body: Optional[dict] = Body(default=None),
180
+ ):
181
+ """Run sync for local_sync sources (e.g. iMessage). Reads since last checkpoint, writes to conversation_messages."""
182
+ source = REGISTRY.get(source_id)
183
+ if not source:
184
+ return {"status": "error", "error": "unknown source_id"}
185
+ if source.source_type != "local_sync":
186
+ return {"status": "error", "error": "sync endpoint only applies to local_sync sources"}
187
+
188
+ if not dataset_id:
189
+ return {"status": "error", "error": "dataset_id required (query param)"}
190
+
191
+ conn = get_db_connection()
192
+ sync_options = (body or {}).get("sync_options")
193
+ if source_id == "imessage" and not sync_options:
194
+ sync_options = {"mode": "3m"}
195
+ if source_id == "imessage":
196
+ result = await asyncio.to_thread(run_imessage_sync, dataset_id, sync_options=sync_options)
197
+ elif source_id == "signal":
198
+ result = await asyncio.to_thread(run_signal_sync, dataset_id, sync_options=sync_options)
199
+ else:
200
+ return {"status": "error", "error": f"sync not implemented for source_id={source_id}"}
201
+
202
+ if conn and result.get("status") == "ok":
203
+ update_sync_result(
204
+ conn, dataset_id, source_id,
205
+ success=True,
206
+ last_sync_at=datetime.now(timezone.utc).isoformat(),
207
+ )
208
+ # Sprint 03 trigger: refresh messenger analytics after successful sync.
209
+ try:
210
+ await asyncio.to_thread(
211
+ compute_and_persist_messenger_analytics,
212
+ dataset_id=dataset_id,
213
+ conn=conn,
214
+ source_ids=[source_id],
215
+ period_granularity="month",
216
+ )
217
+ except Exception:
218
+ # Non-fatal; sync result should still return success.
219
+ pass
220
+ elif conn and result.get("status") == "error":
221
+ update_sync_result(
222
+ conn, dataset_id, source_id,
223
+ success=False,
224
+ last_error=result.get("error", "Sync failed"),
225
+ )
226
+ return result
227
+
228
+
229
+ @router.post("/sources/signal/upload", dependencies=[Depends(require_api_key)])
230
+ async def upload_signal_export(
231
+ request: Request,
232
+ dataset_id: Optional[str] = Query(default=None),
233
+ my_phone_number: Optional[str] = Query(default=None),
234
+ owner_user_id: Optional[str] = Query(default=None),
235
+ ):
236
+ """Upload a Signal export file (JSON). Optional my_phone_number / owner_user_id for identity."""
237
+ if not dataset_id:
238
+ try:
239
+ form = await request.form()
240
+ dataset_id = dataset_id or form.get("dataset_id")
241
+ if isinstance(dataset_id, bytes):
242
+ dataset_id = dataset_id.decode("utf-8")
243
+ my_phone_number = my_phone_number or form.get("my_phone_number")
244
+ if isinstance(my_phone_number, bytes):
245
+ my_phone_number = my_phone_number.decode("utf-8")
246
+ except Exception:
247
+ pass
248
+ if not dataset_id:
249
+ return {"status": "error", "error": "dataset_id required"}
250
+ file_bytes = None
251
+ if request.headers.get("content-type", "").startswith("multipart/"):
252
+ form = await request.form()
253
+ f = form.get("file")
254
+ if f:
255
+ file_bytes = await f.read()
256
+ if not file_bytes:
257
+ return {"status": "error", "error": "file required (multipart/form-data)"}
258
+ result = await asyncio.to_thread(
259
+ run_signal_upload,
260
+ dataset_id,
261
+ file_bytes,
262
+ my_phone_number=my_phone_number,
263
+ owner_user_id=owner_user_id,
264
+ )
265
+ return result
266
+
267
+
268
+ @router.get("/sources/signal/settings", dependencies=[Depends(require_api_key)])
269
+ async def get_signal_settings(
270
+ dataset_id: Optional[str] = Query(default=None, description="Dataset scope for Signal identity"),
271
+ ):
272
+ """Get Signal identity (my_phone_number, my_signal_id) for the given dataset."""
273
+ if not dataset_id:
274
+ return {"status": "error", "error": "dataset_id required"}
275
+ conn = get_db_connection()
276
+ if not conn:
277
+ return {"status": "error", "error": "Database not available"}
278
+ identity = get_signal_identity(conn, dataset_id)
279
+ if identity is None:
280
+ return {"status": "ok", "dataset_id": dataset_id, "my_phone_number": None, "my_signal_id": None}
281
+ return {"status": "ok", "dataset_id": dataset_id, **identity}
282
+
283
+
284
+ @router.put("/sources/signal/settings", dependencies=[Depends(require_api_key)])
285
+ async def put_signal_settings(
286
+ dataset_id: Optional[str] = Query(default=None),
287
+ my_phone_number: Optional[str] = Query(default=None),
288
+ my_signal_id: Optional[str] = Query(default=None),
289
+ body: Optional[dict] = Body(default=None),
290
+ ):
291
+ """Set Signal identity for the dataset. Prefer phone number (e.g. E.164) for 'I am'; do not use Signal username (unreliable)."""
292
+ if not dataset_id and body:
293
+ dataset_id = body.get("dataset_id")
294
+ if not dataset_id:
295
+ return {"status": "error", "error": "dataset_id required"}
296
+ phone = my_phone_number or (body.get("my_phone_number") if body else None)
297
+ sid = my_signal_id or (body.get("my_signal_id") if body else None)
298
+ conn = get_db_connection()
299
+ if not conn:
300
+ return {"status": "error", "error": "Database not available"}
301
+ put_signal_identity(conn, dataset_id, my_phone_number=phone, my_signal_id=sid)
302
+ return {"status": "ok", "dataset_id": dataset_id}
303
+
304
+
305
+ @router.get("/sources/{source_id}/contacts", dependencies=[Depends(require_api_key)])
306
+ async def get_source_contacts(
307
+ source_id: str,
308
+ dataset_id: Optional[str] = Query(default=None),
309
+ ):
310
+ """List canonical contacts for local_sync source."""
311
+ source = REGISTRY.get(source_id)
312
+ if not source:
313
+ return {"status": "error", "error": "unknown source_id"}
314
+ if source.source_type != "local_sync":
315
+ return {"status": "error", "error": "contacts only apply to local_sync sources"}
316
+ if not dataset_id:
317
+ return {"status": "error", "error": "dataset_id required"}
318
+ conn = get_db_connection()
319
+ if not conn:
320
+ return {"status": "error", "error": "Database not available"}
321
+ from ..analytics.messenger_labels import (
322
+ enrich_contact_rows_with_resolved_display_names,
323
+ enrich_conversation_thread_previews,
324
+ )
325
+ from ..storage.canonical import ConversationsTablesManager
326
+ manager = ConversationsTablesManager(conn)
327
+ contacts = manager.list_contacts(dataset_id=dataset_id, source_id=source_id, limit=200)
328
+ enrich_contact_rows_with_resolved_display_names(conn, dataset_id=dataset_id, contacts=contacts)
329
+ for c in contacts:
330
+ identifier = c.get("identifier")
331
+ if identifier:
332
+ pid = str(identifier)
333
+ c["sample_messages"] = manager.get_contact_message_samples(
334
+ dataset_id=dataset_id,
335
+ source_id=source_id,
336
+ identifier=identifier,
337
+ limit=5,
338
+ )
339
+ previews = manager.get_contact_conversation_thread_previews(
340
+ dataset_id=dataset_id,
341
+ source_id=source_id,
342
+ profile_identifier=pid,
343
+ )
344
+ enrich_conversation_thread_previews(
345
+ conn,
346
+ dataset_id=dataset_id,
347
+ profile_identifier=pid,
348
+ previews=previews,
349
+ )
350
+ c["conversation_thread_previews"] = previews
351
+ else:
352
+ c["conversation_thread_previews"] = []
353
+ return {"status": "ok", "dataset_id": dataset_id, "source_id": source_id, "contacts": contacts}
354
+
355
+
356
+ @router.put("/sources/{source_id}/contacts/{contact_id}", dependencies=[Depends(require_api_key)])
357
+ async def put_source_contact(
358
+ source_id: str,
359
+ contact_id: str,
360
+ dataset_id: Optional[str] = Query(default=None),
361
+ body: Optional[dict] = Body(default=None),
362
+ ):
363
+ """Set display name for canonical contact."""
364
+ source = REGISTRY.get(source_id)
365
+ if not source:
366
+ return {"status": "error", "error": "unknown source_id"}
367
+ if source.source_type != "local_sync":
368
+ return {"status": "error", "error": "contacts only apply to local_sync sources"}
369
+ if not dataset_id:
370
+ return {"status": "error", "error": "dataset_id required"}
371
+ conn = get_db_connection()
372
+ if not conn:
373
+ return {"status": "error", "error": "Database not available"}
374
+ from ..storage.canonical import ConversationsTablesManager
375
+ manager = ConversationsTablesManager(conn)
376
+ manager.update_contact_display_name(
377
+ dataset_id=dataset_id,
378
+ source_id=source_id,
379
+ contact_id=contact_id,
380
+ display_name=(body or {}).get("display_name"),
381
+ )
382
+ return {"status": "ok", "dataset_id": dataset_id, "source_id": source_id, "contact_id": contact_id}
383
+
384
+
385
+ @router.post("/sources/{source_id}/contacts/auto-resolve", dependencies=[Depends(require_api_key)])
386
+ async def auto_resolve_source_contacts(
387
+ source_id: str,
388
+ dataset_id: Optional[str] = Query(default=None),
389
+ ):
390
+ """Auto-resolve contact display names from message metadata."""
391
+ source = REGISTRY.get(source_id)
392
+ if not source:
393
+ return {"status": "error", "error": "unknown source_id"}
394
+ if source.source_type != "local_sync":
395
+ return {"status": "error", "error": "contacts only apply to local_sync sources"}
396
+ if not dataset_id:
397
+ return {"status": "error", "error": "dataset_id required"}
398
+ conn = get_db_connection()
399
+ if not conn:
400
+ return {"status": "error", "error": "Database not available"}
401
+ from ..storage.canonical import ConversationsTablesManager
402
+ manager = ConversationsTablesManager(conn)
403
+ updated = manager.auto_resolve_contact_names(dataset_id=dataset_id, source_id=source_id)
404
+ return {"status": "ok", "dataset_id": dataset_id, "source_id": source_id, "updated_contacts": updated}
405
+
406
+
407
+ @router.post("/contacts/import/apple", dependencies=[Depends(require_api_key)])
408
+ async def import_apple_contacts(
409
+ dataset_id: Optional[str] = Query(default=None),
410
+ body: Optional[dict] = Body(default=None),
411
+ ):
412
+ """Import Apple Contacts locally on engine host (global capability)."""
413
+ if not dataset_id:
414
+ return {"status": "error", "error": "dataset_id required"}
415
+ target_sources, err = _resolve_contact_import_targets(body or {})
416
+ if err:
417
+ return {"status": "error", "error": err}
418
+ conn = get_db_connection()
419
+ if not conn:
420
+ return {"status": "error", "error": "Database not available"}
421
+ try:
422
+ from ..ingestion.sources.contact_importers import import_apple_contacts_local
423
+ from ..storage.canonical import ConversationsTablesManager
424
+
425
+ contacts = await asyncio.to_thread(import_apple_contacts_local)
426
+ manager = ConversationsTablesManager(conn)
427
+ import_run_id = str(uuid.uuid4())
428
+ aggregate = manager.import_contacts_batch(
429
+ dataset_id=dataset_id,
430
+ contacts=contacts,
431
+ target_sources=target_sources,
432
+ import_source="apple_contacts",
433
+ import_run_id=import_run_id,
434
+ )
435
+ return {
436
+ "status": "ok",
437
+ "dataset_id": dataset_id,
438
+ "applied_sources": target_sources,
439
+ "import_source": "apple_contacts",
440
+ "contacts_discovered": len(contacts),
441
+ **aggregate,
442
+ }
443
+ except Exception as exc:
444
+ return {"status": "error", "error": str(exc)}
445
+
446
+
447
+ @router.post("/contacts/import/google/start", dependencies=[Depends(require_api_key)])
448
+ async def start_google_contacts_import(
449
+ dataset_id: Optional[str] = Query(default=None),
450
+ body: Optional[dict] = Body(default=None),
451
+ ):
452
+ """Start Google Contacts device authorization flow."""
453
+ if not dataset_id:
454
+ return {"status": "error", "error": "dataset_id required"}
455
+ target_sources, err = _resolve_contact_import_targets(body or {})
456
+ if err:
457
+ return {"status": "error", "error": err}
458
+ google_client_id = ((body or {}).get("google_client_id") or os.getenv("GOOGLE_OAUTH_CLIENT_ID") or "").strip()
459
+ if not google_client_id:
460
+ return {"status": "error", "error": "google_client_id required (or set GOOGLE_OAUTH_CLIENT_ID)"}
461
+ try:
462
+ from ..ingestion.sources.contact_importers import start_google_device_auth
463
+
464
+ auth = await asyncio.to_thread(start_google_device_auth, google_client_id)
465
+ if auth.get("error"):
466
+ return {"status": "error", "error": auth.get("error_description") or auth.get("error")}
467
+ session_id = str(uuid.uuid4())
468
+ _GOOGLE_CONTACT_IMPORT_SESSIONS[session_id] = {
469
+ "dataset_id": dataset_id,
470
+ "apply_to_sources": target_sources,
471
+ "google_client_id": google_client_id,
472
+ "device_code": auth.get("device_code"),
473
+ "interval": int(auth.get("interval") or 5),
474
+ }
475
+ await emit_usage_observation(
476
+ action="contacts.google.connect.started",
477
+ quantity=1,
478
+ producer="api.ingestion_sources",
479
+ canonical_action_identity={
480
+ "dataset_id": dataset_id,
481
+ "session_id": session_id,
482
+ "provider": "google_contacts",
483
+ },
484
+ topos_id=dataset_id,
485
+ trust_class="observe_only",
486
+ metadata={"endpoint": "/contacts/import/google/start"},
487
+ )
488
+ return {
489
+ "status": "ok",
490
+ "session_id": session_id,
491
+ "applied_sources": target_sources,
492
+ "user_code": auth.get("user_code"),
493
+ "verification_url": auth.get("verification_url") or auth.get("verification_uri"),
494
+ "expires_in": auth.get("expires_in"),
495
+ "message": auth.get("message"),
496
+ }
497
+ except Exception as exc:
498
+ return {"status": "error", "error": str(exc)}
499
+
500
+
501
+ @router.post("/contacts/import/google/token", dependencies=[Depends(require_api_key)])
502
+ async def import_google_contacts_token(
503
+ dataset_id: Optional[str] = Query(default=None),
504
+ body: Optional[dict] = Body(default=None),
505
+ ):
506
+ """Import Google contacts directly from OAuth access token (UI sign-in flow)."""
507
+ if not dataset_id:
508
+ return {"status": "error", "error": "dataset_id required"}
509
+ target_sources, err = _resolve_contact_import_targets(body or {})
510
+ if err:
511
+ return {"status": "error", "error": err}
512
+ access_token = str((body or {}).get("access_token") or "").strip()
513
+ if not access_token:
514
+ return {"status": "error", "error": "access_token required"}
515
+ conn = get_db_connection()
516
+ if not conn:
517
+ return {"status": "error", "error": "Database not available"}
518
+ try:
519
+ from ..ingestion.sources.contact_importers import import_google_contacts
520
+ from ..storage.canonical import ConversationsTablesManager
521
+
522
+ contacts = await asyncio.to_thread(import_google_contacts, access_token)
523
+ manager = ConversationsTablesManager(conn)
524
+ import_run_id = str(uuid.uuid4())
525
+ aggregate = manager.import_contacts_batch(
526
+ dataset_id=dataset_id,
527
+ contacts=contacts,
528
+ target_sources=target_sources,
529
+ import_source="google_contacts_oauth",
530
+ import_run_id=import_run_id,
531
+ )
532
+ return {
533
+ "status": "ok",
534
+ "dataset_id": dataset_id,
535
+ "applied_sources": target_sources,
536
+ "import_source": "google_contacts_oauth",
537
+ "contacts_discovered": len(contacts),
538
+ **aggregate,
539
+ }
540
+ except Exception as exc:
541
+ return {"status": "error", "error": str(exc)}
542
+
543
+
544
+ @router.post("/contacts/import/google/finish", dependencies=[Depends(require_api_key)])
545
+ async def finish_google_contacts_import(
546
+ dataset_id: Optional[str] = Query(default=None),
547
+ body: Optional[dict] = Body(default=None),
548
+ ):
549
+ """Finish Google Contacts import once user authorizes device code."""
550
+ if not dataset_id:
551
+ return {"status": "error", "error": "dataset_id required"}
552
+ session_id = ((body or {}).get("session_id") or "").strip()
553
+ if not session_id:
554
+ return {"status": "error", "error": "session_id required"}
555
+ session = _GOOGLE_CONTACT_IMPORT_SESSIONS.get(session_id)
556
+ if not session:
557
+ return {"status": "error", "error": "google import session not found or expired"}
558
+ if session.get("dataset_id") != dataset_id:
559
+ return {"status": "error", "error": "session does not match dataset_id"}
560
+ conn = get_db_connection()
561
+ if not conn:
562
+ return {"status": "error", "error": "Database not available"}
563
+ try:
564
+ from ..ingestion.sources.contact_importers import finish_google_device_auth, import_google_contacts
565
+ from ..storage.canonical import ConversationsTablesManager
566
+
567
+ token = await asyncio.to_thread(
568
+ finish_google_device_auth,
569
+ client_id=session.get("google_client_id"),
570
+ device_code=session.get("device_code"),
571
+ interval_seconds=int(session.get("interval") or 5),
572
+ timeout_seconds=int((body or {}).get("timeout_seconds") or 120),
573
+ )
574
+ if token.get("error"):
575
+ return {"status": "error", "error": token.get("error_description") or token.get("error")}
576
+ access_token = token.get("access_token")
577
+ if not access_token:
578
+ return {"status": "error", "error": "Google token exchange did not return access_token"}
579
+ contacts = await asyncio.to_thread(import_google_contacts, access_token)
580
+ manager = ConversationsTablesManager(conn)
581
+ target_sources = session.get("apply_to_sources") or []
582
+ import_run_id = str(uuid.uuid4())
583
+ aggregate = manager.import_contacts_batch(
584
+ dataset_id=dataset_id,
585
+ contacts=contacts,
586
+ target_sources=target_sources,
587
+ import_source="google_contacts",
588
+ import_run_id=import_run_id,
589
+ )
590
+ _GOOGLE_CONTACT_IMPORT_SESSIONS.pop(session_id, None)
591
+ return {
592
+ "status": "ok",
593
+ "dataset_id": dataset_id,
594
+ "applied_sources": target_sources,
595
+ "import_source": "google_contacts",
596
+ "contacts_discovered": len(contacts),
597
+ **aggregate,
598
+ }
599
+ except Exception as exc:
600
+ return {"status": "error", "error": str(exc)}
topos/api/llm.py ADDED
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+
5
+ from fastapi import APIRouter, Depends
6
+
7
+ from ..auth import require_api_key
8
+ from ..core.api_models import GenerationRequest, GenerationResponse, OllamaModelsResponse
9
+ from ..engine.usage_observation import emit_usage_observation
10
+ from ..rate_limit import rate_limit
11
+ from ..services.container import Services, get_services
12
+
13
+ router = APIRouter()
14
+
15
+
16
+ @router.post(
17
+ "/llm_generation",
18
+ response_model=GenerationResponse,
19
+ dependencies=[Depends(require_api_key), Depends(rate_limit)],
20
+ )
21
+ async def llm_generation(body: GenerationRequest):
22
+ services: Services = get_services()
23
+ result = await services.llm.generate(body.model_dump())
24
+ usage = result.get("usage") if isinstance(result, dict) else {}
25
+ total_tokens = int((usage or {}).get("total_tokens") or (usage or {}).get("completion_tokens") or 0)
26
+ await emit_usage_observation(
27
+ action="llm.generate",
28
+ quantity=total_tokens,
29
+ producer="api.llm",
30
+ canonical_action_identity={
31
+ "provider": body.provider or "default",
32
+ "model": body.model or "",
33
+ "prompt_sha": hashlib.sha256(body.prompt.encode("utf-8")).hexdigest(),
34
+ "max_tokens": body.max_tokens,
35
+ "temperature": body.temperature,
36
+ },
37
+ trust_class="cp_observed_self_hosted",
38
+ )
39
+ return GenerationResponse(output=result["output"], model=result["model"], usage=result["usage"])
40
+
41
+
42
+ @router.post(
43
+ "/llm/generate",
44
+ response_model=GenerationResponse,
45
+ dependencies=[Depends(require_api_key), Depends(rate_limit)],
46
+ )
47
+ async def llm_generation_alias(body: GenerationRequest):
48
+ services: Services = get_services()
49
+ result = await services.llm.generate(body.model_dump())
50
+ usage = result.get("usage") if isinstance(result, dict) else {}
51
+ total_tokens = int((usage or {}).get("total_tokens") or (usage or {}).get("completion_tokens") or 0)
52
+ await emit_usage_observation(
53
+ action="llm.generate",
54
+ quantity=total_tokens,
55
+ producer="api.llm.alias",
56
+ canonical_action_identity={
57
+ "provider": body.provider or "default",
58
+ "model": body.model or "",
59
+ "prompt_sha": hashlib.sha256(body.prompt.encode("utf-8")).hexdigest(),
60
+ "max_tokens": body.max_tokens,
61
+ "temperature": body.temperature,
62
+ },
63
+ trust_class="cp_observed_self_hosted",
64
+ )
65
+ return GenerationResponse(output=result["output"], model=result["model"], usage=result["usage"])
66
+
67
+
68
+ @router.get(
69
+ "/ollama_models",
70
+ response_model=OllamaModelsResponse,
71
+ dependencies=[Depends(require_api_key), Depends(rate_limit)],
72
+ )
73
+ async def ollama_models_list():
74
+ services: Services = get_services()
75
+ result = await services.llm.list_ollama_models()
76
+ return OllamaModelsResponse(models=result["models"])