topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,310 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import List, Optional
7
+
8
+ from pydantic import Field, model_validator
9
+ from pydantic_settings import BaseSettings, SettingsConfigDict
10
+
11
+
12
+ DEFAULT_TOPOS_CONTROL_PLANE_URL = "wss://cp.logu3s.com/ws/engine"
13
+
14
+
15
+ class Settings(BaseSettings):
16
+ """Topos settings sourced from environment."""
17
+
18
+ model_config = SettingsConfigDict(
19
+ env_file=Path(__file__).resolve().parent.parent / ".env",
20
+ env_file_encoding="utf-8",
21
+ case_sensitive=False,
22
+ extra="ignore",
23
+ )
24
+
25
+ topos_key: Optional[str] = Field(None, env="TOPOS_KEY")
26
+ openai_api_key: Optional[str] = Field(None, env="OPENAI_API_KEY")
27
+
28
+ environment: str = Field("development", env="ENVIRONMENT")
29
+ log_format: Optional[str] = Field(None, env="LOG_FORMAT")
30
+ log_level: str = Field("DEBUG", env="LOG_LEVEL")
31
+
32
+ openai_base_url: str = Field("https://api.openai.com/v1", env="OPENAI_BASE_URL")
33
+ openai_model: str = Field("gpt-4o-mini", env="OPENAI_MODEL")
34
+
35
+ gt_cloud_api_key: Optional[str] = Field(None, env="GT_CLOUD_API_KEY")
36
+ griptape_nodes_api_base_url: str = Field(
37
+ "https://api.nodes.griptape.ai", env="GRIPTAPE_NODES_API_BASE_URL"
38
+ )
39
+
40
+ allowed_origins_raw: str = Field("http://localhost:3000", env="ALLOWED_ORIGINS")
41
+ allowed_origin_regex: Optional[str] = Field(None, env="ALLOWED_ORIGIN_REGEX")
42
+ enable_health_auth: bool = Field(False, env="ENABLE_HEALTH_AUTH")
43
+
44
+ request_timeout_seconds: float = Field(20.0, env="REQUEST_TIMEOUT_SECONDS")
45
+ openai_timeout_seconds: float = Field(15.0, env="OPENAI_TIMEOUT_SECONDS")
46
+ connection_retry_initial_seconds: float = Field(1.0, env="CONNECTION_RETRY_INITIAL_SECONDS")
47
+ connection_retry_max_seconds: float = Field(30.0, env="CONNECTION_RETRY_MAX_SECONDS")
48
+ connection_retry_jitter_ratio: float = Field(0.2, env="CONNECTION_RETRY_JITTER_RATIO")
49
+ connection_readiness_timeout_seconds: float = Field(15.0, env="CONNECTION_READINESS_TIMEOUT_SECONDS")
50
+ wait_for_control_plane_on_startup: bool = Field(False, env="WAIT_FOR_CONTROL_PLANE_ON_STARTUP")
51
+ wait_for_sync_on_startup: bool = Field(False, env="WAIT_FOR_SYNC_ON_STARTUP")
52
+ control_plane_inbound_concurrency_limit: int = Field(16, env="CONTROL_PLANE_INBOUND_CONCURRENCY_LIMIT")
53
+ control_plane_inbound_max_pending: int = Field(128, env="CONTROL_PLANE_INBOUND_MAX_PENDING")
54
+ control_plane_presence_outbox_size: int = Field(64, env="CONTROL_PLANE_PRESENCE_OUTBOX_SIZE")
55
+ sync_cursor_retry_attempts: int = Field(3, env="SYNC_CURSOR_RETRY_ATTEMPTS")
56
+ sync_cursor_retry_delay_seconds: float = Field(0.5, env="SYNC_CURSOR_RETRY_DELAY_SECONDS")
57
+
58
+ rate_limit_per_minute: int = Field(60, env="RATE_LIMIT_PER_MINUTE")
59
+ topos_control_plane_url: Optional[str] = Field(
60
+ DEFAULT_TOPOS_CONTROL_PLANE_URL, env="TOPOS_CONTROL_PLANE_URL"
61
+ )
62
+ control_plane_verify_ssl: bool = Field(True, env="CONTROL_PLANE_VERIFY_SSL")
63
+ hosted_pool_lease_enabled: bool = Field(False, env="HOSTED_POOL_LEASE_ENABLED")
64
+ hosted_pool_allow_static_key_in_cloud: bool = Field(
65
+ False, env="HOSTED_POOL_ALLOW_STATIC_KEY_IN_CLOUD"
66
+ )
67
+ hosted_pool_enforce_lease_in_cloud: bool = Field(
68
+ True, env="HOSTED_POOL_ENFORCE_LEASE_IN_CLOUD"
69
+ )
70
+ hosted_pool_lease_audience: Optional[str] = Field(None, env="HOSTED_POOL_LEASE_AUDIENCE")
71
+ hosted_pool_lease_issue_path: str = Field(
72
+ "/v1/system/pool-connectors/lease/issue", env="HOSTED_POOL_LEASE_ISSUE_PATH"
73
+ )
74
+ hosted_pool_lease_renew_path: str = Field(
75
+ "/v1/system/pool-connectors/lease/renew", env="HOSTED_POOL_LEASE_RENEW_PATH"
76
+ )
77
+ hosted_pool_lease_revoke_path: str = Field(
78
+ "/v1/system/pool-connectors/lease/revoke", env="HOSTED_POOL_LEASE_REVOKE_PATH"
79
+ )
80
+ hosted_pool_lease_pool_group: str = Field("default", env="HOSTED_POOL_LEASE_POOL_GROUP")
81
+ hosted_pool_lease_renew_skew_seconds: int = Field(60, env="HOSTED_POOL_LEASE_RENEW_SKEW_SECONDS")
82
+
83
+ engine_mode: str = Field("full", env="ENGINE_MODE")
84
+ enable_llm: bool = Field(True, env="ENABLE_LLM")
85
+ engine_transport_mode: str = Field("ws", env="ENGINE_TRANSPORT_MODE")
86
+ engine_name: Optional[str] = Field(None, env="ENGINE_NAME")
87
+ topos_compute_profile: str = Field("basic_hosted", env="TOPOS_COMPUTE_PROFILE")
88
+
89
+ engine_ollama_base_url: str = Field("http://localhost:11434", env="ENGINE_OLLAMA_BASE_URL")
90
+ engine_default_provider: str = Field("huggingface", env="ENGINE_DEFAULT_PROVIDER")
91
+
92
+ # Sanitization field-transforms via Ollama (see topos.config.sanitization_ollama + engine_config overrides)
93
+ sanitization_ollama_enabled: bool = Field(False, env="SANITIZATION_OLLAMA_ENABLED")
94
+ sanitization_ollama_host: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_HOST")
95
+ sanitization_ollama_default_model: str = Field("llama3.2", env="SANITIZATION_OLLAMA_DEFAULT_MODEL")
96
+ sanitization_ollama_timeout_sec: float = Field(120.0, env="SANITIZATION_OLLAMA_TIMEOUT_SEC")
97
+ sanitization_ollama_auto_pull: bool = Field(True, env="SANITIZATION_OLLAMA_AUTO_PULL")
98
+ sanitization_ollama_max_input_chars: int = Field(8000, env="SANITIZATION_OLLAMA_MAX_INPUT_CHARS")
99
+ sanitization_ollama_model_pii_redaction: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_MODEL_PII_REDACTION")
100
+ sanitization_ollama_model_nsfw_sanitization: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_MODEL_NSFW_SANITIZATION")
101
+ sanitization_ollama_model_raw_to_summary: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_MODEL_RAW_TO_SUMMARY")
102
+ sanitization_ollama_model_raw_to_sentiment: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_MODEL_RAW_TO_SENTIMENT")
103
+ sanitization_ollama_model_third_party_anonymization: Optional[str] = Field(
104
+ None, env="SANITIZATION_OLLAMA_MODEL_THIRD_PARTY_ANONYMIZATION"
105
+ )
106
+ sanitization_ollama_model_name_removal: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_MODEL_NAME_REMOVAL")
107
+ sanitization_ollama_model_contact_removal: Optional[str] = Field(None, env="SANITIZATION_OLLAMA_MODEL_CONTACT_REMOVAL")
108
+
109
+ topos_database_path: Optional[str] = Field(None, env="TOPOS_DATABASE_PATH")
110
+ topos_database_mode: str = Field("local", env="TOPOS_DATABASE_MODE")
111
+ topos_database_service_url: Optional[str] = Field(None, env="TOPOS_DATABASE_SERVICE_URL")
112
+ topos_postgres_dsn: Optional[str] = Field(None, env="TOPOS_POSTGRES_DSN")
113
+ topos_postgres_host: Optional[str] = Field(None, env="TOPOS_POSTGRES_HOST")
114
+ topos_postgres_port: Optional[int] = Field(None, env="TOPOS_POSTGRES_PORT")
115
+ topos_postgres_db: Optional[str] = Field(None, env="TOPOS_POSTGRES_DB")
116
+ topos_postgres_user: Optional[str] = Field(None, env="TOPOS_POSTGRES_USER")
117
+ topos_postgres_password: Optional[str] = Field(None, env="TOPOS_POSTGRES_PASSWORD")
118
+ topos_postgres_reset_incompatible_schema: bool = Field(
119
+ False, env="TOPOS_POSTGRES_RESET_INCOMPATIBLE_SCHEMA"
120
+ )
121
+ topos_default_dataset_id: str = Field("default", env="TOPOS_DEFAULT_DATASET_ID")
122
+ topos_user_id: Optional[str] = Field(None, env="TOPOS_USER_ID")
123
+ # Pooled read enforcement mode for hosted shared-tenancy engines.
124
+ # "off" preserves legacy behavior; set to "pooled" to require tenant-scoped reads.
125
+ topos_pool_mode: str = Field("off", env="TOPOS_POOL_MODE")
126
+
127
+ topos_sync_url: str = Field("wss://cp.logu3s.com/ws/sync", env="TOPOS_SYNC_URL")
128
+ enable_sync: bool = Field(True, env="ENABLE_SYNC")
129
+
130
+ @property
131
+ def allowed_origins(self) -> List[str]:
132
+ raw = self.allowed_origins_raw
133
+ if not raw:
134
+ return []
135
+ if isinstance(raw, list):
136
+ return [str(o).strip() for o in raw if str(o).strip()]
137
+ raw_str = str(raw).strip()
138
+ if not raw_str:
139
+ return []
140
+ if raw_str.startswith("["):
141
+ try:
142
+ parsed = json.loads(raw_str)
143
+ if isinstance(parsed, list):
144
+ return [str(o).strip() for o in parsed if str(o).strip()]
145
+ except json.JSONDecodeError:
146
+ pass
147
+ return [o.strip() for o in raw_str.split(",") if o.strip()]
148
+
149
+ def get_sync_url(self) -> str:
150
+ """Get sync URL (defaults to wss://cp.logu3s.com/ws/sync)."""
151
+ return self.topos_sync_url
152
+
153
+ @property
154
+ def control_plane_url(self) -> Optional[str]:
155
+ return self.topos_control_plane_url
156
+
157
+ @control_plane_url.setter
158
+ def control_plane_url(self, value: Optional[str]) -> None:
159
+ self.topos_control_plane_url = value
160
+
161
+ @property
162
+ def database_path(self) -> Optional[str]:
163
+ return self.topos_database_path
164
+
165
+ @database_path.setter
166
+ def database_path(self, value: Optional[str]) -> None:
167
+ self.topos_database_path = value
168
+
169
+ @property
170
+ def database_mode(self) -> str:
171
+ return self.topos_database_mode
172
+
173
+ @database_mode.setter
174
+ def database_mode(self, value: str) -> None:
175
+ self.topos_database_mode = value
176
+
177
+ @property
178
+ def database_service_url(self) -> Optional[str]:
179
+ return self.topos_database_service_url
180
+
181
+ @database_service_url.setter
182
+ def database_service_url(self, value: Optional[str]) -> None:
183
+ self.topos_database_service_url = value
184
+
185
+ @property
186
+ def postgres_dsn(self) -> Optional[str]:
187
+ return self.topos_postgres_dsn
188
+
189
+ @postgres_dsn.setter
190
+ def postgres_dsn(self, value: Optional[str]) -> None:
191
+ self.topos_postgres_dsn = value
192
+
193
+ @property
194
+ def postgres_host(self) -> Optional[str]:
195
+ return self.topos_postgres_host
196
+
197
+ @postgres_host.setter
198
+ def postgres_host(self, value: Optional[str]) -> None:
199
+ self.topos_postgres_host = value
200
+
201
+ @property
202
+ def postgres_port(self) -> Optional[int]:
203
+ return self.topos_postgres_port
204
+
205
+ @postgres_port.setter
206
+ def postgres_port(self, value: Optional[int]) -> None:
207
+ self.topos_postgres_port = value
208
+
209
+ @property
210
+ def postgres_db(self) -> Optional[str]:
211
+ return self.topos_postgres_db
212
+
213
+ @postgres_db.setter
214
+ def postgres_db(self, value: Optional[str]) -> None:
215
+ self.topos_postgres_db = value
216
+
217
+ @property
218
+ def postgres_user(self) -> Optional[str]:
219
+ return self.topos_postgres_user
220
+
221
+ @postgres_user.setter
222
+ def postgres_user(self, value: Optional[str]) -> None:
223
+ self.topos_postgres_user = value
224
+
225
+ @property
226
+ def postgres_password(self) -> Optional[str]:
227
+ return self.topos_postgres_password
228
+
229
+ @postgres_password.setter
230
+ def postgres_password(self, value: Optional[str]) -> None:
231
+ self.topos_postgres_password = value
232
+
233
+ @property
234
+ def postgres_reset_incompatible_schema(self) -> bool:
235
+ return self.topos_postgres_reset_incompatible_schema
236
+
237
+ @postgres_reset_incompatible_schema.setter
238
+ def postgres_reset_incompatible_schema(self, value: bool) -> None:
239
+ self.topos_postgres_reset_incompatible_schema = value
240
+
241
+ @property
242
+ def default_dataset_id(self) -> str:
243
+ return self.topos_default_dataset_id
244
+
245
+ @default_dataset_id.setter
246
+ def default_dataset_id(self, value: str) -> None:
247
+ self.topos_default_dataset_id = value
248
+
249
+ @property
250
+ def user_id(self) -> Optional[str]:
251
+ return self.topos_user_id
252
+
253
+ @user_id.setter
254
+ def user_id(self, value: Optional[str]) -> None:
255
+ self.topos_user_id = value
256
+
257
+ @property
258
+ def engine_pool_mode(self) -> str:
259
+ return self.topos_pool_mode
260
+
261
+ @engine_pool_mode.setter
262
+ def engine_pool_mode(self, value: str) -> None:
263
+ self.topos_pool_mode = value
264
+
265
+ @property
266
+ def sync_url(self) -> str:
267
+ return self.topos_sync_url
268
+
269
+ @sync_url.setter
270
+ def sync_url(self, value: str) -> None:
271
+ self.topos_sync_url = value
272
+
273
+ @model_validator(mode="after")
274
+ def _validate_topos_key_or_lease(self) -> "Settings":
275
+ is_cloud_runtime = bool(
276
+ os.getenv("K_SERVICE")
277
+ or os.getenv("K_REVISION")
278
+ or os.getenv("CLOUD_RUN_JOB")
279
+ )
280
+ lease_env_explicit = os.getenv("HOSTED_POOL_LEASE_ENABLED") is not None
281
+
282
+ if is_cloud_runtime and self.topos_control_plane_url and not lease_env_explicit:
283
+ # Cloud-hosted runtimes should default to lease-based connector identities
284
+ # unless an operator explicitly sets HOSTED_POOL_LEASE_ENABLED.
285
+ self.hosted_pool_lease_enabled = True
286
+
287
+ if (
288
+ is_cloud_runtime
289
+ and self.topos_control_plane_url
290
+ and self.topos_key
291
+ and not self.hosted_pool_lease_enabled
292
+ and self.hosted_pool_enforce_lease_in_cloud
293
+ and not self.hosted_pool_allow_static_key_in_cloud
294
+ ):
295
+ raise ValueError(
296
+ "Cloud runtime requires hosted pool lease mode by default. "
297
+ "Set HOSTED_POOL_LEASE_ENABLED=true, or set "
298
+ "HOSTED_POOL_ALLOW_STATIC_KEY_IN_CLOUD=true for break-glass static key mode."
299
+ )
300
+
301
+ if self.topos_key:
302
+ return self
303
+ if self.hosted_pool_lease_enabled and self.topos_control_plane_url:
304
+ return self
305
+ raise ValueError(
306
+ "TOPOS_KEY is required unless HOSTED_POOL_LEASE_ENABLED=true and TOPOS_CONTROL_PLANE_URL is configured."
307
+ )
308
+
309
+
310
+ settings = Settings()
@@ -0,0 +1,5 @@
1
+ """Contact identity helpers for messenger / UMA (Stage 11)."""
2
+
3
+ from topos.contacts.identity import normalize_contact_key
4
+
5
+ __all__ = ["normalize_contact_key"]
@@ -0,0 +1,24 @@
1
+ """Normalize sender / identifier strings for matching contact_identifiers rows."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def normalize_contact_key(value: Any) -> str:
9
+ """
10
+ Match keys used when joining message sender_id to contact book identifiers.
11
+ Aligned with historical logic in topos.core.handlers._normalize_contact_key.
12
+ """
13
+ s = str(value or "").strip()
14
+ if not s:
15
+ return ""
16
+ low = s.lower()
17
+ if low == "self":
18
+ return "self"
19
+ if "@" in low:
20
+ return low
21
+ digits = "".join(ch for ch in s if ch.isdigit())
22
+ if digits:
23
+ return f"+{digits}" if s.startswith("+") else digits
24
+ return low
@@ -0,0 +1,300 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import contextlib
5
+ from collections import deque
6
+ import json
7
+ import logging
8
+ import ssl
9
+ from typing import Any, Awaitable, Callable, Dict
10
+
11
+ import certifi
12
+ from websockets.asyncio.client import connect
13
+ from websockets.exceptions import ConnectionClosed
14
+
15
+ from .config.settings import settings
16
+ from .core.connection_resilience import (
17
+ ConnectionSnapshot,
18
+ ConnectionState,
19
+ ExponentialBackoff,
20
+ FailureCategory,
21
+ ResilienceConfig,
22
+ classify_connection_error,
23
+ is_fatal_connection_category,
24
+ utc_now_iso,
25
+ )
26
+
27
+ logger = logging.getLogger("topos.control_plane_client")
28
+
29
+
30
+ class ControlPlaneClient:
31
+ """Maintains a WS connection to the control plane and dispatches incoming requests."""
32
+
33
+ def __init__(
34
+ self,
35
+ control_plane_url: str,
36
+ api_key: str,
37
+ handler: Callable[[dict[str, Any]], Awaitable[dict[str, Any] | None]],
38
+ verify_ssl: bool = True,
39
+ ):
40
+ self.control_plane_url = control_plane_url
41
+ self.api_key = api_key
42
+ self.handler = handler
43
+ self.verify_ssl = verify_ssl
44
+ self._task: asyncio.Task | None = None
45
+ self._stop = asyncio.Event()
46
+ self._ws = None
47
+ self._state: ConnectionState = "idle"
48
+ self._state_changed_at: str | None = utc_now_iso()
49
+ self._last_connected_at: str | None = None
50
+ self._last_disconnected_at: str | None = None
51
+ self._last_failure_category: FailureCategory = "none"
52
+ self._last_failure_reason: str = ""
53
+ self._attempt = 0
54
+ self._consecutive_failures = 0
55
+ self._ready = asyncio.Event()
56
+
57
+ self._inbound_concurrency_limit = max(1, int(settings.control_plane_inbound_concurrency_limit))
58
+ self._inbound_max_pending = max(
59
+ self._inbound_concurrency_limit,
60
+ int(settings.control_plane_inbound_max_pending),
61
+ )
62
+ self._inbound_semaphore = asyncio.Semaphore(self._inbound_concurrency_limit)
63
+ self._inbound_tasks: set[asyncio.Task] = set()
64
+ self._inbound_lock = asyncio.Lock()
65
+
66
+ self._presence_outbox_size = max(1, int(settings.control_plane_presence_outbox_size))
67
+ self._presence_outbox: deque[dict[str, Any]] = deque(maxlen=self._presence_outbox_size)
68
+ self._outbox_lock = asyncio.Lock()
69
+
70
+ self._backoff = ExponentialBackoff(
71
+ ResilienceConfig(
72
+ initial_backoff_s=max(0.1, float(settings.connection_retry_initial_seconds)),
73
+ max_backoff_s=max(1.0, float(settings.connection_retry_max_seconds)),
74
+ jitter_ratio=max(0.0, float(settings.connection_retry_jitter_ratio)),
75
+ )
76
+ )
77
+
78
+ def _set_state(self, state: ConnectionState) -> None:
79
+ if self._state == state:
80
+ return
81
+ self._state = state
82
+ self._state_changed_at = utc_now_iso()
83
+
84
+ def get_connection_status(self) -> dict[str, Any]:
85
+ snapshot = ConnectionSnapshot(
86
+ state=self._state,
87
+ connected=bool(self._ws),
88
+ attempt=self._attempt,
89
+ consecutive_failures=self._consecutive_failures,
90
+ last_failure_category=self._last_failure_category,
91
+ last_failure_reason=self._last_failure_reason,
92
+ last_state_change_at=self._state_changed_at,
93
+ last_connected_at=self._last_connected_at,
94
+ last_disconnected_at=self._last_disconnected_at,
95
+ outbox_depth=len(self._presence_outbox),
96
+ )
97
+ return snapshot.to_dict()
98
+
99
+ def start(self) -> None:
100
+ if self._task and not self._task.done():
101
+ return
102
+ self._stop.clear()
103
+ self._task = asyncio.create_task(self._run())
104
+ logger.info("Control plane client starting: %s", self.control_plane_url)
105
+
106
+ async def wait_until_connected(self, timeout_s: float | None = None) -> bool:
107
+ timeout = float(timeout_s) if timeout_s is not None else float(settings.connection_readiness_timeout_seconds)
108
+ try:
109
+ await asyncio.wait_for(self._ready.wait(), timeout=max(0.1, timeout))
110
+ return True
111
+ except TimeoutError:
112
+ return False
113
+
114
+ async def stop(self) -> None:
115
+ self._set_state("stopping")
116
+ self._stop.set()
117
+ if self._ws:
118
+ try:
119
+ await self._ws.close(code=1000)
120
+ except Exception:
121
+ pass
122
+ if self._task:
123
+ self._task.cancel()
124
+ try:
125
+ await self._task
126
+ except asyncio.CancelledError:
127
+ pass
128
+ await self._cancel_inbound_tasks()
129
+ self._ws = None
130
+ self._ready.clear()
131
+ self._set_state("idle")
132
+
133
+ async def _cancel_inbound_tasks(self) -> None:
134
+ async with self._inbound_lock:
135
+ pending = list(self._inbound_tasks)
136
+ for task in pending:
137
+ task.cancel()
138
+ for task in pending:
139
+ with contextlib.suppress(asyncio.CancelledError):
140
+ await task
141
+
142
+ async def _run(self) -> None:
143
+ headers = {"Authorization": f"Bearer {self.api_key}"}
144
+ ssl_context = None
145
+ if self.control_plane_url.startswith("wss://"):
146
+ ssl_context = ssl.create_default_context(cafile=certifi.where())
147
+ if not self.verify_ssl:
148
+ ssl_context.check_hostname = False
149
+ ssl_context.verify_mode = ssl.CERT_NONE
150
+ while not self._stop.is_set():
151
+ self._set_state("connecting")
152
+ self._attempt += 1
153
+ try:
154
+ async with connect(
155
+ self.control_plane_url,
156
+ additional_headers=headers,
157
+ ssl=ssl_context,
158
+ # Transform-heavy requests can block the event loop in the handler path long enough
159
+ # to miss pong deadlines; disable pong timeout-driven disconnects.
160
+ ping_timeout=None,
161
+ ) as ws:
162
+ self._ws = ws
163
+ self._set_state("connected")
164
+ self._ready.set()
165
+ self._backoff.reset()
166
+ self._last_failure_category = "none"
167
+ self._last_failure_reason = ""
168
+ self._consecutive_failures = 0
169
+ self._last_connected_at = utc_now_iso()
170
+ logger.info("Control plane client connected")
171
+ await self._flush_presence_outbox()
172
+ async for raw in ws:
173
+ if self._stop.is_set():
174
+ break
175
+ try:
176
+ data = json.loads(raw)
177
+ except Exception:
178
+ logger.warning("Relay message is not valid JSON: %s", raw)
179
+ continue
180
+ await self._schedule_inbound_message(ws, data)
181
+ except ConnectionClosed as exc:
182
+ self._record_failure(exc)
183
+ except Exception as exc: # noqa: BLE001
184
+ self._record_failure(exc)
185
+ finally:
186
+ self._ws = None
187
+ self._ready.clear()
188
+ if not self._stop.is_set() and self._state != "stopping":
189
+ self._last_disconnected_at = utc_now_iso()
190
+ if self._stop.is_set():
191
+ break
192
+ delay = self._backoff.next_delay()
193
+ self._set_state("degraded" if is_fatal_connection_category(self._last_failure_category) else "backing_off")
194
+ logger.warning(
195
+ "Control plane reconnect scheduled endpoint=%s state=%s attempt=%d failures=%d category=%s delay_s=%.2f reason=%s",
196
+ self.control_plane_url,
197
+ self._state,
198
+ self._attempt,
199
+ self._consecutive_failures,
200
+ self._last_failure_category,
201
+ delay,
202
+ self._last_failure_reason,
203
+ )
204
+ await self._wait_for_stop_or_timeout(delay)
205
+ self._set_state("idle")
206
+
207
+ def _record_failure(self, exc: BaseException) -> None:
208
+ category, reason = classify_connection_error(exc)
209
+ self._last_failure_category = category
210
+ self._last_failure_reason = reason
211
+ self._consecutive_failures += 1
212
+ logger.warning(
213
+ "Control plane connectivity event endpoint=%s event=connection_failed category=%s failures=%d reason=%s",
214
+ self.control_plane_url,
215
+ category,
216
+ self._consecutive_failures,
217
+ reason,
218
+ )
219
+
220
+ async def _wait_for_stop_or_timeout(self, timeout_s: float) -> None:
221
+ try:
222
+ await asyncio.wait_for(self._stop.wait(), timeout=timeout_s)
223
+ except TimeoutError:
224
+ return
225
+
226
+ async def _schedule_inbound_message(self, ws, data: Dict[str, Any]) -> None:
227
+ async with self._inbound_lock:
228
+ pending_count = len(self._inbound_tasks)
229
+ if pending_count >= self._inbound_max_pending:
230
+ logger.warning("Dropping inbound request due to saturation pending=%d", pending_count)
231
+ request_id = data.get("id")
232
+ if request_id:
233
+ await self._send_ws_json(
234
+ ws,
235
+ {"id": request_id, "status": "error", "error": "Engine is busy. Retry shortly."},
236
+ )
237
+ return
238
+ task = asyncio.create_task(self._handle_message_guarded(ws, data))
239
+ self._inbound_tasks.add(task)
240
+ task.add_done_callback(self._on_inbound_task_done)
241
+
242
+ def _on_inbound_task_done(self, task: asyncio.Task) -> None:
243
+ self._inbound_tasks.discard(task)
244
+
245
+ async def _handle_message_guarded(self, ws, data: Dict[str, Any]) -> None:
246
+ async with self._inbound_semaphore:
247
+ await self._handle_message(ws, data)
248
+
249
+ async def _flush_presence_outbox(self) -> None:
250
+ async with self._outbox_lock:
251
+ pending = list(self._presence_outbox)
252
+ self._presence_outbox.clear()
253
+ if not pending:
254
+ return
255
+ for message in pending:
256
+ success = await self._send_ws_json(self._ws, message)
257
+ if not success:
258
+ await self._enqueue_presence_message(message)
259
+ break
260
+
261
+ async def _enqueue_presence_message(self, message: Dict[str, Any]) -> None:
262
+ async with self._outbox_lock:
263
+ at_capacity = len(self._presence_outbox) >= self._presence_outbox_size
264
+ if at_capacity:
265
+ dropped = self._presence_outbox.popleft()
266
+ logger.warning(
267
+ "Presence outbox full; dropping oldest message type=%s",
268
+ dropped.get("type"),
269
+ )
270
+ self._presence_outbox.append(dict(message))
271
+
272
+ async def _send_ws_json(self, ws, payload: Dict[str, Any]) -> bool:
273
+ if not ws:
274
+ return False
275
+ try:
276
+ await ws.send(json.dumps(payload))
277
+ return True
278
+ except Exception as exc: # noqa: BLE001
279
+ logger.error("Failed to send message to control plane: %s", exc)
280
+ return False
281
+
282
+ async def _handle_message(self, ws, data: Dict[str, Any]) -> None:
283
+ try:
284
+ resp = await self.handler(data)
285
+ except Exception as exc: # noqa: BLE001
286
+ logger.error("Handler raised exception: %s", exc, exc_info=True)
287
+ resp = {"id": data.get("id"), "status": "error", "error": str(exc)}
288
+ if resp is None:
289
+ return # e.g. connection_info or message without id; CP has no pending request to match
290
+ await self._send_ws_json(ws, resp)
291
+
292
+ async def send_message(self, message: Dict[str, Any]) -> None:
293
+ """Send an unsolicited message to the control plane (e.g., progress updates)."""
294
+ if not self._ws:
295
+ await self._enqueue_presence_message(message)
296
+ logger.warning("Queued presence message; control plane currently disconnected")
297
+ return
298
+ sent = await self._send_ws_json(self._ws, message)
299
+ if not sent:
300
+ await self._enqueue_presence_message(message)
topos/core/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Core utilities for Topos."""