topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,168 @@
1
+ """Canonical mapper - maps staging data to canonical models.
2
+
3
+ Migrated from engine/canonical/mapper.py (commit 7b709af).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from abc import ABC, abstractmethod
10
+ from typing import Any, Dict, List
11
+
12
+ from .model import CanonicalAIChatMessage, CanonicalAIChatConversation
13
+
14
+ logger = logging.getLogger("topos.storage.canonical.ai_chat.mapper")
15
+
16
+
17
+ class CanonicalMapper(ABC):
18
+ """Base class for mapping staging data to canonical models."""
19
+
20
+ @abstractmethod
21
+ def map_to_canonical(
22
+ self,
23
+ staging_record: Dict[str, Any],
24
+ source: str
25
+ ) -> List[CanonicalAIChatMessage]:
26
+ """Map a staging record to canonical format.
27
+
28
+ Args:
29
+ staging_record: Record from staging table
30
+ source: Source identifier (e.g., "chatgpt")
31
+
32
+ Returns:
33
+ List of canonical messages (usually one, but could be multiple)
34
+ """
35
+ pass
36
+
37
+ @abstractmethod
38
+ def extract_conversation_id(self, staging_record: Dict[str, Any]) -> str:
39
+ """Extract conversation ID from staging record.
40
+
41
+ Args:
42
+ staging_record: Record from staging table
43
+
44
+ Returns:
45
+ Conversation ID (unified across sources)
46
+ """
47
+ pass
48
+
49
+
50
+ class ChatGPTToAIChatMapper(CanonicalMapper):
51
+ """Maps ChatGPT staging records to canonical AI chat model."""
52
+
53
+ def map_to_canonical(
54
+ self,
55
+ staging_record: Dict[str, Any],
56
+ source: str = "chatgpt"
57
+ ) -> List[CanonicalAIChatMessage]:
58
+ """Map ChatGPT staging record to canonical AI chat message.
59
+
60
+ ChatGPT staging format:
61
+ {
62
+ "message_id": "...",
63
+ "dataset_id": "...",
64
+ "thread_id": "...",
65
+ "ts": "...",
66
+ "sender_type": "human" | "assistant",
67
+ "content": "..."
68
+ }
69
+
70
+ Returns:
71
+ List with single canonical message
72
+ """
73
+ conversation_id = self.extract_conversation_id(staging_record)
74
+
75
+ # Use source_id from staging_record if available (actual source_id like "chatgpt_file_ingestion"),
76
+ # otherwise fall back to source parameter (mapper ID like "chatgpt")
77
+ actual_source_id = staging_record.get("source_id") or source
78
+
79
+ # Preserve _metadata if present (for conversation tree reconstruction)
80
+ metadata = {
81
+ "thread_id": staging_record.get("thread_id"),
82
+ "original_source": source,
83
+ }
84
+ if "_metadata" in staging_record:
85
+ metadata.update(staging_record["_metadata"])
86
+
87
+ message = CanonicalAIChatMessage(
88
+ message_id=staging_record.get("message_id", ""),
89
+ conversation_id=conversation_id,
90
+ sender_type=staging_record.get("sender_type", ""),
91
+ sender_id=None,
92
+ ts=staging_record.get("ts", ""),
93
+ content=staging_record.get("content") or "",
94
+ content_rendered=None,
95
+ metadata_json=metadata,
96
+ seq=0,
97
+ source_id=actual_source_id,
98
+ )
99
+ return [message]
100
+
101
+ def extract_conversation_id(self, staging_record: Dict[str, Any]) -> str:
102
+ """Extract conversation ID from ChatGPT staging record."""
103
+ thread_id = staging_record.get("thread_id")
104
+ if thread_id:
105
+ return f"chatgpt:{thread_id}"
106
+ dataset_id = staging_record.get("dataset_id", "")
107
+ return f"chatgpt:{dataset_id}"
108
+
109
+
110
+ class StoreMessageToAIChatMapper(CanonicalMapper):
111
+ """Maps messages from the messages table (store_message) to canonical AI chat model."""
112
+
113
+ def map_to_canonical(
114
+ self,
115
+ staging_record: Dict[str, Any],
116
+ source: str = "store_message"
117
+ ) -> List[CanonicalAIChatMessage]:
118
+ """Map store_message record to canonical AI chat message."""
119
+ conversation_id = self.extract_conversation_id(staging_record)
120
+ message = CanonicalAIChatMessage(
121
+ message_id=staging_record.get("message_id", ""),
122
+ conversation_id=conversation_id,
123
+ sender_type=staging_record.get("sender_type", ""),
124
+ sender_id=None,
125
+ ts=staging_record.get("ts", ""),
126
+ content=staging_record.get("content") or "",
127
+ content_rendered=None,
128
+ metadata_json={
129
+ "thread_id": staging_record.get("thread_id"),
130
+ "original_source": source,
131
+ },
132
+ seq=0,
133
+ source_id=source,
134
+ )
135
+ return [message]
136
+
137
+ def extract_conversation_id(self, staging_record: Dict[str, Any]) -> str:
138
+ """Extract conversation ID from store_message record."""
139
+ thread_id = staging_record.get("thread_id")
140
+ if thread_id:
141
+ return f"store_message:{thread_id}"
142
+ dataset_id = staging_record.get("dataset_id", "")
143
+ return f"store_message:{dataset_id}"
144
+
145
+
146
+ _MAPPER_REGISTRY: Dict[str, type[CanonicalMapper]] = {
147
+ "chatgpt": ChatGPTToAIChatMapper,
148
+ "store_message": StoreMessageToAIChatMapper,
149
+ }
150
+
151
+
152
+ def register_mapper(source: str, mapper_class: type[CanonicalMapper]) -> None:
153
+ """Register a mapper class for a source."""
154
+ if not issubclass(mapper_class, CanonicalMapper):
155
+ raise TypeError(f"Mapper class must be a subclass of CanonicalMapper, got {mapper_class}")
156
+ _MAPPER_REGISTRY[source] = mapper_class
157
+ logger.info("Registered canonical mapper for source: %s -> %s", source, mapper_class.__name__)
158
+
159
+
160
+ def get_mapper(source: str) -> CanonicalMapper:
161
+ """Get a mapper instance for a source."""
162
+ mapper_class = _MAPPER_REGISTRY.get(source)
163
+ if not mapper_class:
164
+ available = ", ".join(_MAPPER_REGISTRY.keys())
165
+ raise ValueError(
166
+ f"Unknown source for canonical mapping: {source}. Available sources: {available}"
167
+ )
168
+ return mapper_class()
@@ -0,0 +1,87 @@
1
+ """Canonical data models - unified schemas for compatible sources.
2
+
3
+ Migrated from engine/canonical/model.py (commit 7b709af).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Any, Dict, Optional
10
+
11
+
12
+ @dataclass
13
+ class CanonicalAIChatMessage:
14
+ """Canonical AI chat message model.
15
+
16
+ This is the unified format for all AI chat conversations regardless of source
17
+ (ChatGPT, Claude, Gemini, etc.).
18
+ """
19
+ message_id: str
20
+ conversation_id: str # Unified conversation identifier
21
+ sender_type: str # "human" | "assistant" | "system"
22
+ ts: str # ISO timestamp
23
+ content: str # Message content
24
+ source_id: str = "" # Original source identifier (e.g., "chatgpt")
25
+ sender_id: Optional[str] = None # Optional sender identifier
26
+ content_rendered: Optional[str] = None # Optional rendered content
27
+ metadata_json: Optional[Dict[str, Any]] = None # Source-specific metadata
28
+ seq: int = 0 # Sequence number within conversation
29
+
30
+ def to_dict(self) -> Dict[str, Any]:
31
+ """Convert to dictionary for database storage."""
32
+ import json
33
+ return {
34
+ "message_id": self.message_id,
35
+ "conversation_id": self.conversation_id,
36
+ "sender_type": self.sender_type,
37
+ "sender_id": self.sender_id,
38
+ "ts": self.ts,
39
+ "content": self.content,
40
+ "content_rendered": self.content_rendered,
41
+ "metadata_json": json.dumps(self.metadata_json) if self.metadata_json else None,
42
+ "seq": self.seq,
43
+ "source_id": self.source_id,
44
+ }
45
+
46
+
47
+ @dataclass
48
+ class CanonicalAIChatConversation:
49
+ """Canonical AI chat conversation model.
50
+
51
+ Represents a unified conversation across all AI chat sources.
52
+ """
53
+ conversation_id: str
54
+ owner_user_id: str
55
+ title: Optional[str] = None
56
+ source: str = "" # Original source (e.g., "chatgpt")
57
+ created_at: str = "" # ISO timestamp
58
+ updated_at: str = "" # ISO timestamp
59
+
60
+ def to_dict(self) -> Dict[str, Any]:
61
+ """Convert to dictionary for database storage."""
62
+ return {
63
+ "conversation_id": self.conversation_id,
64
+ "owner_user_id": self.owner_user_id,
65
+ "title": self.title,
66
+ "source": self.source,
67
+ "created_at": self.created_at,
68
+ "updated_at": self.updated_at,
69
+ }
70
+
71
+
72
+ class CanonicalAIChatModel:
73
+ """Canonical AI chat conversation model.
74
+
75
+ This model unifies all AI chat sources (ChatGPT, Claude, Gemini, etc.)
76
+ into a single canonical format.
77
+ """
78
+
79
+ @staticmethod
80
+ def get_conversation_table() -> str:
81
+ """Return the canonical conversations table name."""
82
+ return "ai_chat_conversations"
83
+
84
+ @staticmethod
85
+ def get_messages_table() -> str:
86
+ """Return the canonical messages table name."""
87
+ return "ai_chat_messages"
@@ -0,0 +1,179 @@
1
+ """Canonical tables manager - manages canonical database tables.
2
+
3
+ Migrated from engine/canonical/tables.py (commit 7b709af).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import sqlite3
10
+ from typing import Any, Dict, List
11
+
12
+ from .model import CanonicalAIChatMessage, CanonicalAIChatConversation
13
+
14
+ logger = logging.getLogger("topos.storage.canonical.ai_chat.tables")
15
+
16
+
17
+ class CanonicalTablesManager:
18
+ """Manages canonical tables for unified data models."""
19
+
20
+ def __init__(self, conn: sqlite3.Connection):
21
+ """Initialize with database connection."""
22
+ self.conn = conn
23
+ self._ensure_tables()
24
+
25
+ def _ensure_tables(self) -> None:
26
+ """Ensure canonical tables exist. Creates them if they don't exist."""
27
+ try:
28
+ self.conn.execute("""
29
+ CREATE TABLE IF NOT EXISTS ai_chat_conversations (
30
+ conversation_id TEXT PRIMARY KEY,
31
+ owner_user_id TEXT NOT NULL,
32
+ title TEXT,
33
+ source_id TEXT NOT NULL,
34
+ created_at TEXT NOT NULL,
35
+ updated_at TEXT NOT NULL
36
+ )
37
+ """)
38
+ self.conn.execute("""
39
+ CREATE INDEX IF NOT EXISTS idx_ai_chat_conversations_owner
40
+ ON ai_chat_conversations(owner_user_id)
41
+ """)
42
+ self.conn.execute("""
43
+ CREATE INDEX IF NOT EXISTS idx_ai_chat_conversations_source_id
44
+ ON ai_chat_conversations(source_id)
45
+ """)
46
+ self.conn.execute("""
47
+ CREATE TABLE IF NOT EXISTS ai_chat_messages (
48
+ message_id TEXT PRIMARY KEY,
49
+ conversation_id TEXT NOT NULL,
50
+ sender_type TEXT NOT NULL,
51
+ sender_id TEXT,
52
+ event_at TEXT NOT NULL,
53
+ content TEXT NOT NULL,
54
+ content_rendered TEXT,
55
+ metadata_json TEXT,
56
+ sequence INTEGER NOT NULL DEFAULT 0,
57
+ source_id TEXT NOT NULL
58
+ )
59
+ """)
60
+ self.conn.execute("""
61
+ CREATE INDEX IF NOT EXISTS idx_ai_chat_messages_conversation
62
+ ON ai_chat_messages(conversation_id, sequence)
63
+ """)
64
+ self.conn.execute("""
65
+ CREATE INDEX IF NOT EXISTS idx_ai_chat_messages_event_at
66
+ ON ai_chat_messages(event_at)
67
+ """)
68
+ self.conn.execute("""
69
+ CREATE INDEX IF NOT EXISTS idx_ai_chat_messages_source
70
+ ON ai_chat_messages(source_id)
71
+ """)
72
+ self.conn.commit()
73
+ logger.debug("Ensured canonical tables exist")
74
+ except Exception as e:
75
+ self.conn.rollback()
76
+ logger.error("Failed to ensure canonical tables: %s", e)
77
+
78
+ def write_conversations_batch(
79
+ self,
80
+ conversations: List[CanonicalAIChatConversation],
81
+ batch_size: int = 1000,
82
+ ) -> int:
83
+ """Write multiple conversations to canonical table in batches."""
84
+ if not conversations:
85
+ return 0
86
+ written = 0
87
+ try:
88
+ for i in range(0, len(conversations), batch_size):
89
+ batch = conversations[i:i + batch_size]
90
+ values = [
91
+ (
92
+ conv.conversation_id,
93
+ conv.owner_user_id,
94
+ conv.title,
95
+ conv.source,
96
+ conv.created_at,
97
+ conv.updated_at,
98
+ )
99
+ for conv in batch
100
+ ]
101
+ self.conn.executemany("""
102
+ INSERT OR REPLACE INTO ai_chat_conversations (
103
+ conversation_id, owner_user_id, title, source_id, created_at, updated_at
104
+ ) VALUES (?, ?, ?, ?, ?, ?)
105
+ """, values)
106
+ self.conn.commit()
107
+ written += len(batch)
108
+ logger.debug("Wrote batch of %d conversations (total: %d)", len(batch), written)
109
+ except Exception as e:
110
+ self.conn.rollback()
111
+ logger.error("Failed to write conversations batch: %s", e)
112
+ raise
113
+ return written
114
+
115
+ def write_messages_batch(
116
+ self,
117
+ messages: List[CanonicalAIChatMessage],
118
+ batch_size: int = 1000,
119
+ ) -> int:
120
+ """Write multiple messages to canonical table in batches."""
121
+ if not messages:
122
+ return 0
123
+ import json
124
+ written = 0
125
+ try:
126
+ for i in range(0, len(messages), batch_size):
127
+ batch = messages[i:i + batch_size]
128
+ values = [
129
+ (
130
+ msg.message_id,
131
+ msg.conversation_id,
132
+ msg.sender_type,
133
+ msg.sender_id,
134
+ msg.ts,
135
+ msg.content,
136
+ msg.content_rendered,
137
+ json.dumps(msg.metadata_json) if msg.metadata_json else None,
138
+ msg.seq,
139
+ msg.source_id,
140
+ )
141
+ for msg in batch
142
+ ]
143
+ self.conn.executemany("""
144
+ INSERT OR REPLACE INTO ai_chat_messages (
145
+ message_id, conversation_id, sender_type, sender_id, event_at,
146
+ content, content_rendered, metadata_json, sequence, source_id
147
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
148
+ """, values)
149
+ self.conn.commit()
150
+ written += len(batch)
151
+ logger.debug("Wrote batch of %d messages (total: %d)", len(batch), written)
152
+ except Exception as e:
153
+ self.conn.rollback()
154
+ logger.error("Failed to write messages batch: %s", e)
155
+ raise
156
+ return written
157
+
158
+ def update_message_sequences(self, conversation_id: str) -> None:
159
+ """Update sequence numbers for messages in a conversation."""
160
+ try:
161
+ cursor = self.conn.execute("""
162
+ SELECT message_id, event_at
163
+ FROM ai_chat_messages
164
+ WHERE conversation_id = ?
165
+ ORDER BY event_at ASC
166
+ """, (conversation_id,))
167
+ messages = cursor.fetchall()
168
+ for seq, (message_id, _) in enumerate(messages):
169
+ self.conn.execute("""
170
+ UPDATE ai_chat_messages
171
+ SET sequence = ?
172
+ WHERE message_id = ?
173
+ """, (seq, message_id))
174
+ self.conn.commit()
175
+ logger.debug("Updated sequences for conversation %s (%d messages)", conversation_id, len(messages))
176
+ except Exception as e:
177
+ self.conn.rollback()
178
+ logger.error("Failed to update message sequences: %s", e)
179
+ raise
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class CanonicalRef:
9
+ record_id: str
10
+
11
+
12
+ class CanonicalStore:
13
+ def upsert(self, record: Dict[str, str]) -> CanonicalRef:
14
+ raise NotImplementedError
15
+
16
+
17
+ class InMemoryCanonicalStore(CanonicalStore):
18
+ def __init__(self):
19
+ self._records: Dict[str, Dict[str, str]] = {}
20
+
21
+ def upsert(self, record: Dict[str, str]) -> CanonicalRef:
22
+ record_id = record.get("record_id") or record.get("message_id") or ""
23
+ self._records[record_id] = record
24
+ return CanonicalRef(record_id=record_id)