topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,111 @@
1
+ """Minimal schema registry for ingestion validation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any, Dict, Optional
7
+
8
+ logger = logging.getLogger("topos.ingestion.schema_registry")
9
+
10
+ SCHEMAS: Dict[str, Dict[str, Any]] = {
11
+ "chatgpt.conversation.v1": {
12
+ "name": "ChatGPT Conversation Export",
13
+ "version": "1",
14
+ "required_fields": ["id", "thread_id", "role", "content", "created_at"],
15
+ "field_types": {
16
+ "id": str,
17
+ "thread_id": str,
18
+ "role": str,
19
+ "content": str,
20
+ "created_at": (int, float, str),
21
+ },
22
+ "description": "ChatGPT conversation export format (JSONL - flat records)",
23
+ "file_format": "jsonl",
24
+ },
25
+ "chatgpt.conversation.v2": {
26
+ "name": "ChatGPT Real Export Format",
27
+ "version": "2",
28
+ "required_fields": ["id", "thread_id", "role", "content", "created_at"],
29
+ "field_types": {
30
+ "id": str,
31
+ "thread_id": str,
32
+ "role": str,
33
+ "content": str,
34
+ "created_at": (int, float, str),
35
+ },
36
+ "description": "ChatGPT real export format (JSON array of conversation objects, flattened to v1 format)",
37
+ "file_format": "json",
38
+ "source_structure": "conversation_array",
39
+ "note": "Flattened records match v1 format, but source is nested conversation objects",
40
+ },
41
+ # Sprint 02: Messenger ingestion (same logical shape as chat for conversation_messages)
42
+ "imessage.messages.v1": {
43
+ "name": "iMessage Messages",
44
+ "version": "1",
45
+ "required_fields": ["id", "thread_id", "role", "content", "created_at"],
46
+ "field_types": {
47
+ "id": str,
48
+ "thread_id": str,
49
+ "role": str,
50
+ "content": str,
51
+ "created_at": (int, float, str),
52
+ },
53
+ "description": "iMessage message format (normalized from chat.db or sync); id may be imessage:ROWID",
54
+ "file_format": "jsonl",
55
+ },
56
+ "signal.messages.v1": {
57
+ "name": "Signal Messages",
58
+ "version": "1",
59
+ "required_fields": ["id", "thread_id", "role", "content", "created_at"],
60
+ "field_types": {
61
+ "id": str,
62
+ "thread_id": str,
63
+ "role": str,
64
+ "content": str,
65
+ "created_at": (int, float, str),
66
+ },
67
+ "description": "Signal Desktop message format (normalized from SQLCipher or export); id may be signal:uuid",
68
+ "file_format": "jsonl",
69
+ },
70
+ }
71
+
72
+
73
+ def get_schema_definition(schema_id: str) -> Optional[Dict[str, Any]]:
74
+ return SCHEMAS.get(schema_id)
75
+
76
+
77
+ def validate_schema(record: Dict[str, Any], schema_id: str) -> tuple[bool, Optional[str]]:
78
+ schema = get_schema_definition(schema_id)
79
+ if not schema:
80
+ return False, f"Unknown schema: {schema_id}"
81
+
82
+ required_fields = schema.get("required_fields", [])
83
+ for field in required_fields:
84
+ if field not in record:
85
+ return False, f"Missing required field: {field}"
86
+
87
+ field_types = schema.get("field_types", {})
88
+ for field, expected_type in field_types.items():
89
+ if field not in record:
90
+ continue
91
+ value = record[field]
92
+ if expected_type is None:
93
+ continue
94
+ if isinstance(expected_type, tuple):
95
+ if not any(isinstance(value, t) for t in expected_type):
96
+ return False, (
97
+ f"Field '{field}' has invalid type. "
98
+ f"Expected one of {expected_type}, got {type(value).__name__}"
99
+ )
100
+ elif not isinstance(value, expected_type):
101
+ return False, (
102
+ f"Field '{field}' has invalid type. "
103
+ f"Expected {expected_type.__name__}, got {type(value).__name__}"
104
+ )
105
+
106
+ return True, None
107
+
108
+
109
+ def register_schema(schema_id: str, schema_def: Dict[str, Any]) -> None:
110
+ SCHEMAS[schema_id] = schema_def
111
+ logger.info("Registered schema: %s", schema_id)
@@ -0,0 +1,13 @@
1
+ """Default schema validator (no-op placeholder)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, Optional
6
+
7
+ from .base import SchemaDefinition, SchemaValidator, ValidationResult
8
+
9
+
10
+ class NoOpSchemaValidator(SchemaValidator):
11
+ def validate(self, record: Dict[str, Any], schema: Optional[SchemaDefinition] = None) -> ValidationResult:
12
+ _ = (record, schema)
13
+ return ValidationResult(is_valid=True, errors=[], metadata={})
@@ -0,0 +1 @@
1
+ """Lineage and provenance tracking for Topos."""
@@ -0,0 +1,9 @@
1
+ """Provenance stubs for Topos."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict
6
+
7
+
8
+ def record_provenance(payload: Dict[str, str]) -> None:
9
+ _ = payload
@@ -0,0 +1,9 @@
1
+ """Lineage tracking stubs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict
6
+
7
+
8
+ def record_lineage(payload: Dict[str, str]) -> None:
9
+ _ = payload
@@ -0,0 +1,83 @@
1
+ """
2
+ Stdio MCP proxy: Claude Desktop → local Topos engine (no Control Plane).
3
+
4
+ Run this so Claude talks MCP over stdio to the proxy; the proxy forwards tool calls
5
+ to the engine's /api/local/* HTTP endpoints. Use when the engine and Claude run on
6
+ the same machine.
7
+
8
+ Usage:
9
+ ENGINE_URL=http://localhost:9000 BEARER_TOKEN=your_key python -m topos.mcp_stdio_proxy
10
+ # or with args:
11
+ python -m topos.mcp_stdio_proxy --url http://localhost:9000
12
+
13
+ Claude Desktop config (direct to local engine): use scripts/run_local_mcp_proxy.sh
14
+ with full path, args ["--url", "http://localhost:9000"], and env BEARER_TOKEN.
15
+
16
+ Only list_database_tables and get_table_schema are exposed (engine's /api/local/*).
17
+ For full tools (get_analytics, get_messages, get_oplog) use the Control Plane.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import os
24
+ import sys
25
+
26
+ import httpx
27
+ from mcp.server.fastmcp import FastMCP
28
+
29
+ # Engine URL and token; set in main() before FastMCP runs.
30
+ _engine_url: str = ""
31
+ _bearer_token: str = ""
32
+
33
+
34
+ def _headers() -> dict[str, str]:
35
+ return {"Authorization": f"Bearer {_bearer_token}", "Content-Type": "application/json"}
36
+
37
+
38
+ async def _call_engine(path: str, json_body: dict | None = None) -> dict:
39
+ url = f"{_engine_url.rstrip('/')}{path}"
40
+ async with httpx.AsyncClient(timeout=30.0) as client:
41
+ r = await client.post(url, headers=_headers(), json=json_body or {})
42
+ r.raise_for_status()
43
+ data = r.json()
44
+ if isinstance(data, dict) and data.get("status") == "error":
45
+ raise ValueError(data.get("error", "engine error"))
46
+ return data
47
+
48
+
49
+ def main() -> int:
50
+ global _engine_url, _bearer_token
51
+ parser = argparse.ArgumentParser(
52
+ description="MCP stdio proxy to local Topos engine (/api/local/*). No Control Plane."
53
+ )
54
+ parser.add_argument(
55
+ "--url",
56
+ default=os.environ.get("ENGINE_URL", "http://localhost:9000"),
57
+ help="Engine base URL (default: ENGINE_URL or http://localhost:9000)",
58
+ )
59
+ args = parser.parse_args()
60
+ _engine_url = args.url.rstrip("/")
61
+ _bearer_token = (os.environ.get("BEARER_TOKEN") or "").strip()
62
+ if not _bearer_token:
63
+ print("Error: BEARER_TOKEN env var required.", file=sys.stderr)
64
+ return 1
65
+
66
+ mcp = FastMCP("Topos (local engine)", port=0)
67
+
68
+ @mcp.tool()
69
+ async def list_database_tables() -> dict:
70
+ """List all database tables grouped by layer with row counts."""
71
+ return await _call_engine("/api/local/list_database_tables")
72
+
73
+ @mcp.tool()
74
+ async def get_table_schema(table_name: str) -> dict:
75
+ """Get column info (schema) for a table. Use list_database_tables to see available tables."""
76
+ return await _call_engine("/api/local/get_table_schema", {"table_name": table_name})
77
+
78
+ mcp.run(transport="stdio")
79
+ return 0
80
+
81
+
82
+ if __name__ == "__main__":
83
+ sys.exit(main())
@@ -0,0 +1 @@
1
+ """Observability utilities for Topos."""
@@ -0,0 +1,7 @@
1
+ """Alerting stubs for Topos."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def send_alert(message: str) -> None:
7
+ _ = message
@@ -0,0 +1,25 @@
1
+ """Observability metrics (Sprint 07). In-memory counters; extend for Prometheus later."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import threading
6
+ from typing import Dict
7
+
8
+
9
+ _counts: Dict[str, float] = {}
10
+ _lock = threading.Lock()
11
+
12
+
13
+ def record_metric(name: str, value: float) -> None:
14
+ with _lock:
15
+ _counts[name] = _counts.get(name, 0) + value
16
+
17
+
18
+ def get_metric(name: str) -> float:
19
+ with _lock:
20
+ return _counts.get(name, 0.0)
21
+
22
+
23
+ def reset_metrics() -> None:
24
+ with _lock:
25
+ _counts.clear()
@@ -0,0 +1,18 @@
1
+ """Tracing stubs for Topos."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class Span:
7
+ def __init__(self, name: str):
8
+ self.name = name
9
+
10
+ def __enter__(self):
11
+ return self
12
+
13
+ def __exit__(self, exc_type, exc, tb):
14
+ _ = (exc_type, exc, tb)
15
+
16
+
17
+ def start_span(name: str) -> Span:
18
+ return Span(name)
topos/openai_client.py ADDED
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict, Optional
5
+
6
+ import httpx
7
+
8
+ from .config.settings import settings
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class OpenAIError(Exception):
14
+ """Wrapper for upstream OpenAI errors."""
15
+
16
+
17
+ class OpenAIClient:
18
+ """Minimal OpenAI chat completions client."""
19
+
20
+ def __init__(self, api_key: str | None = None, base_url: str | None = None) -> None:
21
+ self.api_key = api_key or settings.openai_api_key
22
+ self.base_url = base_url or settings.openai_base_url
23
+ self.timeout = settings.openai_timeout_seconds
24
+
25
+ async def generate(
26
+ self,
27
+ prompt: str,
28
+ max_tokens: Optional[int],
29
+ temperature: Optional[float],
30
+ ) -> Dict[str, Any]:
31
+ url = f"{self.base_url.rstrip('/')}/chat/completions"
32
+ headers = {"Authorization": f"Bearer {self.api_key}"}
33
+ payload: Dict[str, Any] = {
34
+ "model": settings.openai_model,
35
+ "messages": [
36
+ {"role": "system", "content": "You are a concise assistant."},
37
+ {"role": "user", "content": prompt},
38
+ ],
39
+ }
40
+ if max_tokens is not None:
41
+ payload["max_tokens"] = max_tokens
42
+ if temperature is not None:
43
+ payload["temperature"] = temperature
44
+
45
+ try:
46
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
47
+ resp = await client.post(url, headers=headers, json=payload)
48
+ except Exception as exc: # noqa: BLE001
49
+ logger.error("OpenAI request failed: %s", exc)
50
+ raise OpenAIError(f"request failed: {exc}") from exc
51
+
52
+ if resp.status_code == 429:
53
+ raise OpenAIError("rate_limited")
54
+
55
+ if resp.status_code >= 400:
56
+ try:
57
+ detail = resp.json()
58
+ except Exception:
59
+ detail = resp.text
60
+ raise OpenAIError(f"upstream_error: {resp.status_code}: {detail}")
61
+
62
+ data = resp.json()
63
+ try:
64
+ message = data["choices"][0]["message"]["content"]
65
+ usage = data.get("usage", {})
66
+ except Exception as exc: # noqa: BLE001
67
+ raise OpenAIError("invalid_response") from exc
68
+
69
+ return {"output": message, "usage": usage}
@@ -0,0 +1 @@
1
+ """Projection builders for Topos."""
@@ -0,0 +1 @@
1
+ """Vector index projection abstractions."""
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Iterable, List
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class EmbeddingRow:
9
+ record_id: str
10
+ vector: List[float]
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class ProjectionStatus:
15
+ status: str
16
+ count: int
17
+
18
+
19
+ class ProjectionBuilder:
20
+ def build(self, embeddings: Iterable[EmbeddingRow]) -> ProjectionStatus:
21
+ raise NotImplementedError
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable
4
+
5
+ from .base import EmbeddingRow, ProjectionBuilder, ProjectionStatus
6
+
7
+
8
+ class VectorIndexBuilder(ProjectionBuilder):
9
+ def build(self, embeddings: Iterable[EmbeddingRow]) -> ProjectionStatus:
10
+ count = sum(1 for _ in embeddings)
11
+ return ProjectionStatus(status="stub", count=count)
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def check_index_health() -> dict:
5
+ return {"status": "stub"}
topos/rate_limit.py ADDED
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from typing import Dict
5
+
6
+ from fastapi import HTTPException, Request, status
7
+
8
+ from .config.settings import settings
9
+
10
+
11
+ class TokenBucket:
12
+ def __init__(self, rate_per_minute: int) -> None:
13
+ self.capacity = rate_per_minute
14
+ self.tokens = rate_per_minute
15
+ self.refill_time = time.time()
16
+ self.rate_per_second = rate_per_minute / 60.0
17
+
18
+ def consume(self, tokens: int = 1) -> bool:
19
+ now = time.time()
20
+ elapsed = now - self.refill_time
21
+ refill = elapsed * self.rate_per_second
22
+ if refill > 0:
23
+ self.tokens = min(self.capacity, self.tokens + refill)
24
+ self.refill_time = now
25
+ if self.tokens >= tokens:
26
+ self.tokens -= tokens
27
+ return True
28
+ return False
29
+
30
+
31
+ _buckets: Dict[str, TokenBucket] = {}
32
+
33
+
34
+ def rate_limit(request: Request) -> None:
35
+ """Simple in-memory rate limit per client IP."""
36
+ ip = request.client.host if request.client else "unknown"
37
+ bucket = _buckets.get(ip)
38
+ if bucket is None:
39
+ bucket = TokenBucket(settings.rate_limit_per_minute)
40
+ _buckets[ip] = bucket
41
+
42
+ if not bucket.consume():
43
+ raise HTTPException(status_code=status.HTTP_429_TOO_MANY_REQUESTS, detail="Too many requests")
@@ -0,0 +1,16 @@
1
+ """Runtime text sanitization helpers (optional Ollama-backed field transforms)."""
2
+
3
+ from topos.config.sanitization_ollama import SANITIZATION_OLLAMA_TRANSFORM_IDS
4
+
5
+ from .ollama_transforms import (
6
+ OLLAMA_TRANSFORM_IDS,
7
+ apply_text_transform_with_ollama,
8
+ ollama_sanitization_enabled,
9
+ )
10
+
11
+ __all__ = [
12
+ "SANITIZATION_OLLAMA_TRANSFORM_IDS",
13
+ "OLLAMA_TRANSFORM_IDS",
14
+ "apply_text_transform_with_ollama",
15
+ "ollama_sanitization_enabled",
16
+ ]