topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Canonical mapper registry."""
|
|
2
|
+
|
|
3
|
+
from .base import CanonicalMapper
|
|
4
|
+
from .chatgpt_mapper import ChatGPTCanonicalMapper
|
|
5
|
+
from .grok_mapper import GrokCanonicalMapper
|
|
6
|
+
from .messenger_mapper import ImessageCanonicalMapper, SignalCanonicalMapper
|
|
7
|
+
|
|
8
|
+
MAPPER_REGISTRY = {
|
|
9
|
+
"chatgpt": ChatGPTCanonicalMapper,
|
|
10
|
+
"grok": GrokCanonicalMapper,
|
|
11
|
+
"imessage": ImessageCanonicalMapper, # Sprint 02: conversations canonical group
|
|
12
|
+
"signal": SignalCanonicalMapper,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"CanonicalMapper",
|
|
17
|
+
"ChatGPTCanonicalMapper",
|
|
18
|
+
"GrokCanonicalMapper",
|
|
19
|
+
"ImessageCanonicalMapper",
|
|
20
|
+
"SignalCanonicalMapper",
|
|
21
|
+
"MAPPER_REGISTRY",
|
|
22
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from ...ingestion.parsers.base import NormalizedRecord
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class CanonicalRecord:
|
|
11
|
+
record_id: str
|
|
12
|
+
payload: Dict[str, str]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class MappingMetadata:
|
|
17
|
+
source_id: str
|
|
18
|
+
mapping_version: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CanonicalMapper:
|
|
22
|
+
def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
|
|
26
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from ...ingestion.parsers.base import NormalizedRecord
|
|
7
|
+
from ..models import CanonicalMessage
|
|
8
|
+
from .base import CanonicalMapper, CanonicalRecord, MappingMetadata
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ChatGPTCanonicalMapper(CanonicalMapper):
|
|
13
|
+
version: str = "v1"
|
|
14
|
+
|
|
15
|
+
def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
|
|
16
|
+
payload = normalized.payload
|
|
17
|
+
content = payload.get("content", "")
|
|
18
|
+
content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
19
|
+
message_id = payload.get("message_id", normalized.record_id)
|
|
20
|
+
|
|
21
|
+
# Preserve _metadata for conversation tree reconstruction
|
|
22
|
+
metadata = {"mapper_version": self.version}
|
|
23
|
+
if "_metadata" in payload:
|
|
24
|
+
# Merge _metadata into metadata (preserves parent_id, node_id, etc.)
|
|
25
|
+
metadata.update(payload["_metadata"])
|
|
26
|
+
|
|
27
|
+
canonical = CanonicalMessage(
|
|
28
|
+
message_id=message_id,
|
|
29
|
+
conversation_id=payload.get("thread_id", ""),
|
|
30
|
+
sender_type=payload.get("sender_type", ""),
|
|
31
|
+
content=content,
|
|
32
|
+
ts=payload.get("ts"),
|
|
33
|
+
source_id="chatgpt",
|
|
34
|
+
content_hash=content_hash,
|
|
35
|
+
metadata=metadata,
|
|
36
|
+
)
|
|
37
|
+
return CanonicalRecord(record_id=canonical.message_id, payload=canonical.__dict__)
|
|
38
|
+
|
|
39
|
+
def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
|
|
40
|
+
return MappingMetadata(source_id="chatgpt", mapping_version=self.version)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from ...ingestion.parsers.base import NormalizedRecord
|
|
6
|
+
from .base import CanonicalMapper, CanonicalRecord, MappingMetadata
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class GrokCanonicalMapper(CanonicalMapper):
|
|
11
|
+
version: str = "v1"
|
|
12
|
+
|
|
13
|
+
def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
|
|
14
|
+
return CanonicalRecord(record_id=normalized.record_id, payload=normalized.payload)
|
|
15
|
+
|
|
16
|
+
def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
|
|
17
|
+
return MappingMetadata(source_id="grok", mapping_version=self.version)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Canonical mappers for messenger sources (imessage, signal).
|
|
2
|
+
|
|
3
|
+
Produce the shape expected by conversation_messages. The ingestion pipeline
|
|
4
|
+
routes canonical_group_id=conversations to ConversationsTablesManager and
|
|
5
|
+
builds staging records from normalized payloads directly; these mappers
|
|
6
|
+
are for registry completeness and any code that looks up imessage/signal by mapper_id.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from ...ingestion.parsers.base import NormalizedRecord
|
|
14
|
+
from .base import CanonicalMapper, CanonicalRecord, MappingMetadata
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _normalized_to_canonical_payload(normalized: NormalizedRecord, source_id: str) -> dict:
|
|
18
|
+
"""Convert normalized messenger record to canonical message shape for conversation_messages."""
|
|
19
|
+
p = normalized.payload
|
|
20
|
+
payload = {
|
|
21
|
+
"message_id": p.get("message_id", normalized.record_id),
|
|
22
|
+
"conversation_id": p.get("thread_id") or p.get("conversation_id") or "",
|
|
23
|
+
"sender_type": p.get("sender_type", "human"),
|
|
24
|
+
"sender_id": p.get("sender_id"),
|
|
25
|
+
"reply_to_message_id": p.get("reply_to_message_id"),
|
|
26
|
+
"message_type": p.get("message_type"),
|
|
27
|
+
"event_type": p.get("event_type"),
|
|
28
|
+
"content": p.get("content", ""),
|
|
29
|
+
"ts": p.get("ts", ""),
|
|
30
|
+
"source_id": source_id,
|
|
31
|
+
}
|
|
32
|
+
if "_metadata" in p:
|
|
33
|
+
payload["_metadata"] = p["_metadata"]
|
|
34
|
+
return payload
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ImessageCanonicalMapper(CanonicalMapper):
|
|
39
|
+
"""Maps normalized iMessage record to canonical shape for conversation_messages."""
|
|
40
|
+
|
|
41
|
+
def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
|
|
42
|
+
payload = _normalized_to_canonical_payload(normalized, "imessage")
|
|
43
|
+
return CanonicalRecord(record_id=payload["message_id"], payload=payload)
|
|
44
|
+
|
|
45
|
+
def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
|
|
46
|
+
return MappingMetadata(source_id="imessage", mapping_version="v1")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class SignalCanonicalMapper(CanonicalMapper):
|
|
51
|
+
"""Maps normalized Signal record to canonical shape for conversation_messages."""
|
|
52
|
+
|
|
53
|
+
def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
|
|
54
|
+
payload = _normalized_to_canonical_payload(normalized, "signal")
|
|
55
|
+
return CanonicalRecord(record_id=payload["message_id"], payload=payload)
|
|
56
|
+
|
|
57
|
+
def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
|
|
58
|
+
return MappingMetadata(source_id="signal", mapping_version="v1")
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class CanonicalConversation:
|
|
9
|
+
conversation_id: str
|
|
10
|
+
source_id: str
|
|
11
|
+
metadata: Dict[str, str]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class CanonicalParticipant:
|
|
16
|
+
participant_id: str
|
|
17
|
+
conversation_id: str
|
|
18
|
+
role: str
|
|
19
|
+
metadata: Dict[str, str]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class CanonicalMessage:
|
|
24
|
+
message_id: str
|
|
25
|
+
conversation_id: str
|
|
26
|
+
sender_type: str
|
|
27
|
+
content: str
|
|
28
|
+
ts: Optional[str]
|
|
29
|
+
source_id: str
|
|
30
|
+
content_hash: str
|
|
31
|
+
metadata: Dict[str, str]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Dict
|
|
6
|
+
|
|
7
|
+
from .mappers.base import CanonicalRecord
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def deterministic_id(namespace: str, value: str) -> str:
|
|
11
|
+
seed = f"{namespace}:{value}".encode("utf-8")
|
|
12
|
+
return hashlib.sha256(seed).hexdigest()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class CanonicalResolver:
|
|
17
|
+
"""Resolves canonical records and handles collisions (stub)."""
|
|
18
|
+
|
|
19
|
+
def resolve_message(self, payload: Dict[str, str]) -> CanonicalRecord:
|
|
20
|
+
content = payload.get("content", "")
|
|
21
|
+
source_id = payload.get("source_id", "unknown")
|
|
22
|
+
message_id = deterministic_id(source_id, content)
|
|
23
|
+
return CanonicalRecord(record_id=message_id, payload=payload)
|
topos/cli/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI package for Topos."""
|
topos/cli/__main__.py
ADDED
topos/cli/commands.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""CLI entry points for Topos."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
import uvicorn
|
|
11
|
+
|
|
12
|
+
# Add parent directory to path for imports
|
|
13
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
14
|
+
|
|
15
|
+
from topos.storage.db.paths import discover_databases
|
|
16
|
+
|
|
17
|
+
USER_ENV_PATH = Path.home() / ".topos" / ".env"
|
|
18
|
+
LEGACY_ENV_PATH = Path(__file__).resolve().parent.parent / ".env"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _load_env_file(env_path: Path) -> None:
|
|
22
|
+
if not env_path.exists():
|
|
23
|
+
return
|
|
24
|
+
for line in env_path.read_text(encoding="utf-8").splitlines():
|
|
25
|
+
stripped = line.strip()
|
|
26
|
+
if not stripped or stripped.startswith("#") or "=" not in stripped:
|
|
27
|
+
continue
|
|
28
|
+
key, value = stripped.split("=", 1)
|
|
29
|
+
key = key.strip()
|
|
30
|
+
value = value.strip().strip('"').strip("'")
|
|
31
|
+
if key and key not in os.environ:
|
|
32
|
+
os.environ[key] = value
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _save_topos_key(topos_key: str, env_path: Path = USER_ENV_PATH) -> Path:
|
|
36
|
+
env_path.parent.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
lines: list[str] = []
|
|
38
|
+
if env_path.exists():
|
|
39
|
+
lines = env_path.read_text(encoding="utf-8").splitlines()
|
|
40
|
+
|
|
41
|
+
updated = False
|
|
42
|
+
new_lines: list[str] = []
|
|
43
|
+
for line in lines:
|
|
44
|
+
stripped = line.strip()
|
|
45
|
+
if stripped.startswith("TOPOS_KEY="):
|
|
46
|
+
new_lines.append(f"TOPOS_KEY={topos_key}")
|
|
47
|
+
updated = True
|
|
48
|
+
else:
|
|
49
|
+
new_lines.append(line)
|
|
50
|
+
|
|
51
|
+
if not updated:
|
|
52
|
+
if new_lines and new_lines[-1].strip():
|
|
53
|
+
new_lines.append("")
|
|
54
|
+
new_lines.append(f"TOPOS_KEY={topos_key}")
|
|
55
|
+
|
|
56
|
+
env_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
|
|
57
|
+
try:
|
|
58
|
+
env_path.chmod(0o600)
|
|
59
|
+
except OSError:
|
|
60
|
+
# Best-effort permissions (e.g. may fail on some filesystems).
|
|
61
|
+
pass
|
|
62
|
+
return env_path
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@click.command()
|
|
66
|
+
@click.option(
|
|
67
|
+
"--db-path",
|
|
68
|
+
help="Database file path (SQLite). If not specified, uses auto-discovery.",
|
|
69
|
+
)
|
|
70
|
+
@click.option(
|
|
71
|
+
"--topos-key",
|
|
72
|
+
help="Topos key for this run (overrides saved key).",
|
|
73
|
+
)
|
|
74
|
+
@click.option(
|
|
75
|
+
"--set-topos-key",
|
|
76
|
+
metavar="KEY",
|
|
77
|
+
help="Save TOPOS_KEY to ~/.topos/.env and exit.",
|
|
78
|
+
)
|
|
79
|
+
@click.option(
|
|
80
|
+
"--discover",
|
|
81
|
+
is_flag=True,
|
|
82
|
+
help="Show discovered databases and exit",
|
|
83
|
+
)
|
|
84
|
+
@click.option(
|
|
85
|
+
"--port",
|
|
86
|
+
default=9000,
|
|
87
|
+
help="Server port (default: 9000)",
|
|
88
|
+
)
|
|
89
|
+
@click.option(
|
|
90
|
+
"--host",
|
|
91
|
+
default="0.0.0.0",
|
|
92
|
+
help="Host to bind to (default: 0.0.0.0)",
|
|
93
|
+
)
|
|
94
|
+
def main(db_path, topos_key, set_topos_key, discover, port, host) -> None:
|
|
95
|
+
"""Topos Control Plane API entry point."""
|
|
96
|
+
if set_topos_key:
|
|
97
|
+
env_path = _save_topos_key(set_topos_key)
|
|
98
|
+
click.echo(f"Saved TOPOS_KEY to {env_path}")
|
|
99
|
+
click.echo("You can now run: topos-node")
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
if discover:
|
|
103
|
+
databases = discover_databases()
|
|
104
|
+
if databases:
|
|
105
|
+
click.echo("Discovered databases:")
|
|
106
|
+
for db in databases:
|
|
107
|
+
click.echo(f" - {db}")
|
|
108
|
+
else:
|
|
109
|
+
click.echo("No existing databases found")
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
_load_env_file(USER_ENV_PATH)
|
|
113
|
+
_load_env_file(LEGACY_ENV_PATH)
|
|
114
|
+
|
|
115
|
+
if topos_key:
|
|
116
|
+
os.environ["TOPOS_KEY"] = topos_key
|
|
117
|
+
elif not os.getenv("TOPOS_KEY"):
|
|
118
|
+
os.environ["TOPOS_KEY"] = "dev-key"
|
|
119
|
+
click.echo("TOPOS_KEY not set; using local dev key")
|
|
120
|
+
|
|
121
|
+
from topos.app import app
|
|
122
|
+
|
|
123
|
+
if db_path:
|
|
124
|
+
os.environ["TOPOS_DATABASE_PATH"] = db_path
|
|
125
|
+
click.echo(f"Database path: {db_path}")
|
|
126
|
+
|
|
127
|
+
click.echo(f"Starting topos API on {host}:{port}")
|
|
128
|
+
uvicorn.run(app, host=host, port=port)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
if __name__ == "__main__":
|
|
132
|
+
main()
|
topos/config/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Configuration package for Topos."""
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Sanitization LLM (Ollama) configuration: file/env defaults + device DB overrides.
|
|
3
|
+
|
|
4
|
+
Device overrides are stored in SQLite `engine_config` under
|
|
5
|
+
`ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE` as JSON (see DeviceSanitizationOllamaOverrides).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import sqlite3
|
|
13
|
+
from typing import TYPE_CHECKING, Any, Dict, Optional
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from topos.config.settings import Settings
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("topos.config.sanitization_ollama")
|
|
21
|
+
|
|
22
|
+
# Transform IDs that use Ollama in topos.sanitization.ollama_transforms
|
|
23
|
+
SANITIZATION_OLLAMA_TRANSFORM_IDS: tuple[str, ...] = (
|
|
24
|
+
"pii_redaction",
|
|
25
|
+
"nsfw_sanitization",
|
|
26
|
+
"raw_to_summary",
|
|
27
|
+
"raw_to_sentiment",
|
|
28
|
+
"third_party_anonymization",
|
|
29
|
+
"name_removal",
|
|
30
|
+
"contact_removal",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE = "sanitization_ollama_device"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class DeviceSanitizationOllamaOverrides(BaseModel):
|
|
37
|
+
"""Partial overrides stored on device (engine_config JSON). Omitted keys keep file/env defaults."""
|
|
38
|
+
|
|
39
|
+
model_config = ConfigDict(extra="ignore")
|
|
40
|
+
|
|
41
|
+
version: int = Field(1, ge=1)
|
|
42
|
+
enabled: Optional[bool] = None
|
|
43
|
+
host: Optional[str] = None
|
|
44
|
+
default_model: Optional[str] = None
|
|
45
|
+
timeout_sec: Optional[float] = None
|
|
46
|
+
max_input_chars: Optional[int] = None
|
|
47
|
+
models: Optional[Dict[str, str]] = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class SanitizationOllamaEffective(BaseModel):
|
|
51
|
+
"""Fully resolved config used at runtime (after merge)."""
|
|
52
|
+
|
|
53
|
+
enabled: bool
|
|
54
|
+
host: str
|
|
55
|
+
default_model: str
|
|
56
|
+
timeout_sec: float
|
|
57
|
+
auto_pull: bool
|
|
58
|
+
max_input_chars: int
|
|
59
|
+
models: Dict[str, str]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _settings_transform_model_map(settings: Any) -> Dict[str, Optional[str]]:
|
|
63
|
+
"""Map transform_id -> optional per-transform model from Settings."""
|
|
64
|
+
return {
|
|
65
|
+
"pii_redaction": getattr(settings, "sanitization_ollama_model_pii_redaction", None),
|
|
66
|
+
"nsfw_sanitization": getattr(settings, "sanitization_ollama_model_nsfw_sanitization", None),
|
|
67
|
+
"raw_to_summary": getattr(settings, "sanitization_ollama_model_raw_to_summary", None),
|
|
68
|
+
"raw_to_sentiment": getattr(settings, "sanitization_ollama_model_raw_to_sentiment", None),
|
|
69
|
+
"third_party_anonymization": getattr(settings, "sanitization_ollama_model_third_party_anonymization", None),
|
|
70
|
+
"name_removal": getattr(settings, "sanitization_ollama_model_name_removal", None),
|
|
71
|
+
"contact_removal": getattr(settings, "sanitization_ollama_model_contact_removal", None),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _read_engine_config_value(conn: sqlite3.Connection, key: str) -> Optional[str]:
|
|
76
|
+
"""Read engine_config without importing topos.core.state (avoids circular imports)."""
|
|
77
|
+
try:
|
|
78
|
+
row = conn.execute("SELECT value FROM engine_config WHERE key = ?", (key,)).fetchone()
|
|
79
|
+
if not row:
|
|
80
|
+
return None
|
|
81
|
+
return str(row[0] if not isinstance(row, sqlite3.Row) else row["value"])
|
|
82
|
+
except Exception as exc: # noqa: BLE001
|
|
83
|
+
logger.debug("engine_config read failed for %s: %s", key, exc)
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def parse_device_overrides_json(raw: Optional[str]) -> DeviceSanitizationOllamaOverrides:
|
|
88
|
+
if not raw or not str(raw).strip():
|
|
89
|
+
return DeviceSanitizationOllamaOverrides()
|
|
90
|
+
try:
|
|
91
|
+
data = json.loads(raw)
|
|
92
|
+
if not isinstance(data, dict):
|
|
93
|
+
return DeviceSanitizationOllamaOverrides()
|
|
94
|
+
return DeviceSanitizationOllamaOverrides.model_validate(data)
|
|
95
|
+
except Exception as exc: # noqa: BLE001
|
|
96
|
+
logger.warning("Invalid sanitization_ollama device JSON, ignoring: %s", exc)
|
|
97
|
+
return DeviceSanitizationOllamaOverrides()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def resolve_sanitization_ollama_effective(
|
|
101
|
+
settings: Settings,
|
|
102
|
+
conn: Optional[sqlite3.Connection],
|
|
103
|
+
) -> SanitizationOllamaEffective:
|
|
104
|
+
device = DeviceSanitizationOllamaOverrides()
|
|
105
|
+
if conn is not None:
|
|
106
|
+
raw = _read_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE)
|
|
107
|
+
device = parse_device_overrides_json(raw)
|
|
108
|
+
|
|
109
|
+
enabled = settings.sanitization_ollama_enabled
|
|
110
|
+
if device.enabled is not None:
|
|
111
|
+
enabled = device.enabled
|
|
112
|
+
|
|
113
|
+
host = (device.host or settings.sanitization_ollama_host or settings.engine_ollama_base_url or "").strip()
|
|
114
|
+
if not host:
|
|
115
|
+
host = "http://127.0.0.1:11434"
|
|
116
|
+
|
|
117
|
+
default_model = (device.default_model or settings.sanitization_ollama_default_model or "llama3.2").strip()
|
|
118
|
+
|
|
119
|
+
timeout_sec = float(device.timeout_sec if device.timeout_sec is not None else settings.sanitization_ollama_timeout_sec)
|
|
120
|
+
auto_pull = bool(getattr(settings, "sanitization_ollama_auto_pull", True))
|
|
121
|
+
max_input_chars = int(
|
|
122
|
+
device.max_input_chars if device.max_input_chars is not None else settings.sanitization_ollama_max_input_chars
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
st_map = _settings_transform_model_map(settings)
|
|
126
|
+
models: Dict[str, str] = {}
|
|
127
|
+
for tid in SANITIZATION_OLLAMA_TRANSFORM_IDS:
|
|
128
|
+
m: Optional[str] = None
|
|
129
|
+
if device.models and tid in device.models and device.models[tid]:
|
|
130
|
+
m = str(device.models[tid]).strip()
|
|
131
|
+
if not m:
|
|
132
|
+
sm = st_map.get(tid)
|
|
133
|
+
if sm and str(sm).strip():
|
|
134
|
+
m = str(sm).strip()
|
|
135
|
+
if not m:
|
|
136
|
+
m = default_model
|
|
137
|
+
models[tid] = m
|
|
138
|
+
|
|
139
|
+
return SanitizationOllamaEffective(
|
|
140
|
+
enabled=enabled,
|
|
141
|
+
host=host,
|
|
142
|
+
default_model=default_model,
|
|
143
|
+
timeout_sec=timeout_sec,
|
|
144
|
+
auto_pull=auto_pull,
|
|
145
|
+
max_input_chars=max_input_chars,
|
|
146
|
+
models=models,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def effective_config_for_api(settings: Settings, conn: Optional[sqlite3.Connection]) -> dict[str, Any]:
|
|
151
|
+
"""Payload for GET /v1/sanitization-ollama-config (no secrets)."""
|
|
152
|
+
eff = resolve_sanitization_ollama_effective(settings, conn)
|
|
153
|
+
device_raw = None
|
|
154
|
+
device_obj: Optional[DeviceSanitizationOllamaOverrides] = None
|
|
155
|
+
if conn is not None:
|
|
156
|
+
device_raw = _read_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE)
|
|
157
|
+
device_obj = parse_device_overrides_json(device_raw)
|
|
158
|
+
|
|
159
|
+
defaults = {
|
|
160
|
+
"enabled": settings.sanitization_ollama_enabled,
|
|
161
|
+
"host": settings.sanitization_ollama_host or settings.engine_ollama_base_url,
|
|
162
|
+
"default_model": settings.sanitization_ollama_default_model,
|
|
163
|
+
"timeout_sec": settings.sanitization_ollama_timeout_sec,
|
|
164
|
+
"auto_pull": settings.sanitization_ollama_auto_pull,
|
|
165
|
+
"max_input_chars": settings.sanitization_ollama_max_input_chars,
|
|
166
|
+
"models": {k: v for k, v in _settings_transform_model_map(settings).items() if v},
|
|
167
|
+
}
|
|
168
|
+
return {
|
|
169
|
+
"transform_ids": list(SANITIZATION_OLLAMA_TRANSFORM_IDS),
|
|
170
|
+
"defaults_from_settings": defaults,
|
|
171
|
+
"device_overrides": device_obj.model_dump(exclude_none=True) if device_obj else {},
|
|
172
|
+
"effective": eff.model_dump(),
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def normalize_put_device_overrides(body: dict[str, Any]) -> str:
|
|
177
|
+
"""Validate and return JSON string for engine_config."""
|
|
178
|
+
raw = body.get("device_overrides")
|
|
179
|
+
if raw is None:
|
|
180
|
+
raise ValueError("device_overrides is required")
|
|
181
|
+
if not isinstance(raw, dict):
|
|
182
|
+
raise ValueError("device_overrides must be an object")
|
|
183
|
+
parsed = DeviceSanitizationOllamaOverrides.model_validate(raw)
|
|
184
|
+
for tid, name in (parsed.models or {}).items():
|
|
185
|
+
if tid not in SANITIZATION_OLLAMA_TRANSFORM_IDS:
|
|
186
|
+
raise ValueError(f"Unknown transform_id in models: {tid!r}")
|
|
187
|
+
if not str(name).strip():
|
|
188
|
+
raise ValueError(f"Empty model for transform_id {tid!r}")
|
|
189
|
+
return json.dumps(parsed.model_dump(exclude_none=True))
|