topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from .definitions import DataSourceDefinition
|
|
6
|
+
from shared.filtering import FilterInstance, FilterManifest
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _manifest(*filters: FilterInstance) -> dict:
|
|
10
|
+
return FilterManifest(filters=list(filters)).to_storage_dict()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CHATGPT_FILE = DataSourceDefinition(
|
|
14
|
+
source_id="chatgpt_file_ingestion",
|
|
15
|
+
display_name="ChatGPT File Ingestion",
|
|
16
|
+
source_type="file",
|
|
17
|
+
schema_id="chatgpt.conversation.v2", # Updated to v2 for real ChatGPT data
|
|
18
|
+
parser_id="chatgpt.conversation.v2", # Updated to v2
|
|
19
|
+
canonical_mapper_id="chatgpt",
|
|
20
|
+
canonical_group_id="ai_messages",
|
|
21
|
+
raw_enrichment_jobs=["attachments", "tool_calls", "language", "time_normalization"],
|
|
22
|
+
canonical_enrichment_jobs=["entities", "topics", "sentiment", "embeddings", "emo_27"],
|
|
23
|
+
analytics_profile_id="chatgpt_dev",
|
|
24
|
+
enrichment_trigger="manual", # Enrichment skipped during ingestion, trigger via POST /v1/enrichment/process
|
|
25
|
+
ingestion_trigger="manual", # Ingestion processing waits for manual trigger after upload
|
|
26
|
+
default_scope_id="aiMessages",
|
|
27
|
+
allowed_scope_ids=["aiMessages:read", "aiChat:read"],
|
|
28
|
+
default_filter_hints=["rolling_window_days", "max_rows"],
|
|
29
|
+
filter_tier_kind="sensitivity",
|
|
30
|
+
default_filter_tiers={
|
|
31
|
+
"low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
|
|
32
|
+
"medium": _manifest(
|
|
33
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
|
|
34
|
+
FilterInstance(filter_id="max_rows", params={"count": 500}),
|
|
35
|
+
),
|
|
36
|
+
"high": _manifest(
|
|
37
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
|
|
38
|
+
FilterInstance(filter_id="max_rows", params={"count": 100}),
|
|
39
|
+
),
|
|
40
|
+
},
|
|
41
|
+
field_transform_defaults=[
|
|
42
|
+
{"table_id": "ai_chat_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
|
|
43
|
+
{"table_id": "ai_chat_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
|
|
44
|
+
],
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
CHATGPT_UI = DataSourceDefinition(
|
|
48
|
+
source_id="chatgpt_ui_conversation",
|
|
49
|
+
display_name="ChatGPT UI Conversation",
|
|
50
|
+
source_type="ui_stream",
|
|
51
|
+
schema_id="chatgpt.conversation.v1",
|
|
52
|
+
parser_id="chatgpt.conversation.v1",
|
|
53
|
+
canonical_mapper_id="chatgpt",
|
|
54
|
+
canonical_group_id="ai_messages",
|
|
55
|
+
raw_enrichment_jobs=["attachments", "tool_calls", "language", "time_normalization"],
|
|
56
|
+
canonical_enrichment_jobs=["entities", "topics", "sentiment", "embeddings", "emo_27"],
|
|
57
|
+
analytics_profile_id="chatgpt_dev",
|
|
58
|
+
enrichment_trigger="automatic", # Enrichment runs automatically during ingestion
|
|
59
|
+
default_scope_id="aiMessages",
|
|
60
|
+
allowed_scope_ids=["aiMessages:read", "aiChat:read"],
|
|
61
|
+
default_filter_hints=["rolling_window_days", "max_rows"],
|
|
62
|
+
filter_tier_kind="sensitivity",
|
|
63
|
+
default_filter_tiers={
|
|
64
|
+
"low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
|
|
65
|
+
"medium": _manifest(
|
|
66
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
|
|
67
|
+
FilterInstance(filter_id="max_rows", params={"count": 500}),
|
|
68
|
+
),
|
|
69
|
+
"high": _manifest(
|
|
70
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
|
|
71
|
+
FilterInstance(filter_id="max_rows", params={"count": 100}),
|
|
72
|
+
),
|
|
73
|
+
},
|
|
74
|
+
field_transform_defaults=[
|
|
75
|
+
{"table_id": "ai_chat_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
|
|
76
|
+
{"table_id": "ai_chat_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
|
|
77
|
+
],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Sprint 3: Browser plugin source
|
|
81
|
+
BROWSER_VISITS = DataSourceDefinition(
|
|
82
|
+
source_id="browser_visits",
|
|
83
|
+
display_name="Browser Visits",
|
|
84
|
+
source_type="ui_stream",
|
|
85
|
+
schema_id="browser.visits.v1",
|
|
86
|
+
parser_id="browser.visits.v1",
|
|
87
|
+
canonical_mapper_id=None, # No canonical mapping for MVP
|
|
88
|
+
canonical_group_id=None,
|
|
89
|
+
raw_enrichment_jobs=["url_classification"], # Classify URL category during browser ingestion
|
|
90
|
+
canonical_enrichment_jobs=[],
|
|
91
|
+
analytics_profile_id=None,
|
|
92
|
+
enrichment_trigger="manual", # No automatic enrichment
|
|
93
|
+
ingestion_trigger="automatic",
|
|
94
|
+
default_scope_id="activity",
|
|
95
|
+
allowed_scope_ids=["activity:read", "activity:write"],
|
|
96
|
+
default_filter_hints=["rolling_window_days", "timestamp_to_date", "column_blocklist"],
|
|
97
|
+
filter_tier_kind="inferability",
|
|
98
|
+
default_filter_tiers={
|
|
99
|
+
"low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 30})),
|
|
100
|
+
"medium": _manifest(
|
|
101
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
|
|
102
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
103
|
+
),
|
|
104
|
+
"high": _manifest(
|
|
105
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
|
|
106
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
107
|
+
FilterInstance(filter_id="column_blocklist", params={"fields": ["url"]}),
|
|
108
|
+
),
|
|
109
|
+
},
|
|
110
|
+
field_transform_defaults=[
|
|
111
|
+
{"table_id": "browser_visits", "field": "url", "transform_ids": ["pii_redaction"]},
|
|
112
|
+
{"table_id": "browser_visits", "field": "title", "transform_ids": ["pii_redaction"]},
|
|
113
|
+
{"table_id": "browser_visits", "field": "visited_at", "transform_ids": ["timestamp_to_date"]},
|
|
114
|
+
],
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Browser plugin events: clicks, highlights, star_page, VIDEO_PLAY
|
|
118
|
+
BROWSER_EVENTS = DataSourceDefinition(
|
|
119
|
+
source_id="browser_events",
|
|
120
|
+
display_name="Browser Events",
|
|
121
|
+
source_type="ui_stream",
|
|
122
|
+
schema_id="browser.events.v1",
|
|
123
|
+
parser_id="browser.events.v1",
|
|
124
|
+
canonical_mapper_id=None,
|
|
125
|
+
canonical_group_id=None,
|
|
126
|
+
raw_enrichment_jobs=[],
|
|
127
|
+
canonical_enrichment_jobs=[],
|
|
128
|
+
analytics_profile_id=None,
|
|
129
|
+
enrichment_trigger="manual",
|
|
130
|
+
ingestion_trigger="automatic",
|
|
131
|
+
default_scope_id="activity",
|
|
132
|
+
allowed_scope_ids=["activity:read", "activity:write"],
|
|
133
|
+
default_filter_hints=["rolling_window_days", "timestamp_to_date"],
|
|
134
|
+
filter_tier_kind="inferability",
|
|
135
|
+
default_filter_tiers={
|
|
136
|
+
"low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 30})),
|
|
137
|
+
"medium": _manifest(
|
|
138
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
|
|
139
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
140
|
+
),
|
|
141
|
+
"high": _manifest(
|
|
142
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
|
|
143
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
144
|
+
FilterInstance(filter_id="max_rows", params={"count": 250}),
|
|
145
|
+
),
|
|
146
|
+
},
|
|
147
|
+
field_transform_defaults=[
|
|
148
|
+
{"table_id": "browser_events", "field": "url", "transform_ids": ["pii_redaction"]},
|
|
149
|
+
{"table_id": "browser_events", "field": "title", "transform_ids": ["pii_redaction"]},
|
|
150
|
+
{"table_id": "browser_events", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
|
|
151
|
+
{"table_id": "browser_events", "field": "visited_at", "transform_ids": ["timestamp_to_date"]},
|
|
152
|
+
],
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Sprint 02: Messenger ingestion (local_sync -> conversation_messages)
|
|
156
|
+
IMESSAGE = DataSourceDefinition(
|
|
157
|
+
source_id="imessage",
|
|
158
|
+
display_name="iMessage",
|
|
159
|
+
source_type="local_sync",
|
|
160
|
+
schema_id="imessage.messages.v1",
|
|
161
|
+
parser_id="imessage.messages.v1",
|
|
162
|
+
canonical_mapper_id="imessage",
|
|
163
|
+
canonical_group_id="conversations",
|
|
164
|
+
raw_enrichment_jobs=[],
|
|
165
|
+
canonical_enrichment_jobs=["emo_27"],
|
|
166
|
+
analytics_profile_id=None,
|
|
167
|
+
enrichment_trigger="automatic",
|
|
168
|
+
ingestion_trigger="automatic", # Sync runs on schedule or "Sync now"
|
|
169
|
+
default_scope_id="messages",
|
|
170
|
+
allowed_scope_ids=["messages:read", "messages:write"],
|
|
171
|
+
default_filter_hints=["rolling_window_days", "max_rows", "timestamp_to_date"],
|
|
172
|
+
filter_tier_kind="sensitivity",
|
|
173
|
+
default_filter_tiers={
|
|
174
|
+
"low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
|
|
175
|
+
"medium": _manifest(
|
|
176
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
|
|
177
|
+
FilterInstance(filter_id="max_rows", params={"count": 1000}),
|
|
178
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
179
|
+
),
|
|
180
|
+
"high": _manifest(
|
|
181
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
|
|
182
|
+
FilterInstance(filter_id="max_rows", params={"count": 250}),
|
|
183
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
184
|
+
),
|
|
185
|
+
},
|
|
186
|
+
field_transform_defaults=[
|
|
187
|
+
{"table_id": "conversation_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
|
|
188
|
+
{"table_id": "conversation_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
|
|
189
|
+
],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
SIGNAL = DataSourceDefinition(
|
|
193
|
+
source_id="signal",
|
|
194
|
+
display_name="Signal Desktop",
|
|
195
|
+
source_type="local_sync",
|
|
196
|
+
schema_id="signal.messages.v1",
|
|
197
|
+
parser_id="signal.messages.v1",
|
|
198
|
+
canonical_mapper_id="signal",
|
|
199
|
+
canonical_group_id="conversations",
|
|
200
|
+
raw_enrichment_jobs=[],
|
|
201
|
+
canonical_enrichment_jobs=["emo_27"],
|
|
202
|
+
analytics_profile_id=None,
|
|
203
|
+
enrichment_trigger="automatic",
|
|
204
|
+
ingestion_trigger="automatic",
|
|
205
|
+
default_scope_id="messages",
|
|
206
|
+
allowed_scope_ids=["messages:read", "messages:write"],
|
|
207
|
+
default_filter_hints=["rolling_window_days", "max_rows", "timestamp_to_date"],
|
|
208
|
+
filter_tier_kind="sensitivity",
|
|
209
|
+
default_filter_tiers={
|
|
210
|
+
"low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
|
|
211
|
+
"medium": _manifest(
|
|
212
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
|
|
213
|
+
FilterInstance(filter_id="max_rows", params={"count": 1000}),
|
|
214
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
215
|
+
),
|
|
216
|
+
"high": _manifest(
|
|
217
|
+
FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
|
|
218
|
+
FilterInstance(filter_id="max_rows", params={"count": 250}),
|
|
219
|
+
FilterInstance(filter_id="timestamp_to_date", params={}),
|
|
220
|
+
),
|
|
221
|
+
},
|
|
222
|
+
field_transform_defaults=[
|
|
223
|
+
{"table_id": "conversation_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
|
|
224
|
+
{"table_id": "conversation_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
|
|
225
|
+
],
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
REGISTRY = {
|
|
229
|
+
CHATGPT_FILE.source_id: CHATGPT_FILE,
|
|
230
|
+
CHATGPT_UI.source_id: CHATGPT_UI,
|
|
231
|
+
BROWSER_VISITS.source_id: BROWSER_VISITS,
|
|
232
|
+
BROWSER_EVENTS.source_id: BROWSER_EVENTS,
|
|
233
|
+
IMESSAGE.source_id: IMESSAGE,
|
|
234
|
+
SIGNAL.source_id: SIGNAL,
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def list_sources() -> list[DataSourceDefinition]:
|
|
239
|
+
return list(REGISTRY.values())
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def get_sources_by_scope(scope_id: str) -> List[str]:
|
|
243
|
+
"""
|
|
244
|
+
Return source_id list for sources whose default_scope_id or allowed_scope_ids match scope_id.
|
|
245
|
+
scope_id may be the base name without :read/:write (e.g. 'messages') or a full MVP scope id.
|
|
246
|
+
Used by Topos/Control Plane for scope → source resolution.
|
|
247
|
+
"""
|
|
248
|
+
scope_id = (scope_id or "").strip()
|
|
249
|
+
if not scope_id:
|
|
250
|
+
return []
|
|
251
|
+
scope_base = scope_id.split(":", 1)[0]
|
|
252
|
+
return [
|
|
253
|
+
defn.source_id
|
|
254
|
+
for defn in REGISTRY.values()
|
|
255
|
+
if (
|
|
256
|
+
(defn.default_scope_id or "").strip() == scope_id
|
|
257
|
+
or (defn.default_scope_id or "").strip() == scope_base
|
|
258
|
+
or any(
|
|
259
|
+
(allowed or "").strip() == scope_id or (allowed or "").strip().split(":", 1)[0] == scope_base
|
|
260
|
+
for allowed in (defn.allowed_scope_ids or [])
|
|
261
|
+
)
|
|
262
|
+
)
|
|
263
|
+
]
|