topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
shared/filtering.py
ADDED
|
@@ -0,0 +1,640 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Any, Dict, Iterable, List, Literal, Optional
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FilterCategory(str, Enum):
|
|
12
|
+
RETRIEVAL = "retrieval"
|
|
13
|
+
AGGREGATION = "aggregation"
|
|
14
|
+
FIELD_LEVEL = "field_level"
|
|
15
|
+
SANITIZATION = "sanitization"
|
|
16
|
+
INFERABILITY_REDUCTION = "inferability_reduction"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FilterRuntimeStatus(str, Enum):
|
|
20
|
+
SUPPORTED_NOW = "supported_now"
|
|
21
|
+
STAGE_8_TARGET = "stage_8_target"
|
|
22
|
+
FUTURE_ONLY = "future_only"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Relative compute / latency band for UI grouping (curated per filter; not measured SLA).
|
|
26
|
+
FilterComputeTier = Literal["low", "medium", "high"]
|
|
27
|
+
VALID_FILTER_COMPUTE_TIERS = frozenset({"low", "medium", "high"})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class FilterParamSpec:
|
|
32
|
+
name: str
|
|
33
|
+
value_type: str
|
|
34
|
+
required: bool = False
|
|
35
|
+
min_value: Optional[float] = None
|
|
36
|
+
allowed_values: Optional[List[str]] = None
|
|
37
|
+
item_type: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class FilterDefinition:
|
|
42
|
+
filter_id: str
|
|
43
|
+
version: int
|
|
44
|
+
category: FilterCategory
|
|
45
|
+
display_name: str
|
|
46
|
+
description: str
|
|
47
|
+
parameter_schema: List[FilterParamSpec] = field(default_factory=list)
|
|
48
|
+
compatibility_rules: Dict[str, Any] = field(default_factory=dict)
|
|
49
|
+
ui_metadata: Dict[str, Any] = field(default_factory=dict)
|
|
50
|
+
third_party_default_allowed: bool = True
|
|
51
|
+
runtime_status: FilterRuntimeStatus = FilterRuntimeStatus.FUTURE_ONLY
|
|
52
|
+
handler_id: Optional[str] = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _catalog_entry(
|
|
56
|
+
filter_id: str,
|
|
57
|
+
category: FilterCategory,
|
|
58
|
+
description: str,
|
|
59
|
+
*,
|
|
60
|
+
parameter_schema: Optional[List[FilterParamSpec]] = None,
|
|
61
|
+
runtime_status: FilterRuntimeStatus = FilterRuntimeStatus.FUTURE_ONLY,
|
|
62
|
+
handler_id: Optional[str] = None,
|
|
63
|
+
ui_group: str = "Advanced",
|
|
64
|
+
compute_tier: FilterComputeTier = "low",
|
|
65
|
+
) -> FilterDefinition:
|
|
66
|
+
return FilterDefinition(
|
|
67
|
+
filter_id=filter_id,
|
|
68
|
+
version=1,
|
|
69
|
+
category=category,
|
|
70
|
+
display_name=filter_id.replace("_", " ").title(),
|
|
71
|
+
description=description,
|
|
72
|
+
parameter_schema=parameter_schema or [],
|
|
73
|
+
compatibility_rules={},
|
|
74
|
+
ui_metadata={"group": ui_group, "compute_tier": compute_tier},
|
|
75
|
+
third_party_default_allowed=True,
|
|
76
|
+
runtime_status=runtime_status,
|
|
77
|
+
handler_id=handler_id,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
FILTER_CATALOG: Dict[str, FilterDefinition] = {
|
|
82
|
+
"rolling_window_days": _catalog_entry(
|
|
83
|
+
"rolling_window_days",
|
|
84
|
+
FilterCategory.RETRIEVAL,
|
|
85
|
+
"Restrict records to the last N days.",
|
|
86
|
+
parameter_schema=[
|
|
87
|
+
FilterParamSpec("days", "int", required=True, min_value=0),
|
|
88
|
+
FilterParamSpec("table_id", "str", required=False),
|
|
89
|
+
],
|
|
90
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
91
|
+
handler_id="retrieval.rolling_window_days",
|
|
92
|
+
ui_group="Time",
|
|
93
|
+
),
|
|
94
|
+
"date_range": _catalog_entry(
|
|
95
|
+
"date_range",
|
|
96
|
+
FilterCategory.RETRIEVAL,
|
|
97
|
+
"Restrict records to an explicit start/end range.",
|
|
98
|
+
parameter_schema=[
|
|
99
|
+
FilterParamSpec("start", "iso_datetime", required=True),
|
|
100
|
+
FilterParamSpec("end", "iso_datetime", required=True),
|
|
101
|
+
],
|
|
102
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
103
|
+
handler_id="retrieval.date_range",
|
|
104
|
+
ui_group="Time",
|
|
105
|
+
),
|
|
106
|
+
"max_rows": _catalog_entry(
|
|
107
|
+
"max_rows",
|
|
108
|
+
FilterCategory.RETRIEVAL,
|
|
109
|
+
"Cap the number of returned rows.",
|
|
110
|
+
parameter_schema=[
|
|
111
|
+
FilterParamSpec("count", "int", required=True, min_value=0),
|
|
112
|
+
FilterParamSpec("table_id", "str", required=False),
|
|
113
|
+
],
|
|
114
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
115
|
+
handler_id="retrieval.max_rows",
|
|
116
|
+
ui_group="Detail",
|
|
117
|
+
),
|
|
118
|
+
"most_recent_n": _catalog_entry(
|
|
119
|
+
"most_recent_n",
|
|
120
|
+
FilterCategory.RETRIEVAL,
|
|
121
|
+
"Return only the N most recent records.",
|
|
122
|
+
parameter_schema=[
|
|
123
|
+
FilterParamSpec("count", "int", required=True, min_value=0),
|
|
124
|
+
FilterParamSpec("table_id", "str", required=False),
|
|
125
|
+
],
|
|
126
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
127
|
+
handler_id="retrieval.most_recent_n",
|
|
128
|
+
ui_group="Detail",
|
|
129
|
+
),
|
|
130
|
+
"source_filter": _catalog_entry(
|
|
131
|
+
"source_filter",
|
|
132
|
+
FilterCategory.RETRIEVAL,
|
|
133
|
+
"Limit results to a set of source_ids.",
|
|
134
|
+
parameter_schema=[FilterParamSpec("source_ids", "list", required=True, item_type="str")],
|
|
135
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
136
|
+
handler_id="retrieval.source_filter",
|
|
137
|
+
ui_group="Sources",
|
|
138
|
+
),
|
|
139
|
+
"column_allowlist": _catalog_entry(
|
|
140
|
+
"column_allowlist",
|
|
141
|
+
FilterCategory.FIELD_LEVEL,
|
|
142
|
+
"Only include the specified fields.",
|
|
143
|
+
parameter_schema=[FilterParamSpec("fields", "list", required=True, item_type="str")],
|
|
144
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
145
|
+
handler_id="field_level.column_allowlist",
|
|
146
|
+
ui_group="Detail",
|
|
147
|
+
),
|
|
148
|
+
"column_blocklist": _catalog_entry(
|
|
149
|
+
"column_blocklist",
|
|
150
|
+
FilterCategory.FIELD_LEVEL,
|
|
151
|
+
"Exclude the specified fields.",
|
|
152
|
+
parameter_schema=[FilterParamSpec("fields", "list", required=True, item_type="str")],
|
|
153
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
154
|
+
handler_id="field_level.column_blocklist",
|
|
155
|
+
ui_group="Detail",
|
|
156
|
+
),
|
|
157
|
+
"daily_rollup": _catalog_entry(
|
|
158
|
+
"daily_rollup",
|
|
159
|
+
FilterCategory.AGGREGATION,
|
|
160
|
+
"Aggregate records by day.",
|
|
161
|
+
runtime_status=FilterRuntimeStatus.STAGE_8_TARGET,
|
|
162
|
+
handler_id="aggregation.daily_rollup",
|
|
163
|
+
ui_group="Summaries",
|
|
164
|
+
),
|
|
165
|
+
"weekly_rollup": _catalog_entry(
|
|
166
|
+
"weekly_rollup",
|
|
167
|
+
FilterCategory.AGGREGATION,
|
|
168
|
+
"Aggregate records by week.",
|
|
169
|
+
runtime_status=FilterRuntimeStatus.STAGE_8_TARGET,
|
|
170
|
+
handler_id="aggregation.weekly_rollup",
|
|
171
|
+
ui_group="Summaries",
|
|
172
|
+
),
|
|
173
|
+
"count_only": _catalog_entry(
|
|
174
|
+
"count_only",
|
|
175
|
+
FilterCategory.AGGREGATION,
|
|
176
|
+
"Return only record counts.",
|
|
177
|
+
runtime_status=FilterRuntimeStatus.STAGE_8_TARGET,
|
|
178
|
+
handler_id="aggregation.count_only",
|
|
179
|
+
ui_group="Summaries",
|
|
180
|
+
),
|
|
181
|
+
"timestamp_to_date": _catalog_entry(
|
|
182
|
+
"timestamp_to_date",
|
|
183
|
+
FilterCategory.SANITIZATION,
|
|
184
|
+
"Reduce timestamps to date precision.",
|
|
185
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
186
|
+
handler_id="sanitization.timestamp_to_date",
|
|
187
|
+
ui_group="Redaction",
|
|
188
|
+
compute_tier="low",
|
|
189
|
+
),
|
|
190
|
+
"raw_to_summary": _catalog_entry(
|
|
191
|
+
"raw_to_summary",
|
|
192
|
+
FilterCategory.SANITIZATION,
|
|
193
|
+
"Transform raw text into a summary.",
|
|
194
|
+
parameter_schema=[
|
|
195
|
+
FilterParamSpec("style", "str", required=False),
|
|
196
|
+
FilterParamSpec("max_length", "int", required=False, min_value=1),
|
|
197
|
+
],
|
|
198
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
199
|
+
handler_id="sanitization.raw_to_summary",
|
|
200
|
+
ui_group="Redaction",
|
|
201
|
+
compute_tier="high",
|
|
202
|
+
),
|
|
203
|
+
"raw_to_sentiment": _catalog_entry(
|
|
204
|
+
"raw_to_sentiment",
|
|
205
|
+
FilterCategory.SANITIZATION,
|
|
206
|
+
"Transform raw text into sentiment output.",
|
|
207
|
+
parameter_schema=[
|
|
208
|
+
FilterParamSpec("scale", "str", required=False),
|
|
209
|
+
FilterParamSpec("labels", "list", required=False, item_type="str"),
|
|
210
|
+
],
|
|
211
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
212
|
+
handler_id="sanitization.raw_to_sentiment",
|
|
213
|
+
ui_group="Redaction",
|
|
214
|
+
compute_tier="medium",
|
|
215
|
+
),
|
|
216
|
+
"third_party_anonymization": _catalog_entry(
|
|
217
|
+
"third_party_anonymization",
|
|
218
|
+
FilterCategory.SANITIZATION,
|
|
219
|
+
"Redact third-party identities from content.",
|
|
220
|
+
parameter_schema=[FilterParamSpec("mode", "str", required=False)],
|
|
221
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
222
|
+
handler_id="sanitization.third_party_anonymization",
|
|
223
|
+
ui_group="Redaction",
|
|
224
|
+
compute_tier="medium",
|
|
225
|
+
),
|
|
226
|
+
"name_removal": _catalog_entry(
|
|
227
|
+
"name_removal",
|
|
228
|
+
FilterCategory.SANITIZATION,
|
|
229
|
+
"Remove names from content.",
|
|
230
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
231
|
+
handler_id="sanitization.name_removal",
|
|
232
|
+
ui_group="Redaction",
|
|
233
|
+
compute_tier="medium",
|
|
234
|
+
),
|
|
235
|
+
"contact_removal": _catalog_entry(
|
|
236
|
+
"contact_removal",
|
|
237
|
+
FilterCategory.SANITIZATION,
|
|
238
|
+
"Remove contact details from content.",
|
|
239
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
240
|
+
handler_id="sanitization.contact_removal",
|
|
241
|
+
ui_group="Redaction",
|
|
242
|
+
compute_tier="medium",
|
|
243
|
+
),
|
|
244
|
+
"pii_redaction": _catalog_entry(
|
|
245
|
+
"pii_redaction",
|
|
246
|
+
FilterCategory.SANITIZATION,
|
|
247
|
+
"Redact PII (names, contact details, etc.) from content using NER and replacement.",
|
|
248
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
249
|
+
handler_id="sanitization.pii_redaction",
|
|
250
|
+
ui_group="Redaction",
|
|
251
|
+
compute_tier="medium",
|
|
252
|
+
),
|
|
253
|
+
"nsfw_sanitization": _catalog_entry(
|
|
254
|
+
"nsfw_sanitization",
|
|
255
|
+
FilterCategory.SANITIZATION,
|
|
256
|
+
"Mask or redact NSFW content in text.",
|
|
257
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
258
|
+
handler_id="sanitization.nsfw_sanitization",
|
|
259
|
+
ui_group="Redaction",
|
|
260
|
+
compute_tier="high",
|
|
261
|
+
),
|
|
262
|
+
"coords_to_city": _catalog_entry(
|
|
263
|
+
"coords_to_city",
|
|
264
|
+
FilterCategory.INFERABILITY_REDUCTION,
|
|
265
|
+
"Reduce coordinate precision to city-level.",
|
|
266
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
267
|
+
handler_id="inferability.coords_to_city",
|
|
268
|
+
ui_group="Redaction",
|
|
269
|
+
compute_tier="medium",
|
|
270
|
+
),
|
|
271
|
+
"amount_to_range": _catalog_entry(
|
|
272
|
+
"amount_to_range",
|
|
273
|
+
FilterCategory.INFERABILITY_REDUCTION,
|
|
274
|
+
"Reduce numeric amounts to bands.",
|
|
275
|
+
parameter_schema=[FilterParamSpec("bands", "list", required=False, item_type="str")],
|
|
276
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
277
|
+
handler_id="inferability.amount_to_range",
|
|
278
|
+
ui_group="Redaction",
|
|
279
|
+
compute_tier="low",
|
|
280
|
+
),
|
|
281
|
+
"behavior_anonymization": _catalog_entry(
|
|
282
|
+
"behavior_anonymization",
|
|
283
|
+
FilterCategory.INFERABILITY_REDUCTION,
|
|
284
|
+
"Break direct identity-behavior linkage.",
|
|
285
|
+
parameter_schema=[FilterParamSpec("mode", "str", required=False)],
|
|
286
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
287
|
+
handler_id="inferability.behavior_anonymization",
|
|
288
|
+
ui_group="Redaction",
|
|
289
|
+
compute_tier="medium",
|
|
290
|
+
),
|
|
291
|
+
"precision_level": _catalog_entry(
|
|
292
|
+
"precision_level",
|
|
293
|
+
FilterCategory.INFERABILITY_REDUCTION,
|
|
294
|
+
"Coarsen exposed precision level.",
|
|
295
|
+
parameter_schema=[FilterParamSpec("level", "enum", required=True, allowed_values=["exact", "city", "region", "band"])],
|
|
296
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
297
|
+
handler_id="inferability.precision_level",
|
|
298
|
+
ui_group="Redaction",
|
|
299
|
+
compute_tier="high",
|
|
300
|
+
),
|
|
301
|
+
# --- Stage 11: contact resolution & participation (see sprints_roles_scopes_stage_11) ---
|
|
302
|
+
"contact_display_names": _catalog_entry(
|
|
303
|
+
"contact_display_names",
|
|
304
|
+
FilterCategory.FIELD_LEVEL,
|
|
305
|
+
"Resolve sender display names from the contact book when contacts:resolve is granted.",
|
|
306
|
+
parameter_schema=[FilterParamSpec("enabled", "bool", required=True)],
|
|
307
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
308
|
+
handler_id="field_level.contact_display_names",
|
|
309
|
+
ui_group="Contacts",
|
|
310
|
+
),
|
|
311
|
+
"message_contact_participation": _catalog_entry(
|
|
312
|
+
"message_contact_participation",
|
|
313
|
+
FilterCategory.RETRIEVAL,
|
|
314
|
+
"Include or exclude messages by resolved contact_id for the sender.",
|
|
315
|
+
parameter_schema=[
|
|
316
|
+
FilterParamSpec("mode", "enum", required=True, allowed_values=["all", "allowlist", "blocklist"]),
|
|
317
|
+
FilterParamSpec("contact_ids", "list", required=True, item_type="str"),
|
|
318
|
+
FilterParamSpec("match", "enum", required=True, allowed_values=["sender_only", "thread_participants"]),
|
|
319
|
+
],
|
|
320
|
+
runtime_status=FilterRuntimeStatus.SUPPORTED_NOW,
|
|
321
|
+
handler_id="retrieval.message_contact_participation",
|
|
322
|
+
ui_group="Contacts",
|
|
323
|
+
),
|
|
324
|
+
"event_contact_participation": _catalog_entry(
|
|
325
|
+
"event_contact_participation",
|
|
326
|
+
FilterCategory.RETRIEVAL,
|
|
327
|
+
"Phase 2: include or exclude events by linked contacts (organizer/attendee).",
|
|
328
|
+
parameter_schema=[
|
|
329
|
+
FilterParamSpec("mode", "enum", required=True, allowed_values=["all", "allowlist", "blocklist"]),
|
|
330
|
+
FilterParamSpec("contact_ids", "list", required=True, item_type="str"),
|
|
331
|
+
FilterParamSpec("match", "enum", required=True, allowed_values=["organizer", "attendee", "any_linked"]),
|
|
332
|
+
],
|
|
333
|
+
runtime_status=FilterRuntimeStatus.FUTURE_ONLY,
|
|
334
|
+
handler_id="retrieval.event_contact_participation",
|
|
335
|
+
ui_group="Contacts",
|
|
336
|
+
),
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def list_filter_definitions() -> List[FilterDefinition]:
|
|
341
|
+
return list(FILTER_CATALOG.values())
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def get_filter_definition(filter_id: str) -> Optional[FilterDefinition]:
|
|
345
|
+
return FILTER_CATALOG.get((filter_id or "").strip())
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _validate_iso_datetime(value: str) -> None:
|
|
349
|
+
datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _validate_param(spec: FilterParamSpec, value: Any) -> None:
|
|
353
|
+
if spec.value_type == "bool":
|
|
354
|
+
if not isinstance(value, bool):
|
|
355
|
+
raise ValueError(f"Parameter {spec.name!r} must be a bool")
|
|
356
|
+
return
|
|
357
|
+
if spec.value_type == "int":
|
|
358
|
+
if not isinstance(value, int) or isinstance(value, bool):
|
|
359
|
+
raise ValueError(f"Parameter {spec.name!r} must be an int")
|
|
360
|
+
if spec.min_value is not None and value < spec.min_value:
|
|
361
|
+
raise ValueError(f"Parameter {spec.name!r} must be >= {spec.min_value}")
|
|
362
|
+
return
|
|
363
|
+
if spec.value_type == "str":
|
|
364
|
+
if not isinstance(value, str) or not value.strip():
|
|
365
|
+
raise ValueError(f"Parameter {spec.name!r} must be a non-empty string")
|
|
366
|
+
return
|
|
367
|
+
if spec.value_type == "iso_datetime":
|
|
368
|
+
if not isinstance(value, str):
|
|
369
|
+
raise ValueError(f"Parameter {spec.name!r} must be an ISO datetime string")
|
|
370
|
+
_validate_iso_datetime(value)
|
|
371
|
+
return
|
|
372
|
+
if spec.value_type == "enum":
|
|
373
|
+
if value not in (spec.allowed_values or []):
|
|
374
|
+
raise ValueError(f"Parameter {spec.name!r} must be one of {spec.allowed_values}")
|
|
375
|
+
return
|
|
376
|
+
if spec.value_type == "list":
|
|
377
|
+
if not isinstance(value, list):
|
|
378
|
+
raise ValueError(f"Parameter {spec.name!r} must be a list")
|
|
379
|
+
if spec.item_type == "str":
|
|
380
|
+
if any(not isinstance(item, str) or not item.strip() for item in value):
|
|
381
|
+
raise ValueError(f"Parameter {spec.name!r} must contain only non-empty strings")
|
|
382
|
+
return
|
|
383
|
+
raise ValueError(f"Unsupported parameter type {spec.value_type!r} for {spec.name!r}")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def validate_filter_params(filter_id: str, params: Dict[str, Any], *, allow_future: bool = False) -> None:
|
|
387
|
+
definition = get_filter_definition(filter_id)
|
|
388
|
+
if not definition:
|
|
389
|
+
raise ValueError(f"Unknown filter_id: {filter_id}")
|
|
390
|
+
if definition.runtime_status == FilterRuntimeStatus.FUTURE_ONLY and not allow_future:
|
|
391
|
+
raise ValueError(f"Filter {filter_id!r} is not supported for runtime manifests yet")
|
|
392
|
+
expected = {spec.name: spec for spec in definition.parameter_schema}
|
|
393
|
+
for spec in definition.parameter_schema:
|
|
394
|
+
if spec.required and spec.name not in params:
|
|
395
|
+
raise ValueError(f"Missing required parameter {spec.name!r} for filter {filter_id!r}")
|
|
396
|
+
for key, value in params.items():
|
|
397
|
+
if key not in expected:
|
|
398
|
+
raise ValueError(f"Unexpected parameter {key!r} for filter {filter_id!r}")
|
|
399
|
+
_validate_param(expected[key], value)
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
class FilterInstance(BaseModel):
|
|
403
|
+
filter_id: str = Field(..., min_length=1)
|
|
404
|
+
category: Optional[FilterCategory] = Field(None)
|
|
405
|
+
params: Dict[str, Any] = Field(default_factory=dict)
|
|
406
|
+
|
|
407
|
+
@field_validator("filter_id")
|
|
408
|
+
@classmethod
|
|
409
|
+
def known_filter_id(cls, value: str) -> str:
|
|
410
|
+
filter_id = (value or "").strip()
|
|
411
|
+
if not get_filter_definition(filter_id):
|
|
412
|
+
raise ValueError(f"Unknown filter_id: {filter_id}")
|
|
413
|
+
return filter_id
|
|
414
|
+
|
|
415
|
+
@model_validator(mode="after")
|
|
416
|
+
def validate_against_catalog(self) -> "FilterInstance":
|
|
417
|
+
definition = get_filter_definition(self.filter_id)
|
|
418
|
+
if definition is None:
|
|
419
|
+
raise ValueError(f"Unknown filter_id: {self.filter_id}")
|
|
420
|
+
if self.category is None:
|
|
421
|
+
self.category = definition.category
|
|
422
|
+
elif self.category != definition.category:
|
|
423
|
+
raise ValueError(
|
|
424
|
+
f"Filter {self.filter_id!r} must use category {definition.category.value!r}, "
|
|
425
|
+
f"got {self.category.value!r}"
|
|
426
|
+
)
|
|
427
|
+
validate_filter_params(self.filter_id, self.params)
|
|
428
|
+
return self
|
|
429
|
+
|
|
430
|
+
def to_storage_dict(self) -> Dict[str, Any]:
|
|
431
|
+
return self.model_dump(exclude_none=True, mode="json")
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
class FieldTransform(BaseModel):
|
|
435
|
+
"""
|
|
436
|
+
Field-level transform: apply a single transform (by transform_id) to a specific table/field.
|
|
437
|
+
Used in permission payload as field_transforms list; registerer field_transform_defaults
|
|
438
|
+
use the same transform_ids from FILTER_CATALOG.
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
table_id: Optional[str] = Field(None, description="Canonical table; optional if scope implies one table")
|
|
442
|
+
field: str = Field(..., min_length=1, description="Column/field name to transform")
|
|
443
|
+
transform_id: str = Field(..., min_length=1, description="Filter/transform ID from FILTER_CATALOG")
|
|
444
|
+
params: Dict[str, Any] = Field(default_factory=dict, description="Transform parameters")
|
|
445
|
+
|
|
446
|
+
@field_validator("transform_id")
|
|
447
|
+
@classmethod
|
|
448
|
+
def known_transform_id(cls, value: str) -> str:
|
|
449
|
+
transform_id = (value or "").strip()
|
|
450
|
+
if not get_filter_definition(transform_id):
|
|
451
|
+
raise ValueError(f"Unknown transform_id: {transform_id}")
|
|
452
|
+
return transform_id
|
|
453
|
+
|
|
454
|
+
@model_validator(mode="after")
|
|
455
|
+
def validate_params_against_catalog(self) -> "FieldTransform":
|
|
456
|
+
definition = get_filter_definition(self.transform_id)
|
|
457
|
+
if definition is not None:
|
|
458
|
+
validate_filter_params(self.transform_id, self.params, allow_future=True)
|
|
459
|
+
return self
|
|
460
|
+
|
|
461
|
+
def to_storage_dict(self) -> Dict[str, Any]:
|
|
462
|
+
return self.model_dump(exclude_none=True, mode="json")
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def validate_field_transforms(field_transforms: List[Any]) -> None:
|
|
466
|
+
"""
|
|
467
|
+
Validate a list of field_transforms: each item must have field and transform_id,
|
|
468
|
+
and transform_id must be in FILTER_CATALOG. Raises ValueError on first invalid entry.
|
|
469
|
+
"""
|
|
470
|
+
if not isinstance(field_transforms, list):
|
|
471
|
+
raise ValueError("field_transforms must be a list")
|
|
472
|
+
for i, item in enumerate(field_transforms):
|
|
473
|
+
if isinstance(item, FieldTransform):
|
|
474
|
+
continue
|
|
475
|
+
if isinstance(item, dict):
|
|
476
|
+
FieldTransform.model_validate(item)
|
|
477
|
+
continue
|
|
478
|
+
raise ValueError(f"field_transforms[{i}] must be a FieldTransform or dict, got {type(item).__name__}")
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def field_transforms_from_storage(value: Any) -> Optional[List[FieldTransform]]:
|
|
482
|
+
"""Parse field_transforms from storage (list of dicts). Returns None for None or missing; empty list for []."""
|
|
483
|
+
if value is None:
|
|
484
|
+
return None
|
|
485
|
+
if isinstance(value, list):
|
|
486
|
+
if not value:
|
|
487
|
+
return []
|
|
488
|
+
return [FieldTransform.model_validate(item) for item in value]
|
|
489
|
+
raise ValueError("field_transforms must be a list or None")
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
class FilterManifestProvenance(BaseModel):
|
|
493
|
+
resource_defaults_applied: bool = False
|
|
494
|
+
role_defaults_applied: List[str] = Field(default_factory=list)
|
|
495
|
+
source_defaults_applied: List[str] = Field(default_factory=list)
|
|
496
|
+
owner_overrides: List[str] = Field(default_factory=list)
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
class FilterManifest(BaseModel):
|
|
500
|
+
manifest_version: int = Field(1, ge=1)
|
|
501
|
+
filters: List[FilterInstance] = Field(default_factory=list)
|
|
502
|
+
provenance: Optional[FilterManifestProvenance] = None
|
|
503
|
+
|
|
504
|
+
def to_storage_dict(self) -> Dict[str, Any]:
|
|
505
|
+
return self.model_dump(exclude_none=True, mode="json")
|
|
506
|
+
|
|
507
|
+
def get_filter(self, filter_id: str) -> Optional[FilterInstance]:
|
|
508
|
+
for item in self.filters:
|
|
509
|
+
if item.filter_id == filter_id:
|
|
510
|
+
return item
|
|
511
|
+
return None
|
|
512
|
+
|
|
513
|
+
def iter_filters(self, categories: Optional[Iterable[FilterCategory]] = None) -> List[FilterInstance]:
|
|
514
|
+
if categories is None:
|
|
515
|
+
return list(self.filters)
|
|
516
|
+
allowed = set(categories)
|
|
517
|
+
return [item for item in self.filters if item.category in allowed]
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def filter_manifest_from_storage(value: Any) -> Optional[FilterManifest]:
|
|
521
|
+
if value is None:
|
|
522
|
+
return None
|
|
523
|
+
if isinstance(value, FilterManifest):
|
|
524
|
+
return value
|
|
525
|
+
if not isinstance(value, dict):
|
|
526
|
+
raise ValueError("filter_manifest must be a dict")
|
|
527
|
+
return FilterManifest.model_validate(value)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def build_filter_manifest(*filters: FilterInstance, provenance: Optional[FilterManifestProvenance] = None) -> FilterManifest:
|
|
531
|
+
return FilterManifest(filters=list(filters), provenance=provenance)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _merge_param_values(filter_id: str, existing: Dict[str, Any], incoming: Dict[str, Any]) -> Dict[str, Any]:
|
|
535
|
+
merged = dict(existing)
|
|
536
|
+
if filter_id == "rolling_window_days":
|
|
537
|
+
current = merged.get("days")
|
|
538
|
+
incoming_days = incoming.get("days")
|
|
539
|
+
if current is None:
|
|
540
|
+
merged["days"] = incoming_days
|
|
541
|
+
elif incoming_days is not None:
|
|
542
|
+
merged["days"] = min(int(current), int(incoming_days))
|
|
543
|
+
return merged
|
|
544
|
+
if filter_id in {"max_rows", "most_recent_n"}:
|
|
545
|
+
current = merged.get("count")
|
|
546
|
+
incoming_count = incoming.get("count")
|
|
547
|
+
if current is None:
|
|
548
|
+
merged["count"] = incoming_count
|
|
549
|
+
elif incoming_count is not None:
|
|
550
|
+
merged["count"] = min(int(current), int(incoming_count))
|
|
551
|
+
return merged
|
|
552
|
+
if filter_id == "date_range":
|
|
553
|
+
start = merged.get("start")
|
|
554
|
+
end = merged.get("end")
|
|
555
|
+
incoming_start = incoming.get("start")
|
|
556
|
+
incoming_end = incoming.get("end")
|
|
557
|
+
if incoming_start is not None:
|
|
558
|
+
if start is None or str(incoming_start) > str(start):
|
|
559
|
+
merged["start"] = incoming_start
|
|
560
|
+
if incoming_end is not None:
|
|
561
|
+
if end is None or str(incoming_end) < str(end):
|
|
562
|
+
merged["end"] = incoming_end
|
|
563
|
+
return merged
|
|
564
|
+
if filter_id == "source_filter":
|
|
565
|
+
current = set(str(item) for item in merged.get("source_ids", []))
|
|
566
|
+
incoming_values = set(str(item) for item in incoming.get("source_ids", []))
|
|
567
|
+
if current and incoming_values:
|
|
568
|
+
merged["source_ids"] = sorted(current & incoming_values)
|
|
569
|
+
elif incoming_values:
|
|
570
|
+
merged["source_ids"] = sorted(incoming_values)
|
|
571
|
+
return merged
|
|
572
|
+
if filter_id == "column_allowlist":
|
|
573
|
+
current = set(str(item) for item in merged.get("fields", []))
|
|
574
|
+
incoming_values = set(str(item) for item in incoming.get("fields", []))
|
|
575
|
+
if current and incoming_values:
|
|
576
|
+
merged["fields"] = sorted(current & incoming_values)
|
|
577
|
+
elif incoming_values:
|
|
578
|
+
merged["fields"] = sorted(incoming_values)
|
|
579
|
+
return merged
|
|
580
|
+
if filter_id == "column_blocklist":
|
|
581
|
+
current = set(str(item) for item in merged.get("fields", []))
|
|
582
|
+
incoming_values = set(str(item) for item in incoming.get("fields", []))
|
|
583
|
+
merged["fields"] = sorted(current | incoming_values)
|
|
584
|
+
return merged
|
|
585
|
+
if filter_id == "contact_display_names":
|
|
586
|
+
# Stricter: both must allow names.
|
|
587
|
+
merged["enabled"] = bool(merged.get("enabled", True)) and bool(incoming.get("enabled", True))
|
|
588
|
+
return merged
|
|
589
|
+
if filter_id == "message_contact_participation":
|
|
590
|
+
em = str(merged.get("mode") or "all")
|
|
591
|
+
im = str(incoming.get("mode") or "all")
|
|
592
|
+
ec = {str(x) for x in (merged.get("contact_ids") or [])}
|
|
593
|
+
ic = {str(x) for x in (incoming.get("contact_ids") or [])}
|
|
594
|
+
match = str(incoming.get("match") or merged.get("match") or "sender_only")
|
|
595
|
+
if "blocklist" in (em, im):
|
|
596
|
+
return {"mode": "blocklist", "contact_ids": sorted(ec | ic), "match": match}
|
|
597
|
+
if em == "allowlist" and im == "allowlist":
|
|
598
|
+
inter = ec & ic if ec and ic else ec | ic
|
|
599
|
+
return {"mode": "allowlist", "contact_ids": sorted(inter), "match": match}
|
|
600
|
+
if im != "all":
|
|
601
|
+
return {"mode": im, "contact_ids": sorted(ic), "match": match}
|
|
602
|
+
if em != "all":
|
|
603
|
+
return {"mode": em, "contact_ids": sorted(ec), "match": match}
|
|
604
|
+
return {"mode": "all", "contact_ids": [], "match": match}
|
|
605
|
+
return incoming if incoming else merged
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
def _manifest_merge_key(item: FilterInstance) -> str:
|
|
609
|
+
"""Merge key: retrieval caps may repeat per logical table via params.table_id."""
|
|
610
|
+
fid = item.filter_id
|
|
611
|
+
if fid in {"rolling_window_days", "max_rows", "most_recent_n"}:
|
|
612
|
+
tid = str(item.params.get("table_id") or "").strip()
|
|
613
|
+
return f"{fid}\x00{tid}"
|
|
614
|
+
return fid
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
def merge_filter_manifests(
|
|
618
|
+
manifests: Iterable[Optional[FilterManifest]],
|
|
619
|
+
*,
|
|
620
|
+
provenance: Optional[FilterManifestProvenance] = None,
|
|
621
|
+
) -> FilterManifest:
|
|
622
|
+
merged_instances: Dict[str, FilterInstance] = {}
|
|
623
|
+
for manifest in manifests:
|
|
624
|
+
if manifest is None:
|
|
625
|
+
continue
|
|
626
|
+
for item in manifest.filters:
|
|
627
|
+
key = _manifest_merge_key(item)
|
|
628
|
+
existing = merged_instances.get(key)
|
|
629
|
+
if existing is None:
|
|
630
|
+
merged_instances[key] = item
|
|
631
|
+
continue
|
|
632
|
+
merged_instances[key] = FilterInstance(
|
|
633
|
+
filter_id=item.filter_id,
|
|
634
|
+
category=item.category,
|
|
635
|
+
params=_merge_param_values(item.filter_id, existing.params, item.params),
|
|
636
|
+
)
|
|
637
|
+
return FilterManifest(
|
|
638
|
+
filters=sorted(merged_instances.values(), key=lambda item: (_manifest_merge_key(item), item.filter_id)),
|
|
639
|
+
provenance=provenance,
|
|
640
|
+
)
|