topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1032 @@
1
+ """
2
+ Stage 11: UMA message contact participation + display name resolution.
3
+
4
+ Requires dataset_id and DB connection. Skips all logic when dataset_id is missing.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from collections import defaultdict
12
+ from typing import Any, Dict, List, Optional, Set, Tuple
13
+
14
+ from shared.filtering import FilterManifest
15
+
16
+ from topos.analytics.messenger_labels import _identifier_candidates, resolve_participant_labels
17
+ from topos.contacts.identity import normalize_contact_key
18
+ from topos.storage.canonical.conversations_tables import CONTACT_IDENTIFIERS_TABLE, CONTACTS_TABLE
19
+ from topos.storage.user_identity import get_user_identity
20
+
21
+ logger = logging.getLogger("topos.uma_contact_enrichment")
22
+
23
+ DEFAULT_SHARING_POLICY = {"name_visibility": "normal", "row_visibility": "exclude_from_grants"}
24
+
25
+
26
+ def _table_exists(conn, table_name: str) -> bool:
27
+ try:
28
+ cur = conn.execute(
29
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
30
+ (table_name,),
31
+ )
32
+ return cur.fetchone() is not None
33
+ except Exception:
34
+ return False
35
+
36
+
37
+ def strip_contact_runtime_filters(manifest: Optional[FilterManifest]) -> Optional[FilterManifest]:
38
+ """Remove filters handled in this module so apply_filter_manifest does not double-apply or error."""
39
+ if manifest is None:
40
+ return None
41
+ skip = {"message_contact_participation", "contact_display_names"}
42
+ kept = [f for f in manifest.filters if f.filter_id not in skip]
43
+ if len(kept) == len(manifest.filters):
44
+ return manifest
45
+ return manifest.model_copy(update={"filters": kept})
46
+
47
+
48
+ def _parse_sharing_policy(raw: Any) -> Dict[str, str]:
49
+ if raw is None or raw == "":
50
+ return dict(DEFAULT_SHARING_POLICY)
51
+ if isinstance(raw, dict):
52
+ base = dict(DEFAULT_SHARING_POLICY)
53
+ base.update({k: str(v) for k, v in raw.items() if k in ("name_visibility", "row_visibility")})
54
+ return base
55
+ try:
56
+ d = json.loads(str(raw))
57
+ if isinstance(d, dict):
58
+ base = dict(DEFAULT_SHARING_POLICY)
59
+ base.update({k: str(v) for k, v in d.items() if k in ("name_visibility", "row_visibility")})
60
+ return base
61
+ except Exception:
62
+ pass
63
+ return dict(DEFAULT_SHARING_POLICY)
64
+
65
+
66
+ def _contact_ids_for_literal_self_sender(id_mm: Dict[str, Set[str]]) -> Set[str]:
67
+ """Contact IDs tied to the iMessage sender handle ``self`` (when not using ``is_self`` row)."""
68
+ out: Set[str] = set()
69
+ for key in _identifier_candidates("self"):
70
+ got = id_mm.get(key)
71
+ if got:
72
+ out.update(got)
73
+ return out
74
+
75
+
76
+ def build_identifier_contact_multimap(
77
+ conn, dataset_id: str, _source_ids: Optional[Set[str]] = None
78
+ ) -> Dict[str, Set[str]]:
79
+ """
80
+ Map each lookup key -> set(contact_id) for resolving message sender_id.
81
+
82
+ Uses the same phone/email candidate expansion as the social graph (``_identifier_candidates``)
83
+ so E.164 (+1…), 10-digit NANP, and stored identifier strings align.
84
+
85
+ **Multimap (not single-valued):** duplicate imports often create two ``contact_id`` rows for
86
+ the same NANP phone (e.g. ``+1512…`` vs ``512…``). Those rows share expanded keys; a
87
+ first-wins ``Dict[str, str]`` drops one side so ``pick_representative_contact_id`` never sees
88
+ the named card. Every ``(key, contact_id)`` pair from identifiers is recorded here.
89
+
90
+ Loads **all** ``contact_identifiers`` rows for ``dataset_id``. ``_source_ids`` is ignored.
91
+ """
92
+ if not conn or not dataset_id:
93
+ return {}
94
+ if not _table_exists(conn, CONTACT_IDENTIFIERS_TABLE):
95
+ return {}
96
+ mm: Dict[str, Set[str]] = defaultdict(set)
97
+ rows = conn.execute(
98
+ f"""
99
+ SELECT identifier, contact_id
100
+ FROM {CONTACT_IDENTIFIERS_TABLE}
101
+ WHERE dataset_id = ?
102
+ """,
103
+ (dataset_id,),
104
+ ).fetchall()
105
+ for ident, cid in rows:
106
+ if not ident or not cid:
107
+ continue
108
+ i = str(ident).strip()
109
+ c = str(cid).strip()
110
+ for key in _identifier_candidates(i):
111
+ if key:
112
+ mm[key].add(c)
113
+ nk = normalize_contact_key(i)
114
+ if nk:
115
+ mm[nk].add(c)
116
+ mm[i].add(c)
117
+ return dict(mm)
118
+
119
+
120
+ def _nanp_digit_lookup_keys(digits: str) -> Set[str]:
121
+ """Link US NANP handles that differ only by formatting or leading country code 1."""
122
+ d = digits
123
+ if len(d) < 10:
124
+ return set()
125
+ keys: Set[str] = {d[-10:]}
126
+ if len(d) == 11 and d[0] == "1":
127
+ keys.add(d[1:])
128
+ keys.add(d)
129
+ return keys
130
+
131
+
132
+ def _nanp_lookup_keys_for_value(value: Any) -> Set[str]:
133
+ d = "".join(ch for ch in str(value or "") if ch.isdigit())
134
+ return _nanp_digit_lookup_keys(d)
135
+
136
+
137
+ def build_nanp_digit_contact_index(conn, dataset_id: str) -> Dict[str, Set[str]]:
138
+ """
139
+ Map digit-derived keys -> contact_ids so differently formatted phone rows still merge
140
+ (e.g. ``+1512…`` vs ``(512) …`` vs ``512…``) when string keys in the multimap diverge.
141
+ """
142
+ if not conn or not dataset_id:
143
+ return {}
144
+ idx: Dict[str, Set[str]] = defaultdict(set)
145
+ try:
146
+ rows = conn.execute(
147
+ f"""
148
+ SELECT identifier, contact_id
149
+ FROM {CONTACT_IDENTIFIERS_TABLE}
150
+ WHERE dataset_id = ?
151
+ """,
152
+ (dataset_id,),
153
+ ).fetchall()
154
+ except Exception as exc: # noqa: BLE001
155
+ logger.warning("build_nanp_digit_contact_index failed: %s", exc)
156
+ return {}
157
+ for ident, cid in rows:
158
+ if not ident or not cid:
159
+ continue
160
+ i = str(ident).strip()
161
+ c = str(cid).strip()
162
+ for k in _nanp_lookup_keys_for_value(i):
163
+ idx[k].add(c)
164
+ return dict(idx)
165
+
166
+
167
+ def _uma_graph_display_name_for_row(
168
+ graph_labels: Dict[str, Dict[str, str]],
169
+ row: Dict[str, Any],
170
+ ) -> str:
171
+ """Best-effort display_name from :func:`resolve_participant_labels` for this message's handles."""
172
+ sid = str(row.get("sender_id") or "").strip()
173
+ if not sid or sid.lower() == "self":
174
+ return ""
175
+ keys: List[str] = [sid]
176
+ alt = _metadata_chat_identifier(row)
177
+ if alt:
178
+ a = str(alt).strip()
179
+ if a and a not in keys:
180
+ keys.append(a)
181
+ for key in keys:
182
+ g = graph_labels.get(key, {})
183
+ dn = (g.get("display_name") or "").strip()
184
+ if dn:
185
+ return dn
186
+ return ""
187
+
188
+
189
+ def _graph_display_name_respects_name_policy(
190
+ graph_dn: str,
191
+ cids: Set[str],
192
+ display_names: Dict[str, Optional[str]],
193
+ name_block: Set[str],
194
+ ) -> str:
195
+ """
196
+ Only use graph-resolved names that correspond to a contact on this row whose name may be shown.
197
+
198
+ ``resolve_participant_labels`` ignores sharing_policy; we still must not surface a name marked hidden.
199
+ """
200
+ g = (graph_dn or "").strip()
201
+ if not g or not cids:
202
+ return ""
203
+ holders = {c for c in cids if (display_names.get(c) or "").strip() == g}
204
+ if not holders:
205
+ return ""
206
+ if holders & name_block:
207
+ return ""
208
+ return g
209
+
210
+
211
+ def _owner_graph_display_name(
212
+ graph_labels: Dict[str, Dict[str, str]],
213
+ self_contact_id: str,
214
+ ) -> str:
215
+ """
216
+ ``resolve_participant_labels`` result keyed by the owner's ``contact_id``.
217
+
218
+ Include ``self_contact_id`` in the graph batch so duplicate cards / identifier promotion
219
+ can supply a name when ``contacts.display_name`` on the ``is_self`` row is empty.
220
+ """
221
+ g = graph_labels.get(self_contact_id, {})
222
+ dn = (g.get("display_name") or "").strip()
223
+ if dn and not _is_imessage_self_sentinel_label(dn):
224
+ return dn
225
+ lab = (g.get("label") or "").strip()
226
+ if not lab or _is_imessage_self_sentinel_label(lab) or lab == self_contact_id:
227
+ return ""
228
+ return lab
229
+
230
+
231
+ def _metadata_chat_identifier(row: Dict[str, Any]) -> Optional[str]:
232
+ """iMessage-style metadata often duplicates the peer handle in ``chat_identifier``."""
233
+ mj = row.get("metadata_json")
234
+ if mj is None:
235
+ return None
236
+ if isinstance(mj, str):
237
+ try:
238
+ mj = json.loads(mj)
239
+ except Exception:
240
+ return None
241
+ if not isinstance(mj, dict):
242
+ return None
243
+ for key in ("chat_identifier", "handle"):
244
+ v = mj.get(key)
245
+ if v is not None and str(v).strip():
246
+ return str(v).strip()
247
+ return None
248
+
249
+
250
+ def message_row_contact_id(
251
+ row: Dict[str, Any],
252
+ id_mm: Dict[str, Set[str]],
253
+ *,
254
+ self_contact_id: Optional[str] = None,
255
+ nanp_idx: Optional[Dict[str, Set[str]]] = None,
256
+ ) -> Optional[str]:
257
+ """Resolve contact_id from sender_id, then from metadata_json when needed."""
258
+ cids = collect_message_contact_ids(row, id_mm, self_contact_id=self_contact_id, nanp_idx=nanp_idx)
259
+ if not cids:
260
+ return None
261
+ if len(cids) == 1:
262
+ return next(iter(cids))
263
+ # Ambiguous without contact meta; callers that need a single id should use
264
+ # pick_representative_contact_id after load_contact_meta.
265
+ for key in _ordered_identifier_lookup_keys(row.get("sender_id")):
266
+ got = id_mm.get(key)
267
+ if got:
268
+ return min(got)
269
+ alt = _metadata_chat_identifier(row)
270
+ if alt:
271
+ for key in _ordered_identifier_lookup_keys(alt):
272
+ got = id_mm.get(key)
273
+ if got:
274
+ return min(got)
275
+ return min(cids)
276
+
277
+
278
+ def _ordered_identifier_lookup_keys(sender_id: Any) -> List[str]:
279
+ """Deterministic key order for first-hit fallback (before meta-aware pick)."""
280
+ if sender_id is None:
281
+ return []
282
+ s = str(sender_id).strip()
283
+ if not s:
284
+ return []
285
+ keys: List[str] = []
286
+ seen: Set[str] = set()
287
+
288
+ def add(k: str) -> None:
289
+ if k and k not in seen:
290
+ seen.add(k)
291
+ keys.append(k)
292
+
293
+ add(s)
294
+ add(s.lower())
295
+ nk = normalize_contact_key(s)
296
+ add(nk)
297
+ for k in sorted(_identifier_candidates(s)):
298
+ add(k)
299
+ return keys
300
+
301
+
302
+ def collect_contact_ids_for_sender(
303
+ sender_id: Any,
304
+ id_mm: Dict[str, Set[str]],
305
+ *,
306
+ self_contact_id: Optional[str] = None,
307
+ nanp_idx: Optional[Dict[str, Set[str]]] = None,
308
+ ) -> Set[str]:
309
+ """All contact_ids that match any stored identifier key for this sender."""
310
+ if sender_id is None:
311
+ return set()
312
+ s = str(sender_id).strip()
313
+ if not s:
314
+ return set()
315
+ if s.lower() == "self":
316
+ out: Set[str] = set()
317
+ if self_contact_id:
318
+ out.add(self_contact_id)
319
+ for key in _identifier_candidates(s):
320
+ got = id_mm.get(key)
321
+ if got:
322
+ out.update(got)
323
+ return out
324
+ cids: Set[str] = set()
325
+ for key in _identifier_candidates(s):
326
+ got = id_mm.get(key)
327
+ if got:
328
+ cids.update(got)
329
+ nk = normalize_contact_key(s)
330
+ if nk:
331
+ got = id_mm.get(nk)
332
+ if got:
333
+ cids.update(got)
334
+ if nanp_idx:
335
+ for k in _nanp_lookup_keys_for_value(s):
336
+ got = nanp_idx.get(k)
337
+ if got:
338
+ cids.update(got)
339
+ return cids
340
+
341
+
342
+ def collect_message_contact_ids(
343
+ row: Dict[str, Any],
344
+ id_mm: Dict[str, Set[str]],
345
+ *,
346
+ self_contact_id: Optional[str] = None,
347
+ nanp_idx: Optional[Dict[str, Set[str]]] = None,
348
+ ) -> Set[str]:
349
+ """Union of contact_ids from sender_id and metadata_json handles."""
350
+ cids: Set[str] = set()
351
+ cids.update(
352
+ collect_contact_ids_for_sender(
353
+ row.get("sender_id"),
354
+ id_mm,
355
+ self_contact_id=self_contact_id,
356
+ nanp_idx=nanp_idx,
357
+ )
358
+ )
359
+ alt = _metadata_chat_identifier(row)
360
+ if alt:
361
+ cids.update(
362
+ collect_contact_ids_for_sender(alt, id_mm, self_contact_id=self_contact_id, nanp_idx=nanp_idx)
363
+ )
364
+ return cids
365
+
366
+
367
+ def sender_row_contact_id(
368
+ sender_id: Any,
369
+ id_mm: Dict[str, Set[str]],
370
+ *,
371
+ self_contact_id: Optional[str] = None,
372
+ nanp_idx: Optional[Dict[str, Set[str]]] = None,
373
+ ) -> Optional[str]:
374
+ if sender_id is None:
375
+ return None
376
+ s = str(sender_id).strip()
377
+ if not s:
378
+ return None
379
+ cids = collect_contact_ids_for_sender(
380
+ sender_id,
381
+ id_mm,
382
+ self_contact_id=self_contact_id,
383
+ nanp_idx=nanp_idx,
384
+ )
385
+ if not cids:
386
+ return None
387
+ if len(cids) == 1:
388
+ return next(iter(cids))
389
+ for key in _ordered_identifier_lookup_keys(sender_id):
390
+ got = id_mm.get(key)
391
+ if got:
392
+ return min(got)
393
+ return min(cids)
394
+
395
+
396
+ def _has_letter(s: str) -> bool:
397
+ return any(ch.isalpha() for ch in s)
398
+
399
+
400
+ def _is_imessage_self_sentinel_label(value: str) -> bool:
401
+ """True for the iMessage owner placeholder string; not a human-readable display name."""
402
+ return str(value or "").strip().lower() == "self"
403
+
404
+
405
+ def _contact_label_score(
406
+ cid: str,
407
+ *,
408
+ display_names: Dict[str, Optional[str]],
409
+ known_usernames_by_cid: Dict[str, List[str]],
410
+ fallback_labels: Dict[str, str],
411
+ ) -> Tuple[int, int, str]:
412
+ """
413
+ Return (tier, length, tie_breaker) with higher tier/length better.
414
+ tier: 3 = human-looking display_name, 2 = human username, 1 = any display_name, 0 = phone-like fallback.
415
+ """
416
+ dn = (display_names.get(cid) or "").strip()
417
+ if _is_imessage_self_sentinel_label(dn):
418
+ dn = ""
419
+ tier = 0
420
+ best = ""
421
+ if dn:
422
+ if _has_letter(dn):
423
+ tier = 3
424
+ best = dn
425
+ else:
426
+ tier = 1
427
+ best = dn
428
+ if tier < 2:
429
+ for u in known_usernames_by_cid.get(cid) or []:
430
+ uu = str(u).strip()
431
+ if uu and not _is_imessage_self_sentinel_label(uu) and _has_letter(uu):
432
+ tier = max(tier, 2)
433
+ if len(uu) > len(best):
434
+ best = uu
435
+ if tier == 0:
436
+ fb = (fallback_labels.get(cid) or "").strip()
437
+ if fb and not _is_imessage_self_sentinel_label(fb):
438
+ best = fb
439
+ return (tier, len(best), best or cid)
440
+
441
+
442
+ def _row_hidden_by_default_policy(
443
+ cid: str,
444
+ policies: Dict[str, Dict[str, str]],
445
+ inherit_defaults: bool,
446
+ ) -> bool:
447
+ if not inherit_defaults:
448
+ return False
449
+ pol = policies.get(cid)
450
+ if pol is None:
451
+ return False
452
+ return pol.get("row_visibility") == "exclude_from_grants"
453
+
454
+
455
+ def pick_representative_contact_id(
456
+ cids: Set[str],
457
+ *,
458
+ display_names: Dict[str, Optional[str]],
459
+ known_usernames_by_cid: Dict[str, List[str]],
460
+ fallback_labels: Dict[str, str],
461
+ policies: Dict[str, Dict[str, str]],
462
+ inherit_defaults: bool,
463
+ ) -> Optional[str]:
464
+ """
465
+ When multiple contact rows share the same phone (e.g. +E.164 vs 10-digit imports),
466
+ prefer the card with a real display name / username over an unnamed duplicate.
467
+ """
468
+ if not cids:
469
+ return None
470
+ if len(cids) == 1:
471
+ return next(iter(cids))
472
+ ranked: List[Tuple[int, int, int, str]] = []
473
+ for cid in cids:
474
+ tier, length, tie = _contact_label_score(
475
+ cid,
476
+ display_names=display_names,
477
+ known_usernames_by_cid=known_usernames_by_cid,
478
+ fallback_labels=fallback_labels,
479
+ )
480
+ hidden = _row_hidden_by_default_policy(cid, policies, inherit_defaults)
481
+ visibility_boost = 0 if hidden else 1
482
+ ranked.append((visibility_boost, tier, length, cid))
483
+ ranked.sort(reverse=True)
484
+ return ranked[0][3]
485
+
486
+
487
+ def visible_label_for_contact(
488
+ cid: str,
489
+ *,
490
+ display_names: Dict[str, Optional[str]],
491
+ known_usernames_by_cid: Dict[str, List[str]],
492
+ fallback_labels: Dict[str, str],
493
+ ) -> str:
494
+ """Single visible string for a contact_id (display_name → username → identifier)."""
495
+ dn = (display_names.get(cid) or "").strip()
496
+ if dn and not _is_imessage_self_sentinel_label(dn):
497
+ return dn
498
+ for u in known_usernames_by_cid.get(cid) or []:
499
+ uu = str(u).strip()
500
+ if uu and not _is_imessage_self_sentinel_label(uu):
501
+ return uu
502
+ fb = (fallback_labels.get(cid) or "").strip()
503
+ if fb and not _is_imessage_self_sentinel_label(fb):
504
+ return fb
505
+ return ""
506
+
507
+
508
+ def load_self_contact_info(conn: Any, dataset_id: str) -> Tuple[Optional[str], Optional[str]]:
509
+ """Return (contact_id, display label) for the dataset owner (is_self), if present."""
510
+ if not conn or not dataset_id:
511
+ return None, None
512
+ try:
513
+ row = conn.execute(
514
+ f"""
515
+ SELECT contact_id, display_name, known_usernames_json
516
+ FROM {CONTACTS_TABLE}
517
+ WHERE dataset_id = ? AND is_self = 1
518
+ LIMIT 1
519
+ """,
520
+ (dataset_id,),
521
+ ).fetchone()
522
+ except Exception as exc: # noqa: BLE001
523
+ logger.warning("load_self_contact_info failed: %s", exc)
524
+ return None, None
525
+ if not row:
526
+ return None, None
527
+ cid = str(row[0] or "").strip() or None
528
+ dn = str(row[1] or "").strip()
529
+ if dn and not _is_imessage_self_sentinel_label(dn):
530
+ return cid, dn
531
+ raw = row[2]
532
+ try:
533
+ arr = json.loads(raw or "[]")
534
+ if isinstance(arr, list):
535
+ for u in arr:
536
+ uu = str(u).strip()
537
+ if uu and not _is_imessage_self_sentinel_label(uu):
538
+ return cid, uu
539
+ except Exception:
540
+ pass
541
+ return cid, None
542
+
543
+
544
+ def load_user_identity_display_name(conn: Any, dataset_id: str) -> Optional[str]:
545
+ """Return the canonical owner-authored display name for the dataset, if set."""
546
+ if not conn or not dataset_id:
547
+ return None
548
+ try:
549
+ identity = get_user_identity(conn, dataset_id)
550
+ except Exception as exc: # noqa: BLE001
551
+ logger.warning("load_user_identity_display_name failed: %s", exc)
552
+ return None
553
+ if not identity:
554
+ return None
555
+ dn = str(identity.get("display_name") or "").strip()
556
+ return dn or None
557
+
558
+
559
+ def prefetch_contact_ids_for_conversations(
560
+ conn: Any,
561
+ dataset_id: str,
562
+ conversation_ids: Set[str],
563
+ id_mm: Dict[str, Set[str]],
564
+ *,
565
+ self_contact_id: Optional[str],
566
+ nanp_idx: Optional[Dict[str, Set[str]]] = None,
567
+ ) -> Set[str]:
568
+ """Union of all contact_ids reachable from senders in the given conversations."""
569
+ out: Set[str] = set()
570
+ if not conn or not dataset_id or not conversation_ids:
571
+ return out
572
+ placeholders = ",".join("?" for _ in conversation_ids)
573
+ params: List[Any] = [dataset_id, *sorted(conversation_ids)]
574
+ try:
575
+ rows = conn.execute(
576
+ f"""
577
+ SELECT DISTINCT sender_id
578
+ FROM conversation_messages
579
+ WHERE dataset_id = ? AND conversation_id IN ({placeholders})
580
+ """,
581
+ params,
582
+ ).fetchall()
583
+ except Exception as exc: # noqa: BLE001
584
+ logger.warning("prefetch_contact_ids_for_conversations failed: %s", exc)
585
+ return out
586
+ for (sender_id,) in rows:
587
+ out.update(
588
+ collect_contact_ids_for_sender(
589
+ sender_id,
590
+ id_mm,
591
+ self_contact_id=self_contact_id,
592
+ nanp_idx=nanp_idx,
593
+ )
594
+ )
595
+ return out
596
+
597
+
598
+ def load_conversation_participant_contact_ids(
599
+ conn: Any,
600
+ dataset_id: str,
601
+ conversation_ids: Set[str],
602
+ id_mm: Dict[str, Set[str]],
603
+ *,
604
+ self_contact_id: Optional[str],
605
+ nanp_idx: Optional[Dict[str, Set[str]]] = None,
606
+ display_names: Dict[str, Optional[str]],
607
+ known_usernames_by_cid: Dict[str, List[str]],
608
+ fallback_labels: Dict[str, str],
609
+ policies: Dict[str, Dict[str, str]],
610
+ inherit_defaults: bool,
611
+ ) -> Dict[str, Set[str]]:
612
+ """
613
+ Return mapping conversation_id -> set(contact_id) using sender_id values from
614
+ conversation_messages within the same dataset.
615
+ """
616
+ out: Dict[str, Set[str]] = {}
617
+ if not conn or not dataset_id or not conversation_ids:
618
+ return out
619
+ placeholders = ",".join("?" for _ in conversation_ids)
620
+ params: List[Any] = [dataset_id, *sorted(conversation_ids)]
621
+ try:
622
+ rows = conn.execute(
623
+ f"""
624
+ SELECT conversation_id, sender_id
625
+ FROM conversation_messages
626
+ WHERE dataset_id = ? AND conversation_id IN ({placeholders})
627
+ """,
628
+ params,
629
+ ).fetchall()
630
+ except Exception as exc: # noqa: BLE001
631
+ logger.warning("load_conversation_participant_contact_ids failed: %s", exc)
632
+ return out
633
+
634
+ for conv_id, sender_id in rows:
635
+ conv = str(conv_id or "").strip()
636
+ if not conv:
637
+ continue
638
+ cids = collect_contact_ids_for_sender(
639
+ sender_id,
640
+ id_mm,
641
+ self_contact_id=self_contact_id,
642
+ nanp_idx=nanp_idx,
643
+ )
644
+ cid = (
645
+ pick_representative_contact_id(
646
+ cids,
647
+ display_names=display_names,
648
+ known_usernames_by_cid=known_usernames_by_cid,
649
+ fallback_labels=fallback_labels,
650
+ policies=policies,
651
+ inherit_defaults=inherit_defaults,
652
+ )
653
+ if cids
654
+ else None
655
+ )
656
+ if not cid:
657
+ continue
658
+ if conv not in out:
659
+ out[conv] = set()
660
+ out[conv].add(cid)
661
+ return out
662
+
663
+
664
+ def load_contact_meta(
665
+ conn,
666
+ dataset_id: str,
667
+ contact_ids: Set[str],
668
+ ) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Optional[str]], Dict[str, List[str]]]:
669
+ """Return (sharing_policy_by_cid, display_name_by_cid, known_usernames_by_cid)."""
670
+ policies: Dict[str, Dict[str, str]] = {}
671
+ names: Dict[str, Optional[str]] = {}
672
+ usernames: Dict[str, List[str]] = {}
673
+ if not conn or not dataset_id or not contact_ids:
674
+ return policies, names, usernames
675
+ placeholders = ",".join("?" for _ in contact_ids)
676
+ params: List[Any] = [dataset_id, *sorted(contact_ids)]
677
+ try:
678
+ rows = conn.execute(
679
+ f"""
680
+ SELECT contact_id, display_name, sharing_policy_json, known_usernames_json
681
+ FROM {CONTACTS_TABLE}
682
+ WHERE dataset_id = ? AND contact_id IN ({placeholders})
683
+ """,
684
+ params,
685
+ ).fetchall()
686
+ except Exception as exc: # noqa: BLE001
687
+ logger.warning("load_contact_meta failed: %s", exc)
688
+ return policies, names, usernames
689
+ for cid, dname, pol, raw_users in rows:
690
+ c = str(cid or "").strip()
691
+ if not c:
692
+ continue
693
+ names[c] = str(dname).strip() if dname else None
694
+ policies[c] = _parse_sharing_policy(pol)
695
+ parsed: List[str] = []
696
+ try:
697
+ arr = json.loads(raw_users or "[]")
698
+ if isinstance(arr, list):
699
+ parsed = [str(v).strip() for v in arr if str(v).strip()]
700
+ except Exception:
701
+ parsed = []
702
+ usernames[c] = parsed
703
+ return policies, names, usernames
704
+
705
+
706
+ def load_identifier_fallback_labels(
707
+ conn: Any,
708
+ dataset_id: str,
709
+ contact_ids: Set[str],
710
+ ) -> Dict[str, str]:
711
+ """
712
+ When ``contacts.display_name`` is empty, use a primary identifier row as the visible label.
713
+
714
+ Avoids returning no ``sender_display_name`` when the address book row exists but the card
715
+ name was never synced into ``display_name``.
716
+ """
717
+ if not conn or not dataset_id or not contact_ids:
718
+ return {}
719
+ out: Dict[str, str] = {}
720
+ placeholders = ",".join("?" * len(contact_ids))
721
+ params: List[Any] = [dataset_id, *sorted(contact_ids)]
722
+ try:
723
+ rows = conn.execute(
724
+ f"""
725
+ SELECT contact_id, identifier, source_id
726
+ FROM {CONTACT_IDENTIFIERS_TABLE}
727
+ WHERE dataset_id = ? AND contact_id IN ({placeholders})
728
+ ORDER BY CASE WHEN source_id = '*' THEN 1 ELSE 0 END ASC, updated_at DESC
729
+ """,
730
+ params,
731
+ ).fetchall()
732
+ except Exception as exc: # noqa: BLE001
733
+ logger.warning("load_identifier_fallback_labels failed: %s", exc)
734
+ return out
735
+ for cid_raw, ident, _src in rows:
736
+ c = str(cid_raw or "").strip()
737
+ i = str(ident or "").strip()
738
+ if not c or not i or c in out:
739
+ continue
740
+ out[c] = i
741
+ return out
742
+
743
+
744
+ def apply_message_contact_pipeline(
745
+ items: List[Dict[str, Any]],
746
+ *,
747
+ conn: Any,
748
+ dataset_id: Optional[str],
749
+ allowed_scopes: List[str],
750
+ manifest: Optional[FilterManifest],
751
+ filters: Optional[Dict[str, Any]],
752
+ ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
753
+ """
754
+ Apply message_contact_participation, owner sharing_policy row exclusion, grant block/allow lists,
755
+ and optional sender_display_name enrichment when contacts:resolve + contact_display_names.
756
+
757
+ Returns ``(rows, sidecar)``. ``sidecar["message_owner"]`` describes the dataset owner so clients
758
+ can label owner-authored rows (see per-row ``sender_is_owner`` and ``is_from_self``).
759
+ """
760
+ if not items or not conn or not dataset_id:
761
+ return items, {}
762
+
763
+ scope_set = {str(s).strip() for s in (allowed_scopes or []) if s}
764
+ can_resolve = "contacts:resolve" in scope_set
765
+
766
+ participation = manifest.get_filter("message_contact_participation") if manifest else None
767
+ name_filter = manifest.get_filter("contact_display_names") if manifest else None
768
+ # If contacts:resolve is granted, enrich names by default unless the manifest explicitly disables it.
769
+ names_enabled = can_resolve
770
+ if name_filter is not None:
771
+ names_enabled = bool(name_filter.params.get("enabled"))
772
+
773
+ cgp = (filters or {}).get("contact_grant_policy") if isinstance(filters, dict) else None
774
+ cgp = cgp if isinstance(cgp, dict) else {}
775
+ inherit_defaults = bool(cgp.get("inherit_contact_defaults", True))
776
+ grant_block: Set[str] = {str(x).strip() for x in (cgp.get("blocklist_contact_ids") or []) if str(x).strip()}
777
+ grant_allow: Set[str] = {str(x).strip() for x in (cgp.get("allowlist_contact_ids") or []) if str(x).strip()}
778
+
779
+ source_ids = {str(row.get("source_id") or "").strip() for row in items if row.get("source_id")}
780
+ id_mm = build_identifier_contact_multimap(conn, dataset_id, source_ids)
781
+ nanp_idx = build_nanp_digit_contact_index(conn, dataset_id)
782
+ if not id_mm:
783
+ logger.info(
784
+ "UMA contact enrichment: no rows in %s for dataset_id=%s; cannot resolve senders to contacts",
785
+ CONTACT_IDENTIFIERS_TABLE,
786
+ dataset_id[:48] if dataset_id else "",
787
+ )
788
+
789
+ canonical_owner_display_name = load_user_identity_display_name(conn, dataset_id)
790
+ self_contact_id, _self_label = load_self_contact_info(conn, dataset_id)
791
+ literal_self_cids = _contact_ids_for_literal_self_sender(id_mm)
792
+
793
+ conversation_ids_in_page: Set[str] = set()
794
+ contact_ids_in_page: Set[str] = set()
795
+ for row in items:
796
+ conv = str(row.get("conversation_id") or row.get("thread_id") or "").strip()
797
+ if conv:
798
+ conversation_ids_in_page.add(conv)
799
+ contact_ids_in_page.update(
800
+ collect_message_contact_ids(row, id_mm, self_contact_id=self_contact_id, nanp_idx=nanp_idx)
801
+ )
802
+ if self_contact_id:
803
+ contact_ids_in_page.add(self_contact_id)
804
+ else:
805
+ contact_ids_in_page.update(literal_self_cids)
806
+ contact_ids_in_page.update(
807
+ prefetch_contact_ids_for_conversations(
808
+ conn,
809
+ dataset_id,
810
+ conversation_ids_in_page,
811
+ id_mm,
812
+ self_contact_id=self_contact_id,
813
+ nanp_idx=nanp_idx,
814
+ )
815
+ )
816
+
817
+ policies, display_names, known_usernames_by_cid = load_contact_meta(conn, dataset_id, contact_ids_in_page)
818
+ fallback_labels = load_identifier_fallback_labels(conn, dataset_id, contact_ids_in_page)
819
+
820
+ effective_owner_contact_id: Optional[str] = self_contact_id
821
+ if not effective_owner_contact_id and literal_self_cids:
822
+ effective_owner_contact_id = pick_representative_contact_id(
823
+ literal_self_cids,
824
+ display_names=display_names,
825
+ known_usernames_by_cid=known_usernames_by_cid,
826
+ fallback_labels=fallback_labels,
827
+ policies=policies,
828
+ inherit_defaults=inherit_defaults,
829
+ )
830
+
831
+ participant_map = load_conversation_participant_contact_ids(
832
+ conn,
833
+ dataset_id,
834
+ conversation_ids_in_page,
835
+ id_mm,
836
+ self_contact_id=self_contact_id,
837
+ nanp_idx=nanp_idx,
838
+ display_names=display_names,
839
+ known_usernames_by_cid=known_usernames_by_cid,
840
+ fallback_labels=fallback_labels,
841
+ policies=policies,
842
+ inherit_defaults=inherit_defaults,
843
+ )
844
+
845
+ row_block: Set[str] = set(grant_block)
846
+ name_block: Set[str] = set()
847
+ for cid, pol in policies.items():
848
+ if not inherit_defaults:
849
+ continue
850
+ if pol.get("row_visibility") == "exclude_from_grants":
851
+ row_block.add(cid)
852
+ if pol.get("name_visibility") == "hidden":
853
+ name_block.add(cid)
854
+
855
+ mode = "all"
856
+ manifest_block_ids: Set[str] = set()
857
+ manifest_allow_ids: Set[str] = set()
858
+ if participation:
859
+ mode = str(participation.params.get("mode") or "all")
860
+ raw_ids = participation.params.get("contact_ids") or []
861
+ if isinstance(raw_ids, list):
862
+ if mode == "blocklist":
863
+ manifest_block_ids = {str(x).strip() for x in raw_ids if str(x).strip()}
864
+ elif mode == "allowlist":
865
+ manifest_allow_ids = {str(x).strip() for x in raw_ids if str(x).strip()}
866
+ match_mode = "thread_participants"
867
+ if participation:
868
+ match_mode = str(participation.params.get("match") or "thread_participants")
869
+
870
+ row_block |= manifest_block_ids
871
+
872
+ graph_labels: Dict[str, Dict[str, str]] = {}
873
+ graph_pid: Set[str] = set()
874
+ if can_resolve and names_enabled:
875
+ for row in items:
876
+ sid_g = str(row.get("sender_id") or "").strip()
877
+ if sid_g and sid_g.lower() != "self":
878
+ graph_pid.add(sid_g)
879
+ alt_g = _metadata_chat_identifier(row)
880
+ if alt_g:
881
+ aa = str(alt_g).strip()
882
+ if aa:
883
+ graph_pid.add(aa)
884
+ if self_contact_id:
885
+ graph_pid.add(self_contact_id)
886
+ elif literal_self_cids:
887
+ graph_pid.update(literal_self_cids)
888
+ if graph_pid:
889
+ try:
890
+ graph_labels = resolve_participant_labels(
891
+ conn, dataset_id=dataset_id, participant_ids=sorted(graph_pid)
892
+ )
893
+ except Exception as exc: # noqa: BLE001
894
+ logger.warning("resolve_participant_labels for UMA enrichment failed: %s", exc)
895
+ graph_labels = {}
896
+
897
+ out_rows: List[Dict[str, Any]] = []
898
+ for row in items:
899
+ sid = str(row.get("sender_id") or "").strip()
900
+ cids = collect_message_contact_ids(row, id_mm, self_contact_id=self_contact_id, nanp_idx=nanp_idx)
901
+ self_sender_cids = collect_contact_ids_for_sender(
902
+ row.get("sender_id"),
903
+ id_mm,
904
+ self_contact_id=self_contact_id,
905
+ nanp_idx=nanp_idx,
906
+ )
907
+ cid = (
908
+ pick_representative_contact_id(
909
+ cids,
910
+ display_names=display_names,
911
+ known_usernames_by_cid=known_usernames_by_cid,
912
+ fallback_labels=fallback_labels,
913
+ policies=policies,
914
+ inherit_defaults=inherit_defaults,
915
+ )
916
+ if cids
917
+ else None
918
+ )
919
+ # iMessage rows often duplicate the peer handle in metadata; that merges peer + owner into
920
+ # ``cids``. Never treat the peer as the sender when ``sender_id`` is literally ``self``.
921
+ if sid.lower() == "self":
922
+ if self_contact_id and self_contact_id in cids:
923
+ cid = self_contact_id
924
+ elif self_sender_cids:
925
+ cid = pick_representative_contact_id(
926
+ self_sender_cids,
927
+ display_names=display_names,
928
+ known_usernames_by_cid=known_usernames_by_cid,
929
+ fallback_labels=fallback_labels,
930
+ policies=policies,
931
+ inherit_defaults=inherit_defaults,
932
+ )
933
+ else:
934
+ cid = None
935
+
936
+ conv = str(row.get("conversation_id") or row.get("thread_id") or "").strip()
937
+ if match_mode == "sender_only":
938
+ row_contact_ids: Set[str] = {cid} if cid else set()
939
+ else:
940
+ row_contact_ids = set(participant_map.get(conv, set()))
941
+ if cid:
942
+ row_contact_ids.add(cid)
943
+
944
+ if row_contact_ids and row_contact_ids.intersection(row_block):
945
+ continue
946
+ if mode == "allowlist":
947
+ if not manifest_allow_ids:
948
+ continue
949
+ if not row_contact_ids.intersection(manifest_allow_ids):
950
+ continue
951
+ if grant_allow:
952
+ if not row_contact_ids.intersection(grant_allow):
953
+ continue
954
+
955
+ new_row = dict(row)
956
+ raw_graph = ""
957
+ graph_dn = ""
958
+ pipeline_dn = ""
959
+ if can_resolve and names_enabled:
960
+ if sid.lower() == "self" and canonical_owner_display_name:
961
+ new_row["sender_display_name"] = canonical_owner_display_name
962
+ elif cid:
963
+ raw_graph = _uma_graph_display_name_for_row(graph_labels, row) if graph_labels else ""
964
+ graph_dn = _graph_display_name_respects_name_policy(
965
+ raw_graph, cids, display_names, name_block
966
+ )
967
+ pipeline_dn = visible_label_for_contact(
968
+ cid,
969
+ display_names=display_names,
970
+ known_usernames_by_cid=known_usernames_by_cid,
971
+ fallback_labels=fallback_labels,
972
+ )
973
+ if cid not in name_block and cid not in grant_block:
974
+ dn = ""
975
+ owner_graph_key: Optional[str] = None
976
+ if sid.lower() == "self" and cid:
977
+ if self_contact_id is not None and cid == self_contact_id:
978
+ owner_graph_key = self_contact_id
979
+ elif self_contact_id is None and cid in self_sender_cids:
980
+ owner_graph_key = cid
981
+ if owner_graph_key is not None:
982
+ # Prefer graph resolution (identifier promotion across duplicate cards) over
983
+ # pipeline fallback labels, which are often raw phone strings.
984
+ dn = (_self_label or "").strip()
985
+ if not dn and graph_labels:
986
+ dn = _owner_graph_display_name(graph_labels, owner_graph_key)
987
+ if not dn:
988
+ dn = pipeline_dn
989
+ elif graph_dn:
990
+ dn = graph_dn
991
+ else:
992
+ dn = pipeline_dn
993
+ if dn:
994
+ new_row["sender_display_name"] = dn
995
+ new_row["is_from_self"] = bool(new_row.get("is_from_self"))
996
+ sid_lower = sid.strip().lower()
997
+ new_row["sender_is_owner"] = bool(
998
+ sid_lower == "self"
999
+ or (effective_owner_contact_id is not None and cid == effective_owner_contact_id)
1000
+ or new_row.get("is_from_self")
1001
+ )
1002
+
1003
+ out_rows.append(new_row)
1004
+
1005
+ owner_messages_in_response = sum(1 for r in out_rows if r.get("sender_is_owner"))
1006
+ owner_display_for_response: Optional[str] = None
1007
+ if can_resolve and names_enabled:
1008
+ _ocand = (canonical_owner_display_name or "").strip()
1009
+ if not _ocand and effective_owner_contact_id:
1010
+ _ocand = (_self_label or "").strip()
1011
+ if not _ocand and graph_labels:
1012
+ _ocand = _owner_graph_display_name(graph_labels, effective_owner_contact_id)
1013
+ if not _ocand:
1014
+ _ocand = visible_label_for_contact(
1015
+ effective_owner_contact_id,
1016
+ display_names=display_names,
1017
+ known_usernames_by_cid=known_usernames_by_cid,
1018
+ fallback_labels=fallback_labels,
1019
+ )
1020
+ if _ocand and not _is_imessage_self_sentinel_label(_ocand):
1021
+ owner_display_for_response = _ocand
1022
+ owner_uid = (dataset_id or "").split(":", 1)[0] or None
1023
+ sidecar: Dict[str, Any] = {
1024
+ "message_owner": {
1025
+ "owner_user_id": owner_uid,
1026
+ "owner_contact_id": effective_owner_contact_id,
1027
+ "owner_display_name": owner_display_for_response,
1028
+ "owner_messages_in_this_response": owner_messages_in_response,
1029
+ }
1030
+ }
1031
+
1032
+ return out_rows, sidecar