wechat-screenshot-vision-algorithm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. wechat_screenshot_vision_algorithm/__init__.py +40 -0
  2. wechat_screenshot_vision_algorithm/_config.py +61 -0
  3. wechat_screenshot_vision_algorithm/algorithms/__init__.py +0 -0
  4. wechat_screenshot_vision_algorithm/algorithms/avatar_column.py +211 -0
  5. wechat_screenshot_vision_algorithm/algorithms/badge_detection.py +275 -0
  6. wechat_screenshot_vision_algorithm/algorithms/card_bbox.py +814 -0
  7. wechat_screenshot_vision_algorithm/algorithms/phash_utils.py +267 -0
  8. wechat_screenshot_vision_algorithm/algorithms/speaker_band.py +292 -0
  9. wechat_screenshot_vision_algorithm/algorithms/template_matching.py +2152 -0
  10. wechat_screenshot_vision_algorithm/algorithms/title_ocr.py +145 -0
  11. wechat_screenshot_vision_algorithm/merge/__init__.py +0 -0
  12. wechat_screenshot_vision_algorithm/merge/multipage.py +157 -0
  13. wechat_screenshot_vision_algorithm/ocr/__init__.py +0 -0
  14. wechat_screenshot_vision_algorithm/ocr/avatar_guard.py +436 -0
  15. wechat_screenshot_vision_algorithm/ocr/badge_ocr.py +234 -0
  16. wechat_screenshot_vision_algorithm/ocr/nickname_binding.py +1888 -0
  17. wechat_screenshot_vision_algorithm/ocr/text_ocr_adapter.py +627 -0
  18. wechat_screenshot_vision_algorithm/png_utils.py +87 -0
  19. wechat_screenshot_vision_algorithm/profiles/__init__.py +0 -0
  20. wechat_screenshot_vision_algorithm/profiles/android_wechat.py +53 -0
  21. wechat_screenshot_vision_algorithm/profiles/harmony_wechat.py +10 -0
  22. wechat_screenshot_vision_algorithm/profiles/ios_wechat.py +53 -0
  23. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/chat_back_chevron.png +0 -0
  24. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/chat_input_emoji_smile.png +0 -0
  25. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/chat_input_plus.png +0 -0
  26. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/chat_input_voice.png +0 -0
  27. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/chat_title_more_dots.png +0 -0
  28. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/favorite_label.png +0 -0
  29. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/new_messages_hint_suffix.png +0 -0
  30. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/unread_divider_hint.png +0 -0
  31. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/unread_divider_hint_v2_textonly.png +0 -0
  32. wechat_screenshot_vision_algorithm/templates/wechat/android/8.0.69/wechat_note_header.png +0 -0
  33. wechat_screenshot_vision_algorithm-0.1.0.dist-info/METADATA +423 -0
  34. wechat_screenshot_vision_algorithm-0.1.0.dist-info/RECORD +36 -0
  35. wechat_screenshot_vision_algorithm-0.1.0.dist-info/WHEEL +5 -0
  36. wechat_screenshot_vision_algorithm-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,814 @@
1
+ """Resume-thumbnail card geometry from ``favorite_label`` template hits.
2
+
3
+ Given the (variable) set of ``favorite_label.png`` hits found across one chat
4
+ screenshot, derive:
5
+ - the bounding box (top/bottom/left/right) of each resume-thumbnail card
6
+ - the recommended click center for ``tap_thumbnail``
7
+ - the ``click_side`` half (``left`` / ``right``) relative to the
8
+ screen midline — stored in ``click_context.click_side`` for gate-1e/4
9
+ group-A discipline
10
+
11
+ Two-stage strategy (DD section 2.4.x A + section 3.3 1):
12
+
13
+ Stage 1: ``favorite_label.png`` template match (done upstream by
14
+ ``collector.template_matcher``) returns a list of (x, y, score)
15
+ hits, one per card (the label is the strongest on-card anchor
16
+ and appears exactly once per card).
17
+
18
+ Stage 2: For each hit:
19
+ - card.top = prev_hit.y2 + FAVORITE_TAIL_OFFSET + 1
20
+ (for i=0, fall back to ``y1 - FAVORITE_TO_CARD_TOP_OFFSET``)
21
+ - card.bottom = hit.y2 + FAVORITE_TAIL_OFFSET (anchor sits near bottom)
22
+ - card.left = 0, card.right = screen_w
23
+ (the label only gives us y; the card always spans full text column
24
+ width, and tapping anywhere horizontally inside it opens the note)
25
+
26
+ The click target is the **center of the ``favorite_label`` template
27
+ hit** (the ``收藏`` crop — ``chat_profiles/.../favorite_label.png``,
28
+ **70×60 @ 1080 baseline**, scaled by ``scale_w`` at match time). This
29
+ keeps taps on the label foot instead of the card mid-column (avoids
30
+ right-side image thumbnails and lands on the note entry chrome).
31
+
32
+ Constants are anchored to the 1080x2248 baseline; runtime ``scale_w`` is
33
+ applied when loading templates.
34
+
35
+ Aligned with:
36
+ DD section 2.4.x A (two-stage thumbnail detection)
37
+ DD section 3.3 step 1 (tap_thumbnail)
38
+ chat_profiles/README.md v0.2.2 UI constants
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ from dataclasses import dataclass
44
+ from typing import Iterable, Optional, Sequence
45
+
46
+ import cv2
47
+ import numpy as np
48
+
49
+ #: Distance from the top of the ``favorite_label.png`` hit to the top of the
50
+ #: enclosing note card, measured at 1080x2248 baseline. Used as an
51
+ #: **upper-bound heuristic** for the first hit only (subsequent hits derive
52
+ #: their top from the previous hit's bottom + ``FAVORITE_TAIL_OFFSET``).
53
+ FAVORITE_TO_CARD_TOP_OFFSET_BASELINE = 421
54
+
55
+ #: Gap from the bottom of the ``favorite_label.png`` hit to the card's
56
+ #: actual bottom edge, at 1080x2248 baseline. Used to extend the card bbox
57
+ #: past the label.
58
+ FAVORITE_TAIL_OFFSET_BASELINE = 60
59
+
60
+ #: ``favorite_label.png`` raster size @ 1080-wide baseline (`chat_profiles/README.md`).
61
+ FAVORITE_LABEL_TEMPLATE_W_BASELINE = 70
62
+ FAVORITE_LABEL_TEMPLATE_H_BASELINE = 60
63
+
64
+ #: **edb1a89f 1080×2248** empirical row step (px @ baseline width):
65
+ #:
66
+ #: - ``session_20260506193232`` ``scr_004`` ``chat_resume_rescan``: real card tops
67
+ #: 424→966 → **Δtop = 542** (minimal full-notebook row step that frame).
68
+ #: - Same session ``scr_009``: first row ``top`` clamps to **0**, next real row top
69
+ #: **396** → **Δtop = 396** (pseudo strip from «421px 顶上启发» eating grey/chrome).
70
+ #:
71
+ #: When ``card.top == 0`` and another card sits below: if
72
+ #: ``Δtop < round(REFERENCE_RESUME_CARD_TOP_GAP_BASELINE * scale_w)
73
+ #: - round(FAVORITE_LABEL_TEMPLATE_H_BASELINE * scale_w)``
74
+ #: (same as ``Δtop - H_fixed <`` 收藏小矩形高度 in px), drop the clamped strip as
75
+ #: a false clickable card. Single top-clamped hits (no sibling) stay — no pitch to
76
+ #: compare (caller may rely on OCR / downstream).
77
+ REFERENCE_RESUME_CARD_TOP_GAP_BASELINE: int = 542
78
+
79
+ #: Session de-dup: ``chat_resume_rescan`` / 连续帧上同一张卡,只比较 **纵轴**:两卡行区间
80
+ #: ``[top,bottom]``(端点像素含于区间内)相交长度除以 ``min(h_a,h_b)``
81
+ #: ≥ 本阈值 → 视为已从会话内路径处理过。**不**用 2D 面积交叉(Notebook 推导卡本就
82
+ #: 横跨整列宽,横轴对辨重贡献小;且略省乘加)。仍为 O(K) 逐项比、\(K\le\) 屏幕上收藏命中数,
83
+ #: **相对 screencap + OpenCV**,性能可忽略。
84
+ PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO: float = 0.70
85
+
86
+
87
+ @dataclass(frozen=True)
88
+ class FavoriteLabelHit:
89
+ """One match of ``favorite_label.png`` on the chat screenshot.
90
+
91
+ ``x, y`` are the top-left corner (OpenCV convention — same as what
92
+ ``collector.template_matcher.Hit`` carries). ``w, h`` are the
93
+ (possibly scale_w-adjusted) template dimensions so callers can compute
94
+ ``y2 = y + h`` without re-loading the template image.
95
+ """
96
+
97
+ x: int
98
+ y: int
99
+ w: int
100
+ h: int
101
+ score: float
102
+
103
+ @property
104
+ def y1(self) -> int:
105
+ return self.y
106
+
107
+ @property
108
+ def y2(self) -> int:
109
+ return self.y + self.h
110
+
111
+
112
+ @dataclass(frozen=True)
113
+ class ThumbnailCard:
114
+ """Derived rectangle for one resume-note thumbnail.
115
+
116
+ Coordinates are in raw-image pixel space (same as the source screenshot).
117
+ ``click_x`` / ``click_y`` is what the driver passes to ``adb shell
118
+ input tap``. ``click_side`` is the DD §2.4.x A ``click_context.click_side``
119
+ signal (which half of the screen the card lives on — historically useful
120
+ because WeChat sometimes shows the reply preview on the opposite side).
121
+ """
122
+
123
+ index: int # 0-based; matches the favorite_label hit ordering (top→bottom)
124
+ top: int
125
+ bottom: int
126
+ left: int
127
+ right: int
128
+ click_x: int
129
+ click_y: int
130
+ click_side: str # "left" or "right"
131
+ favorite_hit: FavoriteLabelHit
132
+
133
+
134
+ @dataclass(frozen=True)
135
+ class BubbleBbox:
136
+ """Derived rectangle for one text-chat bubble.
137
+
138
+ Coordinates are in raw-image pixel space. Generated from remaining
139
+ mid-variance zones after card zones are excluded (Step 4).
140
+ """
141
+
142
+ top: int
143
+ bottom: int
144
+ left: int
145
+ right: int
146
+
147
+
148
+ def _compute_exact_card_bboxes(
149
+ ordered_hits: list[FavoriteLabelHit],
150
+ bgr_img: np.ndarray,
151
+ screen_w: int,
152
+ screen_h: int,
153
+ ) -> list[ThumbnailCard]:
154
+ """PRD S6: vline-segment + hline-boundary card bbox detection.
155
+
156
+ Steps:
157
+ 1. Convert to grayscale, compute local variance via box filter,
158
+ derive mid-variance mask (gray 190-248, std 6-25).
159
+ 2. Find right-side vline candidates via vertical projection.
160
+ Pick the one with most mid-variance pixels as card right edge.
161
+ 3. Segment the vline into continuous runs; clamp to [TOP, BOT].
162
+ 4. Find card-boundary hlines (span > 50% card_w).
163
+ 5. Match vline segment ends to nearest hlines (+/-80px).
164
+ 6. Assign each hit to its zone by fav-center y; classified zones
165
+ become resume cards.
166
+ """
167
+ gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY).astype(np.float32)
168
+
169
+ H, _W = gray.shape
170
+ TOP = int(H * 0.10)
171
+ BOT = int(H * 0.93)
172
+ AR = int(108 * screen_w / 1080)
173
+ card_w = screen_w - AR
174
+
175
+ # Step 1 — fast local variance via box filter (O(N) via integral images)
176
+ win = 7
177
+ mean = cv2.boxFilter(gray, -1, (win, win), normalize=True)
178
+ sq_mean = cv2.boxFilter(gray * gray, -1, (win, win), normalize=True)
179
+ var_map = np.maximum(sq_mean - mean * mean, 0.0)
180
+ std_map = np.sqrt(var_map)
181
+
182
+ # Mid-variance mask: gray 190-248, std 6-25
183
+ mid = (
184
+ (gray >= 190.0) & (gray <= 248.0) &
185
+ (std_map >= 6.0) & (std_map <= 25.0)
186
+ )
187
+
188
+ # Step 2 — right-side vline candidates
189
+ # PRD: per-column max *continuous-segment* length > 15% of zone height
190
+ # (not total pixel sum across the column).
191
+ vh = BOT - TOP
192
+ min_seg = vh * 0.15
193
+
194
+ cand_cols = []
195
+ for x in range(screen_w):
196
+ col = mid[TOP:BOT, int(x)]
197
+ max_seg = 0
198
+ cur = 0
199
+ for v in col:
200
+ if v:
201
+ cur += 1
202
+ else:
203
+ if cur > max_seg:
204
+ max_seg = cur
205
+ cur = 0
206
+ if cur > max_seg:
207
+ max_seg = cur
208
+ if max_seg > min_seg:
209
+ cand_cols.append(x)
210
+
211
+ # PRD: group qualifying columns by >30px gap, take cluster mean
212
+ vd_lines = []
213
+ if cand_cols:
214
+ cluster = [cand_cols[0]]
215
+ for i in range(1, len(cand_cols)):
216
+ if cand_cols[i] - cand_cols[i - 1] > 30:
217
+ vd_lines.append(int(round(sum(cluster) / len(cluster))))
218
+ cluster = [cand_cols[i]]
219
+ else:
220
+ cluster.append(cand_cols[i])
221
+ vd_lines.append(int(round(sum(cluster) / len(cluster))))
222
+
223
+ # PRD §B.2 Step 2: 竖线离发言人头像的X距离应该 >= 260 px (1080基准)
224
+ min_card_x_gap = int(260 * screen_w / 1080)
225
+ vd_lines = [vx for vx in vd_lines if vx - AR >= min_card_x_gap]
226
+
227
+ if not vd_lines:
228
+ return []
229
+
230
+ # Pick best vline: max total mid pixels + longest continuous segment
231
+ vline_scores: dict[int, float] = {}
232
+ best_vx, best_score = None, 0.0
233
+ for vx in vd_lines:
234
+ col = mid[:, vx]
235
+ segs = []
236
+ k = 0
237
+ while k < H:
238
+ if col[k]:
239
+ s = k
240
+ while k < H and col[k]:
241
+ k += 1
242
+ segs.append(k - s)
243
+ else:
244
+ k += 1
245
+ total = float(sum(segs))
246
+ longest = float(max(segs) if segs else 0)
247
+ score = total + longest
248
+ vline_scores[vx] = score
249
+ if score > best_score:
250
+ best_score = score
251
+ best_vx = vx
252
+
253
+ if best_vx is None:
254
+ return []
255
+
256
+ # Step 3 — vline segments, computed per candidate vline. A fav-anchored
257
+ # card may sit on a different right-edge vline than the frame-global best
258
+ # one when plain text bubbles and note cards mix in one frame (the bubble
259
+ # edge can out-score the card edge — 20260611 session_185429 C1 case).
260
+ def _zones_for(vx: int) -> list[tuple[int, int]]:
261
+ colv = mid[:, int(vx)]
262
+ segs: list[tuple[int, int]] = []
263
+ yy = 0
264
+ while yy < H:
265
+ if colv[yy]:
266
+ s = yy
267
+ while yy < H and colv[yy]:
268
+ yy += 1
269
+ segs.append((s, yy - 1))
270
+ else:
271
+ yy += 1
272
+ # Clamp to [TOP, BOT], drop tiny
273
+ return [
274
+ (max(TOP, s), min(BOT, e))
275
+ for s, e in segs
276
+ if min(BOT, e) - max(TOP, s) > 10
277
+ ]
278
+
279
+ vline_order: list[int] = [int(best_vx)] + sorted(
280
+ (int(vx) for vx in vd_lines if vx != best_vx),
281
+ key=lambda vx: vline_scores[vx],
282
+ reverse=True,
283
+ )
284
+ zones_by_vx = {vx: _zones_for(vx) for vx in vline_order}
285
+ if not any(zones_by_vx.values()):
286
+ return []
287
+
288
+ # Step 4 — card boundary hlines (span > 50% card_w, X-near vline,
289
+ # max gray gradient >= 30 = white→gray boundary)
290
+ hproj = np.sum(mid[:, AR:], axis=1).astype(float)
291
+ above = hproj > card_w * 0.25
292
+ XLIMIT = int(100 * screen_w / 1080)
293
+ all_hd: list[tuple[int, int, int, int, int]] = [] # (hy,span,left_x,right_x,white_pct_diff)
294
+ i = 0
295
+ while i < H:
296
+ if above[i]:
297
+ j = i + 1
298
+ while j < H and above[j]:
299
+ j += 1
300
+ hy = (i + j - 1) // 2
301
+ hrow = int(hy)
302
+ row = mid[hrow, :]
303
+ seg_info: list[tuple[int, int, int]] = [] # (len, start_x, end_x)
304
+ k = 0
305
+ while k < screen_w:
306
+ if row[k]:
307
+ s = k
308
+ while k < screen_w and row[k]:
309
+ k += 1
310
+ seg_info.append((k - s, s, k - 1))
311
+ else:
312
+ k += 1
313
+ if seg_info:
314
+ span, left_x, right_x = max(seg_info, key=lambda t: t[0])
315
+ else:
316
+ span, left_x, right_x = 0, 0, 0
317
+ # Vertical white-pct change across this hline: real white→gray
318
+ # boundaries show a large drop in white-pixel ratio above vs
319
+ # below; uniform UI separators (e.g. y=206) show near-zero.
320
+ col = gray[:, AR:int(best_vx)]
321
+ H_img = col.shape[0]
322
+ r1 = max(0, hrow - 2)
323
+ r2 = min(H_img, hrow + 2)
324
+ above_zone = col[r1:hrow, :]
325
+ below_zone = col[hrow:r2, :]
326
+ above_white = int((above_zone > 244).sum())
327
+ below_white = int((below_zone > 244).sum())
328
+ above_pct = above_white / above_zone.size if above_zone.size else 0.0
329
+ below_pct = below_white / below_zone.size if below_zone.size else 0.0
330
+ white_pct_diff = int(abs(above_pct - below_pct) * 100)
331
+ all_hd.append((hy, span, left_x, right_x, white_pct_diff))
332
+ i = j
333
+ else:
334
+ i += 1
335
+
336
+ def _hline_near_vline(y: int, vx: int, mid_mask: np.ndarray, x_limit: int) -> bool:
337
+ x1 = max(0, int(vx) - x_limit)
338
+ x2 = min(mid_mask.shape[1] - 1, int(vx) + x_limit)
339
+ return bool(mid_mask[y, x1:x2 + 1].any())
340
+
341
+ hlines_by_vx: dict[int, list[tuple[int, int, int, int, int]]] = {
342
+ vx: sorted(
343
+ (hy, span, left_x, right_x, white_pct_diff)
344
+ for hy, span, left_x, right_x, white_pct_diff in all_hd
345
+ if TOP <= hy <= BOT and span > card_w * 0.5
346
+ and _hline_near_vline(hy, vx, mid, XLIMIT)
347
+ )
348
+ for vx in vline_order
349
+ }
350
+
351
+ # Step 5 — match vline zones to fav hits, snap to hlines. Per hit, walk
352
+ # candidate vlines (best-scored first) and accept the first bbox that
353
+ # actually contains the fav anchor: the 收藏 label lives INSIDE its card,
354
+ # so a bbox excluding it is a mis-matched zone, not this card.
355
+ cards: list[ThumbnailCard] = []
356
+ prev_bottom: int | None = None # anti-overlap: enforce card boundaries non-overlapping
357
+ for idx, hit in enumerate(ordered_hits):
358
+ fav_cy = hit.y + hit.h // 2
359
+
360
+ chosen: tuple[int, int, int] | None = None # (vx, exact_y1, exact_y2)
361
+ for vx in vline_order:
362
+ zones = zones_by_vx[vx]
363
+ if not zones:
364
+ continue
365
+
366
+ # Try exact zone containment first; if in a gap, pair with the
367
+ # zone immediately *above* within the SAME vline (收藏 row may
368
+ # interrupt the vline run).
369
+ fav_zone_idx = None
370
+ for zi, (z1, z2) in enumerate(zones):
371
+ if z1 <= fav_cy <= z2:
372
+ fav_zone_idx = zi
373
+ break
374
+
375
+ if fav_zone_idx is None and len(zones) >= 2:
376
+ # Hit in gap — find the zone just above it
377
+ for zi in range(len(zones) - 1):
378
+ if zones[zi][1] < fav_cy < zones[zi + 1][0]:
379
+ fav_zone_idx = zi
380
+ break
381
+
382
+ if fav_zone_idx is None:
383
+ continue
384
+
385
+ y1 = zones[fav_zone_idx][0]
386
+ y2 = zones[fav_zone_idx][1]
387
+ card_hlines = hlines_by_vx[vx]
388
+
389
+ # Snap to nearest card hline per PRD Step 3.
390
+ # Upper: "向上最近的白底-灰底的中方差横线(要求另一端在发言人头像100 px附近)
391
+ # 或者 TOP_BAR_BOT"
392
+ upper = [h for h in card_hlines
393
+ if TOP <= h[0] < y1 and h[2] < AR + XLIMIT]
394
+ exact_y1 = max(upper, key=lambda h: h[0])[0] if upper else TOP
395
+
396
+ # Lower: "向下最近的白底-灰底的中方差横线 或者 BOT_BAR_TOP"
397
+ lower = [h for h in card_hlines if y2 < h[0] <= BOT]
398
+ exact_y2 = min(lower, key=lambda h: h[0])[0] if lower else BOT
399
+
400
+ if exact_y2 <= exact_y1 or exact_y2 - exact_y1 < 80:
401
+ continue
402
+ if not (exact_y1 <= fav_cy <= exact_y2):
403
+ continue
404
+ chosen = (vx, exact_y1, exact_y2)
405
+ break
406
+
407
+ if chosen is None:
408
+ continue
409
+ card_vx, exact_y1, exact_y2 = chosen
410
+
411
+ # Anti-overlap: ensure sequence of cards is strictly non-overlapping
412
+ # (essential when gap-matching + hline snapping pushes boundaries inward)
413
+ if prev_bottom is not None and exact_y1 <= prev_bottom:
414
+ exact_y1 = prev_bottom + 1
415
+ if exact_y2 <= exact_y1 or exact_y2 - exact_y1 < 80:
416
+ continue
417
+
418
+ click_x = min(screen_w - 1, max(0, hit.x + hit.w // 2))
419
+ click_y = min(screen_h - 1, max(0, hit.y + hit.h // 2))
420
+ midline = screen_w / 2.0
421
+ click_side = "left" if click_x < midline else "right"
422
+
423
+ cards.append(ThumbnailCard(
424
+ index=idx,
425
+ top=exact_y1,
426
+ bottom=exact_y2,
427
+ left=AR,
428
+ right=card_vx,
429
+ click_x=click_x,
430
+ click_y=click_y,
431
+ click_side=click_side,
432
+ favorite_hit=hit,
433
+ ))
434
+ prev_bottom = exact_y2
435
+
436
+ return cards
437
+
438
+
439
+ def derive_cards(
440
+ hits: Sequence[FavoriteLabelHit],
441
+ *,
442
+ screen_w: int,
443
+ screen_h: int,
444
+ scale_w: float = 1.0,
445
+ chat_img: Optional[np.ndarray] = None,
446
+ ) -> list[ThumbnailCard]:
447
+ """Translate favorite_label hits into click-ready :class:`ThumbnailCard`.
448
+
449
+ Args:
450
+ hits: favorite_label.png hits on the chat screenshot, in any order
451
+ (we sort by y ourselves).
452
+ screen_w / screen_h: the source screenshot's raw-image dimensions
453
+ (usually equal to ``device_info.screen_resolution``).
454
+ scale_w: ``screen_w / baseline_w`` (baseline = 1080). Used to scale
455
+ ``FAVORITE_TO_CARD_TOP_OFFSET`` + ``FAVORITE_TAIL_OFFSET``
456
+ so the logic survives across the 720 / 1080 / 1220 device
457
+ whitelist.
458
+ chat_img: optional BGR image (as returned by ``cv2.imdecode``).
459
+ When provided, uses the vline-segment + hline-boundary
460
+ variance-based algorithm (PRD S6) for precise card bbox
461
+ derivation. Falls back to legacy fixed-offset logic
462
+ when ``chat_img`` is None.
463
+
464
+ Returns:
465
+ One :class:`ThumbnailCard` per hit, sorted top -> bottom. Empty list
466
+ if ``hits`` is empty (caller should then decide whether to scroll
467
+ further or end the session).
468
+ """
469
+ if not hits:
470
+ return []
471
+
472
+ ordered = sorted(hits, key=lambda h: h.y)
473
+
474
+ # New algorithm: vline-segment + hline-boundary (PRD S6)
475
+ if chat_img is not None and len(ordered) > 0:
476
+ try:
477
+ exact_cards = _compute_exact_card_bboxes(
478
+ ordered, chat_img, screen_w, screen_h,
479
+ )
480
+ if exact_cards:
481
+ return drop_top_clamped_false_positive_cards(
482
+ exact_cards, scale_w=scale_w,
483
+ )
484
+ except Exception:
485
+ pass
486
+
487
+ # Legacy: fixed-offset algorithm
488
+ top_offset = int(round(FAVORITE_TO_CARD_TOP_OFFSET_BASELINE * scale_w))
489
+ tail_offset = int(round(FAVORITE_TAIL_OFFSET_BASELINE * scale_w))
490
+
491
+ cards: list[ThumbnailCard] = []
492
+ for idx, hit in enumerate(ordered):
493
+ if idx == 0:
494
+ card_top = max(0, hit.y1 - top_offset)
495
+ else:
496
+ prev_bottom = ordered[idx - 1].y2 + tail_offset
497
+ card_top = max(prev_bottom + 1, 0)
498
+
499
+ card_bottom = min(screen_h - 1, hit.y2 + tail_offset)
500
+
501
+ if card_bottom <= card_top:
502
+ continue
503
+
504
+ click_x = min(screen_w - 1, max(0, hit.x + hit.w // 2))
505
+ click_y = min(screen_h - 1, max(0, hit.y + hit.h // 2))
506
+ midline = screen_w / 2
507
+ click_side = "left" if click_x < midline else "right"
508
+
509
+ cards.append(ThumbnailCard(
510
+ index=idx,
511
+ top=card_top,
512
+ bottom=card_bottom,
513
+ left=0,
514
+ right=screen_w - 1,
515
+ click_x=click_x,
516
+ click_y=click_y,
517
+ click_side=click_side,
518
+ favorite_hit=hit,
519
+ ))
520
+
521
+ return drop_top_clamped_false_positive_cards(cards, scale_w=scale_w)
522
+
523
+
524
+ def drop_top_clamped_false_positive_cards(
525
+ cards: list[ThumbnailCard],
526
+ *,
527
+ scale_w: float,
528
+ ) -> list[ThumbnailCard]:
529
+ """Drop notebook strips whose ``top`` clamped to 0 when the gap to the
530
+ next derived card top matches the empirical **pseudo-row** signature on
531
+ edb1a89f — see :data:`REFERENCE_RESUME_CARD_TOP_GAP_BASELINE`.
532
+
533
+ Condition (scaled): let ``Δ = cards[k+1].top - cards[k].top``.
534
+ Discard ``cards[k]`` when ``cards[k].top == 0`` and
535
+ ``Δ < round(base_gap * scale_w) - round(label_h * scale_w)`` — i.e.
536
+ ``Δ - H_fixed < H_收藏`` in device px.
537
+ """
538
+ if len(cards) <= 1:
539
+ return cards
540
+
541
+ ref_gap = max(1, int(round(REFERENCE_RESUME_CARD_TOP_GAP_BASELINE * scale_w)))
542
+ label_scaled = max(1, int(round(FAVORITE_LABEL_TEMPLATE_H_BASELINE * scale_w)))
543
+ min_step = ref_gap - label_scaled
544
+
545
+ kept: list[ThumbnailCard] = []
546
+ i = 0
547
+ while i < len(cards):
548
+ card = cards[i]
549
+ gap_to_next_card_top = (
550
+ cards[i + 1].top - card.top if i + 1 < len(cards) else None
551
+ )
552
+ reject = False
553
+ if card.top == 0 and gap_to_next_card_top is not None:
554
+ dy = gap_to_next_card_top
555
+ if dy < min_step:
556
+ reject = True
557
+
558
+ if not reject:
559
+ kept.append(card)
560
+
561
+ i += 1
562
+
563
+ return [
564
+ ThumbnailCard(
565
+ index=j,
566
+ top=c.top,
567
+ bottom=c.bottom,
568
+ left=c.left,
569
+ right=c.right,
570
+ click_x=c.click_x,
571
+ click_y=c.click_y,
572
+ click_side=c.click_side,
573
+ favorite_hit=c.favorite_hit,
574
+ )
575
+ for j, c in enumerate(kept)
576
+ ]
577
+
578
+
579
+ def bbox_to_metadata_list(card: ThumbnailCard) -> list[int]:
580
+ """Convert a :class:`ThumbnailCard` bbox to the schema's
581
+ ``resume_thumb_bboxes`` element shape ``[x1, y1, x2, y2]``.
582
+
583
+ The schema declares ``resume_thumb_bboxes: Optional[list[list[int]]]``
584
+ per screenshot — one list-of-4 entry per thumbnail the collector
585
+ identified on that screen.
586
+ """
587
+ return [card.left, card.top, card.right, card.bottom]
588
+
589
+
590
+ def click_context_for_tap_thumbnail(card: ThumbnailCard) -> dict:
591
+ """Build the ``click_context`` dict for a group-A ``tap_thumbnail`` frame.
592
+
593
+ Schema requirement (gate 1e/4):
594
+ group A action → ``click_coords`` + ``click_side`` MUST be populated
595
+ AND ``click_position`` + ``divider_verified`` MUST be null.
596
+ """
597
+ return {
598
+ "click_coords": [card.click_x, card.click_y],
599
+ "click_side": card.click_side,
600
+ }
601
+
602
+
603
+ def card_bounding_tuple(card: ThumbnailCard) -> tuple[int, int, int, int]:
604
+ """Inclusive pixel bbox ``(left, top, right, bottom)`` aligned with
605
+ ``bbox_to_metadata_list`` / gate-1d thumbnail rectangles."""
606
+ return (card.left, card.top, card.right, card.bottom)
607
+
608
+
609
+ def y_interval_overlap_ratio(
610
+ top_a: int,
611
+ bottom_a: int,
612
+ top_b: int,
613
+ bottom_b: int,
614
+ ) -> float:
615
+ """1D overlap of inclusive row intervals `[top_* , bottom_*]`.
616
+
617
+ Returns ``intersection_px / min(h_a, h_b)`` with
618
+ ``h = bottom - top + 1``, or ``0.0`` if disjoint."""
619
+ iy1 = max(top_a, top_b)
620
+ iy2 = min(bottom_a, bottom_b)
621
+ if iy2 < iy1:
622
+ return 0.0
623
+ inter = iy2 - iy1 + 1
624
+ ha = bottom_a - top_a + 1
625
+ hb = bottom_b - top_b + 1
626
+ denom = min(ha, hb)
627
+ return inter / denom if denom > 0 else 0.0
628
+
629
+
630
+ def card_overlaps_processed(
631
+ card: ThumbnailCard,
632
+ processed_bboxes: Iterable[tuple[int, int, int, int]],
633
+ *,
634
+ min_overlap_ratio: float = PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO,
635
+ ) -> bool:
636
+ """True if ``card``'s **vertical span** overlaps any processed card enough.
637
+
638
+ Compared per pair: ``intersection_rows / min(height_new, height_old)
639
+ >= min_overlap_ratio`` where heights use inclusive ``card.top /
640
+ card.bottom``. ``processed_bboxes`` tuples are ``(left,top,right,bottom)``
641
+ from :func:`card_bounding_tuple` — only ``top,bottom`` are read.
642
+ """
643
+ for prev in processed_bboxes:
644
+ _l, pt, _r, pb = prev
645
+ r = y_interval_overlap_ratio(card.top, card.bottom, pt, pb)
646
+ if r >= min_overlap_ratio:
647
+ return True
648
+ return False
649
+
650
+
651
+ def pick_first_unprocessed_card(
652
+ cards: Iterable[ThumbnailCard],
653
+ processed_card_bboxes: Sequence[tuple[int, int, int, int]] | None = None,
654
+ ) -> ThumbnailCard | None:
655
+ """Return the first card that does **not** sufficiently overlap any
656
+ already-processed card in the current session (**Y-span only**, see below).
657
+
658
+ Overlap rule: for inclusive row intervals ``[top,bottom]`` vs each stored
659
+ processed bbox, compute
660
+ ``intersection_px / min(h_new,h_old)``; if ``>=``
661
+ :data:`PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO` (default **0.70**) → same
662
+ logical card → skip. Stored tuples remain ``(left,top,right,bottom)``
663
+ from :func:`card_bounding_tuple`; only ``top``/``bottom`` participate.
664
+
665
+ Used between ``chat_content_scroll_down`` / ``chat_resume_rescan`` frames so a
666
+ notebook card that reappears with a **slightly shifted** ``favorite_label``
667
+ match (integer ``(x,y)`` off by a few pixels) is still treated as the same
668
+ card and not double-tapped.
669
+
670
+ ``processed_card_bboxes`` is the list accumulated from prior
671
+ :func:`card_bounding_tuple` values for cards that entered ``tap_thumbnail``
672
+ (see ``run_collector_minimal``).
673
+
674
+ NOTE: does not model new messages inserted above the card; P0 accepts
675
+ that rare edge.
676
+ """
677
+ seen: list[tuple[int, int, int, int]] = (
678
+ list(processed_card_bboxes) if processed_card_bboxes else []
679
+ )
680
+ for card in cards:
681
+ if not card_overlaps_processed(card, seen):
682
+ return card
683
+ return None
684
+
685
+
686
+ @dataclass
687
+ class TrackedCard:
688
+ """One tracked card in the height-sequence dedup state (array A)."""
689
+
690
+ height: int # bbox height (bottom - top + 1)
691
+ clicked: bool # whether this card has been entered
692
+
693
+
694
+ def _card_height(card: ThumbnailCard) -> int:
695
+ """Pixel-height of a card bbox (inclusive row count)."""
696
+ return card.bottom - card.top + 1
697
+
698
+
699
+ def _longest_subsequence_match(
700
+ a_heights: list[int],
701
+ b_heights: list[int],
702
+ ) -> tuple[int, int, int] | None:
703
+ """Find the longest contiguous substring of a_heights that appears in b_heights.
704
+
705
+ Returns ``(a_start, b_start, length)`` or ``None``.
706
+ If multiple matches share the same maximum length, picks the one with
707
+ the smallest ``b_start`` (earliest in B).
708
+ """
709
+ if not a_heights or not b_heights:
710
+ return None
711
+
712
+ best: tuple[int, int, int] | None = None # (a_start, b_start, length)
713
+
714
+ for a_start in range(len(a_heights)):
715
+ max_len = len(a_heights) - a_start
716
+ for length in range(max_len, 0, -1):
717
+ if best is not None and length < best[2]:
718
+ break
719
+ sub = a_heights[a_start:a_start + length]
720
+ for b_start in range(len(b_heights) - length + 1):
721
+ if b_heights[b_start:b_start + length] == sub:
722
+ if (
723
+ best is None
724
+ or length > best[2]
725
+ or (length == best[2] and b_start < best[1])
726
+ ):
727
+ best = (a_start, b_start, length)
728
+
729
+ return best
730
+
731
+
732
+ def pick_next_unclicked_card(
733
+ cards: list[ThumbnailCard],
734
+ tracked_cards: list[TrackedCard],
735
+ ) -> ThumbnailCard | None:
736
+ """Return the next card to enter based on height-sequence dedup.
737
+
738
+ Algorithm (PRD S6):
739
+ 1. If ``tracked_cards`` (A) is empty -> build A from current ``cards`` (B),
740
+ return ``cards[0]``.
741
+ 2. Find the longest contiguous substring of A-heights in B-heights.
742
+ If multiple, pick the one with smallest B start index.
743
+ 3. Rebuild A: keep matched cards (inheriting ``clicked``), add new
744
+ B cards before/after the match segment (unclicked), drop unmatched A.
745
+ 4. Collect candidates (B indices where ``tracked_cards[i].clicked`` is
746
+ ``False``), return ``cards[min(candidates)]`` or ``None``.
747
+
748
+ ``tracked_cards`` is mutated in-place on every call - the caller must
749
+ clear it on ``chat_content_scroll_up`` / ``chat_content_scroll_down``
750
+ to reset the tracking window.
751
+ """
752
+ cur_heights = [_card_height(c) for c in cards]
753
+
754
+ if not tracked_cards:
755
+ tracked_cards[:] = [TrackedCard(height=h, clicked=False) for h in cur_heights]
756
+ return cards[0]
757
+
758
+ a_heights = [tc.height for tc in tracked_cards]
759
+ match = _longest_subsequence_match(a_heights, cur_heights)
760
+
761
+ if match is None:
762
+ tracked_cards[:] = [TrackedCard(height=h, clicked=False) for h in cur_heights]
763
+ candidates = [i for i, tc in enumerate(tracked_cards) if not tc.clicked]
764
+ if not candidates:
765
+ return None
766
+ return cards[min(candidates)]
767
+
768
+ a_start, b_start, length = match
769
+
770
+ new_tracked: list[TrackedCard] = []
771
+
772
+ for i in range(b_start):
773
+ new_tracked.append(TrackedCard(height=cur_heights[i], clicked=False))
774
+
775
+ for j in range(length):
776
+ a_idx = a_start + j
777
+ new_tracked.append(TrackedCard(
778
+ height=tracked_cards[a_idx].height,
779
+ clicked=tracked_cards[a_idx].clicked,
780
+ ))
781
+
782
+ for i in range(b_start + length, len(cards)):
783
+ new_tracked.append(TrackedCard(height=cur_heights[i], clicked=False))
784
+
785
+ tracked_cards[:] = new_tracked
786
+
787
+ candidates = [i for i, tc in enumerate(tracked_cards) if not tc.clicked]
788
+ if not candidates:
789
+ return None
790
+ return cards[min(candidates)]
791
+
792
+
793
+ __all__ = [
794
+ "FAVORITE_LABEL_TEMPLATE_W_BASELINE",
795
+ "FAVORITE_LABEL_TEMPLATE_H_BASELINE",
796
+ "FAVORITE_TO_CARD_TOP_OFFSET_BASELINE",
797
+ "FAVORITE_TAIL_OFFSET_BASELINE",
798
+ "PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO",
799
+ "REFERENCE_RESUME_CARD_TOP_GAP_BASELINE",
800
+ "FavoriteLabelHit",
801
+ "ThumbnailCard",
802
+ "TrackedCard",
803
+ "BubbleBbox",
804
+ "derive_cards",
805
+ "drop_top_clamped_false_positive_cards",
806
+ "bbox_to_metadata_list",
807
+ "card_bounding_tuple",
808
+ "card_overlaps_processed",
809
+ "click_context_for_tap_thumbnail",
810
+ "pick_first_unprocessed_card",
811
+ "pick_next_unclicked_card",
812
+ "y_interval_overlap_ratio",
813
+ "_compute_exact_card_bboxes",
814
+ ]