workstream-cli 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. workstream/ARCHITECTURE.md +89 -0
  2. workstream/__init__.py +8 -0
  3. workstream/cli.py +136 -0
  4. workstream/commands/__init__.py +0 -0
  5. workstream/commands/backfill.py +139 -0
  6. workstream/commands/block.py +93 -0
  7. workstream/commands/checkin.py +51 -0
  8. workstream/commands/cron.py +119 -0
  9. workstream/commands/focus_cmd.py +273 -0
  10. workstream/commands/idea.py +172 -0
  11. workstream/commands/index.py +89 -0
  12. workstream/commands/init.py +567 -0
  13. workstream/commands/inspect_cmd.py +354 -0
  14. workstream/commands/list_cmd.py +99 -0
  15. workstream/commands/nest.py +108 -0
  16. workstream/commands/new.py +95 -0
  17. workstream/commands/next_cmd.py +333 -0
  18. workstream/commands/report.py +190 -0
  19. workstream/commands/resume.py +145 -0
  20. workstream/commands/review.py +227 -0
  21. workstream/commands/serve.py +23 -0
  22. workstream/commands/setup.py +178 -0
  23. workstream/commands/show.py +123 -0
  24. workstream/commands/snooze.py +117 -0
  25. workstream/commands/stale.py +116 -0
  26. workstream/commands/sweep.py +1753 -0
  27. workstream/commands/tree.py +105 -0
  28. workstream/commands/update_status.py +117 -0
  29. workstream/config.py +322 -0
  30. workstream/extensions/__init__.py +0 -0
  31. workstream/extensions/workstream.ts +633 -0
  32. workstream/focus_artifact.py +157 -0
  33. workstream/git.py +194 -0
  34. workstream/harness.py +49 -0
  35. workstream/llm.py +78 -0
  36. workstream/markdown.py +501 -0
  37. workstream/models.py +274 -0
  38. workstream/plan_index.py +88 -0
  39. workstream/provisioning.py +196 -0
  40. workstream/repo_discovery.py +158 -0
  41. workstream/review_artifact.py +96 -0
  42. workstream/scripts/migrate_statuses.py +120 -0
  43. workstream/skills/__init__.py +0 -0
  44. workstream/skills/workstream_context/SKILL.md +75 -0
  45. workstream/skills/workstream_context/__init__.py +0 -0
  46. workstream/skills/workstream_focus/SKILL.md +141 -0
  47. workstream/skills/workstream_init/SKILL.md +86 -0
  48. workstream/skills/workstream_review/SKILL.md +224 -0
  49. workstream/skills/workstream_sweep/SKILL.md +178 -0
  50. workstream/sweep_state.py +93 -0
  51. workstream/templates/dashboard.html +382 -0
  52. workstream/templates/detail.html +360 -0
  53. workstream/templates/plan.html +210 -0
  54. workstream/test/__init__.py +0 -0
  55. workstream/test/conftest.py +221 -0
  56. workstream/test/fixtures/sample_sprint_note.md +10 -0
  57. workstream/test/fixtures/sample_workstream.md +41 -0
  58. workstream/test/test_backfill.py +180 -0
  59. workstream/test/test_batch_writeback.py +81 -0
  60. workstream/test/test_commands.py +938 -0
  61. workstream/test/test_config.py +54 -0
  62. workstream/test/test_focus_artifact.py +211 -0
  63. workstream/test/test_git.py +88 -0
  64. workstream/test/test_heuristics.py +136 -0
  65. workstream/test/test_hierarchy.py +231 -0
  66. workstream/test/test_init.py +452 -0
  67. workstream/test/test_inspect.py +143 -0
  68. workstream/test/test_llm.py +78 -0
  69. workstream/test/test_markdown.py +626 -0
  70. workstream/test/test_models.py +506 -0
  71. workstream/test/test_next.py +206 -0
  72. workstream/test/test_plan_index.py +83 -0
  73. workstream/test/test_provisioning.py +270 -0
  74. workstream/test/test_repo_discovery.py +181 -0
  75. workstream/test/test_resume.py +71 -0
  76. workstream/test/test_sweep.py +1196 -0
  77. workstream/test/test_sweep_state.py +86 -0
  78. workstream/test/test_thoughts.py +516 -0
  79. workstream/test/test_web.py +606 -0
  80. workstream/thoughts.py +505 -0
  81. workstream/web.py +444 -0
  82. workstream_cli-0.0.1.dist-info/LICENSE +21 -0
  83. workstream_cli-0.0.1.dist-info/METADATA +93 -0
  84. workstream_cli-0.0.1.dist-info/RECORD +86 -0
  85. workstream_cli-0.0.1.dist-info/WHEEL +4 -0
  86. workstream_cli-0.0.1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,1753 @@
1
+ """ws sweep — scan repos for plans/branches and update workstream files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from datetime import date, datetime
8
+ from pathlib import Path
9
+
10
+ from face import Command
11
+
12
+ from workstream.config import Config, GLOBAL_CONFIG_PATH, RepoConfig
13
+ from workstream.git import branch_ahead_count, branch_is_merged, list_branches
14
+ from workstream.cli import load_all_workstreams
15
+ from workstream.markdown import (
16
+ parse_frontmatter,
17
+ save_workstream,
18
+ )
19
+ from workstream.models import BranchRef, LogEntry, PlanRef, slugify
20
+
21
+
22
+ # ── Sweep findings accumulator ──────────────────────────────────────
23
+
24
+
25
+ @dataclass
26
+ class SweepFindings:
27
+ """Delta from a single sweep run for one workstream.
28
+
29
+ Accumulated during the scan loop and passed to the review manifest
30
+ builder. This is the single source of truth for what changed —
31
+ the manifest builder must not re-derive it.
32
+ """
33
+ newly_matched: list[PlanRef] = field(default_factory=list)
34
+ new_signals: list[PlanRef] = field(default_factory=list)
35
+ auto_classified: list[PlanRef] = field(default_factory=list)
36
+ repos_with_commits: dict[str, list[dict]] = field(default_factory=dict)
37
+
38
+
39
+ _RELATIVE_RE = re.compile(r'^(\d+)([hdwm])$')
40
+ _RELATIVE_UNITS = {'h': 'hours', 'd': 'days', 'w': 'weeks', 'm': 'months'}
41
+
42
+
43
+ def _resolve_cutoff(since: str | None, last_sweep: str) -> str:
44
+ """Resolve --since into a git-compatible cutoff string.
45
+
46
+ Accepts: ISO date ('2026-03-30'), ISO datetime ('2026-03-30T14:00'),
47
+ relative shorthand ('2h', '1d', '3w'), or None (default: last_sweep).
48
+ Returns empty string when no cutoff is available (first sweep).
49
+ """
50
+ if since:
51
+ m = _RELATIVE_RE.match(since)
52
+ if m:
53
+ return f'{m.group(1)} {_RELATIVE_UNITS[m.group(2)]} ago'
54
+ return since # ISO date/datetime or other git-compatible format
55
+ if last_sweep:
56
+ # last_sweep is date-only (YYYY-MM-DD); git interprets as midnight start-of-day
57
+ return last_sweep
58
+ return '' # first sweep — no cutoff
59
+
60
+ _PLAN_STATUS_RE = re.compile(r'<!--\s*STATUS:\s*(\w+)(?:\s+(\S+))?\s*-->')
61
+
62
+ # Map legacy HTML comment statuses to canonical plan statuses.
63
+ # FINALIZED means approved/ready, not implemented.
64
+ _HTML_COMMENT_STATUS_MAP = {'finalized': 'active'}
65
+
66
+ _FILENAME_DATE_RE = re.compile(r'^(\d{4}-\d{2}-\d{2})')
67
+
68
+ def _date_from_filename(filename: str) -> str:
69
+ """Extract leading YYYY-MM-DD from a plan filename, or return ''."""
70
+ m = _FILENAME_DATE_RE.match(filename)
71
+ return m.group(1) if m else ''
72
+
73
+ def scan_repo_plans(plans_dir: Path) -> list[dict]:
74
+ """Scan a plans directory, return list of {path, meta} dicts.
75
+
76
+ Parses YAML frontmatter first. Falls back to HTML comment status markers
77
+ (``<!-- STATUS: FINALIZED timestamp -->``) for legacy plan files.
78
+ """
79
+ if not plans_dir.is_dir():
80
+ return []
81
+ results = []
82
+ for f in sorted(plans_dir.glob('*.md')):
83
+ text = f.read_text(encoding='utf-8')
84
+ meta, _ = parse_frontmatter(text)
85
+ if not meta:
86
+ # Legacy plans: status in HTML comment, not YAML frontmatter
87
+ m = _PLAN_STATUS_RE.match(text)
88
+ if m:
89
+ raw = m.group(1).lower()
90
+ meta = {'status': _HTML_COMMENT_STATUS_MAP.get(raw, raw)}
91
+ if m.group(2):
92
+ meta['finalized_at'] = m.group(2)
93
+ results.append({'path': f.name, 'meta': meta})
94
+ return results
95
+
96
+
97
+ def _scan_repo_branches(repo_path: Path) -> list[dict]:
98
+ """List non-default branches with ahead counts. Returns [] on git failure."""
99
+ try:
100
+ branches = list_branches(repo_path)
101
+ except Exception:
102
+ return []
103
+ results = []
104
+ for b in branches:
105
+ if b in ('main', 'master'):
106
+ continue
107
+ ahead = branch_ahead_count(repo_path, b)
108
+ results.append({'branch': b, 'ahead': ahead})
109
+ return results
110
+
111
+
112
+ def _make_plan_ref(repo_name: str, plan: dict) -> PlanRef:
113
+ """Build a PlanRef from a scanned plan dict."""
114
+ meta = plan['meta']
115
+ return PlanRef(
116
+ repo=repo_name,
117
+ path=plan['path'],
118
+ status=meta.get('status', 'unknown'),
119
+ title=meta.get('title', ''),
120
+ date=str(meta.get('created', '') or '') or _date_from_filename(plan['path']),
121
+ plan_type=meta.get('plan_type', ''),
122
+ guidance=meta.get('workstream_guidance', ''),
123
+ )
124
+
125
+
126
+ def _match_plans_to_workstream(
127
+ ws_title: str, ws_slug: str, ws_id: str,
128
+ ws_repos: list[str], repo_name: str, plans: list[dict],
129
+ ) -> list[PlanRef]:
130
+ """Return PlanRef entries from plans that belong to this workstream.
131
+
132
+ Plans with an explicit ``workstream`` field are matched against the
133
+ workstream's title (case-insensitive), slug, or ID. Only plans that
134
+ match are included — plans assigned to a *different* workstream are
135
+ skipped. Plans with no ``workstream`` field fall back to repo-level
136
+ matching (included in every workstream that claims the repo).
137
+ """
138
+ ws_identities = {ws_title.lower(), ws_slug, ws_id}
139
+ matched = []
140
+ for plan in plans:
141
+ meta = plan['meta']
142
+ plan_ws = (meta.get('workstream', '') or '').strip()
143
+ if plan_ws:
144
+ if plan_ws.lower() in ws_identities or slugify(plan_ws) == ws_slug:
145
+ matched.append(_make_plan_ref(repo_name, plan))
146
+ # Plan is assigned to a different workstream — skip
147
+ continue
148
+ # No explicit assignment — fall back to repo-level matching
149
+ if repo_name in ws_repos:
150
+ matched.append(_make_plan_ref(repo_name, plan))
151
+ return matched
152
+
153
+
154
+ def _match_branches_to_workstream(ws_slug: str, repo_name: str,
155
+ branches: list[dict]) -> list[BranchRef]:
156
+ """Return BranchRef entries for branches whose name contains the ws slug."""
157
+ matched = []
158
+ for info in branches:
159
+ branch_name = info['branch']
160
+ # Heuristic: branch name contains slugified workstream title
161
+ if ws_slug in branch_name:
162
+ matched.append(BranchRef(repo=repo_name, branch=branch_name, ahead=info['ahead']))
163
+ return matched
164
+
165
+
166
+ def _auto_classify_plans(
167
+ plans: list['PlanRef'], repo_data: dict[str, dict],
168
+ ) -> list['PlanRef']:
169
+ """Auto-classify plans with high-confidence heuristic signals.
170
+
171
+ Plans with 'likely-implemented' signal get status set to 'implemented'
172
+ in their plan file's YAML frontmatter. Returns list of plans that were
173
+ auto-classified.
174
+ """
175
+ from workstream.markdown import parse_frontmatter, write_frontmatter
176
+ from datetime import date as date_type
177
+
178
+ today = date_type.today().isoformat()
179
+ classified: list[PlanRef] = []
180
+
181
+ for plan in plans:
182
+ if plan.signal != 'likely-implemented':
183
+ continue
184
+
185
+ # Find the plan file and update its frontmatter
186
+ data = repo_data.get(plan.repo)
187
+ if not data:
188
+ continue
189
+ plans_dir = data.get('plans_dir')
190
+ if not plans_dir:
191
+ continue
192
+ plan_file = plans_dir / plan.path
193
+ if not plan_file.is_file():
194
+ continue
195
+
196
+ try:
197
+ text = plan_file.read_text(encoding='utf-8')
198
+ meta, body = parse_frontmatter(text)
199
+ if not meta or meta.get('status') == 'implemented':
200
+ continue # already classified or no frontmatter
201
+ meta['status'] = 'implemented'
202
+ meta['reviewed'] = today
203
+ meta['review_note'] = 'auto-classified: associated branch merged'
204
+ plan_file.write_text(
205
+ write_frontmatter(meta, body), encoding='utf-8'
206
+ )
207
+ plan.status = 'implemented'
208
+ classified.append(plan)
209
+ except Exception:
210
+ pass # don't fail sweep for one bad file
211
+
212
+ return classified
213
+
214
+
215
+ def _find_matching_branches(plan_title: str, branches: list[BranchRef]) -> list[BranchRef]:
216
+ """Find branches whose name contains the slugified plan title.
217
+
218
+ Used for triage: when a new project plan is discovered, show whether
219
+ a corresponding branch already exists.
220
+ """
221
+ slug = slugify(plan_title)
222
+ if len(slug) < _MIN_SLUG_MATCH_LEN:
223
+ return []
224
+ return [b for b in branches if slug in b.branch]
225
+
226
+ _MIN_SLUG_MATCH_LEN = 4 # avoid false positives for short names like 'bq', 'top'
227
+
228
+
229
+ def _repo_matches_slug(repo_name: str, ws_slug: str) -> bool:
230
+ """True if repo_name plausibly belongs to a workstream with the given slug.
231
+
232
+ Matches on: exact equality, or word-prefix in either direction
233
+ (e.g., repo 'workstream' matches slug 'workstream-tool').
234
+ Short names (< 4 chars) require exact match to avoid false positives.
235
+ """
236
+ if repo_name == ws_slug:
237
+ return True
238
+ # Check prefix in each direction, guarding against short stems
239
+ if ws_slug.startswith(repo_name + '-') and len(repo_name) >= _MIN_SLUG_MATCH_LEN:
240
+ return True
241
+ if repo_name.startswith(ws_slug + '-') and len(ws_slug) >= _MIN_SLUG_MATCH_LEN:
242
+ return True
243
+ return False
244
+
245
+
246
+ def _auto_associate_repos(workstreams: list, repo_names: list[str]) -> int:
247
+ """Set ws.repos for workstreams with no repos, using slug matching.
248
+
249
+ Matches exact slug equality, or word-prefix in either direction
250
+ (e.g., repo 'workstream' matches slug 'workstream-tool').
251
+ Short slugs (< 4 chars) require exact match to avoid false positives.
252
+ Returns count of workstreams updated on disk.
253
+ """
254
+ count = 0
255
+ for ws in workstreams:
256
+ if ws.repos or ws._repos_explicit:
257
+ continue
258
+ ws_slug = slugify(ws.title)
259
+ matched = [r for r in repo_names if _repo_matches_slug(r, ws_slug)]
260
+ if matched:
261
+ ws.repos = matched
262
+ if ws.source_path:
263
+ save_workstream(ws, ws.source_path)
264
+ count += 1
265
+ return count
266
+
267
+
268
+ # Month extraction from sprint filenames (sprint_YYYY-MM-DD.md)
269
+ _SPRINT_DATE_RE = re.compile(r'sprint_(\d{4}-\d{2})')
270
+
271
+
272
+ def _file_month(path: Path) -> str:
273
+ """Extract 'YYYY-MM' from a sprint filename, or 'unknown'."""
274
+ m = _SPRINT_DATE_RE.search(path.name)
275
+ return m.group(1) if m else 'unknown'
276
+
277
+
278
+ def _file_size_kb(path: Path) -> str:
279
+ """Human-readable file size in KB."""
280
+ return f'{path.stat().st_size / 1024:.0f}KB'
281
+
282
+
283
+ def _build_per_workstream_repo_context(
284
+ workstreams: list, found_repos: list, cutoff: str,
285
+ ) -> str:
286
+ """Build a structured repo activity summary grouped by workstream.
287
+
288
+ Returns a text block with per-workstream branch/commit summaries,
289
+ plus an 'Unmatched' section for repos/branches not tied to any workstream.
290
+ The LLM can run git commands for deeper investigation since repos are local.
291
+ """
292
+ from workstream.git import recent_commits
293
+ from workstream.repo_discovery import _default_branch, _repo_summary
294
+
295
+ if not found_repos:
296
+ return ''
297
+
298
+ repo_summaries = []
299
+ for repo in found_repos:
300
+ info = _repo_summary(repo, cutoff)
301
+ if info is not None:
302
+ repo_summaries.append(info)
303
+
304
+ if not repo_summaries:
305
+ return ''
306
+
307
+ # Build lookup: workstream slug -> workstream
308
+ ws_slugs = {slugify(ws.title): ws for ws in workstreams}
309
+
310
+ # Track which branches have been claimed by a workstream
311
+ claimed_branches: set[tuple[str, str]] = set() # (repo_name, branch_name)
312
+ claimed_repos: set[str] = set()
313
+
314
+ lines: list[str] = []
315
+
316
+ for ws in workstreams:
317
+ ws_slug = slugify(ws.title)
318
+ ws_lines: list[str] = []
319
+
320
+ for info in repo_summaries:
321
+ repo_name = info['name']
322
+
323
+ # Match repo to workstream by ws.repos list or slug-in-repo-name
324
+ repo_matches = (repo_name in ws.repos) or (ws_slug in repo_name.lower())
325
+
326
+ # Find matching branches
327
+ matched_branches = []
328
+ for b in info.get('active_branches', []):
329
+ branch_name = b['name']
330
+ if ws_slug in branch_name or repo_matches:
331
+ matched_branches.append(b)
332
+ claimed_branches.add((repo_name, branch_name))
333
+
334
+ if repo_matches:
335
+ claimed_repos.add(repo_name)
336
+ last = info.get('last_default_date', '')
337
+ ws_lines.append(f' {repo_name}: last default-branch commit {last}')
338
+ for b in matched_branches:
339
+ ws_lines.append(
340
+ f' - {b["name"]} (+{b["ahead"]} ahead) — {b["date"]}: "{b["subject"]}"'
341
+ )
342
+ elif matched_branches:
343
+ for b in matched_branches:
344
+ ws_lines.append(
345
+ f' {repo_name}/{b["name"]} (+{b["ahead"]} ahead) — {b["date"]}: "{b["subject"]}"'
346
+ )
347
+
348
+ if ws_lines:
349
+ lines.append(f'{ws.title} ({ws.id}):')
350
+ lines.extend(ws_lines)
351
+ lines.append('')
352
+
353
+ # Unmatched repos/branches
354
+ unmatched_lines: list[str] = []
355
+ for info in repo_summaries:
356
+ repo_name = info['name']
357
+ if repo_name not in claimed_repos:
358
+ last = info.get('last_default_date', '')
359
+ has_activity = info.get('is_active', False)
360
+ if has_activity:
361
+ unmatched_lines.append(f' {repo_name}: last default-branch commit {last}')
362
+ # Branches not claimed
363
+ for b in info.get('active_branches', []):
364
+ if (info['name'], b['name']) not in claimed_branches:
365
+ unmatched_lines.append(
366
+ f' {info["name"]}/{b["name"]} (+{b["ahead"]} ahead) — {b["date"]}: "{b["subject"]}"'
367
+ )
368
+
369
+ if unmatched_lines:
370
+ lines.append('Unmatched repos/branches:')
371
+ lines.extend(unmatched_lines)
372
+ lines.append('')
373
+
374
+ return '\n'.join(lines)
375
+
376
+
377
+ def _recompute_activity_dates(ws_dir: Path, repo_paths: dict[str, Path] | None = None) -> None:
378
+ """Recompute first_activity/last_activity/code_last_activity from dated artifacts.
379
+
380
+ Idempotent: reads each workstream, derives dates from its entries, writes
381
+ only if the computed values differ from what's stored.
382
+ """
383
+ from workstream.markdown import load_workstream, parse_frontmatter, write_frontmatter
384
+
385
+ updated = 0
386
+ for ws_file in sorted(ws_dir.glob('*.md')):
387
+ if ws_file.name == 'inbox.md':
388
+ continue
389
+ try:
390
+ ws = load_workstream(ws_file)
391
+ except Exception:
392
+ continue
393
+
394
+ # Collect all dates from thread entries and thought entries
395
+ dates: list[str] = []
396
+ for entry in ws.thread:
397
+ if entry.date:
398
+ dates.append(entry.date)
399
+ for thought in ws.thoughts:
400
+ if thought.date:
401
+ dates.append(thought.date)
402
+
403
+ # Plan dates
404
+ for plan in ws.plans:
405
+ if plan.date:
406
+ dates.append(plan.date)
407
+
408
+ # Git commit dates (most recent commit per associated repo)
409
+ git_dates: list[str] = []
410
+ if repo_paths:
411
+ from workstream.git import recent_commits
412
+ ws_slug = slugify(ws.title)
413
+ associated_repos: set[str] = set(ws.repos)
414
+ for rname in repo_paths:
415
+ if _repo_matches_slug(rname, ws_slug):
416
+ associated_repos.add(rname)
417
+ for repo_name in associated_repos:
418
+ rpath = repo_paths.get(repo_name)
419
+ if rpath and rpath.is_dir():
420
+ try:
421
+ commits = recent_commits(rpath, '2020-01-01')
422
+ if commits:
423
+ git_dates.append(commits[0]['date'])
424
+ except Exception:
425
+ continue
426
+ dates.extend(git_dates)
427
+
428
+ if not dates:
429
+ continue
430
+
431
+ # Preserve any existing datetime-precision value for last_activity.
432
+ # All thread/thought/plan entry dates are YYYY-MM-DD, so _recompute_
433
+ # would otherwise downgrade a stored 'YYYY-MM-DDTHH:MM:SS' back to a
434
+ # plain date. Including it in the pool keeps the datetime if it is
435
+ # already the maximum.
436
+ if ws.last_activity:
437
+ dates.append(ws.last_activity)
438
+
439
+ computed_first = min(dates)
440
+ computed_last = max(dates)
441
+
442
+ code_last = max(git_dates) if git_dates else ''
443
+
444
+ if (computed_first == ws.first_activity
445
+ and computed_last == ws.last_activity
446
+ and code_last == ws.code_last_activity):
447
+ continue
448
+
449
+ # Update frontmatter in place
450
+ text = ws_file.read_text(encoding='utf-8')
451
+ meta, body = parse_frontmatter(text)
452
+ meta['first_activity'] = computed_first
453
+ meta['last_activity'] = computed_last
454
+ if code_last:
455
+ meta['code_last_activity'] = code_last
456
+ elif 'code_last_activity' in meta:
457
+ del meta['code_last_activity']
458
+ ws_file.write_text(write_frontmatter(meta, body), encoding='utf-8')
459
+ updated += 1
460
+
461
+ if updated:
462
+ print(f' Updated activity dates for {updated} workstream(s).')
463
+
464
+
465
+ def _discover_handler(config: Config, workstreams: list, ws_dir: Path,
466
+ interactive: bool = False,
467
+ lookback_days: int = 60) -> None:
468
+ """Run LLM-driven discovery: extract and associate thoughts per file.
469
+
470
+ Sends each unprocessed sprint note to the LLM in full, which extracts
471
+ thoughts and associates them with workstreams in a single pass.
472
+ Saves state after every file, so Ctrl+C loses at most one LLM call.
473
+ With ``interactive=True``, pauses at month boundaries when inbox items
474
+ or errors need attention. Clean months proceed without prompting.
475
+
476
+ Only processes files whose effective date is within *lookback_days*.
477
+ Effective date: git commit date (primary), file mtime (fallback).
478
+ """
479
+ import sys
480
+ from collections import defaultdict
481
+ from datetime import date, timedelta
482
+
483
+ from workstream.llm import LLMAgent
484
+ from workstream.sweep_state import load_sweep_state, save_sweep_state
485
+ from workstream.thoughts import (
486
+ Thought,
487
+ build_discovery_prompt,
488
+ dispatch_associations,
489
+ parse_association_response,
490
+ )
491
+
492
+ today = date.today()
493
+ cutoff = today - timedelta(days=lookback_days)
494
+
495
+ # 1. Load sweep state
496
+ state = load_sweep_state(ws_dir)
497
+
498
+ # 1b. Build per-workstream repo context
499
+ found_repos: list = []
500
+ repo_context = ''
501
+ if config.repo_dirs:
502
+ from workstream.repo_discovery import discover_repos
503
+ found_repos = discover_repos(config.repo_dirs)
504
+ if found_repos:
505
+ repo_context = _build_per_workstream_repo_context(
506
+ workstreams, found_repos, cutoff.isoformat(),
507
+ )
508
+
509
+ # 2. Collect note files from notes_dirs
510
+ note_files: list[Path] = []
511
+ for notes_dir_str in config.notes_dirs:
512
+ notes_dir = Path(notes_dir_str).expanduser()
513
+ if not notes_dir.is_dir():
514
+ continue
515
+ for md_file in sorted(notes_dir.glob('*.md'), reverse=True):
516
+ note_files.append(md_file)
517
+
518
+ # 2b. Filter by lookback: git commit date (primary), mtime (fallback)
519
+ from workstream.git import file_last_commit_dates
520
+
521
+ # Batch-fetch git commit dates for all files
522
+ git_dates: dict[str, str] = {}
523
+ if note_files:
524
+ # Group by parent dir (notes may span multiple dirs)
525
+ by_parent: dict[Path, list[Path]] = {}
526
+ for f in note_files:
527
+ by_parent.setdefault(f.parent, []).append(f)
528
+ for parent, files in by_parent.items():
529
+ git_dates.update(file_last_commit_dates(parent, files))
530
+
531
+ filtered: list[Path] = []
532
+ for f in note_files:
533
+ git_date_str = git_dates.get(str(f))
534
+ if git_date_str:
535
+ try:
536
+ if date.fromisoformat(git_date_str) >= cutoff:
537
+ filtered.append(f)
538
+ continue
539
+ except ValueError:
540
+ pass
541
+ # No git date — fall back to mtime
542
+ mtime_date = date.fromtimestamp(f.stat().st_mtime)
543
+ if mtime_date >= cutoff:
544
+ filtered.append(f)
545
+
546
+ skipped = len(note_files) - len(filtered)
547
+ if skipped:
548
+ print(f'Filtered {skipped} note file(s) outside {lookback_days}-day lookback.')
549
+
550
+ # 3. Filter to unprocessed
551
+ unprocessed = state.unprocessed_files(filtered)
552
+ if not unprocessed:
553
+ print('No new or changed note files to process.')
554
+ return
555
+
556
+ print(f'Found {len(unprocessed)} unprocessed note file(s).')
557
+
558
+ # 4. Get LLM agent (required for discovery — no regex fallback)
559
+ llm_name = config.get_llm_agent()
560
+ if llm_name:
561
+ try:
562
+ agent = LLMAgent(llm_name) if llm_name != 'auto' else LLMAgent.detect()
563
+ except (ValueError, Exception):
564
+ agent = None
565
+ else:
566
+ agent = None
567
+
568
+ if agent is None:
569
+ print('No LLM agent available. Cannot run discovery without an LLM.')
570
+ print(f'Configure an LLM agent in {GLOBAL_CONFIG_PATH}')
571
+ return
572
+
573
+ discover_model = config.llm_discover_model
574
+ model_label = f'{agent.agent} (model={discover_model})' if discover_model else agent.agent
575
+ print(f'Using {model_label} for discovery.')
576
+ print()
577
+
578
+ # 5. Group files by month (from filename), newest first
579
+ by_month: dict[str, list[Path]] = defaultdict(list)
580
+ for f in unprocessed:
581
+ by_month[_file_month(f)].append(f)
582
+ months_sorted = sorted(by_month.keys(), reverse=True)
583
+
584
+ # 6. Process file by file, grouped by month
585
+ total_files = len(unprocessed)
586
+ file_num = 0
587
+ total_matched = 0
588
+ total_new = 0
589
+ total_inbox = 0
590
+ total_errors: list[str] = []
591
+
592
+ for month in months_sorted:
593
+ month_files = by_month[month]
594
+ month_matched = 0
595
+ month_new = 0
596
+ month_inbox = 0
597
+ month_thoughts = 0
598
+ errors_before = len(total_errors)
599
+
600
+ print(f'\u2500\u2500 {month} ({len(month_files)} file{"s" if len(month_files) != 1 else ""}) \u2500\u2500')
601
+
602
+ for note_file in month_files:
603
+ file_num += 1
604
+ size = _file_size_kb(note_file)
605
+ sys.stdout.write(f' [{file_num}/{total_files}] {note_file.name} ({size})...')
606
+ sys.stdout.flush()
607
+
608
+ file_content = note_file.read_text(encoding='utf-8')
609
+ prompt = build_discovery_prompt(
610
+ file_content, workstreams, note_file.name,
611
+ repo_context=repo_context,
612
+ )
613
+
614
+ try:
615
+ response = agent.prompt(
616
+ prompt, model=discover_model, thinking='minimal',
617
+ )
618
+ except Exception as e:
619
+ print(f' error: {e}')
620
+ total_errors.append(f'{note_file.name}: {e}')
621
+ continue
622
+
623
+ associations = parse_association_response(response)
624
+
625
+ if not associations:
626
+ # No thoughts found (or parse failure) — mark processed, move on
627
+ print(' \u2192 0 thoughts')
628
+ state.mark_processed(note_file)
629
+ state.last_sweep = date.today().isoformat()
630
+ save_sweep_state(state, ws_dir)
631
+ continue
632
+
633
+ # Build Thought objects from LLM response for dispatch
634
+ thoughts_for_dispatch: list[Thought] = []
635
+ for assoc in associations:
636
+ thoughts_for_dispatch.append(Thought(
637
+ date=assoc.get('date') or date.today().isoformat(),
638
+ text=assoc['thought'],
639
+ source_file=str(note_file),
640
+ ))
641
+
642
+ result = dispatch_associations(
643
+ associations, thoughts_for_dispatch, workstreams, ws_dir
644
+ )
645
+
646
+ # Per-file inline result
647
+ n = len(associations)
648
+ parts = [f'{n} thought{"s" if n != 1 else ""}']
649
+ if result.matched:
650
+ parts.append(f'{result.matched} matched')
651
+ if result.new_created:
652
+ parts.append(f'{result.new_created} new')
653
+ if result.inbox_count:
654
+ parts.append(f'{result.inbox_count} inbox')
655
+ print(f' \u2192 {", ".join(parts)}')
656
+
657
+ month_matched += result.matched
658
+ month_new += result.new_created
659
+ month_inbox += result.inbox_count
660
+ month_thoughts += n
661
+ if result.errors:
662
+ total_errors.extend(result.errors)
663
+
664
+ # Mark file processed and save state (Ctrl+C safe)
665
+ state.mark_processed(note_file)
666
+ state.last_sweep = date.today().isoformat()
667
+ save_sweep_state(state, ws_dir)
668
+
669
+ total_matched += month_matched
670
+ total_new += month_new
671
+ total_inbox += month_inbox
672
+
673
+ # Monthly summary
674
+ if month_thoughts:
675
+ print(f' {month} totals: {month_thoughts} thoughts, {month_matched} matched, {month_new} new, {month_inbox} inbox')
676
+ print(f' Running: {total_matched + total_new + total_inbox} thoughts, {total_matched} matched, {total_new} new, {total_inbox} inbox')
677
+ print()
678
+
679
+ # Interactive pause at month boundaries — only when something needs attention
680
+ month_errors = len(total_errors) - errors_before
681
+ needs_attention = month_inbox > 0 or month_errors > 0
682
+ if interactive and needs_attention and month != months_sorted[-1]:
683
+ remaining_months = months_sorted[months_sorted.index(month) + 1:]
684
+ remaining_files = sum(len(by_month[m]) for m in remaining_months)
685
+ try:
686
+ reply = input(
687
+ f' {len(remaining_months)} month(s) remaining ({remaining_files} files). '
688
+ f'Continue? [Y/n/tree] '
689
+ ).strip().lower()
690
+ except (EOFError, KeyboardInterrupt):
691
+ print()
692
+ reply = 'n'
693
+ if reply == 'tree':
694
+ from workstream.commands.tree import build_tree_lines
695
+ fresh = load_all_workstreams(ws_dir)
696
+ for line in build_tree_lines(fresh):
697
+ print(f' {line}')
698
+ print()
699
+ try:
700
+ reply = input(' Continue? [Y/n] ').strip().lower()
701
+ except (EOFError, KeyboardInterrupt):
702
+ print()
703
+ reply = 'n'
704
+ if reply in ('n', 'no'):
705
+ print(' Stopping. Progress saved \u2014 resume with ws sweep --discover.')
706
+ break
707
+
708
+ # Final summary
709
+ total_thoughts = total_matched + total_new + total_inbox
710
+ print(f'Discovery complete: {total_thoughts} thoughts, {total_matched} matched, {total_new} new, {total_inbox} inbox.')
711
+ if repo_context:
712
+ print(f' Repo context included in prompts ({len(found_repos)} repos scanned).')
713
+ if total_errors:
714
+ print(f' {len(total_errors)} error(s):')
715
+ for err in total_errors[:5]:
716
+ print(f' {err}')
717
+ if total_inbox:
718
+ print(f' Review inbox: {ws_dir / "inbox.md"}')
719
+
720
+ # Post-sweep: recompute first_activity/last_activity for all workstreams
721
+ # Belt-and-suspenders: covers existing workstreams that predate activity tracking
722
+ _recompute_activity_dates(ws_dir, {p.name: p for p in found_repos})
723
+
724
+
725
+ # ── Plan Review ─────────────────────────────────────────────────────
726
+
727
+
728
+ def _idea_is_duplicate(new_text: str, existing_ideas: list) -> bool:
729
+ """Check if new idea text substantially overlaps an existing idea.
730
+
731
+ Catches exact matches and substring containment for ideas longer than
732
+ 20 characters. *existing_ideas* is a list of IdeaEntry objects.
733
+ """
734
+ new_lower = new_text.lower()
735
+ for idea in existing_ideas:
736
+ existing_lower = idea.text.lower()
737
+ if new_lower == existing_lower:
738
+ return True
739
+ if len(new_lower) > 20 and (new_lower in existing_lower or existing_lower in new_lower):
740
+ return True
741
+ return False
742
+
743
+
744
+ def _build_plan_review_prompt(ws_title: str, ws_context: str,
745
+ git_context: str, plan_entries: list[dict]) -> str:
746
+ """Build an LLM prompt for reviewing plan files against git evidence.
747
+
748
+ Each plan_entry has: filename, marker (status string), content (summary or truncated body).
749
+ """
750
+ plan_sections = []
751
+ for entry in plan_entries:
752
+ section = f'### {entry["filename"]}\n'
753
+ if entry.get('marker'):
754
+ section += f'Marker: {entry["marker"]}\n'
755
+ section += entry['content']
756
+ plan_sections.append(section)
757
+
758
+ plans_block = '\n---\n'.join(plan_sections)
759
+
760
+ return f"""You are reviewing plan files for the workstream "{ws_title}".
761
+
762
+ ## Workstream Context
763
+ {ws_context}
764
+
765
+ ## Git Activity
766
+ {git_context}
767
+
768
+ ## Plan Files
769
+ {plans_block}
770
+
771
+ ## Instructions
772
+
773
+ For each plan file, determine:
774
+ 1. Was this plan IMPLEMENTED? (Look for matching commits, branches, or code changes.)
775
+ 2. Is it OBSOLETE? (Superseded by another plan, or describes work no longer relevant.)
776
+ 3. Does it contain IDEAS for genuinely new work not already tracked?
777
+ An idea must be: actionable (suggests concrete work), novel (not already
778
+ captured in the workstream's ideas list), and non-trivial. Observations
779
+ and restatements of existing plans are NOT ideas. Target: 0-2 per plan.
780
+
781
+ A plan can be implemented AND still have residual ideas (e.g., a "Future Work" section
782
+ that was never pursued). Small tactical plans (single-commit scope) rarely have ideas.
783
+
784
+ "Finalized" in the status marker means "approved/ready" — it does NOT mean implemented.
785
+ Cross-reference with git commits to determine actual implementation status.
786
+
787
+ Return a JSON array:
788
+ [{{
789
+ "plan": "filename.md",
790
+ "status": "implemented|obsolete|active",
791
+ "reason": "Brief explanation with commit evidence if applicable",
792
+ "ideas": ["Unrealized idea 1", "Unrealized idea 2"]
793
+ }}]
794
+
795
+ "ideas" is an empty list when there are no unrealized ideas.
796
+ "active" means the plan has unfinished work that is still relevant.
797
+ Do not explain. Return ONLY the JSON array."""
798
+
799
+
800
+ def _parse_plan_review_response(response: str) -> list[dict]:
801
+ """Parse the LLM's JSON response for plan review into classification dicts.
802
+
803
+ Returns [] on parse failure. Each dict has: plan, status, reason, ideas.
804
+ """
805
+ import json
806
+
807
+ text = response.strip()
808
+ # Strip markdown code fences if present
809
+ if text.startswith('```'):
810
+ lines = text.splitlines()
811
+ lines = [ln for ln in lines if not ln.strip().startswith('```')]
812
+ text = '\n'.join(lines)
813
+
814
+ try:
815
+ result = json.loads(text)
816
+ except json.JSONDecodeError:
817
+ match = re.search(r'\[.*\]', text, re.DOTALL)
818
+ if match:
819
+ try:
820
+ result = json.loads(match.group())
821
+ except json.JSONDecodeError:
822
+ return []
823
+ else:
824
+ return []
825
+
826
+ if not isinstance(result, list):
827
+ return []
828
+
829
+ valid = []
830
+ for entry in result:
831
+ if not isinstance(entry, dict):
832
+ continue
833
+ if 'plan' not in entry or 'status' not in entry:
834
+ continue
835
+ if entry['status'] not in ('implemented', 'obsolete', 'active'):
836
+ continue
837
+ ideas = entry.get('ideas', [])
838
+ if not isinstance(ideas, list):
839
+ ideas = []
840
+ ideas = [str(i) for i in ideas if isinstance(i, str)]
841
+ valid.append({
842
+ 'plan': str(entry['plan']),
843
+ 'status': str(entry['status']),
844
+ 'reason': str(entry.get('reason', '')),
845
+ 'ideas': ideas,
846
+ })
847
+ return valid
848
+
849
+
850
+ def _build_ws_context(ws) -> str:
851
+ """Build abbreviated workstream context for plan review prompt."""
852
+ lines = [f'Status: {ws.status}']
853
+ # Last 3 thread entries
854
+ for entry in ws.thread[:3]:
855
+ snippet = entry.body.splitlines()[0][:100]
856
+ lines.append(f' [{entry.date}] {snippet}')
857
+ # Next actions
858
+ if ws.next_actions:
859
+ lines.append('Next actions:')
860
+ for action in ws.next_actions[:5]:
861
+ lines.append(f' - {action}')
862
+ # Existing ideas (abbreviated)
863
+ if ws.ideas:
864
+ lines.append(f'Existing ideas: {len(ws.ideas)}')
865
+ return '\n'.join(lines)
866
+
867
+
868
+ def _build_git_context(repo_path: Path, repo_name: str, since: str = '') -> str:
869
+ """Build git activity context for a repo.
870
+
871
+ When ``since`` is provided (ISO date), only shows commits after that date.
872
+ Otherwise falls back to last 5 commits. The interactive agent can always
873
+ run ``git log`` for deeper investigation.
874
+ """
875
+ from workstream.git import recent_commits
876
+ from workstream.repo_discovery import _default_branch
877
+ from workstream.git import list_branches
878
+
879
+ try:
880
+ branches = list_branches(repo_path)
881
+ except Exception:
882
+ return f'{repo_name}: unable to read git data'
883
+
884
+ default = _default_branch(branches)
885
+ if not default:
886
+ return f'{repo_name}: no default branch found'
887
+
888
+ lines = [f'Repo: {repo_name}, default branch: {default}']
889
+
890
+ try:
891
+ cutoff = since or '2020-01-01'
892
+ limit = 20 if since else 5 # more generous when scoped to delta
893
+ commits = recent_commits(repo_path, cutoff, default)[:limit]
894
+ if commits:
895
+ label = f'Commits since {since}' if since else 'Recent commits'
896
+ lines.append(f'{label}:')
897
+ for c in commits:
898
+ lines.append(f' {c["date"]} {c["subject"]}')
899
+ elif since:
900
+ lines.append(f'No commits since {since}')
901
+ except Exception:
902
+ pass
903
+
904
+ # Active branches
905
+ active = [b for b in branches if b not in ('main', 'master')]
906
+ if active:
907
+ from workstream.git import branch_ahead_count
908
+ lines.append('Active branches:')
909
+ for b in active[:10]:
910
+ try:
911
+ ahead = branch_ahead_count(repo_path, b)
912
+ lines.append(f' {b} (+{ahead} ahead)')
913
+ except Exception:
914
+ lines.append(f' {b}')
915
+
916
+ return '\n'.join(lines)
917
+
918
+
919
+ def _build_sweep_review_manifest(
920
+ workstreams: list, repo_data: dict[str, dict], ws_dir: Path,
921
+ findings: dict[str, SweepFindings] | None = None,
922
+ cutoff: str = '',
923
+ changed_notes: list[dict] | None = None,
924
+ notes_root: Path | None = None,
925
+ ) -> str:
926
+ """Build a findings-scoped manifest for interactive sweep review.
927
+
928
+ Only workstreams with activity (present in *findings*) get expanded
929
+ sections. The manifest is the single document the reviewing agent
930
+ sees as system-prompt context.
931
+ """
932
+ from datetime import date as date_type
933
+
934
+ findings = findings or {}
935
+ today = date_type.today().isoformat()
936
+ parts: list[str] = ['## Sweep Activity Report', '']
937
+ if cutoff:
938
+ parts.append(f'Today: {today} | Cutoff: {cutoff}')
939
+ else:
940
+ parts.append(f'Today: {today} | First sweep (no prior baseline)')
941
+ parts.append(f'Workstreams with activity: {len(findings)}')
942
+ parts.append('')
943
+
944
+ # Changed notes section (global, before per-workstream sections)
945
+ if changed_notes:
946
+ parts.append('### Changed Notes')
947
+ if notes_root:
948
+ parts.append(f'Notes root: {notes_root}')
949
+ parts.append('')
950
+ committed = [n for n in changed_notes if n['status'] == 'committed']
951
+ unstaged = [n for n in changed_notes if n['status'] != 'committed']
952
+ if committed:
953
+ parts.append(f'**Committed since {cutoff or "last sweep"}:**')
954
+ for n in committed:
955
+ parts.append(f'- {n["name"]} ({n["path"]})')
956
+ parts.append('')
957
+ if unstaged:
958
+ parts.append('**Unstaged changes:**')
959
+ for n in unstaged:
960
+ parts.append(f'- {n["name"]} ({n["status"]})')
961
+ parts.append('')
962
+
963
+ # Include actual note content so the reviewing agent can see
964
+ # completion signals ("Done:"), progress updates, and decisions
965
+ # without making separate tool calls.
966
+ if notes_root and cutoff:
967
+ from workstream.git import notes_diff_since
968
+ notes_dirs = [n['path'].rsplit('/', 1)[0] for n in changed_notes]
969
+ # Deduplicate while preserving order
970
+ seen: set[str] = set()
971
+ unique_dirs = [d for d in notes_dirs if d not in seen and not seen.add(d)] # type: ignore[func-returns-value]
972
+ diff = notes_diff_since(notes_root, cutoff, unique_dirs, max_lines=100)
973
+ if diff:
974
+ parts.append('**Note changes:**')
975
+ parts.append('')
976
+ parts.append('```diff')
977
+ parts.append(diff)
978
+ parts.append('```')
979
+ if diff.endswith('... (truncated)'):
980
+ pathspec = ' '.join(unique_dirs)
981
+ parts.append(f'(truncated — for full diff: `git -C {notes_root} log --since="{cutoff}" -p --no-merges -- {pathspec}`)')
982
+ parts.append('')
983
+ parts.append('Look for completion signals ("Done:", "shipped", "launched") and use')
984
+ parts.append('`ws update-status <id> completed [reason]` for workstreams whose work is finished.')
985
+ parts.append('Capture follow-up ideas with `ws idea` before completing.')
986
+ parts.append('')
987
+
988
+ if not findings and not changed_notes:
989
+ parts.append('No activity detected this sweep.')
990
+ return '\n'.join(parts)
991
+ ws_by_id = {ws.id: ws for ws in workstreams}
992
+
993
+ for ws_id, f in sorted(findings.items(), key=lambda kv: ws_by_id[kv[0]].title):
994
+ ws = ws_by_id[ws_id]
995
+ repos_str = ', '.join(ws.repos) if ws.repos else '(none)'
996
+
997
+ # Workstream header with branch summary
998
+ branch_info = ''
999
+ if ws.branches:
1000
+ br_strs = [f'{b.branch} (+{b.ahead} ahead)' for b in ws.branches[:5]]
1001
+ branch_info = f' | Branches: {", ".join(br_strs)}'
1002
+ parts.append(f'### {ws.title} ({ws.id})')
1003
+ parts.append(f'Repos: {repos_str}{branch_info}')
1004
+ # Filesystem paths so the agent can resolve plan files
1005
+ path_parts: list[str] = []
1006
+ for rname in ws.repos:
1007
+ data = repo_data.get(rname)
1008
+ if data and data.get('path'):
1009
+ path_parts.append(f'{rname}={data["path"]}')
1010
+ pd = data.get('plans_dir')
1011
+ if pd:
1012
+ path_parts.append(f'plans={pd}')
1013
+ if path_parts:
1014
+ parts.append(f'Paths: {", ".join(path_parts)}')
1015
+ parts.append('')
1016
+
1017
+ # Code velocity warning for workstreams with repos
1018
+ if ws.repos and cutoff:
1019
+ code_date = getattr(ws, 'code_last_activity', '') or ''
1020
+ if not code_date or code_date < cutoff:
1021
+ if code_date:
1022
+ parts.append(f'\u26a0 No code commits since {code_date} (design-only activity)')
1023
+ else:
1024
+ parts.append(f'\u26a0 No code commits detected in associated repos')
1025
+ parts.append('')
1026
+
1027
+ # Discovered plans
1028
+ if f.newly_matched:
1029
+ parts.append('**Discovered plans:**')
1030
+ for p in f.newly_matched:
1031
+ ptype = f', {p.plan_type}' if p.plan_type else ''
1032
+ line = f'- `{p.path}` ({p.status}{ptype}) "{p.title}"'
1033
+ if p.date:
1034
+ line += f' {p.date}'
1035
+ # Triage hints
1036
+ if p.plan_type == 'project' and p.title:
1037
+ matched_br = _find_matching_branches(p.title, ws.branches)
1038
+ if matched_br:
1039
+ br = matched_br[0]
1040
+ line += f'\n Branch: `{br.branch}` (+{br.ahead} ahead) — track implementation'
1041
+ else:
1042
+ line += '\n Project plan, no matching branch yet'
1043
+ elif p.plan_type == 'tactical':
1044
+ line += ' [tactical — commit-scope]'
1045
+ if p.guidance in ('new-peer', 'new-top-level'):
1046
+ line += f'\n Suggests: create a new workstream ({p.guidance})'
1047
+ parts.append(line)
1048
+ parts.append('')
1049
+
1050
+ # Signals
1051
+ if f.new_signals:
1052
+ parts.append('**Signals:**')
1053
+ for p in f.new_signals:
1054
+ if p.signal == 'likely-implemented':
1055
+ parts.append(f'- `{p.path}` [{p.signal}] — branch merged. Auto-classified.')
1056
+ elif p.signal == 'stale':
1057
+ parts.append(f'- `{p.path}` [{p.signal}] — no recent changes. Still relevant?')
1058
+ else:
1059
+ parts.append(f'- `{p.path}` [{p.signal}]')
1060
+ parts.append('')
1061
+
1062
+ # Auto-classified
1063
+ if f.auto_classified:
1064
+ parts.append('**Auto-classified (implemented):**')
1065
+ for p in f.auto_classified:
1066
+ parts.append(f'- `{p.path}` — set to implemented (branch merged, all commits landed)')
1067
+ parts.append('')
1068
+
1069
+ # Commits
1070
+ if f.repos_with_commits:
1071
+ for rname, commits in sorted(f.repos_with_commits.items()):
1072
+ parts.append(f'**Commits ({rname}):**')
1073
+ for c in commits[:20]:
1074
+ parts.append(f' {c["date"]} {c["subject"]}')
1075
+ parts.append('')
1076
+
1077
+ # Instructions for the reviewing agent
1078
+ parts.append('### Instructions')
1079
+ parts.append('')
1080
+ parts.append('You are reviewing sweep findings — only workstreams with activity are shown above.')
1081
+ parts.append('For each workstream, integrate the findings into the workstream system:')
1082
+ parts.append('- **Discovered plans** — read the plan file, verify classification, recommend branch/ws creation')
1083
+ parts.append('- **Signals** — confirm or override auto-classifications')
1084
+ parts.append('- **Commits** — relate to plans, record checkins for untracked work')
1085
+ parts.append('- **Branches** — map to plans or workstreams')
1086
+ parts.append('')
1087
+ parts.append('Repos are local — use `git log`, `git show`, `git diff` for deeper investigation.')
1088
+ parts.append('Read plan files directly with the read tool.')
1089
+ parts.append('Use the workstream-sweep skill for the full review protocol.')
1090
+ parts.append('')
1091
+
1092
+ return '\n'.join(parts)
1093
+
1094
+
1095
+ def _batch_review_plans_handler(config: 'Config', workstreams: list,
1096
+ repo_data: dict, ws_dir: Path) -> None:
1097
+ """LLM-review plan files for each workstream with repos."""
1098
+ from datetime import date as date_type
1099
+
1100
+ from workstream.llm import LLMAgent
1101
+ from workstream.markdown import (
1102
+ append_idea,
1103
+ append_log_entry,
1104
+ parse_frontmatter,
1105
+ write_frontmatter,
1106
+ )
1107
+ from workstream.models import IdeaEntry, LogEntry
1108
+
1109
+ # Get LLM agent
1110
+ llm_name = config.get_llm_agent()
1111
+ if llm_name:
1112
+ try:
1113
+ agent = LLMAgent(llm_name) if llm_name != 'auto' else LLMAgent.detect()
1114
+ except (ValueError, Exception):
1115
+ agent = None
1116
+ else:
1117
+ agent = None
1118
+
1119
+ if agent is None:
1120
+ print('No LLM agent available. Cannot review plans without an LLM.')
1121
+ return
1122
+
1123
+ discover_model = config.llm_discover_model
1124
+ now_dt = datetime.now().isoformat(timespec='seconds')
1125
+ today_date = date_type.today().isoformat()
1126
+ import sys
1127
+
1128
+ print()
1129
+ print('── Plan Review ──')
1130
+
1131
+ # Filter to workstreams with repos AND matched plans in repo_data
1132
+ reviewed_plans = 0
1133
+ total_ideas = 0
1134
+ for ws in workstreams:
1135
+ if not ws.repos:
1136
+ continue
1137
+ # Collect plan entries from matched repos
1138
+ plan_entries: list[dict] = []
1139
+ git_contexts: list[str] = []
1140
+
1141
+ for repo_name in ws.repos:
1142
+ if repo_name not in repo_data:
1143
+ continue
1144
+ data = repo_data[repo_name]
1145
+ repo_path = data['path']
1146
+
1147
+ git_contexts.append(_build_git_context(repo_path, repo_name))
1148
+
1149
+ for plan in data['plans']:
1150
+ meta = plan['meta']
1151
+ # Build plan entry for prompt
1152
+ filename = plan['path']
1153
+ marker = meta.get('status', '')
1154
+ if meta.get('finalized_at'):
1155
+ marker += f' {meta["finalized_at"]}'
1156
+
1157
+ # Prefer summary from frontmatter; fall back to truncated content
1158
+ summary = meta.get('summary', '')
1159
+ if summary:
1160
+ content = summary
1161
+ else:
1162
+ # Read and truncate the plan file
1163
+ plan_path = data['plans_dir'] / filename
1164
+ try:
1165
+ full_text = plan_path.read_text(encoding='utf-8')
1166
+ # Skip frontmatter for content truncation
1167
+ _, plan_body = parse_frontmatter(full_text)
1168
+ content_lines = (plan_body or full_text).splitlines()[:80]
1169
+ content = '\n'.join(content_lines)
1170
+ except Exception:
1171
+ content = '(unable to read plan file)'
1172
+
1173
+ plan_entries.append({
1174
+ 'filename': filename,
1175
+ 'marker': marker,
1176
+ 'content': content,
1177
+ })
1178
+
1179
+ if not plan_entries:
1180
+ continue
1181
+
1182
+ ws_context = _build_ws_context(ws)
1183
+ git_context = '\n\n'.join(git_contexts) if git_contexts else '(no git data)'
1184
+
1185
+ # Batch plans ~10 per LLM call
1186
+ batch_size = 10
1187
+ total_batches = (len(plan_entries) + batch_size - 1) // batch_size
1188
+ for batch_start in range(0, len(plan_entries), batch_size):
1189
+ batch_num = batch_start // batch_size + 1
1190
+ batch = plan_entries[batch_start:batch_start + batch_size]
1191
+ plan_names = ', '.join(e['filename'][:30] for e in batch[:3])
1192
+ if len(batch) > 3:
1193
+ plan_names += f', ... +{len(batch) - 3}'
1194
+ sys.stdout.write(
1195
+ f' {ws.title} [{batch_num}/{total_batches}]: reviewing {len(batch)} plans ({plan_names})...'
1196
+ )
1197
+ sys.stdout.flush()
1198
+
1199
+ prompt = _build_plan_review_prompt(ws.title, ws_context, git_context, batch)
1200
+
1201
+ try:
1202
+ response = agent.prompt(prompt, model=discover_model, thinking='minimal')
1203
+ except Exception as e:
1204
+ print(f' error: {e}')
1205
+ continue
1206
+
1207
+ classifications = _parse_plan_review_response(response)
1208
+ if not classifications:
1209
+ print(' no valid classifications')
1210
+ continue
1211
+
1212
+ # Dispatch results
1213
+ if ws.source_path and ws.source_path.exists():
1214
+ text = ws.source_path.read_text(encoding='utf-8')
1215
+ meta, body = parse_frontmatter(text)
1216
+
1217
+ for cl in classifications:
1218
+ reviewed_plans += 1
1219
+ # Log every classification
1220
+ detail = f'{cl["plan"]}: {cl["status"]} — {cl["reason"]}'
1221
+ body = append_log_entry(body, LogEntry(
1222
+ date=today_date, event='plan-reviewed', detail=detail,
1223
+ ))
1224
+
1225
+ # Dispatch ideas (with dedup)
1226
+ for idea_text in cl['ideas']:
1227
+ if _idea_is_duplicate(idea_text, ws.ideas):
1228
+ continue
1229
+ body = append_idea(body, IdeaEntry(date=today_date, text=idea_text))
1230
+ body = append_log_entry(body, LogEntry(
1231
+ date=today_date, event='plan-idea',
1232
+ detail=f'from {cl["plan"]}: {idea_text}',
1233
+ ))
1234
+ total_ideas += 1
1235
+
1236
+ # Update the plan file's frontmatter status
1237
+ for repo_name in ws.repos:
1238
+ if repo_name not in repo_data:
1239
+ continue
1240
+ plan_file_path = repo_data[repo_name]['plans_dir'] / cl['plan']
1241
+ if plan_file_path.is_file():
1242
+ try:
1243
+ plan_text = plan_file_path.read_text(encoding='utf-8')
1244
+ plan_meta, plan_body = parse_frontmatter(plan_text)
1245
+ if plan_meta and plan_meta.get('status') != cl['status']:
1246
+ plan_meta['status'] = cl['status']
1247
+ plan_meta['reviewed'] = today_date
1248
+ plan_file_path.write_text(
1249
+ write_frontmatter(plan_meta, plan_body),
1250
+ encoding='utf-8',
1251
+ )
1252
+ except Exception:
1253
+ pass # Don't fail the whole review if one file can't be updated
1254
+ break # Found the file, no need to check other repos
1255
+
1256
+ meta['updated'] = now_dt
1257
+ meta['last_activity'] = now_dt
1258
+ ws.source_path.write_text(
1259
+ write_frontmatter(meta, body), encoding='utf-8'
1260
+ )
1261
+
1262
+ # Complete the inline progress line with results
1263
+ statuses = {}
1264
+ for cl in classifications:
1265
+ statuses[cl['status']] = statuses.get(cl['status'], 0) + 1
1266
+ batch_ideas = sum(len(cl['ideas']) for cl in classifications)
1267
+ parts = [f'{v} {k}' for k, v in sorted(statuses.items())]
1268
+ if batch_ideas:
1269
+ parts.append(f'{batch_ideas} ideas')
1270
+ print(f' → {", ".join(parts)}')
1271
+
1272
+ print(f'Plan review complete: {reviewed_plans} plans reviewed, {total_ideas} ideas extracted.')
1273
+
1274
+
1275
+ def _review_plans_handler(config: 'Config', workstreams: list,
1276
+ repo_data: dict, ws_dir: Path,
1277
+ batch: bool = False,
1278
+ findings: dict[str, SweepFindings] | None = None,
1279
+ cutoff: str = '',
1280
+ changed_notes: list[dict] | None = None) -> None:
1281
+ """Review plan files — interactive (default) or headless batch mode."""
1282
+ if batch:
1283
+ _batch_review_plans_handler(config, workstreams, repo_data, ws_dir)
1284
+ return
1285
+
1286
+ # Interactive mode: launch omp session with sweep-review skill
1287
+ import os
1288
+ import tempfile
1289
+
1290
+ from workstream.commands.init import _read_skill_content
1291
+
1292
+ notes_root = ws_dir.parent
1293
+
1294
+
1295
+ # Build manifest from authoritative findings
1296
+ manifest = _build_sweep_review_manifest(workstreams, repo_data, ws_dir,
1297
+ findings=findings or {}, cutoff=cutoff,
1298
+ changed_notes=changed_notes,
1299
+ notes_root=notes_root)
1300
+
1301
+ # Write manifest to temp file
1302
+ fd, tmp_name = tempfile.mkstemp(suffix='.md', prefix='ws-plan-review-')
1303
+ os.close(fd)
1304
+ tmp_path = Path(tmp_name)
1305
+ skill = _read_skill_content('workstream_sweep')
1306
+ tmp_path.write_text(skill + '\n\n---\n\n' + manifest, encoding='utf-8')
1307
+
1308
+ # Find omp (preferred) or claude harness
1309
+ from workstream.harness import exec_or_fallback, find_harness
1310
+
1311
+ harness = find_harness()
1312
+ if not harness:
1313
+ print('No interactive harness (omp or claude) found on PATH.')
1314
+ print('Falling back to batch mode.')
1315
+ _batch_review_plans_handler(config, workstreams, repo_data, ws_dir)
1316
+ return
1317
+
1318
+ cmd = [
1319
+ harness,
1320
+ '--append-system-prompt', f'@{tmp_path}',
1321
+ 'Begin the sweep review. Read the manifest in your system prompt and follow the skill protocol.',
1322
+ ]
1323
+
1324
+ exec_or_fallback(harness, cmd, tmp_name, cwd=notes_root)
1325
+
1326
+
1327
+ def _backfill_summaries(config: 'Config', ws_dir: Path) -> None:
1328
+ """Generate BLUF summaries for workstreams missing them.
1329
+
1330
+ Uses the LLM to synthesize a 1-2 sentence summary from thread entries,
1331
+ plans, and next actions. Only runs when an LLM agent is available.
1332
+ Skips workstreams that already have summaries or are in terminal status.
1333
+ """
1334
+ from workstream.llm import LLMAgent
1335
+ from workstream.markdown import load_workstream, save_workstream
1336
+
1337
+ llm_name = config.get_llm_agent()
1338
+ if llm_name:
1339
+ try:
1340
+ agent = LLMAgent(llm_name) if llm_name != 'auto' else LLMAgent.detect()
1341
+ except (ValueError, Exception):
1342
+ agent = None
1343
+ else:
1344
+ agent = None
1345
+
1346
+ if agent is None:
1347
+ print('No LLM agent available. Skipping summary backfill.')
1348
+ return
1349
+
1350
+ discover_model = config.llm_discover_model
1351
+ workstreams = load_all_workstreams(ws_dir)
1352
+ candidates = [
1353
+ ws for ws in workstreams
1354
+ if not ws.summary
1355
+ and ws.status not in ('completed', 'dropped')
1356
+ and ws.source_path
1357
+ ]
1358
+
1359
+ if not candidates:
1360
+ print('All workstreams already have summaries.')
1361
+ return
1362
+
1363
+ print(f'Backfilling summaries for {len(candidates)} workstream(s)...')
1364
+ backfilled = 0
1365
+ for ws in candidates:
1366
+ # Build context for the LLM
1367
+ parts = [f'Title: {ws.title}', f'Status: {ws.status}']
1368
+ if ws.tags:
1369
+ parts.append(f'Tags: {", ".join(ws.tags)}')
1370
+
1371
+ if ws.thread:
1372
+ parts.append('Recent thread entries:')
1373
+ for entry in ws.thread[:5]:
1374
+ snippet = entry.body.splitlines()[0][:120] if entry.body else ''
1375
+ parts.append(f' [{entry.date}] {snippet}')
1376
+
1377
+ if ws.next_actions:
1378
+ parts.append('Next actions:')
1379
+ for action in ws.next_actions:
1380
+ parts.append(f' - {action}')
1381
+
1382
+ if ws.plans:
1383
+ plan_names = ', '.join(p.title or p.path for p in ws.plans[:5])
1384
+ parts.append(f'Plans ({len(ws.plans)}): {plan_names}')
1385
+
1386
+ context = '\n'.join(parts)
1387
+ prompt = (
1388
+ 'Summarize this workstream in 1-2 sentences. '
1389
+ 'What is it and where is it heading?\n\n'
1390
+ f'{context}\n\n'
1391
+ 'Return ONLY the summary text, no formatting or labels.'
1392
+ )
1393
+
1394
+ try:
1395
+ response = agent.prompt(prompt, model=discover_model, thinking='minimal')
1396
+ except Exception as e:
1397
+ print(f' skip: {ws.title[:40]} (error: {e})')
1398
+ continue
1399
+
1400
+ summary = response.strip().strip('"').strip()
1401
+ if not summary:
1402
+ continue
1403
+
1404
+ ws.summary = summary
1405
+ save_workstream(ws, ws.source_path)
1406
+ backfilled += 1
1407
+ trunc = summary[:60] + ('...' if len(summary) > 60 else '')
1408
+ print(f' summary: {ws.title[:40]} -> "{trunc}"')
1409
+
1410
+ print(f'Backfilled {backfilled} summary/summaries.')
1411
+
1412
+
1413
+
1414
+ def _sweep_handler(config: Config, discover: bool = False, interactive: bool = False,
1415
+ lookback: int = 60, review_plans: bool = False,
1416
+ no_review: bool = False,
1417
+ batch: bool = False, backfill_summaries: bool = False,
1418
+ since: str | None = None) -> None:
1419
+ ws_dir = config.workstreams_path
1420
+
1421
+ # Load all workstreams
1422
+ workstreams = load_all_workstreams(ws_dir)
1423
+
1424
+ if not workstreams:
1425
+ print('No workstreams found.')
1426
+ return
1427
+
1428
+ # Determine repos to scan: explicit config repos + auto-discovered repos.
1429
+ # Explicit repos take priority (they may have plans_dir overrides), then
1430
+ # auto-discovered repos fill in the rest.
1431
+ from workstream.repo_discovery import discover_repos
1432
+
1433
+ explicit_names: set[str] = set()
1434
+ repos_to_scan: list[tuple[str, Path]] = []
1435
+ if config.repos:
1436
+ repos_to_scan = [(rc.name, Path(rc.path).expanduser()) for rc in config.repos]
1437
+ explicit_names = {rc.name for rc in config.repos}
1438
+
1439
+ if config.repo_dirs:
1440
+ discovered = discover_repos(config.repo_dirs)
1441
+ new_repos = [(p.name, p) for p in discovered if p.name not in explicit_names]
1442
+ if new_repos:
1443
+ repos_to_scan.extend(new_repos)
1444
+ print(f' auto-discovered {len(new_repos)} repos from repo_dirs')
1445
+
1446
+ # Deduplicate repos sharing the same remote URL — keep the one with the most
1447
+ # .plans/ files (or first found if tied). Multiple clones of the same repo
1448
+ # produce duplicate plan reviews and inflated branch counts.
1449
+ from workstream.git import remote_url
1450
+ seen_remotes: dict[str, tuple[str, Path]] = {} # url -> (name, path)
1451
+ deduped: list[tuple[str, Path]] = []
1452
+ for repo_name, repo_path in repos_to_scan:
1453
+ url = remote_url(repo_path)
1454
+ if url and url in seen_remotes:
1455
+ prev_name, prev_path = seen_remotes[url]
1456
+ # Keep whichever has more plans
1457
+ prev_plans_dir = config.get_plans_dir(prev_name, prev_path)
1458
+ cur_plans_dir = config.get_plans_dir(repo_name, repo_path)
1459
+ prev_plans = prev_plans_dir.glob('*.md') if prev_plans_dir.is_dir() else []
1460
+ cur_plans = cur_plans_dir.glob('*.md') if cur_plans_dir.is_dir() else []
1461
+ if sum(1 for _ in cur_plans) > sum(1 for _ in prev_plans):
1462
+ # Replace previous with current
1463
+ deduped = [(n, p) if n != prev_name else (repo_name, repo_path)
1464
+ for n, p in deduped]
1465
+ seen_remotes[url] = (repo_name, repo_path)
1466
+ print(f' dedup: {repo_name} supersedes {prev_name} (same remote)')
1467
+ else:
1468
+ print(f' dedup: skipping {repo_name} (same remote as {prev_name})')
1469
+ continue
1470
+ if url:
1471
+ seen_remotes[url] = (repo_name, repo_path)
1472
+ deduped.append((repo_name, repo_path))
1473
+ if len(deduped) < len(repos_to_scan):
1474
+ dropped = len(repos_to_scan) - len(deduped)
1475
+ print(f' deduplicated {dropped} repo(s) sharing the same remote')
1476
+ repos_to_scan = deduped
1477
+
1478
+ # Scan each repo
1479
+ repo_data: dict[str, dict] = {} # repo_name -> {plans, branches, path, plans_dir}
1480
+ for repo_name, repo_path in repos_to_scan:
1481
+ if not repo_path.is_dir():
1482
+ print(f' skip {repo_name}: directory not found')
1483
+ continue
1484
+
1485
+ plans_dir = config.get_plans_dir(repo_name, repo_path)
1486
+ plans = scan_repo_plans(plans_dir)
1487
+ branches = _scan_repo_branches(repo_path)
1488
+ repo_data[repo_name] = {
1489
+ 'plans': plans,
1490
+ 'branches': branches,
1491
+ 'path': repo_path,
1492
+ 'plans_dir': plans_dir,
1493
+ }
1494
+ if plans or branches:
1495
+ print(f' {repo_name}: {len(plans)} plans, {len(branches)} branches')
1496
+ # Auto-associate repos to workstreams with no repos set
1497
+ if repo_data:
1498
+ # Track which workstreams have no repos before auto-association
1499
+ unassociated = {id(ws) for ws in workstreams if not ws.repos}
1500
+ associated = _auto_associate_repos(workstreams, list(repo_data.keys()))
1501
+ if associated:
1502
+ print(f' auto-associated repos for {associated} workstream(s)')
1503
+ # Auto-setup newly associated repos (best-effort)
1504
+ from workstream.provisioning import setup_repo
1505
+ for ws in workstreams:
1506
+ if id(ws) not in unassociated:
1507
+ continue # was already associated before
1508
+ for rname in ws.repos:
1509
+ rdata = repo_data.get(rname)
1510
+ if rdata and rdata.get('path'):
1511
+ try:
1512
+ setup_repo(rdata['path'])
1513
+ except Exception:
1514
+ pass # best-effort; user can run ws setup explicitly
1515
+
1516
+ # Match and update workstreams
1517
+ updated_count = 0
1518
+ sweep_findings: dict[str, SweepFindings] = {} # ws.id -> delta from this sweep
1519
+ for ws in workstreams:
1520
+ ws_slug = slugify(ws.title)
1521
+ new_plans: list[PlanRef] = []
1522
+ new_branches: list[BranchRef] = []
1523
+
1524
+ for rname, data in repo_data.items():
1525
+ new_plans.extend(
1526
+ _match_plans_to_workstream(
1527
+ ws.title, ws_slug, ws.id,
1528
+ ws.repos, rname, data['plans'],
1529
+ )
1530
+ )
1531
+ new_branches.extend(
1532
+ _match_branches_to_workstream(ws_slug, rname, data['branches'])
1533
+ )
1534
+
1535
+ # -- Heuristic signal detection for active plans --
1536
+ now_dt = datetime.now().isoformat(timespec='seconds')
1537
+ today_date = date.today().isoformat()
1538
+ signal_plans: list[PlanRef] = []
1539
+ for plan in new_plans:
1540
+ if plan.status != 'active':
1541
+ continue
1542
+ # Check if associated branch was merged into main/master
1543
+ repo_path = repo_data.get(plan.repo, {}).get('path')
1544
+ if repo_path:
1545
+ # Look for a branch matching the workstream slug
1546
+ branches_in_repo = repo_data[plan.repo].get('branches', [])
1547
+ for br in branches_in_repo:
1548
+ if ws_slug in br['branch']:
1549
+ try:
1550
+ if branch_is_merged(repo_path, br['branch']):
1551
+ plan.signal = 'likely-implemented'
1552
+ signal_plans.append(plan)
1553
+ break
1554
+ except Exception:
1555
+ pass
1556
+ # Check for staleness via plan file mtime
1557
+ if not plan.signal and repo_path:
1558
+ plans_dir = repo_data.get(plan.repo, {}).get('plans_dir')
1559
+ plan_file = plans_dir / plan.path if plans_dir else repo_path / '.plans' / plan.path
1560
+ if plan_file.is_file():
1561
+ try:
1562
+ mtime = datetime.fromtimestamp(plan_file.stat().st_mtime)
1563
+ age_days = (datetime.now() - mtime).days
1564
+ if age_days > 90:
1565
+ plan.signal = 'stale'
1566
+ signal_plans.append(plan)
1567
+ except Exception:
1568
+ pass
1569
+
1570
+ # Only log signals that are genuinely new (not already on the stored plan)
1571
+ prev_signals = {(p.repo, p.path): p.signal for p in ws.plans}
1572
+ new_signal_plans = [
1573
+ p for p in signal_plans
1574
+ if prev_signals.get((p.repo, p.path)) != p.signal
1575
+ ]
1576
+ if new_signal_plans:
1577
+ for plan in new_signal_plans:
1578
+ ws.log.append(LogEntry(
1579
+ date=today_date, event='sweep-signal',
1580
+ detail=f'{plan.path}: {plan.signal}',
1581
+ ))
1582
+ print(f' signal: {ws.title} — {plan.path}: {plan.signal}')
1583
+
1584
+ # Auto-classify high-confidence signals
1585
+ auto_classified = _auto_classify_plans(signal_plans, repo_data)
1586
+ for plan in auto_classified:
1587
+ ws.log.append(LogEntry(
1588
+ date=today_date, event='plan-auto-classified',
1589
+ detail=f'{plan.path}: implemented (auto)',
1590
+ ))
1591
+ print(f' auto-classified: {ws.title} — {plan.path} → implemented')
1592
+
1593
+ # Detect newly-matched plans (not in previous plan list)
1594
+ prev_plan_paths = {(p.repo, p.path) for p in ws.plans}
1595
+ newly_matched = [
1596
+ p for p in new_plans
1597
+ if (p.repo, p.path) not in prev_plan_paths
1598
+ ]
1599
+ if newly_matched:
1600
+ for plan in newly_matched:
1601
+ ws.log.append(LogEntry(
1602
+ date=today_date, event='plan-discovered',
1603
+ detail=f'{plan.repo}/.plans/{plan.path} ({plan.status})',
1604
+ ))
1605
+ print(f' new plan: {ws.title} — {plan.repo}/.plans/{plan.path}')
1606
+
1607
+ # Compare by content, not order — render_body sorts by date-desc,
1608
+ # but freshly-matched plans arrive in scan order (alphabetical).
1609
+ _pk = lambda p: (p.repo, p.path, p.status, p.title, p.date, p.signal)
1610
+ _bk = lambda b: (b.repo, b.branch, b.ahead)
1611
+ plans_changed = sorted(new_plans, key=_pk) != sorted(ws.plans, key=_pk)
1612
+ branches_changed = sorted(new_branches, key=_bk) != sorted(ws.branches, key=_bk)
1613
+ changed = plans_changed or branches_changed or bool(new_signal_plans) or bool(auto_classified)
1614
+ if changed:
1615
+ ws.plans = new_plans
1616
+ ws.branches = new_branches
1617
+ ws.updated = now_dt
1618
+ ws.last_activity = now_dt
1619
+ if ws.source_path:
1620
+ save_workstream(ws, ws.source_path)
1621
+ updated_count += 1
1622
+ print(f' updated: {ws.title} ({len(new_plans)} plans, {len(new_branches)} branches)')
1623
+
1624
+ # Accumulate findings for workstreams with activity
1625
+ if newly_matched or new_signal_plans or auto_classified:
1626
+ sweep_findings[ws.id] = SweepFindings(
1627
+ newly_matched=newly_matched,
1628
+ new_signals=new_signal_plans,
1629
+ auto_classified=auto_classified,
1630
+ )
1631
+
1632
+ print(f'Sweep complete: {updated_count} workstream(s) updated.')
1633
+
1634
+ # Update durable plan index
1635
+ from workstream.plan_index import load_plan_index, save_plan_index, update_plan_index
1636
+ plan_index = load_plan_index(ws_dir)
1637
+ index_changed = False
1638
+ for ws in workstreams:
1639
+ ws_slug = slugify(ws.title)
1640
+ if update_plan_index(plan_index, ws_slug, ws.plans):
1641
+ index_changed = True
1642
+ if index_changed:
1643
+ save_plan_index(plan_index, ws_dir)
1644
+ print(f' plan index updated ({sum(len(v) for v in plan_index.values())} entries)')
1645
+
1646
+ # Post-sweep: recompute activity dates with repo data
1647
+ _recompute_activity_dates(ws_dir, {name: path for name, path in repos_to_scan})
1648
+
1649
+ # Record last_sweep timestamp — capture the previous value for cutoff resolution
1650
+ from workstream.sweep_state import load_sweep_state, save_sweep_state
1651
+ state = load_sweep_state(ws_dir)
1652
+ prev_last_sweep = state.last_sweep # when the prior sweep ran
1653
+ cutoff = _resolve_cutoff(since, prev_last_sweep)
1654
+ state.last_sweep = date.today().isoformat()
1655
+ save_sweep_state(state, ws_dir)
1656
+
1657
+ # Fill repos_with_commits for workstreams with findings
1658
+ if sweep_findings and cutoff:
1659
+ from workstream.git import recent_commits
1660
+ ws_by_id = {ws.id: ws for ws in workstreams}
1661
+ for ws_id, findings in sweep_findings.items():
1662
+ ws_obj = ws_by_id[ws_id]
1663
+ for rname in ws_obj.repos:
1664
+ data = repo_data.get(rname)
1665
+ if data:
1666
+ commits = recent_commits(data['path'], cutoff)
1667
+ if commits:
1668
+ findings.repos_with_commits[rname] = commits
1669
+
1670
+ # Collect changed notes for review manifest
1671
+ notes_root = ws_dir.parent
1672
+ changed_notes: list[dict] = []
1673
+ if config.notes_dirs and cutoff:
1674
+ from workstream.git import file_last_commit_dates, modified_files
1675
+
1676
+ # Committed notes since cutoff
1677
+ for notes_dir_str in config.notes_dirs:
1678
+ notes_dir = Path(notes_dir_str).expanduser()
1679
+ if not notes_dir.is_dir():
1680
+ continue
1681
+ md_files = sorted(notes_dir.glob('*.md'))
1682
+ if not md_files:
1683
+ continue
1684
+ git_dates = file_last_commit_dates(notes_dir, md_files)
1685
+ for f in md_files:
1686
+ gd = git_dates.get(str(f))
1687
+ if gd and gd >= cutoff:
1688
+ changed_notes.append({
1689
+ 'path': str(f), 'name': f.name, 'status': 'committed',
1690
+ })
1691
+
1692
+ # Unstaged changes in notes root
1693
+ try:
1694
+ dirty = modified_files(notes_root)
1695
+ for rel_path in dirty:
1696
+ abs_path = notes_root / rel_path
1697
+ if abs_path.suffix == '.md' and any(
1698
+ str(abs_path).startswith(d) for d in config.notes_dirs
1699
+ ):
1700
+ # Don't duplicate if already in committed list
1701
+ if not any(n['path'] == str(abs_path) for n in changed_notes):
1702
+ changed_notes.append({
1703
+ 'path': str(abs_path), 'name': abs_path.name,
1704
+ 'status': 'modified',
1705
+ })
1706
+ except Exception:
1707
+ pass # git not available in notes root
1708
+
1709
+ # Auto-commit modified notes
1710
+ if config.auto_commit_notes and changed_notes:
1711
+ from workstream.git import git_add, git_commit
1712
+ modified_note_paths = [
1713
+ Path(n['path']) for n in changed_notes if n['status'] == 'modified'
1714
+ ]
1715
+ if modified_note_paths:
1716
+ today_str = date.today().isoformat()
1717
+ if git_add(notes_root, modified_note_paths):
1718
+ if git_commit(notes_root, f'notes: {today_str}'):
1719
+ print(f' auto-committed {len(modified_note_paths)} modified note(s)')
1720
+ for n in changed_notes:
1721
+ if n['status'] == 'modified':
1722
+ n['status'] = 'committed'
1723
+
1724
+ if backfill_summaries:
1725
+ _backfill_summaries(config, ws_dir)
1726
+
1727
+ # -- Plan review phase --
1728
+ # Default: interactive review runs unless --no-review is set.
1729
+ # --review-plans is deprecated (it's now the default behavior).
1730
+ if review_plans:
1731
+ import sys
1732
+ print('Note: --review-plans is now the default. Use --no-review to skip.', file=sys.stderr)
1733
+
1734
+ if not no_review and (sweep_findings or changed_notes or review_plans):
1735
+ _review_plans_handler(config, workstreams, repo_data, ws_dir,
1736
+ batch=batch, findings=sweep_findings, cutoff=cutoff,
1737
+ changed_notes=changed_notes or None)
1738
+
1739
+ if discover:
1740
+ _discover_handler(config, workstreams, ws_dir, interactive=interactive,
1741
+ lookback_days=lookback)
1742
+
1743
+ def get_command() -> Command:
1744
+ cmd = Command(_sweep_handler, name='sweep', doc='Scan repos for plans and branches, update workstreams.')
1745
+ cmd.add('--discover', parse_as=True, doc='Run iterative discovery: scan notes for thoughts and associate via LLM')
1746
+ cmd.add('--interactive', parse_as=True, doc='Pause at month boundaries when inbox items or errors need attention')
1747
+ cmd.add('--lookback', parse_as=int, missing=60, doc='Lookback period in days for discovery (default: 60)')
1748
+ cmd.add('--review-plans', parse_as=True, doc='(deprecated, now default) LLM-review plan files')
1749
+ cmd.add('--no-review', parse_as=True, doc='Skip interactive plan review (mechanical scan only)')
1750
+ cmd.add('--batch', parse_as=True, doc='Use headless batch mode for plan review (default: interactive)')
1751
+ cmd.add('--backfill-summaries', parse_as=True, doc='Generate BLUF summaries for workstreams missing them (uses LLM)')
1752
+ cmd.add('--since', doc='Cutoff for activity detection (ISO date, relative like "2h"/"1d", default: last sweep)')
1753
+ return cmd