swegen 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,706 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import time
6
+ import traceback
7
+ from datetime import UTC, datetime
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from harbor.models.environment_type import EnvironmentType
12
+ from rich.console import Console, Group
13
+ from rich.panel import Panel
14
+ from rich.rule import Rule
15
+ from rich.table import Table
16
+ from rich.text import Text
17
+ from rich.traceback import install as rich_traceback_install
18
+
19
+ from swegen.config import CreateConfig
20
+ from swegen.tools.harbor_runner import parse_harbor_outcome, run_harbor_agent
21
+ from swegen.tools.validate_utils import ValidationError, run_nop_oracle
22
+
23
+ from . import MissingIssueError, PRToHarborPipeline, TrivialPRError
24
+ from .claude_code_runner import ClaudeCodeResult, run_claude_code_session
25
+ from .repo_cache import RepoCache
26
+
27
+ # -----------------------------------------------------------------------------
28
+ # Helper functions for run_reversal phases
29
+ # -----------------------------------------------------------------------------
30
+
31
+
32
+ def _display_header(console: Console, pipeline: PRToHarborPipeline, pr: int) -> None:
33
+ """Display the initial header panel with repo and PR context."""
34
+ console.print(Rule(Text("Task Generation", style="bold cyan")))
35
+ info = Table(show_header=False, box=None)
36
+ info.add_row("Repo", Text(pipeline.repo, style="bold"))
37
+ info.add_row("PR", Text(str(pr), style="bold"))
38
+ console.print(Panel(info, title="Context", expand=False))
39
+
40
+
41
+ def _check_linked_issues(
42
+ console: Console,
43
+ pipeline: PRToHarborPipeline,
44
+ pr: int,
45
+ require_issue: bool,
46
+ ) -> list:
47
+ """Check for linked issues and validate requirements.
48
+
49
+ Returns list of linked issues.
50
+ Raises MissingIssueError if required and none found.
51
+ """
52
+ linked_issues = []
53
+ try:
54
+ linked_issues = pipeline.pr_fetcher.fetch_linked_issues()
55
+ except Exception as e:
56
+ logging.getLogger("swegen").debug("Could not fetch linked issues: %s", str(e))
57
+
58
+ if require_issue:
59
+ if not linked_issues:
60
+ console.print(
61
+ Panel(
62
+ Text(
63
+ f"PR #{pr} has no linked issue. Use --no-require-issue to generate task from PR body/title instead.",
64
+ style="yellow",
65
+ ),
66
+ title="[yellow]Skipped (No Linked Issue)[/yellow]",
67
+ border_style="yellow",
68
+ )
69
+ )
70
+ raise MissingIssueError(
71
+ f"PR #{pr}: No linked issue found (use --no-require-issue to skip this check)"
72
+ )
73
+ else:
74
+ console.print(f"[green]✓ Found {len(linked_issues)} linked issue(s)[/green]")
75
+ else:
76
+ if linked_issues:
77
+ console.print(f"[dim]Found {len(linked_issues)} linked issue(s)[/dim]")
78
+ else:
79
+ console.print(
80
+ "[yellow]No linked issue found - using PR body/title for instructions[/yellow]"
81
+ )
82
+
83
+ return linked_issues
84
+
85
+
86
+ def _check_dedupe(
87
+ console: Console,
88
+ repo_key: str,
89
+ state_file: Path,
90
+ force: bool,
91
+ ) -> bool:
92
+ """Check if task already exists in state file.
93
+
94
+ Returns True if duplicate found and should skip, False otherwise.
95
+ """
96
+ if force or not state_file.exists():
97
+ return False
98
+
99
+ last_rec = None
100
+ logger = logging.getLogger("swegen")
101
+ with open(state_file) as f:
102
+ for line in f:
103
+ try:
104
+ rec = json.loads(line)
105
+ if rec.get("key") == repo_key:
106
+ last_rec = rec
107
+ except (json.JSONDecodeError, ValueError, TypeError) as e:
108
+ logger.debug(f"Failed to parse state record line: {e}")
109
+ continue
110
+
111
+ if last_rec is not None:
112
+ existing_harbor = last_rec.get("harbor")
113
+ body = Table(show_header=False, box=None)
114
+ body.add_row("harbor", Text(str(existing_harbor)))
115
+ console.print(
116
+ Panel(
117
+ body,
118
+ title=f"Duplicate key: [bold]{repo_key}[/bold]",
119
+ subtitle="Use --force to regenerate",
120
+ border_style="yellow",
121
+ )
122
+ )
123
+ return True
124
+ return False
125
+
126
+
127
+ def _display_validation_results(
128
+ console: Console,
129
+ results_rows: list[list[str]],
130
+ ) -> tuple[bool, bool]:
131
+ """Display validation results line by line and return failure flags.
132
+
133
+ Args:
134
+ console: Rich console for output
135
+ results_rows: List of [phase, expected, actual, match] for each validation
136
+
137
+ Returns:
138
+ Tuple of (harbor_validation_failed, cc_validation_failed)
139
+ """
140
+ harbor_validation_failed = False
141
+ cc_validation_failed = False
142
+
143
+ for phase, expected, actual, match in results_rows:
144
+ ok = match == "Yes"
145
+ style = "green" if ok else "red"
146
+ icon = "✓" if ok else "✗"
147
+ console.print(Text(f"{icon} {phase}: expected {expected}, actual {actual}", style=style))
148
+ if not ok:
149
+ if "Harbor" in phase:
150
+ harbor_validation_failed = True
151
+ if "CC" in phase:
152
+ cc_validation_failed = True
153
+
154
+ return harbor_validation_failed, cc_validation_failed
155
+
156
+
157
+ def _build_validation_table(results_rows: list[list[str]]) -> Table | None:
158
+ """Build the validation results table for the summary panel.
159
+
160
+ Args:
161
+ results_rows: List of [phase, expected, actual, match] for each validation
162
+
163
+ Returns:
164
+ Rich Table with validation results, or None if no results
165
+ """
166
+ if not results_rows:
167
+ return None
168
+
169
+ vt = Table(
170
+ title="Validation Results", title_style="bold cyan", header_style="bold", show_lines=False
171
+ )
172
+ vt.add_column("Phase")
173
+ vt.add_column("Expected")
174
+ vt.add_column("Actual")
175
+ vt.add_column("Match?")
176
+ for phase, expected, actual, match in results_rows:
177
+ vt.add_row(
178
+ phase, expected, actual, Text(match, style=("green" if match == "Yes" else "red"))
179
+ )
180
+ return vt
181
+
182
+
183
+ def _handle_validation_failure(
184
+ console: Console,
185
+ harbor_validation_failed: bool,
186
+ cc_validation_failed: bool,
187
+ harbor_actually_ran: bool,
188
+ ) -> None:
189
+ """Handle validation failures, printing warnings and raising ValidationError if needed.
190
+
191
+ Args:
192
+ console: Rich console for output
193
+ harbor_validation_failed: True if any Harbor validation failed
194
+ cc_validation_failed: True if any CC validation failed
195
+ harbor_actually_ran: True if Harbor validations were run (not skipped)
196
+
197
+ Raises:
198
+ ValidationError: If validation failed in a way that should stop processing
199
+ """
200
+ # CC failed but Harbor passed - acceptable with warning
201
+ if cc_validation_failed and not harbor_validation_failed and harbor_actually_ran:
202
+ console.print()
203
+ console.print(
204
+ Panel(
205
+ Text(
206
+ "⚠ CC validation failed, but Harbor validation passed.\nThis is acceptable - Harbor is the authoritative test environment.",
207
+ style="yellow bold",
208
+ ),
209
+ title="[yellow]CC Validation Warning[/yellow]",
210
+ border_style="yellow",
211
+ )
212
+ )
213
+
214
+ # Determine overall validation failure:
215
+ # - Harbor failed (authoritative) → fail
216
+ # - CC failed AND Harbor was skipped → fail (no authoritative validation to fall back on)
217
+ # - CC failed BUT Harbor passed → success (Harbor is authoritative)
218
+ validation_failed = harbor_validation_failed or (
219
+ cc_validation_failed and not harbor_actually_ran
220
+ )
221
+
222
+ if validation_failed:
223
+ console.print()
224
+ if cc_validation_failed and not harbor_actually_ran:
225
+ # CC failed and Harbor was skipped - can't verify the task
226
+ console.print(
227
+ Panel(
228
+ Text(
229
+ "CC validation failed and Harbor validation was skipped.\nThe task cannot be verified. Run Harbor validation manually or re-run with --validate.",
230
+ style="red bold",
231
+ ),
232
+ title="[red]Validation Failed[/red]",
233
+ border_style="red",
234
+ )
235
+ )
236
+ raise ValidationError("CC validation failed and Harbor validation was skipped")
237
+ else:
238
+ # Harbor validation failed
239
+ console.print(
240
+ Panel(
241
+ Text("Validation failed. Review the task files and logs.", style="red bold"),
242
+ title="[red]Validation Failed[/red]",
243
+ border_style="red",
244
+ )
245
+ )
246
+ raise ValidationError("Harbor validation failed (NOP or Oracle did not pass)")
247
+
248
+
249
+ def _save_state_record(
250
+ state_dir: Path,
251
+ state_file: Path,
252
+ repo_key: str,
253
+ repo: str,
254
+ pr: int,
255
+ task_id: str,
256
+ task_dir: Path,
257
+ ) -> None:
258
+ """Save a record of the generated task to the state file.
259
+
260
+ This is non-fatal - errors are logged but do not stop execution.
261
+ """
262
+ logger = logging.getLogger("swegen")
263
+ try:
264
+ state_dir.mkdir(parents=True, exist_ok=True)
265
+ rec = {
266
+ "key": repo_key,
267
+ "repo": repo,
268
+ "pr": pr,
269
+ "task_id": task_id,
270
+ "harbor": str(task_dir.resolve()),
271
+ "ts": datetime.now(UTC).isoformat(),
272
+ }
273
+ with open(state_file, "a") as f:
274
+ f.write(json.dumps(rec) + "\n")
275
+ except (OSError, IOError, PermissionError, ValueError) as e:
276
+ # Non-fatal; log but continue
277
+ logger.warning(f"Failed to save state record for {repo_key}: {e}")
278
+ except Exception as e:
279
+ # Catch-all for unexpected errors, but still log them
280
+ logger.warning(f"Unexpected error saving state record for {repo_key}: {e}", exc_info=True)
281
+
282
+
283
+ def _display_summary_panel(
284
+ console: Console,
285
+ repo: str,
286
+ pr: int,
287
+ task_id: str,
288
+ task_dir: Path,
289
+ gen_log_path: Path,
290
+ validation_table: Table | None,
291
+ ) -> None:
292
+ """Display the summary panel with task and PR context."""
293
+ # Count test files
294
+ test_files_count = 0
295
+ try:
296
+ test_files = list((task_dir / "tests").glob("*.py"))
297
+ if not test_files:
298
+ test_files = list((task_dir / "tests").glob("*.js")) + list(
299
+ (task_dir / "tests").glob("*.ts")
300
+ )
301
+ test_files_count = len(test_files)
302
+ except Exception:
303
+ pass
304
+
305
+ def _short(sha: Any) -> str:
306
+ s = str(sha or "-")
307
+ return s[:7] if len(s) > 7 else s
308
+
309
+ summary = Table(show_header=False, box=None)
310
+ summary.add_row("Repo", Text(repo))
311
+ summary.add_row("PR", Text(str(pr)))
312
+ summary.add_row("Base", Text("-")) # Not tracked in current implementation
313
+ summary.add_row("Head", Text("-")) # Not tracked in current implementation
314
+ summary.add_row("Changed files", Text("-")) # Not tracked in current implementation
315
+ summary.add_row("Test files", Text(str(test_files_count)))
316
+ summary.add_row("Task ID", Text(task_id, style="bold"))
317
+ summary.add_row("Harbor task", Text(str(task_dir)))
318
+ summary.add_row("Debug log", Text(str(gen_log_path)))
319
+
320
+ content = Group(summary, validation_table) if validation_table is not None else summary
321
+ console.print(Panel(content, title="Summary", border_style="green"))
322
+
323
+
324
+ def _display_logs_panel(
325
+ console: Console,
326
+ gen_log_path: Path,
327
+ harbor_nop_job_dir: str | None,
328
+ harbor_oracle_job_dir: str | None,
329
+ ) -> None:
330
+ """Display the logs panel with job directory paths."""
331
+ logs = Table(show_header=False, box=None, expand=True)
332
+ logs.add_column("Item", no_wrap=True)
333
+ logs.add_column("Path", overflow="fold", no_wrap=False)
334
+ logs.add_row("Harbor nop job", Text(harbor_nop_job_dir or "-", overflow="fold"))
335
+ logs.add_row("Harbor oracle job", Text(harbor_oracle_job_dir or "-", overflow="fold"))
336
+ logs.add_row("Generate log", Text(str(gen_log_path)))
337
+ console.print(Panel(logs, title="Logs", border_style="magenta"))
338
+
339
+
340
+ def _display_next_steps_panel(
341
+ console: Console,
342
+ harbor_root: Path,
343
+ task_id: str,
344
+ ) -> None:
345
+ """Display the next steps panel with recommended actions."""
346
+ steps = Table(show_header=False, box=None)
347
+ steps.add_row("1.", "Confirm validation results match expectations; review Logs for mismatches")
348
+ steps.add_row("2.", "Review generated files (especially Dockerfile)")
349
+ steps.add_row("3.", "Review instruction.md and task.toml")
350
+ steps.add_row("4.", f"Harbor nop: harbor run --agent nop -p {harbor_root} -t {task_id}")
351
+ steps.add_row("5.", f"Harbor oracle: harbor run --agent oracle -p {harbor_root} -t {task_id}")
352
+ steps.add_row(
353
+ "6.", f"Create a pull request including the new task under {harbor_root / task_id}"
354
+ )
355
+ console.print(Panel(steps, title="Next Steps", border_style="cyan"))
356
+
357
+
358
+ def _run_harbor_validations(
359
+ task_id: str,
360
+ harbor_root: Path,
361
+ harbor_jobs: Path,
362
+ console: Console,
363
+ environment: EnvironmentType = EnvironmentType.DOCKER,
364
+ ) -> tuple[list[list[str]], dict[str, str | None]]:
365
+ """Run Harbor validations (nop + oracle) sequentially.
366
+
367
+ Returns:
368
+ Tuple of (results_rows, job_dirs) where:
369
+ - results_rows: List of [phase, expected, actual, match] for each validation
370
+ - job_dirs: Dict mapping agent names to job directory paths (as strings)
371
+ """
372
+ with console.status("Running harbor nop + oracle...", spinner="dots"):
373
+ reward_nop, reward_oracle, job_paths = run_nop_oracle(
374
+ task_id=task_id,
375
+ dataset_path=harbor_root,
376
+ jobs_dir=harbor_jobs,
377
+ environment=environment,
378
+ )
379
+
380
+ # Convert paths to strings for job_dirs
381
+ job_dirs = {
382
+ "nop": str(job_paths["nop"]) if job_paths["nop"] else None,
383
+ "oracle": str(job_paths["oracle"]) if job_paths["oracle"] else None,
384
+ }
385
+
386
+ # Build results rows
387
+ results_rows = [
388
+ [
389
+ "Harbor nop",
390
+ "reward=0",
391
+ f"reward={reward_nop}" if reward_nop is not None else "reward=unknown",
392
+ "Yes" if reward_nop == 0 else "No",
393
+ ],
394
+ [
395
+ "Harbor oracle",
396
+ "reward=1",
397
+ f"reward={reward_oracle}" if reward_oracle is not None else "reward=unknown",
398
+ "Yes" if reward_oracle == 1 else "No",
399
+ ],
400
+ ]
401
+
402
+ return results_rows, job_dirs
403
+
404
+
405
+ def run_reversal(config: CreateConfig) -> None:
406
+ """Convert a merged PR into a Harbor task.
407
+
408
+ Args:
409
+ config: Typed configuration with repo, PR number, and options.
410
+ """
411
+ rich_traceback_install(show_locals=False)
412
+ console = Console()
413
+ pipeline = PRToHarborPipeline(repo=config.repo, pr_number=config.pr)
414
+ # Configure file logging for detailed generation logs
415
+ logs_root = Path(config.state_dir) / "logs"
416
+ logs_root.mkdir(parents=True, exist_ok=True)
417
+ gen_log_path = logs_root / f"generate-{pipeline.task_id}.log"
418
+ _configure_file_logger(gen_log_path)
419
+ try:
420
+ # Header
421
+ _display_header(console, pipeline, config.pr)
422
+
423
+ # Check for linked issues if required
424
+ linked_issues = _check_linked_issues(console, pipeline, config.pr, config.require_issue)
425
+
426
+ # Simple local dedupe: check-before
427
+ # Lowercase repo for consistency (GitHub is case-insensitive, Docker requires lowercase)
428
+ repo_key = f"{pipeline.repo.lower()}#{config.pr}"
429
+ state_dir: Path = config.state_dir or Path(".state")
430
+ state_file = state_dir / "create.jsonl"
431
+ if _check_dedupe(console, repo_key, state_file, config.force):
432
+ return
433
+
434
+ harbor_root = config.output
435
+ harbor_root.mkdir(parents=True, exist_ok=True)
436
+ t0 = time.perf_counter()
437
+
438
+ # CC detects language automatically and fills in the skeleton
439
+ cc_result: ClaudeCodeResult | None = None
440
+
441
+ try:
442
+ # try: skeleton generation + CC
443
+ verbose = config.verbose
444
+
445
+ # Step 1a: Fetch PR metadata
446
+ console.print("[dim] → Fetching PR metadata...[/dim]")
447
+ metadata = pipeline.pr_fetcher.fetch_pr_metadata(allow_unmerged=config.allow_unmerged)
448
+
449
+ # Step 1b: Clone/update repo cache
450
+ console.print(
451
+ "[dim] → Cloning/updating repo cache (may take a minute for first clone)...[/dim]"
452
+ )
453
+ repo_cache_dir = config.state_dir / "repos" if config.state_dir else None
454
+ repo_cache = RepoCache(repo_cache_dir)
455
+ repo_path = repo_cache.get_or_clone(
456
+ repo=pipeline.repo,
457
+ head_sha=metadata["head_sha"],
458
+ repo_url=metadata["repo_url"],
459
+ )
460
+ console.print(f"[dim] Repo at: {repo_path}[/dim]")
461
+
462
+ # Step 1c: Generate skeleton files (includes LLM call for PR evaluation)
463
+ console.print("[dim] → Generating skeleton and evaluating...[/dim]")
464
+ with console.status("Evaluating PR & writing skeleton...", spinner="dots"):
465
+ (
466
+ task_dir,
467
+ _,
468
+ extracted_test_files,
469
+ task_reference,
470
+ ) = pipeline.generate_task(
471
+ tasks_root=harbor_root,
472
+ overwrite=bool(config.force),
473
+ cache_dir=repo_cache_dir,
474
+ repo_path=repo_path,
475
+ metadata=metadata,
476
+ linked_issues=linked_issues,
477
+ run_cc=False, # Run CC separately after skeleton
478
+ cc_timeout=config.cc_timeout,
479
+ verbose=verbose,
480
+ use_cache=config.use_cache,
481
+ state_dir=config.state_dir,
482
+ require_minimum_difficulty=config.require_minimum_difficulty,
483
+ min_source_files=config.min_source_files,
484
+ max_source_files=config.max_source_files,
485
+ environment=config.environment.value,
486
+ )
487
+
488
+ skeleton_secs = time.perf_counter() - t0
489
+ console.print(
490
+ f"[green]✓ Skeleton generated in {skeleton_secs:.1f}s → {task_dir}[/green]"
491
+ )
492
+ console.print(f" [dim]Test files: {len(extracted_test_files)}[/dim]")
493
+
494
+ # Step 2: Run CC "make it work" session
495
+ console.print()
496
+ if task_reference:
497
+ console.print(
498
+ Rule(
499
+ Text(
500
+ f"Claude Code: Adapt from PR #{task_reference.pr_number}",
501
+ style="bold magenta",
502
+ )
503
+ )
504
+ )
505
+ console.print(
506
+ f"[dim]Reference: {task_reference.task_id} | Timeout: {config.cc_timeout}s | Verbose: {str(verbose).lower()}[/dim]"
507
+ )
508
+ else:
509
+ console.print(Rule(Text("Claude Code", style="bold magenta")))
510
+ console.print(
511
+ f"[dim]Timeout: {config.cc_timeout}s | Verbose: {str(verbose).lower()}[/dim]"
512
+ )
513
+ console.print()
514
+
515
+ cc_result = run_claude_code_session(
516
+ repo=pipeline.repo,
517
+ pr_number=pipeline.pr_number,
518
+ repo_path=repo_path,
519
+ task_dir=task_dir,
520
+ task_id=pipeline.task_id,
521
+ dataset_path=harbor_root,
522
+ test_files=extracted_test_files,
523
+ timeout=config.cc_timeout,
524
+ verbose=verbose,
525
+ reference_task_id=task_reference.task_id if task_reference else None,
526
+ reference_pr=task_reference.pr_number if task_reference else None,
527
+ head_sha=metadata.get("head_sha"),
528
+ environment=config.environment.value,
529
+ )
530
+
531
+ gen_secs = time.perf_counter() - t0
532
+
533
+ if cc_result and cc_result.success:
534
+ console.print()
535
+ console.print(f"[green]✓ Task generated and validated in {gen_secs:.1f}s[/green]")
536
+ elif cc_result:
537
+ console.print()
538
+ console.print(
539
+ f"[yellow]⚠ CC session completed in {gen_secs:.1f}s (validation incomplete)[/yellow]"
540
+ )
541
+ if cc_result.error_message:
542
+ console.print(f" [red]Error: {cc_result.error_message}[/red]")
543
+ else:
544
+ console.print(
545
+ f"[green]✓ Skeleton generated in {gen_secs:.1f}s → {task_dir}[/green]"
546
+ )
547
+ except TrivialPRError as e:
548
+ # Skip trivial PRs gracefully
549
+ console.print(
550
+ Panel(
551
+ Text(str(e), style="yellow"),
552
+ title="[yellow]Skipped (Trivial PR)[/yellow]",
553
+ border_style="yellow",
554
+ )
555
+ )
556
+ # Re-raise so calling code can handle appropriately
557
+ raise
558
+ except FileExistsError as e:
559
+ # Task already exists
560
+ console.print(
561
+ Panel(
562
+ Text(str(e), style="yellow"),
563
+ title="[yellow]Task Already Exists[/yellow]",
564
+ border_style="yellow",
565
+ )
566
+ )
567
+ # Re-raise so calling code can handle appropriately
568
+ raise
569
+
570
+ # Task ID from generated dir
571
+ task_id = task_dir.name
572
+ harbor_do = not config.no_validate
573
+
574
+ # If CC already validated successfully, skip harbor validation
575
+ if cc_result and cc_result.success:
576
+ harbor_do = False
577
+ console.print("[green]✓ Skipping harbor validation (CC already validated)[/green]")
578
+
579
+ # Auto-validation unless skipped
580
+ results_rows = []
581
+ # Hold log paths for summary
582
+ harbor_nop_job_dir = harbor_oracle_job_dir = None
583
+
584
+ # If CC ran, add its results to the summary
585
+ if cc_result:
586
+ results_rows.append(
587
+ [
588
+ "CC NOP",
589
+ "reward=0",
590
+ "reward=0" if cc_result.nop_passed else "failed",
591
+ "Yes" if cc_result.nop_passed else "No",
592
+ ]
593
+ )
594
+ results_rows.append(
595
+ [
596
+ "CC Oracle",
597
+ "reward=1",
598
+ "reward=1" if cc_result.oracle_passed else "failed",
599
+ "Yes" if cc_result.oracle_passed else "No",
600
+ ]
601
+ )
602
+
603
+ if harbor_do:
604
+ # Prepare harbor jobs directory
605
+ harbor_jobs = (
606
+ config.state_dir / "harbor-jobs"
607
+ if isinstance(config.state_dir, Path)
608
+ else Path(".state") / "harbor-jobs"
609
+ )
610
+ harbor_jobs = harbor_jobs.resolve()
611
+ harbor_jobs.mkdir(parents=True, exist_ok=True)
612
+
613
+ # Run validations serially to avoid Docker conflicts
614
+ console.print(Rule(Text("Validations", style="bold blue")))
615
+
616
+ validation_results, job_dirs = _run_harbor_validations(
617
+ task_id, harbor_root, harbor_jobs, console, config.environment
618
+ )
619
+ results_rows.extend(validation_results)
620
+ harbor_nop_job_dir = job_dirs.get("nop")
621
+ harbor_oracle_job_dir = job_dirs.get("oracle")
622
+
623
+ # Display validation results and check for failures
624
+ harbor_validation_failed, cc_validation_failed = _display_validation_results(
625
+ console, results_rows
626
+ )
627
+ validation_table = _build_validation_table(results_rows)
628
+
629
+ # Handle validation failures (may raise ValidationError)
630
+ harbor_actually_ran = any("Harbor" in row[0] for row in results_rows)
631
+ _handle_validation_failure(
632
+ console, harbor_validation_failed, cc_validation_failed, harbor_actually_ran
633
+ )
634
+
635
+ # Save state record (non-fatal if fails)
636
+ _save_state_record(
637
+ state_dir, state_file, repo_key, pipeline.repo, config.pr, task_id, task_dir
638
+ )
639
+
640
+ # Display final panels
641
+ _display_summary_panel(
642
+ console, pipeline.repo, config.pr, task_id, task_dir, gen_log_path, validation_table
643
+ )
644
+ _display_logs_panel(
645
+ console,
646
+ gen_log_path,
647
+ harbor_nop_job_dir,
648
+ harbor_oracle_job_dir,
649
+ )
650
+ _display_next_steps_panel(console, harbor_root, task_id)
651
+ except (TrivialPRError, MissingIssueError, ValidationError, FileExistsError):
652
+ # Re-raise these exceptions so caller can handle them
653
+ raise
654
+ except Exception as e:
655
+ # Unexpected errors - print and re-raise for caller to handle
656
+ console.print(Panel(Text(str(e)), title="Error", border_style="red"))
657
+ traceback.print_exc()
658
+ raise
659
+
660
+
661
+ def _run_harbor_with_status(
662
+ task_id: str,
663
+ harbor_root: Path,
664
+ harbor_jobs_parent: Path,
665
+ console: Console,
666
+ phase: str,
667
+ delete_after: bool = True,
668
+ environment: EnvironmentType = EnvironmentType.DOCKER,
669
+ ) -> Path | None:
670
+ """Run harbor with a rich console status spinner.
671
+
672
+ Thin wrapper around run_harbor_agent that adds console status feedback.
673
+
674
+ Args:
675
+ task_id: Task identifier
676
+ harbor_root: Harbor dataset root path
677
+ harbor_jobs_parent: Jobs directory path
678
+ console: Rich console for output
679
+ phase: Agent name ("nop" or "oracle")
680
+ delete_after: If True, delete Docker image after run (default: True)
681
+ environment: Environment type (docker, daytona, e2b, modal, runloop, gke)
682
+ """
683
+ with console.status(f"Running harbor {phase}...", spinner="dots"):
684
+ _, job_result = run_harbor_agent(
685
+ task_id=task_id,
686
+ dataset_path=harbor_root,
687
+ jobs_dir=harbor_jobs_parent,
688
+ agent=phase,
689
+ capture_output=True,
690
+ delete_after=delete_after,
691
+ environment=environment,
692
+ )
693
+ return job_result
694
+
695
+
696
+ def _configure_file_logger(path: Path) -> None:
697
+ logger = logging.getLogger("swegen")
698
+ logger.setLevel(logging.DEBUG)
699
+ logger.propagate = False
700
+ # Clear existing handlers
701
+ logger.handlers = []
702
+ fh = logging.FileHandler(path)
703
+ fh.setLevel(logging.DEBUG)
704
+ fmt = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
705
+ fh.setFormatter(fmt)
706
+ logger.addHandler(fh)