swegen 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swegen/__init__.py +14 -0
- swegen/analyze/__init__.py +24 -0
- swegen/analyze/classifier.py +637 -0
- swegen/analyze/classify_prompt.txt +241 -0
- swegen/analyze/models.py +253 -0
- swegen/analyze/run.py +656 -0
- swegen/analyze/verdict_prompt.txt +126 -0
- swegen/cli.py +411 -0
- swegen/config.py +142 -0
- swegen/create/__init__.py +22 -0
- swegen/create/claude_code_runner.py +988 -0
- swegen/create/claude_code_utils.py +95 -0
- swegen/create/create.py +706 -0
- swegen/create/diff_utils.py +142 -0
- swegen/create/orchestrator.py +368 -0
- swegen/create/pr_fetcher.py +187 -0
- swegen/create/repo_cache.py +175 -0
- swegen/create/task_instruction.py +363 -0
- swegen/create/task_reference.py +130 -0
- swegen/create/task_skeleton.py +266 -0
- swegen/create/utils.py +350 -0
- swegen/farm/__init__.py +13 -0
- swegen/farm/farm_hand.py +342 -0
- swegen/farm/fetcher.py +341 -0
- swegen/farm/state.py +231 -0
- swegen/farm/stream_farm.py +430 -0
- swegen/tools/__init__.py +16 -0
- swegen/tools/harbor_runner.py +191 -0
- swegen/tools/validate.py +523 -0
- swegen/tools/validate_utils.py +142 -0
- swegen-0.1.0.dist-info/METADATA +292 -0
- swegen-0.1.0.dist-info/RECORD +35 -0
- swegen-0.1.0.dist-info/WHEEL +4 -0
- swegen-0.1.0.dist-info/entry_points.txt +3 -0
- swegen-0.1.0.dist-info/licenses/LICENSE +201 -0
swegen/create/create.py
ADDED
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import time
|
|
6
|
+
import traceback
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from harbor.models.environment_type import EnvironmentType
|
|
12
|
+
from rich.console import Console, Group
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
from rich.rule import Rule
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
from rich.text import Text
|
|
17
|
+
from rich.traceback import install as rich_traceback_install
|
|
18
|
+
|
|
19
|
+
from swegen.config import CreateConfig
|
|
20
|
+
from swegen.tools.harbor_runner import parse_harbor_outcome, run_harbor_agent
|
|
21
|
+
from swegen.tools.validate_utils import ValidationError, run_nop_oracle
|
|
22
|
+
|
|
23
|
+
from . import MissingIssueError, PRToHarborPipeline, TrivialPRError
|
|
24
|
+
from .claude_code_runner import ClaudeCodeResult, run_claude_code_session
|
|
25
|
+
from .repo_cache import RepoCache
|
|
26
|
+
|
|
27
|
+
# -----------------------------------------------------------------------------
|
|
28
|
+
# Helper functions for run_reversal phases
|
|
29
|
+
# -----------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _display_header(console: Console, pipeline: PRToHarborPipeline, pr: int) -> None:
|
|
33
|
+
"""Display the initial header panel with repo and PR context."""
|
|
34
|
+
console.print(Rule(Text("Task Generation", style="bold cyan")))
|
|
35
|
+
info = Table(show_header=False, box=None)
|
|
36
|
+
info.add_row("Repo", Text(pipeline.repo, style="bold"))
|
|
37
|
+
info.add_row("PR", Text(str(pr), style="bold"))
|
|
38
|
+
console.print(Panel(info, title="Context", expand=False))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _check_linked_issues(
|
|
42
|
+
console: Console,
|
|
43
|
+
pipeline: PRToHarborPipeline,
|
|
44
|
+
pr: int,
|
|
45
|
+
require_issue: bool,
|
|
46
|
+
) -> list:
|
|
47
|
+
"""Check for linked issues and validate requirements.
|
|
48
|
+
|
|
49
|
+
Returns list of linked issues.
|
|
50
|
+
Raises MissingIssueError if required and none found.
|
|
51
|
+
"""
|
|
52
|
+
linked_issues = []
|
|
53
|
+
try:
|
|
54
|
+
linked_issues = pipeline.pr_fetcher.fetch_linked_issues()
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logging.getLogger("swegen").debug("Could not fetch linked issues: %s", str(e))
|
|
57
|
+
|
|
58
|
+
if require_issue:
|
|
59
|
+
if not linked_issues:
|
|
60
|
+
console.print(
|
|
61
|
+
Panel(
|
|
62
|
+
Text(
|
|
63
|
+
f"PR #{pr} has no linked issue. Use --no-require-issue to generate task from PR body/title instead.",
|
|
64
|
+
style="yellow",
|
|
65
|
+
),
|
|
66
|
+
title="[yellow]Skipped (No Linked Issue)[/yellow]",
|
|
67
|
+
border_style="yellow",
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
raise MissingIssueError(
|
|
71
|
+
f"PR #{pr}: No linked issue found (use --no-require-issue to skip this check)"
|
|
72
|
+
)
|
|
73
|
+
else:
|
|
74
|
+
console.print(f"[green]✓ Found {len(linked_issues)} linked issue(s)[/green]")
|
|
75
|
+
else:
|
|
76
|
+
if linked_issues:
|
|
77
|
+
console.print(f"[dim]Found {len(linked_issues)} linked issue(s)[/dim]")
|
|
78
|
+
else:
|
|
79
|
+
console.print(
|
|
80
|
+
"[yellow]No linked issue found - using PR body/title for instructions[/yellow]"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return linked_issues
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _check_dedupe(
|
|
87
|
+
console: Console,
|
|
88
|
+
repo_key: str,
|
|
89
|
+
state_file: Path,
|
|
90
|
+
force: bool,
|
|
91
|
+
) -> bool:
|
|
92
|
+
"""Check if task already exists in state file.
|
|
93
|
+
|
|
94
|
+
Returns True if duplicate found and should skip, False otherwise.
|
|
95
|
+
"""
|
|
96
|
+
if force or not state_file.exists():
|
|
97
|
+
return False
|
|
98
|
+
|
|
99
|
+
last_rec = None
|
|
100
|
+
logger = logging.getLogger("swegen")
|
|
101
|
+
with open(state_file) as f:
|
|
102
|
+
for line in f:
|
|
103
|
+
try:
|
|
104
|
+
rec = json.loads(line)
|
|
105
|
+
if rec.get("key") == repo_key:
|
|
106
|
+
last_rec = rec
|
|
107
|
+
except (json.JSONDecodeError, ValueError, TypeError) as e:
|
|
108
|
+
logger.debug(f"Failed to parse state record line: {e}")
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
if last_rec is not None:
|
|
112
|
+
existing_harbor = last_rec.get("harbor")
|
|
113
|
+
body = Table(show_header=False, box=None)
|
|
114
|
+
body.add_row("harbor", Text(str(existing_harbor)))
|
|
115
|
+
console.print(
|
|
116
|
+
Panel(
|
|
117
|
+
body,
|
|
118
|
+
title=f"Duplicate key: [bold]{repo_key}[/bold]",
|
|
119
|
+
subtitle="Use --force to regenerate",
|
|
120
|
+
border_style="yellow",
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
return True
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _display_validation_results(
|
|
128
|
+
console: Console,
|
|
129
|
+
results_rows: list[list[str]],
|
|
130
|
+
) -> tuple[bool, bool]:
|
|
131
|
+
"""Display validation results line by line and return failure flags.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
console: Rich console for output
|
|
135
|
+
results_rows: List of [phase, expected, actual, match] for each validation
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Tuple of (harbor_validation_failed, cc_validation_failed)
|
|
139
|
+
"""
|
|
140
|
+
harbor_validation_failed = False
|
|
141
|
+
cc_validation_failed = False
|
|
142
|
+
|
|
143
|
+
for phase, expected, actual, match in results_rows:
|
|
144
|
+
ok = match == "Yes"
|
|
145
|
+
style = "green" if ok else "red"
|
|
146
|
+
icon = "✓" if ok else "✗"
|
|
147
|
+
console.print(Text(f"{icon} {phase}: expected {expected}, actual {actual}", style=style))
|
|
148
|
+
if not ok:
|
|
149
|
+
if "Harbor" in phase:
|
|
150
|
+
harbor_validation_failed = True
|
|
151
|
+
if "CC" in phase:
|
|
152
|
+
cc_validation_failed = True
|
|
153
|
+
|
|
154
|
+
return harbor_validation_failed, cc_validation_failed
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _build_validation_table(results_rows: list[list[str]]) -> Table | None:
|
|
158
|
+
"""Build the validation results table for the summary panel.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
results_rows: List of [phase, expected, actual, match] for each validation
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Rich Table with validation results, or None if no results
|
|
165
|
+
"""
|
|
166
|
+
if not results_rows:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
vt = Table(
|
|
170
|
+
title="Validation Results", title_style="bold cyan", header_style="bold", show_lines=False
|
|
171
|
+
)
|
|
172
|
+
vt.add_column("Phase")
|
|
173
|
+
vt.add_column("Expected")
|
|
174
|
+
vt.add_column("Actual")
|
|
175
|
+
vt.add_column("Match?")
|
|
176
|
+
for phase, expected, actual, match in results_rows:
|
|
177
|
+
vt.add_row(
|
|
178
|
+
phase, expected, actual, Text(match, style=("green" if match == "Yes" else "red"))
|
|
179
|
+
)
|
|
180
|
+
return vt
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _handle_validation_failure(
|
|
184
|
+
console: Console,
|
|
185
|
+
harbor_validation_failed: bool,
|
|
186
|
+
cc_validation_failed: bool,
|
|
187
|
+
harbor_actually_ran: bool,
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Handle validation failures, printing warnings and raising ValidationError if needed.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
console: Rich console for output
|
|
193
|
+
harbor_validation_failed: True if any Harbor validation failed
|
|
194
|
+
cc_validation_failed: True if any CC validation failed
|
|
195
|
+
harbor_actually_ran: True if Harbor validations were run (not skipped)
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
ValidationError: If validation failed in a way that should stop processing
|
|
199
|
+
"""
|
|
200
|
+
# CC failed but Harbor passed - acceptable with warning
|
|
201
|
+
if cc_validation_failed and not harbor_validation_failed and harbor_actually_ran:
|
|
202
|
+
console.print()
|
|
203
|
+
console.print(
|
|
204
|
+
Panel(
|
|
205
|
+
Text(
|
|
206
|
+
"⚠ CC validation failed, but Harbor validation passed.\nThis is acceptable - Harbor is the authoritative test environment.",
|
|
207
|
+
style="yellow bold",
|
|
208
|
+
),
|
|
209
|
+
title="[yellow]CC Validation Warning[/yellow]",
|
|
210
|
+
border_style="yellow",
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Determine overall validation failure:
|
|
215
|
+
# - Harbor failed (authoritative) → fail
|
|
216
|
+
# - CC failed AND Harbor was skipped → fail (no authoritative validation to fall back on)
|
|
217
|
+
# - CC failed BUT Harbor passed → success (Harbor is authoritative)
|
|
218
|
+
validation_failed = harbor_validation_failed or (
|
|
219
|
+
cc_validation_failed and not harbor_actually_ran
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
if validation_failed:
|
|
223
|
+
console.print()
|
|
224
|
+
if cc_validation_failed and not harbor_actually_ran:
|
|
225
|
+
# CC failed and Harbor was skipped - can't verify the task
|
|
226
|
+
console.print(
|
|
227
|
+
Panel(
|
|
228
|
+
Text(
|
|
229
|
+
"CC validation failed and Harbor validation was skipped.\nThe task cannot be verified. Run Harbor validation manually or re-run with --validate.",
|
|
230
|
+
style="red bold",
|
|
231
|
+
),
|
|
232
|
+
title="[red]Validation Failed[/red]",
|
|
233
|
+
border_style="red",
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
raise ValidationError("CC validation failed and Harbor validation was skipped")
|
|
237
|
+
else:
|
|
238
|
+
# Harbor validation failed
|
|
239
|
+
console.print(
|
|
240
|
+
Panel(
|
|
241
|
+
Text("Validation failed. Review the task files and logs.", style="red bold"),
|
|
242
|
+
title="[red]Validation Failed[/red]",
|
|
243
|
+
border_style="red",
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
raise ValidationError("Harbor validation failed (NOP or Oracle did not pass)")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _save_state_record(
|
|
250
|
+
state_dir: Path,
|
|
251
|
+
state_file: Path,
|
|
252
|
+
repo_key: str,
|
|
253
|
+
repo: str,
|
|
254
|
+
pr: int,
|
|
255
|
+
task_id: str,
|
|
256
|
+
task_dir: Path,
|
|
257
|
+
) -> None:
|
|
258
|
+
"""Save a record of the generated task to the state file.
|
|
259
|
+
|
|
260
|
+
This is non-fatal - errors are logged but do not stop execution.
|
|
261
|
+
"""
|
|
262
|
+
logger = logging.getLogger("swegen")
|
|
263
|
+
try:
|
|
264
|
+
state_dir.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
rec = {
|
|
266
|
+
"key": repo_key,
|
|
267
|
+
"repo": repo,
|
|
268
|
+
"pr": pr,
|
|
269
|
+
"task_id": task_id,
|
|
270
|
+
"harbor": str(task_dir.resolve()),
|
|
271
|
+
"ts": datetime.now(UTC).isoformat(),
|
|
272
|
+
}
|
|
273
|
+
with open(state_file, "a") as f:
|
|
274
|
+
f.write(json.dumps(rec) + "\n")
|
|
275
|
+
except (OSError, IOError, PermissionError, ValueError) as e:
|
|
276
|
+
# Non-fatal; log but continue
|
|
277
|
+
logger.warning(f"Failed to save state record for {repo_key}: {e}")
|
|
278
|
+
except Exception as e:
|
|
279
|
+
# Catch-all for unexpected errors, but still log them
|
|
280
|
+
logger.warning(f"Unexpected error saving state record for {repo_key}: {e}", exc_info=True)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _display_summary_panel(
|
|
284
|
+
console: Console,
|
|
285
|
+
repo: str,
|
|
286
|
+
pr: int,
|
|
287
|
+
task_id: str,
|
|
288
|
+
task_dir: Path,
|
|
289
|
+
gen_log_path: Path,
|
|
290
|
+
validation_table: Table | None,
|
|
291
|
+
) -> None:
|
|
292
|
+
"""Display the summary panel with task and PR context."""
|
|
293
|
+
# Count test files
|
|
294
|
+
test_files_count = 0
|
|
295
|
+
try:
|
|
296
|
+
test_files = list((task_dir / "tests").glob("*.py"))
|
|
297
|
+
if not test_files:
|
|
298
|
+
test_files = list((task_dir / "tests").glob("*.js")) + list(
|
|
299
|
+
(task_dir / "tests").glob("*.ts")
|
|
300
|
+
)
|
|
301
|
+
test_files_count = len(test_files)
|
|
302
|
+
except Exception:
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
def _short(sha: Any) -> str:
|
|
306
|
+
s = str(sha or "-")
|
|
307
|
+
return s[:7] if len(s) > 7 else s
|
|
308
|
+
|
|
309
|
+
summary = Table(show_header=False, box=None)
|
|
310
|
+
summary.add_row("Repo", Text(repo))
|
|
311
|
+
summary.add_row("PR", Text(str(pr)))
|
|
312
|
+
summary.add_row("Base", Text("-")) # Not tracked in current implementation
|
|
313
|
+
summary.add_row("Head", Text("-")) # Not tracked in current implementation
|
|
314
|
+
summary.add_row("Changed files", Text("-")) # Not tracked in current implementation
|
|
315
|
+
summary.add_row("Test files", Text(str(test_files_count)))
|
|
316
|
+
summary.add_row("Task ID", Text(task_id, style="bold"))
|
|
317
|
+
summary.add_row("Harbor task", Text(str(task_dir)))
|
|
318
|
+
summary.add_row("Debug log", Text(str(gen_log_path)))
|
|
319
|
+
|
|
320
|
+
content = Group(summary, validation_table) if validation_table is not None else summary
|
|
321
|
+
console.print(Panel(content, title="Summary", border_style="green"))
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _display_logs_panel(
|
|
325
|
+
console: Console,
|
|
326
|
+
gen_log_path: Path,
|
|
327
|
+
harbor_nop_job_dir: str | None,
|
|
328
|
+
harbor_oracle_job_dir: str | None,
|
|
329
|
+
) -> None:
|
|
330
|
+
"""Display the logs panel with job directory paths."""
|
|
331
|
+
logs = Table(show_header=False, box=None, expand=True)
|
|
332
|
+
logs.add_column("Item", no_wrap=True)
|
|
333
|
+
logs.add_column("Path", overflow="fold", no_wrap=False)
|
|
334
|
+
logs.add_row("Harbor nop job", Text(harbor_nop_job_dir or "-", overflow="fold"))
|
|
335
|
+
logs.add_row("Harbor oracle job", Text(harbor_oracle_job_dir or "-", overflow="fold"))
|
|
336
|
+
logs.add_row("Generate log", Text(str(gen_log_path)))
|
|
337
|
+
console.print(Panel(logs, title="Logs", border_style="magenta"))
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _display_next_steps_panel(
|
|
341
|
+
console: Console,
|
|
342
|
+
harbor_root: Path,
|
|
343
|
+
task_id: str,
|
|
344
|
+
) -> None:
|
|
345
|
+
"""Display the next steps panel with recommended actions."""
|
|
346
|
+
steps = Table(show_header=False, box=None)
|
|
347
|
+
steps.add_row("1.", "Confirm validation results match expectations; review Logs for mismatches")
|
|
348
|
+
steps.add_row("2.", "Review generated files (especially Dockerfile)")
|
|
349
|
+
steps.add_row("3.", "Review instruction.md and task.toml")
|
|
350
|
+
steps.add_row("4.", f"Harbor nop: harbor run --agent nop -p {harbor_root} -t {task_id}")
|
|
351
|
+
steps.add_row("5.", f"Harbor oracle: harbor run --agent oracle -p {harbor_root} -t {task_id}")
|
|
352
|
+
steps.add_row(
|
|
353
|
+
"6.", f"Create a pull request including the new task under {harbor_root / task_id}"
|
|
354
|
+
)
|
|
355
|
+
console.print(Panel(steps, title="Next Steps", border_style="cyan"))
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _run_harbor_validations(
|
|
359
|
+
task_id: str,
|
|
360
|
+
harbor_root: Path,
|
|
361
|
+
harbor_jobs: Path,
|
|
362
|
+
console: Console,
|
|
363
|
+
environment: EnvironmentType = EnvironmentType.DOCKER,
|
|
364
|
+
) -> tuple[list[list[str]], dict[str, str | None]]:
|
|
365
|
+
"""Run Harbor validations (nop + oracle) sequentially.
|
|
366
|
+
|
|
367
|
+
Returns:
|
|
368
|
+
Tuple of (results_rows, job_dirs) where:
|
|
369
|
+
- results_rows: List of [phase, expected, actual, match] for each validation
|
|
370
|
+
- job_dirs: Dict mapping agent names to job directory paths (as strings)
|
|
371
|
+
"""
|
|
372
|
+
with console.status("Running harbor nop + oracle...", spinner="dots"):
|
|
373
|
+
reward_nop, reward_oracle, job_paths = run_nop_oracle(
|
|
374
|
+
task_id=task_id,
|
|
375
|
+
dataset_path=harbor_root,
|
|
376
|
+
jobs_dir=harbor_jobs,
|
|
377
|
+
environment=environment,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# Convert paths to strings for job_dirs
|
|
381
|
+
job_dirs = {
|
|
382
|
+
"nop": str(job_paths["nop"]) if job_paths["nop"] else None,
|
|
383
|
+
"oracle": str(job_paths["oracle"]) if job_paths["oracle"] else None,
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
# Build results rows
|
|
387
|
+
results_rows = [
|
|
388
|
+
[
|
|
389
|
+
"Harbor nop",
|
|
390
|
+
"reward=0",
|
|
391
|
+
f"reward={reward_nop}" if reward_nop is not None else "reward=unknown",
|
|
392
|
+
"Yes" if reward_nop == 0 else "No",
|
|
393
|
+
],
|
|
394
|
+
[
|
|
395
|
+
"Harbor oracle",
|
|
396
|
+
"reward=1",
|
|
397
|
+
f"reward={reward_oracle}" if reward_oracle is not None else "reward=unknown",
|
|
398
|
+
"Yes" if reward_oracle == 1 else "No",
|
|
399
|
+
],
|
|
400
|
+
]
|
|
401
|
+
|
|
402
|
+
return results_rows, job_dirs
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def run_reversal(config: CreateConfig) -> None:
|
|
406
|
+
"""Convert a merged PR into a Harbor task.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
config: Typed configuration with repo, PR number, and options.
|
|
410
|
+
"""
|
|
411
|
+
rich_traceback_install(show_locals=False)
|
|
412
|
+
console = Console()
|
|
413
|
+
pipeline = PRToHarborPipeline(repo=config.repo, pr_number=config.pr)
|
|
414
|
+
# Configure file logging for detailed generation logs
|
|
415
|
+
logs_root = Path(config.state_dir) / "logs"
|
|
416
|
+
logs_root.mkdir(parents=True, exist_ok=True)
|
|
417
|
+
gen_log_path = logs_root / f"generate-{pipeline.task_id}.log"
|
|
418
|
+
_configure_file_logger(gen_log_path)
|
|
419
|
+
try:
|
|
420
|
+
# Header
|
|
421
|
+
_display_header(console, pipeline, config.pr)
|
|
422
|
+
|
|
423
|
+
# Check for linked issues if required
|
|
424
|
+
linked_issues = _check_linked_issues(console, pipeline, config.pr, config.require_issue)
|
|
425
|
+
|
|
426
|
+
# Simple local dedupe: check-before
|
|
427
|
+
# Lowercase repo for consistency (GitHub is case-insensitive, Docker requires lowercase)
|
|
428
|
+
repo_key = f"{pipeline.repo.lower()}#{config.pr}"
|
|
429
|
+
state_dir: Path = config.state_dir or Path(".state")
|
|
430
|
+
state_file = state_dir / "create.jsonl"
|
|
431
|
+
if _check_dedupe(console, repo_key, state_file, config.force):
|
|
432
|
+
return
|
|
433
|
+
|
|
434
|
+
harbor_root = config.output
|
|
435
|
+
harbor_root.mkdir(parents=True, exist_ok=True)
|
|
436
|
+
t0 = time.perf_counter()
|
|
437
|
+
|
|
438
|
+
# CC detects language automatically and fills in the skeleton
|
|
439
|
+
cc_result: ClaudeCodeResult | None = None
|
|
440
|
+
|
|
441
|
+
try:
|
|
442
|
+
# try: skeleton generation + CC
|
|
443
|
+
verbose = config.verbose
|
|
444
|
+
|
|
445
|
+
# Step 1a: Fetch PR metadata
|
|
446
|
+
console.print("[dim] → Fetching PR metadata...[/dim]")
|
|
447
|
+
metadata = pipeline.pr_fetcher.fetch_pr_metadata(allow_unmerged=config.allow_unmerged)
|
|
448
|
+
|
|
449
|
+
# Step 1b: Clone/update repo cache
|
|
450
|
+
console.print(
|
|
451
|
+
"[dim] → Cloning/updating repo cache (may take a minute for first clone)...[/dim]"
|
|
452
|
+
)
|
|
453
|
+
repo_cache_dir = config.state_dir / "repos" if config.state_dir else None
|
|
454
|
+
repo_cache = RepoCache(repo_cache_dir)
|
|
455
|
+
repo_path = repo_cache.get_or_clone(
|
|
456
|
+
repo=pipeline.repo,
|
|
457
|
+
head_sha=metadata["head_sha"],
|
|
458
|
+
repo_url=metadata["repo_url"],
|
|
459
|
+
)
|
|
460
|
+
console.print(f"[dim] Repo at: {repo_path}[/dim]")
|
|
461
|
+
|
|
462
|
+
# Step 1c: Generate skeleton files (includes LLM call for PR evaluation)
|
|
463
|
+
console.print("[dim] → Generating skeleton and evaluating...[/dim]")
|
|
464
|
+
with console.status("Evaluating PR & writing skeleton...", spinner="dots"):
|
|
465
|
+
(
|
|
466
|
+
task_dir,
|
|
467
|
+
_,
|
|
468
|
+
extracted_test_files,
|
|
469
|
+
task_reference,
|
|
470
|
+
) = pipeline.generate_task(
|
|
471
|
+
tasks_root=harbor_root,
|
|
472
|
+
overwrite=bool(config.force),
|
|
473
|
+
cache_dir=repo_cache_dir,
|
|
474
|
+
repo_path=repo_path,
|
|
475
|
+
metadata=metadata,
|
|
476
|
+
linked_issues=linked_issues,
|
|
477
|
+
run_cc=False, # Run CC separately after skeleton
|
|
478
|
+
cc_timeout=config.cc_timeout,
|
|
479
|
+
verbose=verbose,
|
|
480
|
+
use_cache=config.use_cache,
|
|
481
|
+
state_dir=config.state_dir,
|
|
482
|
+
require_minimum_difficulty=config.require_minimum_difficulty,
|
|
483
|
+
min_source_files=config.min_source_files,
|
|
484
|
+
max_source_files=config.max_source_files,
|
|
485
|
+
environment=config.environment.value,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
skeleton_secs = time.perf_counter() - t0
|
|
489
|
+
console.print(
|
|
490
|
+
f"[green]✓ Skeleton generated in {skeleton_secs:.1f}s → {task_dir}[/green]"
|
|
491
|
+
)
|
|
492
|
+
console.print(f" [dim]Test files: {len(extracted_test_files)}[/dim]")
|
|
493
|
+
|
|
494
|
+
# Step 2: Run CC "make it work" session
|
|
495
|
+
console.print()
|
|
496
|
+
if task_reference:
|
|
497
|
+
console.print(
|
|
498
|
+
Rule(
|
|
499
|
+
Text(
|
|
500
|
+
f"Claude Code: Adapt from PR #{task_reference.pr_number}",
|
|
501
|
+
style="bold magenta",
|
|
502
|
+
)
|
|
503
|
+
)
|
|
504
|
+
)
|
|
505
|
+
console.print(
|
|
506
|
+
f"[dim]Reference: {task_reference.task_id} | Timeout: {config.cc_timeout}s | Verbose: {str(verbose).lower()}[/dim]"
|
|
507
|
+
)
|
|
508
|
+
else:
|
|
509
|
+
console.print(Rule(Text("Claude Code", style="bold magenta")))
|
|
510
|
+
console.print(
|
|
511
|
+
f"[dim]Timeout: {config.cc_timeout}s | Verbose: {str(verbose).lower()}[/dim]"
|
|
512
|
+
)
|
|
513
|
+
console.print()
|
|
514
|
+
|
|
515
|
+
cc_result = run_claude_code_session(
|
|
516
|
+
repo=pipeline.repo,
|
|
517
|
+
pr_number=pipeline.pr_number,
|
|
518
|
+
repo_path=repo_path,
|
|
519
|
+
task_dir=task_dir,
|
|
520
|
+
task_id=pipeline.task_id,
|
|
521
|
+
dataset_path=harbor_root,
|
|
522
|
+
test_files=extracted_test_files,
|
|
523
|
+
timeout=config.cc_timeout,
|
|
524
|
+
verbose=verbose,
|
|
525
|
+
reference_task_id=task_reference.task_id if task_reference else None,
|
|
526
|
+
reference_pr=task_reference.pr_number if task_reference else None,
|
|
527
|
+
head_sha=metadata.get("head_sha"),
|
|
528
|
+
environment=config.environment.value,
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
gen_secs = time.perf_counter() - t0
|
|
532
|
+
|
|
533
|
+
if cc_result and cc_result.success:
|
|
534
|
+
console.print()
|
|
535
|
+
console.print(f"[green]✓ Task generated and validated in {gen_secs:.1f}s[/green]")
|
|
536
|
+
elif cc_result:
|
|
537
|
+
console.print()
|
|
538
|
+
console.print(
|
|
539
|
+
f"[yellow]⚠ CC session completed in {gen_secs:.1f}s (validation incomplete)[/yellow]"
|
|
540
|
+
)
|
|
541
|
+
if cc_result.error_message:
|
|
542
|
+
console.print(f" [red]Error: {cc_result.error_message}[/red]")
|
|
543
|
+
else:
|
|
544
|
+
console.print(
|
|
545
|
+
f"[green]✓ Skeleton generated in {gen_secs:.1f}s → {task_dir}[/green]"
|
|
546
|
+
)
|
|
547
|
+
except TrivialPRError as e:
|
|
548
|
+
# Skip trivial PRs gracefully
|
|
549
|
+
console.print(
|
|
550
|
+
Panel(
|
|
551
|
+
Text(str(e), style="yellow"),
|
|
552
|
+
title="[yellow]Skipped (Trivial PR)[/yellow]",
|
|
553
|
+
border_style="yellow",
|
|
554
|
+
)
|
|
555
|
+
)
|
|
556
|
+
# Re-raise so calling code can handle appropriately
|
|
557
|
+
raise
|
|
558
|
+
except FileExistsError as e:
|
|
559
|
+
# Task already exists
|
|
560
|
+
console.print(
|
|
561
|
+
Panel(
|
|
562
|
+
Text(str(e), style="yellow"),
|
|
563
|
+
title="[yellow]Task Already Exists[/yellow]",
|
|
564
|
+
border_style="yellow",
|
|
565
|
+
)
|
|
566
|
+
)
|
|
567
|
+
# Re-raise so calling code can handle appropriately
|
|
568
|
+
raise
|
|
569
|
+
|
|
570
|
+
# Task ID from generated dir
|
|
571
|
+
task_id = task_dir.name
|
|
572
|
+
harbor_do = not config.no_validate
|
|
573
|
+
|
|
574
|
+
# If CC already validated successfully, skip harbor validation
|
|
575
|
+
if cc_result and cc_result.success:
|
|
576
|
+
harbor_do = False
|
|
577
|
+
console.print("[green]✓ Skipping harbor validation (CC already validated)[/green]")
|
|
578
|
+
|
|
579
|
+
# Auto-validation unless skipped
|
|
580
|
+
results_rows = []
|
|
581
|
+
# Hold log paths for summary
|
|
582
|
+
harbor_nop_job_dir = harbor_oracle_job_dir = None
|
|
583
|
+
|
|
584
|
+
# If CC ran, add its results to the summary
|
|
585
|
+
if cc_result:
|
|
586
|
+
results_rows.append(
|
|
587
|
+
[
|
|
588
|
+
"CC NOP",
|
|
589
|
+
"reward=0",
|
|
590
|
+
"reward=0" if cc_result.nop_passed else "failed",
|
|
591
|
+
"Yes" if cc_result.nop_passed else "No",
|
|
592
|
+
]
|
|
593
|
+
)
|
|
594
|
+
results_rows.append(
|
|
595
|
+
[
|
|
596
|
+
"CC Oracle",
|
|
597
|
+
"reward=1",
|
|
598
|
+
"reward=1" if cc_result.oracle_passed else "failed",
|
|
599
|
+
"Yes" if cc_result.oracle_passed else "No",
|
|
600
|
+
]
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
if harbor_do:
|
|
604
|
+
# Prepare harbor jobs directory
|
|
605
|
+
harbor_jobs = (
|
|
606
|
+
config.state_dir / "harbor-jobs"
|
|
607
|
+
if isinstance(config.state_dir, Path)
|
|
608
|
+
else Path(".state") / "harbor-jobs"
|
|
609
|
+
)
|
|
610
|
+
harbor_jobs = harbor_jobs.resolve()
|
|
611
|
+
harbor_jobs.mkdir(parents=True, exist_ok=True)
|
|
612
|
+
|
|
613
|
+
# Run validations serially to avoid Docker conflicts
|
|
614
|
+
console.print(Rule(Text("Validations", style="bold blue")))
|
|
615
|
+
|
|
616
|
+
validation_results, job_dirs = _run_harbor_validations(
|
|
617
|
+
task_id, harbor_root, harbor_jobs, console, config.environment
|
|
618
|
+
)
|
|
619
|
+
results_rows.extend(validation_results)
|
|
620
|
+
harbor_nop_job_dir = job_dirs.get("nop")
|
|
621
|
+
harbor_oracle_job_dir = job_dirs.get("oracle")
|
|
622
|
+
|
|
623
|
+
# Display validation results and check for failures
|
|
624
|
+
harbor_validation_failed, cc_validation_failed = _display_validation_results(
|
|
625
|
+
console, results_rows
|
|
626
|
+
)
|
|
627
|
+
validation_table = _build_validation_table(results_rows)
|
|
628
|
+
|
|
629
|
+
# Handle validation failures (may raise ValidationError)
|
|
630
|
+
harbor_actually_ran = any("Harbor" in row[0] for row in results_rows)
|
|
631
|
+
_handle_validation_failure(
|
|
632
|
+
console, harbor_validation_failed, cc_validation_failed, harbor_actually_ran
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
# Save state record (non-fatal if fails)
|
|
636
|
+
_save_state_record(
|
|
637
|
+
state_dir, state_file, repo_key, pipeline.repo, config.pr, task_id, task_dir
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Display final panels
|
|
641
|
+
_display_summary_panel(
|
|
642
|
+
console, pipeline.repo, config.pr, task_id, task_dir, gen_log_path, validation_table
|
|
643
|
+
)
|
|
644
|
+
_display_logs_panel(
|
|
645
|
+
console,
|
|
646
|
+
gen_log_path,
|
|
647
|
+
harbor_nop_job_dir,
|
|
648
|
+
harbor_oracle_job_dir,
|
|
649
|
+
)
|
|
650
|
+
_display_next_steps_panel(console, harbor_root, task_id)
|
|
651
|
+
except (TrivialPRError, MissingIssueError, ValidationError, FileExistsError):
|
|
652
|
+
# Re-raise these exceptions so caller can handle them
|
|
653
|
+
raise
|
|
654
|
+
except Exception as e:
|
|
655
|
+
# Unexpected errors - print and re-raise for caller to handle
|
|
656
|
+
console.print(Panel(Text(str(e)), title="Error", border_style="red"))
|
|
657
|
+
traceback.print_exc()
|
|
658
|
+
raise
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def _run_harbor_with_status(
|
|
662
|
+
task_id: str,
|
|
663
|
+
harbor_root: Path,
|
|
664
|
+
harbor_jobs_parent: Path,
|
|
665
|
+
console: Console,
|
|
666
|
+
phase: str,
|
|
667
|
+
delete_after: bool = True,
|
|
668
|
+
environment: EnvironmentType = EnvironmentType.DOCKER,
|
|
669
|
+
) -> Path | None:
|
|
670
|
+
"""Run harbor with a rich console status spinner.
|
|
671
|
+
|
|
672
|
+
Thin wrapper around run_harbor_agent that adds console status feedback.
|
|
673
|
+
|
|
674
|
+
Args:
|
|
675
|
+
task_id: Task identifier
|
|
676
|
+
harbor_root: Harbor dataset root path
|
|
677
|
+
harbor_jobs_parent: Jobs directory path
|
|
678
|
+
console: Rich console for output
|
|
679
|
+
phase: Agent name ("nop" or "oracle")
|
|
680
|
+
delete_after: If True, delete Docker image after run (default: True)
|
|
681
|
+
environment: Environment type (docker, daytona, e2b, modal, runloop, gke)
|
|
682
|
+
"""
|
|
683
|
+
with console.status(f"Running harbor {phase}...", spinner="dots"):
|
|
684
|
+
_, job_result = run_harbor_agent(
|
|
685
|
+
task_id=task_id,
|
|
686
|
+
dataset_path=harbor_root,
|
|
687
|
+
jobs_dir=harbor_jobs_parent,
|
|
688
|
+
agent=phase,
|
|
689
|
+
capture_output=True,
|
|
690
|
+
delete_after=delete_after,
|
|
691
|
+
environment=environment,
|
|
692
|
+
)
|
|
693
|
+
return job_result
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _configure_file_logger(path: Path) -> None:
|
|
697
|
+
logger = logging.getLogger("swegen")
|
|
698
|
+
logger.setLevel(logging.DEBUG)
|
|
699
|
+
logger.propagate = False
|
|
700
|
+
# Clear existing handlers
|
|
701
|
+
logger.handlers = []
|
|
702
|
+
fh = logging.FileHandler(path)
|
|
703
|
+
fh.setLevel(logging.DEBUG)
|
|
704
|
+
fmt = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
|
|
705
|
+
fh.setFormatter(fmt)
|
|
706
|
+
logger.addHandler(fh)
|