swegen 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swegen/__init__.py +14 -0
- swegen/analyze/__init__.py +24 -0
- swegen/analyze/classifier.py +637 -0
- swegen/analyze/classify_prompt.txt +241 -0
- swegen/analyze/models.py +253 -0
- swegen/analyze/run.py +656 -0
- swegen/analyze/verdict_prompt.txt +126 -0
- swegen/cli.py +411 -0
- swegen/config.py +142 -0
- swegen/create/__init__.py +22 -0
- swegen/create/claude_code_runner.py +988 -0
- swegen/create/claude_code_utils.py +95 -0
- swegen/create/create.py +706 -0
- swegen/create/diff_utils.py +142 -0
- swegen/create/orchestrator.py +368 -0
- swegen/create/pr_fetcher.py +187 -0
- swegen/create/repo_cache.py +175 -0
- swegen/create/task_instruction.py +363 -0
- swegen/create/task_reference.py +130 -0
- swegen/create/task_skeleton.py +266 -0
- swegen/create/utils.py +350 -0
- swegen/farm/__init__.py +13 -0
- swegen/farm/farm_hand.py +342 -0
- swegen/farm/fetcher.py +341 -0
- swegen/farm/state.py +231 -0
- swegen/farm/stream_farm.py +430 -0
- swegen/tools/__init__.py +16 -0
- swegen/tools/harbor_runner.py +191 -0
- swegen/tools/validate.py +523 -0
- swegen/tools/validate_utils.py +142 -0
- swegen-0.1.0.dist-info/METADATA +292 -0
- swegen-0.1.0.dist-info/RECORD +35 -0
- swegen-0.1.0.dist-info/WHEEL +4 -0
- swegen-0.1.0.dist-info/entry_points.txt +3 -0
- swegen-0.1.0.dist-info/licenses/LICENSE +201 -0
swegen/farm/state.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class StreamState:
|
|
11
|
+
"""State for resumable streaming PR processing.
|
|
12
|
+
|
|
13
|
+
Tracks which PRs have been processed, success/failure counts,
|
|
14
|
+
and the last processed PR for resume capability.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
repo: Repository name in "owner/repo" format
|
|
18
|
+
processed_prs: Set of PR numbers that have been processed
|
|
19
|
+
total_fetched: Total PRs fetched from API
|
|
20
|
+
total_processed: Total PRs processed (attempted)
|
|
21
|
+
successful: Count of successfully generated tasks
|
|
22
|
+
failed: Count of failed task generations
|
|
23
|
+
last_pr_number: Last processed PR number
|
|
24
|
+
last_created_at: ISO timestamp of last processed PR's creation time
|
|
25
|
+
last_updated: ISO timestamp of last state update
|
|
26
|
+
skip_list_prs: Set of PR numbers to skip (from external skip list)
|
|
27
|
+
|
|
28
|
+
# Detailed categorization
|
|
29
|
+
successful_prs: dict[int, str] = None # PR# -> task_id
|
|
30
|
+
trivial_prs: set[int] = None # Trivial PRs (too small/simple)
|
|
31
|
+
no_issue_prs: set[int] = None # PRs without linked issues
|
|
32
|
+
no_tests_prs: set[int] = None # PRs that don't modify tests
|
|
33
|
+
validation_failed_prs: set[int] = None # Failed Harbor validation
|
|
34
|
+
already_exists_prs: set[int] = None # Task already exists
|
|
35
|
+
rate_limit_prs: set[int] = None # GitHub API rate limit
|
|
36
|
+
quota_exceeded_prs: set[int] = None # OpenAI quota exceeded
|
|
37
|
+
timeout_prs: set[int] = None # Command timeouts
|
|
38
|
+
git_error_prs: set[int] = None # Git checkout/commit errors
|
|
39
|
+
other_failed_prs: dict[int, str] = None # PR# -> error message
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
repo: str
|
|
43
|
+
processed_prs: set[int] = None
|
|
44
|
+
total_fetched: int = 0
|
|
45
|
+
total_processed: int = 0
|
|
46
|
+
successful: int = 0
|
|
47
|
+
failed: int = 0
|
|
48
|
+
last_pr_number: int | None = None
|
|
49
|
+
last_created_at: str | None = None
|
|
50
|
+
last_updated: str | None = None
|
|
51
|
+
skip_list_prs: set[int] = None
|
|
52
|
+
|
|
53
|
+
# Detailed categorization
|
|
54
|
+
successful_prs: dict[int, str] = None # PR# -> task_id
|
|
55
|
+
trivial_prs: set[int] = None
|
|
56
|
+
no_issue_prs: set[int] = None
|
|
57
|
+
no_tests_prs: set[int] = None
|
|
58
|
+
validation_failed_prs: set[int] = None
|
|
59
|
+
already_exists_prs: set[int] = None
|
|
60
|
+
rate_limit_prs: set[int] = None
|
|
61
|
+
quota_exceeded_prs: set[int] = None
|
|
62
|
+
timeout_prs: set[int] = None
|
|
63
|
+
git_error_prs: set[int] = None
|
|
64
|
+
other_failed_prs: dict[int, str] = None
|
|
65
|
+
|
|
66
|
+
def __post_init__(self):
|
|
67
|
+
if self.processed_prs is None:
|
|
68
|
+
self.processed_prs = set()
|
|
69
|
+
if self.skip_list_prs is None:
|
|
70
|
+
self.skip_list_prs = set()
|
|
71
|
+
if self.successful_prs is None:
|
|
72
|
+
self.successful_prs = {}
|
|
73
|
+
if self.trivial_prs is None:
|
|
74
|
+
self.trivial_prs = set()
|
|
75
|
+
if self.no_issue_prs is None:
|
|
76
|
+
self.no_issue_prs = set()
|
|
77
|
+
if self.no_tests_prs is None:
|
|
78
|
+
self.no_tests_prs = set()
|
|
79
|
+
if self.validation_failed_prs is None:
|
|
80
|
+
self.validation_failed_prs = set()
|
|
81
|
+
if self.already_exists_prs is None:
|
|
82
|
+
self.already_exists_prs = set()
|
|
83
|
+
if self.rate_limit_prs is None:
|
|
84
|
+
self.rate_limit_prs = set()
|
|
85
|
+
if self.quota_exceeded_prs is None:
|
|
86
|
+
self.quota_exceeded_prs = set()
|
|
87
|
+
if self.timeout_prs is None:
|
|
88
|
+
self.timeout_prs = set()
|
|
89
|
+
if self.git_error_prs is None:
|
|
90
|
+
self.git_error_prs = set()
|
|
91
|
+
if self.other_failed_prs is None:
|
|
92
|
+
self.other_failed_prs = {}
|
|
93
|
+
|
|
94
|
+
def mark_processed(
|
|
95
|
+
self, pr_number: int, created_at: str, success: bool, task_id: str = None,
|
|
96
|
+
category: str = None, message: str = None
|
|
97
|
+
) -> None:
|
|
98
|
+
"""Mark a PR as processed and update counters.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
pr_number: The PR number that was processed
|
|
102
|
+
created_at: ISO timestamp of when the PR was created
|
|
103
|
+
success: Whether the task generation succeeded
|
|
104
|
+
task_id: Task ID if successful (for tracking)
|
|
105
|
+
category: Category of result (for detailed stats)
|
|
106
|
+
message: Error/skip message (for other_failed category)
|
|
107
|
+
"""
|
|
108
|
+
self.processed_prs.add(pr_number)
|
|
109
|
+
self.total_processed += 1
|
|
110
|
+
|
|
111
|
+
if success:
|
|
112
|
+
self.successful += 1
|
|
113
|
+
if task_id:
|
|
114
|
+
self.successful_prs[pr_number] = task_id
|
|
115
|
+
else:
|
|
116
|
+
self.failed += 1
|
|
117
|
+
# Categorize the failure/skip
|
|
118
|
+
if category == "trivial":
|
|
119
|
+
self.trivial_prs.add(pr_number)
|
|
120
|
+
elif category == "no_issue":
|
|
121
|
+
self.no_issue_prs.add(pr_number)
|
|
122
|
+
elif category == "no_tests":
|
|
123
|
+
self.no_tests_prs.add(pr_number)
|
|
124
|
+
elif category == "validation_failed":
|
|
125
|
+
self.validation_failed_prs.add(pr_number)
|
|
126
|
+
elif category == "already_exists":
|
|
127
|
+
self.already_exists_prs.add(pr_number)
|
|
128
|
+
elif category == "rate_limit":
|
|
129
|
+
self.rate_limit_prs.add(pr_number)
|
|
130
|
+
elif category == "quota_exceeded":
|
|
131
|
+
self.quota_exceeded_prs.add(pr_number)
|
|
132
|
+
elif category == "timeout":
|
|
133
|
+
self.timeout_prs.add(pr_number)
|
|
134
|
+
elif category == "git_error":
|
|
135
|
+
self.git_error_prs.add(pr_number)
|
|
136
|
+
else:
|
|
137
|
+
# Other/unknown error
|
|
138
|
+
self.other_failed_prs[pr_number] = message or "Unknown error"
|
|
139
|
+
|
|
140
|
+
self.last_pr_number = pr_number
|
|
141
|
+
self.last_created_at = created_at
|
|
142
|
+
self.last_updated = datetime.now(UTC).isoformat()
|
|
143
|
+
|
|
144
|
+
def to_dict(self) -> dict:
|
|
145
|
+
"""Convert to dict for JSON serialization."""
|
|
146
|
+
return {
|
|
147
|
+
"repo": self.repo,
|
|
148
|
+
"processed_prs": list(self.processed_prs),
|
|
149
|
+
"total_fetched": self.total_fetched,
|
|
150
|
+
"total_processed": self.total_processed,
|
|
151
|
+
"successful": self.successful,
|
|
152
|
+
"failed": self.failed,
|
|
153
|
+
"last_pr_number": self.last_pr_number,
|
|
154
|
+
"last_created_at": self.last_created_at,
|
|
155
|
+
"last_updated": self.last_updated,
|
|
156
|
+
# Detailed breakdown
|
|
157
|
+
"successful_prs": {str(k): v for k, v in self.successful_prs.items()},
|
|
158
|
+
"trivial_prs": list(self.trivial_prs),
|
|
159
|
+
"no_issue_prs": list(self.no_issue_prs),
|
|
160
|
+
"no_tests_prs": list(self.no_tests_prs),
|
|
161
|
+
"validation_failed_prs": list(self.validation_failed_prs),
|
|
162
|
+
"already_exists_prs": list(self.already_exists_prs),
|
|
163
|
+
"rate_limit_prs": list(self.rate_limit_prs),
|
|
164
|
+
"quota_exceeded_prs": list(self.quota_exceeded_prs),
|
|
165
|
+
"timeout_prs": list(self.timeout_prs),
|
|
166
|
+
"git_error_prs": list(self.git_error_prs),
|
|
167
|
+
"other_failed_prs": {str(k): v for k, v in self.other_failed_prs.items()},
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
def from_dict(cls, data: dict) -> StreamState:
|
|
172
|
+
"""Load state from a dict.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
data: Dict previously created by to_dict()
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
StreamState instance
|
|
179
|
+
"""
|
|
180
|
+
return cls(
|
|
181
|
+
repo=data["repo"],
|
|
182
|
+
processed_prs=set(data.get("processed_prs", [])),
|
|
183
|
+
total_fetched=data.get("total_fetched", 0),
|
|
184
|
+
total_processed=data.get("total_processed", 0),
|
|
185
|
+
successful=data.get("successful", 0),
|
|
186
|
+
failed=data.get("failed", 0),
|
|
187
|
+
last_pr_number=data.get("last_pr_number"),
|
|
188
|
+
last_created_at=data.get("last_created_at"),
|
|
189
|
+
last_updated=data.get("last_updated"),
|
|
190
|
+
# Detailed breakdown
|
|
191
|
+
successful_prs={int(k): v for k, v in data.get("successful_prs", {}).items()},
|
|
192
|
+
trivial_prs=set(data.get("trivial_prs", [])),
|
|
193
|
+
no_issue_prs=set(data.get("no_issue_prs", [])),
|
|
194
|
+
no_tests_prs=set(data.get("no_tests_prs", [])),
|
|
195
|
+
validation_failed_prs=set(data.get("validation_failed_prs", [])),
|
|
196
|
+
already_exists_prs=set(data.get("already_exists_prs", [])),
|
|
197
|
+
rate_limit_prs=set(data.get("rate_limit_prs", [])),
|
|
198
|
+
quota_exceeded_prs=set(data.get("quota_exceeded_prs", [])),
|
|
199
|
+
timeout_prs=set(data.get("timeout_prs", [])),
|
|
200
|
+
git_error_prs=set(data.get("git_error_prs", [])),
|
|
201
|
+
other_failed_prs={int(k): v for k, v in data.get("other_failed_prs", {}).items()},
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def save(self, state_file: Path) -> None:
|
|
205
|
+
"""Save state to a JSON file.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
state_file: Path to save state to
|
|
209
|
+
"""
|
|
210
|
+
state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
211
|
+
state_file.write_text(json.dumps(self.to_dict(), indent=2))
|
|
212
|
+
|
|
213
|
+
@classmethod
|
|
214
|
+
def load(cls, state_file: Path, repo: str) -> StreamState:
|
|
215
|
+
"""Load state from file, or create new if not exists.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
state_file: Path to state file
|
|
219
|
+
repo: Repository name (used to verify state matches)
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
StreamState instance (loaded or new)
|
|
223
|
+
"""
|
|
224
|
+
if state_file.exists():
|
|
225
|
+
try:
|
|
226
|
+
data = json.loads(state_file.read_text())
|
|
227
|
+
if data.get("repo") == repo:
|
|
228
|
+
return cls.from_dict(data)
|
|
229
|
+
except Exception:
|
|
230
|
+
pass
|
|
231
|
+
return cls(repo=repo)
|
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import shutil
|
|
5
|
+
import signal
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import asdict
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.panel import Panel
|
|
14
|
+
from rich.rule import Rule
|
|
15
|
+
from rich.table import Table
|
|
16
|
+
from rich.text import Text
|
|
17
|
+
|
|
18
|
+
from swegen.config import FarmConfig
|
|
19
|
+
|
|
20
|
+
from .farm_hand import (
|
|
21
|
+
PRCandidate,
|
|
22
|
+
TaskResult,
|
|
23
|
+
_now_utc,
|
|
24
|
+
_run_reversal_for_pr,
|
|
25
|
+
_slug,
|
|
26
|
+
)
|
|
27
|
+
from .fetcher import StreamingPRFetcher, load_skip_list
|
|
28
|
+
from .state import StreamState
|
|
29
|
+
|
|
30
|
+
DOCKER_CLEANUP_CMD = "docker system prune -af"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class StreamFarmer:
|
|
34
|
+
"""Manages continuous PR farming with streaming.
|
|
35
|
+
|
|
36
|
+
Orchestrates the process of:
|
|
37
|
+
1. Streaming PRs from GitHub (via StreamingPRFetcher)
|
|
38
|
+
2. Processing each PR into a Harbor task (via farm_hand)
|
|
39
|
+
3. Tracking state for resumability (via StreamState)
|
|
40
|
+
4. Periodic cleanup and progress reporting
|
|
41
|
+
|
|
42
|
+
Attributes:
|
|
43
|
+
repo: Repository in "owner/repo" format
|
|
44
|
+
config: FarmConfig with all settings
|
|
45
|
+
console: Rich console for output
|
|
46
|
+
tasks_root: Directory for generated tasks
|
|
47
|
+
state: StreamState for tracking progress
|
|
48
|
+
state_file: Path to state persistence file
|
|
49
|
+
resume_from_time: ISO timestamp to resume from (if any)
|
|
50
|
+
fetcher: StreamingPRFetcher instance
|
|
51
|
+
results: List of TaskResult from this session
|
|
52
|
+
shutdown_requested: Flag for graceful shutdown
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
repo: str,
|
|
58
|
+
config: FarmConfig,
|
|
59
|
+
console: Console,
|
|
60
|
+
):
|
|
61
|
+
self.repo = repo
|
|
62
|
+
self.config = config
|
|
63
|
+
self.console = console
|
|
64
|
+
self.tasks_root = config.output
|
|
65
|
+
self.tasks_root.mkdir(exist_ok=True)
|
|
66
|
+
|
|
67
|
+
# State file path
|
|
68
|
+
self.state_file = config.state_dir / "stream_farm" / f"{_slug(repo)}.json"
|
|
69
|
+
|
|
70
|
+
# Load or create state
|
|
71
|
+
if config.reset:
|
|
72
|
+
self.state = StreamState(repo=repo)
|
|
73
|
+
self.console.print("[yellow]State reset - starting fresh[/yellow]")
|
|
74
|
+
else:
|
|
75
|
+
self.state = StreamState.load(self.state_file, repo)
|
|
76
|
+
|
|
77
|
+
# Load skip list if provided
|
|
78
|
+
if config.skip_list:
|
|
79
|
+
skip_list_path = Path(config.skip_list)
|
|
80
|
+
skip_prs = load_skip_list(skip_list_path, repo)
|
|
81
|
+
self.state.skip_list_prs = skip_prs
|
|
82
|
+
if skip_prs:
|
|
83
|
+
self.console.print(
|
|
84
|
+
f"[yellow]Loaded skip list: {len(skip_prs)} PRs to skip from {skip_list_path}[/yellow]"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Determine resume time
|
|
88
|
+
self.resume_from_time = self._determine_resume_time()
|
|
89
|
+
|
|
90
|
+
# Create streaming fetcher (always require tests)
|
|
91
|
+
self.fetcher = StreamingPRFetcher(
|
|
92
|
+
repo=repo,
|
|
93
|
+
console=console,
|
|
94
|
+
state=self.state,
|
|
95
|
+
min_files=config.min_source_files, # Early approximate filter
|
|
96
|
+
require_tests=True, # Always require tests
|
|
97
|
+
api_delay=config.api_delay,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Results tracking
|
|
101
|
+
self.results: list[TaskResult] = []
|
|
102
|
+
|
|
103
|
+
# Graceful shutdown handling
|
|
104
|
+
self.shutdown_requested = False
|
|
105
|
+
signal.signal(signal.SIGINT, self._handle_shutdown)
|
|
106
|
+
signal.signal(signal.SIGTERM, self._handle_shutdown)
|
|
107
|
+
|
|
108
|
+
def _determine_resume_time(self) -> str | None:
|
|
109
|
+
"""Determine the resume time based on config and state.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
ISO timestamp string to resume from, or None to start fresh
|
|
113
|
+
"""
|
|
114
|
+
if self.config.resume_from:
|
|
115
|
+
# User specified a resume time - parse date or full timestamp
|
|
116
|
+
resume_input = self.config.resume_from.strip()
|
|
117
|
+
try:
|
|
118
|
+
# Try to parse as date only (YYYY-MM-DD)
|
|
119
|
+
if len(resume_input) == 10 and resume_input.count("-") == 2:
|
|
120
|
+
# Date only - convert to end of day (23:59:59) since we're working backwards
|
|
121
|
+
resume_date = datetime.strptime(resume_input, "%Y-%m-%d")
|
|
122
|
+
# Set to end of day in UTC
|
|
123
|
+
resume_dt = resume_date.replace(
|
|
124
|
+
hour=23, minute=59, second=59, microsecond=999999, tzinfo=UTC
|
|
125
|
+
)
|
|
126
|
+
self.console.print(
|
|
127
|
+
f"[yellow]Resuming from end of {resume_input} "
|
|
128
|
+
f"(processing PRs merged before this date)[/yellow]"
|
|
129
|
+
)
|
|
130
|
+
return resume_dt.isoformat()
|
|
131
|
+
else:
|
|
132
|
+
# Full timestamp - validate it parses
|
|
133
|
+
datetime.fromisoformat(resume_input.replace("Z", "+00:00"))
|
|
134
|
+
return resume_input
|
|
135
|
+
except ValueError as e:
|
|
136
|
+
self.console.print(
|
|
137
|
+
f"[red]Error: Invalid --resume-from format: {resume_input}[/red]"
|
|
138
|
+
)
|
|
139
|
+
self.console.print("[yellow]Expected date like: 2024-01-15[/yellow]")
|
|
140
|
+
self.console.print("[yellow]Or full timestamp like: 2024-01-15T10:30:00Z[/yellow]")
|
|
141
|
+
raise ValueError(f"Invalid timestamp format: {e}") from e
|
|
142
|
+
elif not self.config.reset and self.state.last_created_at:
|
|
143
|
+
# Resume from last processed PR's creation time
|
|
144
|
+
self.console.print(
|
|
145
|
+
f"[yellow]Resuming from last processed PR (created at {self.state.last_created_at})[/yellow]"
|
|
146
|
+
)
|
|
147
|
+
return self.state.last_created_at
|
|
148
|
+
|
|
149
|
+
return None
|
|
150
|
+
|
|
151
|
+
def _handle_shutdown(self, signum, frame):
|
|
152
|
+
"""Handle graceful shutdown on interrupt."""
|
|
153
|
+
self.console.print("\n[yellow]Shutdown requested... finishing current PR...[/yellow]")
|
|
154
|
+
self.shutdown_requested = True
|
|
155
|
+
|
|
156
|
+
def run(self) -> int:
|
|
157
|
+
"""Run the continuous farming process.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Exit code: 0 if any tasks succeeded, 1 otherwise
|
|
161
|
+
"""
|
|
162
|
+
self._print_header()
|
|
163
|
+
|
|
164
|
+
# Start streaming and processing
|
|
165
|
+
try:
|
|
166
|
+
self._run_stream()
|
|
167
|
+
except KeyboardInterrupt:
|
|
168
|
+
self.console.print("\n[yellow]Interrupted by user[/yellow]")
|
|
169
|
+
finally:
|
|
170
|
+
self._finalize()
|
|
171
|
+
|
|
172
|
+
return 0 if self.state.successful > 0 else 1
|
|
173
|
+
|
|
174
|
+
def _print_header(self) -> None:
|
|
175
|
+
"""Print the farming header with settings."""
|
|
176
|
+
self.console.print(Rule(Text(f"Stream Farming - {self.repo}", style="bold cyan")))
|
|
177
|
+
|
|
178
|
+
# pipeline info
|
|
179
|
+
self.console.print("[green]Only PRs that modify tests will be considered.[/green]")
|
|
180
|
+
|
|
181
|
+
if self.config.issue_only:
|
|
182
|
+
self.console.print(
|
|
183
|
+
"[magenta]ISSUE-ONLY MODE - only PRs with linked issues will be processed[/magenta]"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if self.config.dry_run:
|
|
187
|
+
self.console.print("[cyan]DRY RUN MODE - no tasks will be generated[/cyan]")
|
|
188
|
+
|
|
189
|
+
self.console.print(
|
|
190
|
+
f"[dim]Timeout: {self.config.timeout}s | " f"State: {self.state_file}[/dim]\n"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def _run_stream(self) -> None:
|
|
194
|
+
"""Process PRs synchronously: fetch one, process it, repeat."""
|
|
195
|
+
self.console.print("[cyan]Streaming and processing PRs...[/cyan]\n")
|
|
196
|
+
|
|
197
|
+
for pr in self.fetcher.stream_prs(resume_from_time=self.resume_from_time):
|
|
198
|
+
if self.shutdown_requested:
|
|
199
|
+
self.console.print("[yellow]Shutdown requested, stopping...[/yellow]")
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
self._process_pr(pr)
|
|
203
|
+
|
|
204
|
+
def _process_pr(self, pr: PRCandidate) -> None:
|
|
205
|
+
"""Process a single PR candidate.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
pr: The PR candidate to process
|
|
209
|
+
"""
|
|
210
|
+
# Print PR header
|
|
211
|
+
merged_dt = datetime.fromisoformat(pr.merged_at.replace("Z", "+00:00"))
|
|
212
|
+
self.console.print(
|
|
213
|
+
f"\n[bold cyan]═══ PR #{pr.number} ({self.state.total_processed + 1}) ═══[/bold cyan]"
|
|
214
|
+
)
|
|
215
|
+
self.console.print(f"[bold]{pr.title}[/bold]")
|
|
216
|
+
self.console.print(
|
|
217
|
+
f"[dim]Merged: {merged_dt.strftime('%Y-%m-%d %H:%M:%S UTC')} | "
|
|
218
|
+
f"Files: {pr.files_changed} | "
|
|
219
|
+
f"+{pr.additions}/-{pr.deletions}[/dim]"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Process this PR completely before moving to next
|
|
223
|
+
result = _run_reversal_for_pr(pr, self.config, self.tasks_root, self.console)
|
|
224
|
+
self.results.append(result)
|
|
225
|
+
|
|
226
|
+
# Mark as processed with detailed tracking
|
|
227
|
+
self.state.mark_processed(
|
|
228
|
+
pr.number,
|
|
229
|
+
pr.created_at,
|
|
230
|
+
result.status == "success",
|
|
231
|
+
task_id=result.task_id if result.status == "success" else None,
|
|
232
|
+
category=result.category,
|
|
233
|
+
message=result.message if result.category == "other" else None,
|
|
234
|
+
)
|
|
235
|
+
self._save_state()
|
|
236
|
+
|
|
237
|
+
# Show result
|
|
238
|
+
self._print_result(result)
|
|
239
|
+
|
|
240
|
+
# Rate limit protection: sleep between PRs
|
|
241
|
+
self.console.print(f"[dim]Waiting {self.config.task_delay} seconds before next PR...[/dim]")
|
|
242
|
+
time.sleep(self.config.task_delay)
|
|
243
|
+
|
|
244
|
+
# Periodic summary
|
|
245
|
+
if self.state.total_processed % 10 == 0:
|
|
246
|
+
self._print_progress()
|
|
247
|
+
|
|
248
|
+
# Docker cleanup after batch
|
|
249
|
+
if self.config.docker_prune_batch > 0:
|
|
250
|
+
if self.state.total_processed % self.config.docker_prune_batch == 0:
|
|
251
|
+
self._prune_docker()
|
|
252
|
+
|
|
253
|
+
def _print_result(self, result: TaskResult) -> None:
|
|
254
|
+
"""Print the result of processing a PR.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
result: The TaskResult to display
|
|
258
|
+
"""
|
|
259
|
+
if result.status == "success":
|
|
260
|
+
self.console.print(f"[green]✓ Success: {result.message}[/green]")
|
|
261
|
+
elif result.status == "dry-run":
|
|
262
|
+
self.console.print(f"[cyan]○ Dry-run: {result.message}[/cyan]")
|
|
263
|
+
else:
|
|
264
|
+
self.console.print(f"[red]✗ Failed: {result.message}[/red]")
|
|
265
|
+
|
|
266
|
+
def _print_progress(self) -> None:
|
|
267
|
+
"""Print progress summary."""
|
|
268
|
+
last_info = f"#{self.state.last_pr_number or 'N/A'}"
|
|
269
|
+
if self.state.last_created_at:
|
|
270
|
+
created_dt = datetime.fromisoformat(self.state.last_created_at.replace("Z", "+00:00"))
|
|
271
|
+
last_info = f"#{self.state.last_pr_number} (created {created_dt.strftime('%Y-%m-%d')})"
|
|
272
|
+
|
|
273
|
+
# Calculate top failure reasons
|
|
274
|
+
failure_summary = []
|
|
275
|
+
if len(self.state.trivial_prs) > 0:
|
|
276
|
+
failure_summary.append(f"Trivial: {len(self.state.trivial_prs)}")
|
|
277
|
+
if len(self.state.no_issue_prs) > 0:
|
|
278
|
+
failure_summary.append(f"No Issue: {len(self.state.no_issue_prs)}")
|
|
279
|
+
if len(self.state.validation_failed_prs) > 0:
|
|
280
|
+
failure_summary.append(f"Validation: {len(self.state.validation_failed_prs)}")
|
|
281
|
+
|
|
282
|
+
failure_text = ", ".join(failure_summary[:3]) if failure_summary else "None"
|
|
283
|
+
success_rate = (self.state.successful / self.state.total_processed * 100) if self.state.total_processed > 0 else 0
|
|
284
|
+
|
|
285
|
+
self.console.print(
|
|
286
|
+
Panel(
|
|
287
|
+
f"Processed: {self.state.total_processed}\n"
|
|
288
|
+
f"✓ Success: {self.state.successful} ({success_rate:.1f}%)\n"
|
|
289
|
+
f"✗ Failed: {self.state.failed}\n"
|
|
290
|
+
f"Top failures: {failure_text}\n"
|
|
291
|
+
f"Last PR: {last_info}",
|
|
292
|
+
title="Progress",
|
|
293
|
+
border_style="cyan",
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
def _prune_docker(self) -> None:
|
|
298
|
+
"""Run docker cleanup to free disk space."""
|
|
299
|
+
if shutil.which("docker") is None:
|
|
300
|
+
self.console.print(
|
|
301
|
+
"[yellow]Skipping docker prune (docker binary not found in PATH).[/yellow]"
|
|
302
|
+
)
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
self.console.print(
|
|
306
|
+
Panel(
|
|
307
|
+
f"Running docker cleanup: {DOCKER_CLEANUP_CMD}",
|
|
308
|
+
title="Disk cleanup",
|
|
309
|
+
border_style="yellow",
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
try:
|
|
314
|
+
result = subprocess.run(
|
|
315
|
+
DOCKER_CLEANUP_CMD,
|
|
316
|
+
shell=True,
|
|
317
|
+
capture_output=True,
|
|
318
|
+
text=True,
|
|
319
|
+
timeout=600,
|
|
320
|
+
)
|
|
321
|
+
if result.returncode == 0:
|
|
322
|
+
stdout = result.stdout.strip()
|
|
323
|
+
if stdout:
|
|
324
|
+
# Show summary if available
|
|
325
|
+
lines = stdout.split("\n")
|
|
326
|
+
summary_lines = [
|
|
327
|
+
line
|
|
328
|
+
for line in lines
|
|
329
|
+
if "reclaimed" in line.lower()
|
|
330
|
+
or "deleted" in line.lower()
|
|
331
|
+
or "total" in line.lower()
|
|
332
|
+
]
|
|
333
|
+
if summary_lines:
|
|
334
|
+
self.console.print(f"[dim]{summary_lines[0]}[/dim]")
|
|
335
|
+
self.console.print("[green]Docker cleanup completed[/green]")
|
|
336
|
+
else:
|
|
337
|
+
self.console.print(f"[red]Docker cleanup failed (exit {result.returncode})[/red]")
|
|
338
|
+
if result.stderr:
|
|
339
|
+
self.console.print(f"[red]{result.stderr.strip()}[/red]")
|
|
340
|
+
except subprocess.TimeoutExpired:
|
|
341
|
+
self.console.print("[red]Docker prune timed out after 600s[/red]")
|
|
342
|
+
|
|
343
|
+
def _save_state(self) -> None:
|
|
344
|
+
"""Save state to file."""
|
|
345
|
+
self.state.save(self.state_file)
|
|
346
|
+
|
|
347
|
+
def _finalize(self) -> None:
|
|
348
|
+
"""Finalize the run and print summary."""
|
|
349
|
+
self._save_state()
|
|
350
|
+
self._save_log()
|
|
351
|
+
|
|
352
|
+
self.console.print("\n")
|
|
353
|
+
self.console.print(Rule(Text("Final Summary", style="bold magenta")))
|
|
354
|
+
|
|
355
|
+
# Summary table
|
|
356
|
+
table = Table(show_header=True, header_style="bold")
|
|
357
|
+
table.add_column("Metric", style="cyan")
|
|
358
|
+
table.add_column("Count", justify="right")
|
|
359
|
+
|
|
360
|
+
table.add_row("PRs Processed", str(self.state.total_processed))
|
|
361
|
+
table.add_row("Successful", f"[green]{self.state.successful}[/green]")
|
|
362
|
+
table.add_row("Failed", f"[red]{self.state.failed}[/red]")
|
|
363
|
+
|
|
364
|
+
# Add detailed breakdown
|
|
365
|
+
if self.state.failed > 0:
|
|
366
|
+
table.add_row("", "") # Spacer
|
|
367
|
+
table.add_row("[bold]Failure Breakdown:[/bold]", "")
|
|
368
|
+
if self.state.trivial_prs:
|
|
369
|
+
table.add_row(" Trivial PRs", str(len(self.state.trivial_prs)))
|
|
370
|
+
if self.state.no_issue_prs:
|
|
371
|
+
table.add_row(" No Linked Issue", str(len(self.state.no_issue_prs)))
|
|
372
|
+
if self.state.no_tests_prs:
|
|
373
|
+
table.add_row(" No Tests", str(len(self.state.no_tests_prs)))
|
|
374
|
+
if self.state.validation_failed_prs:
|
|
375
|
+
table.add_row(" Validation Failed", str(len(self.state.validation_failed_prs)))
|
|
376
|
+
if self.state.already_exists_prs:
|
|
377
|
+
table.add_row(" Already Exists", str(len(self.state.already_exists_prs)))
|
|
378
|
+
if self.state.rate_limit_prs:
|
|
379
|
+
table.add_row(" Rate Limited", str(len(self.state.rate_limit_prs)))
|
|
380
|
+
if self.state.quota_exceeded_prs:
|
|
381
|
+
table.add_row(" Quota Exceeded", str(len(self.state.quota_exceeded_prs)))
|
|
382
|
+
if self.state.timeout_prs:
|
|
383
|
+
table.add_row(" Timeouts", str(len(self.state.timeout_prs)))
|
|
384
|
+
if self.state.git_error_prs:
|
|
385
|
+
table.add_row(" Git Errors", str(len(self.state.git_error_prs)))
|
|
386
|
+
if self.state.other_failed_prs:
|
|
387
|
+
table.add_row(" Other Errors", str(len(self.state.other_failed_prs)))
|
|
388
|
+
|
|
389
|
+
self.console.print(table)
|
|
390
|
+
|
|
391
|
+
if self.state.successful > 0:
|
|
392
|
+
success_rate = (self.state.successful / self.state.total_processed) * 100
|
|
393
|
+
self.console.print(
|
|
394
|
+
f"\n[green]✓ Generated {self.state.successful} tasks successfully! "
|
|
395
|
+
f"({success_rate:.1f}% success rate)[/green]"
|
|
396
|
+
)
|
|
397
|
+
self.console.print("[dim]Tasks located in: tasks/[/dim]")
|
|
398
|
+
|
|
399
|
+
log_path = self._get_log_path()
|
|
400
|
+
self.console.print(f"\n[dim]Detailed log: {log_path}[/dim]")
|
|
401
|
+
self.console.print(f"[dim]State saved: {self.state_file}[/dim]")
|
|
402
|
+
|
|
403
|
+
def _save_log(self) -> None:
|
|
404
|
+
"""Save results log to file."""
|
|
405
|
+
log_path = self._get_log_path()
|
|
406
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
407
|
+
|
|
408
|
+
payload = {
|
|
409
|
+
"repo": self.repo,
|
|
410
|
+
"stats": self.state.to_dict(),
|
|
411
|
+
"args": {
|
|
412
|
+
"require_tests": True,
|
|
413
|
+
"timeout": self.config.timeout,
|
|
414
|
+
},
|
|
415
|
+
"results": [asdict(r) for r in self.results],
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
log_path.write_text(json.dumps(payload, indent=2))
|
|
419
|
+
|
|
420
|
+
def _get_log_path(self) -> Path:
|
|
421
|
+
"""Get the log file path.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Path to the log file for this session
|
|
425
|
+
"""
|
|
426
|
+
slug = _slug(self.repo).replace("-", "_")
|
|
427
|
+
timestamp = datetime.fromisoformat(
|
|
428
|
+
self.state.last_updated or _now_utc().isoformat()
|
|
429
|
+
).strftime("%Y%m%d_%H%M%S")
|
|
430
|
+
return self.config.state_dir / "logs" / f"stream_farm_{slug}_{timestamp}.json"
|
swegen/tools/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .validate import ValidateArgs, run_validate
|
|
2
|
+
from .validate_utils import (
|
|
3
|
+
ValidationError,
|
|
4
|
+
check_validation_passed,
|
|
5
|
+
run_nop_oracle,
|
|
6
|
+
validate_task_structure,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"run_validate",
|
|
11
|
+
"ValidateArgs",
|
|
12
|
+
"ValidationError",
|
|
13
|
+
"validate_task_structure",
|
|
14
|
+
"run_nop_oracle",
|
|
15
|
+
"check_validation_passed",
|
|
16
|
+
]
|