swegen 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,342 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import time
5
+ import traceback
6
+ from dataclasses import dataclass
7
+ from datetime import UTC, datetime
8
+ from pathlib import Path
9
+
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+
13
+ from swegen.config import CreateConfig, FarmConfig
14
+ from swegen.create import MissingIssueError, TrivialPRError, ValidationError
15
+ from swegen.create.create import run_reversal
16
+ from swegen.create.task_reference import TaskReferenceStore
17
+
18
+
19
+ def _now_utc() -> datetime:
20
+ return datetime.now(UTC)
21
+
22
+
23
+ def _slug(repo: str) -> str:
24
+ """Convert repo to slug using SWEBench convention: owner/repo -> owner__repo"""
25
+ return repo.replace("/", "__")
26
+
27
+
28
+ def _task_id(repo: str, pr_number: int) -> str:
29
+ """Generate task ID using SWEBench convention: owner__repo-number"""
30
+ return f"{_slug(repo)}-{pr_number}"
31
+
32
+
33
+ @dataclass
34
+ class PRCandidate:
35
+ """A candidate PR for task generation."""
36
+
37
+ number: int
38
+ title: str
39
+ created_at: str
40
+ merged_at: str
41
+ author: str
42
+ files_changed: int
43
+ additions: int
44
+ deletions: int
45
+ url: str
46
+
47
+
48
+ @dataclass
49
+ class TaskResult:
50
+ """Result of processing a single PR into a task."""
51
+
52
+ repo: str
53
+ pr_number: int
54
+ task_id: str
55
+ status: str # "success", "failed", or "dry-run"
56
+ message: str
57
+ duration_seconds: float
58
+ timestamp: str
59
+ category: str = None # Category for detailed tracking
60
+
61
+
62
+ def _cleanup_task(task_id: str, tasks_root: Path, console: Console) -> None:
63
+ removed_any = False
64
+ paths = [
65
+ tasks_root / task_id,
66
+ Path("trash") / task_id,
67
+ ]
68
+ for path in paths:
69
+ if path.exists():
70
+ shutil.rmtree(path, ignore_errors=True)
71
+ removed_any = True
72
+ if removed_any:
73
+ console.print(f"[dim]Cleaned up incomplete task directory: {task_id}[/dim]")
74
+
75
+
76
+ def _classify_failure(stderr: str) -> tuple[str, str]:
77
+ """Classify failure reason and return (category, message).
78
+
79
+ Categories:
80
+ - trivial: Trivial PR (too small/simple)
81
+ - no_issue: No linked issue
82
+ - no_tests: No tests detected
83
+ - validation_failed: Harbor validation failed
84
+ - already_exists: Task already exists
85
+ - rate_limit: GitHub API rate limit
86
+ - quota_exceeded: OpenAI quota exceeded
87
+ - timeout: Command timeout
88
+ - git_error: Git checkout/commit errors
89
+ - other: Unknown/other errors
90
+ """
91
+ lowered = stderr.lower()
92
+ if "trivial" in stderr:
93
+ return "trivial", "Trivial PR (skipped)"
94
+ if "no linked issue" in lowered or "missingissueerror" in lowered:
95
+ return "no_issue", "No linked issue (skipped)"
96
+ if "validation failed" in lowered or "harbor validation" in lowered:
97
+ return "validation_failed", "Validation failed (NOP or Oracle)"
98
+ if "task already exists" in lowered or "file exists" in lowered:
99
+ return "already_exists", "Task already exists (skipped)"
100
+ if "no test" in stderr:
101
+ return "no_tests", "No tests detected"
102
+ if "rate limit exceeded" in lowered and "github" in lowered:
103
+ return "rate_limit", "GitHub API rate limit exceeded (set GITHUB_TOKEN)"
104
+ if "insufficient_quota" in lowered or "exceeded your current quota" in lowered:
105
+ return "quota_exceeded", "OpenAI API quota exceeded (check billing)"
106
+ if "timed out" in lowered or "timeout" in lowered:
107
+ return "timeout", "Command timed out"
108
+ if "cannot checkout commit" in lowered or "force-pushed or deleted" in lowered:
109
+ return "git_error", "Git commit not found (may be force-pushed or deleted)"
110
+ if "git checkout" in lowered:
111
+ return "git_error", "Git checkout failed (repo cache may be corrupted)"
112
+
113
+ message = (stderr or "Unknown error").replace("\n", " ")
114
+ return "other", message
115
+
116
+
117
+ def _print_success(
118
+ console: Console,
119
+ pr: PRCandidate,
120
+ task_id: str,
121
+ harbor_dir: Path,
122
+ ) -> None:
123
+ console.print(
124
+ Panel.fit(
125
+ f"🎉 Successfully generated task\n[bold]{task_id}[/bold]\nHarbor: {harbor_dir}",
126
+ title=f"PR #{pr.number}",
127
+ border_style="green",
128
+ )
129
+ )
130
+
131
+
132
+ def _gate_task(
133
+ task_id: str,
134
+ tasks_root: Path,
135
+ ) -> tuple[bool, str]:
136
+ """
137
+ Validate that the task directory exists.
138
+
139
+ Returns:
140
+ Tuple of (success, message)
141
+ """
142
+ task_dir = tasks_root / task_id
143
+ if not task_dir.exists():
144
+ return False, f"Task directory missing: {task_dir}"
145
+
146
+ return True, f"Task generated successfully at {task_dir}"
147
+
148
+
149
+ def _run_reversal_for_pr(
150
+ pr: PRCandidate,
151
+ config: FarmConfig,
152
+ tasks_root: Path,
153
+ console: Console,
154
+ ) -> TaskResult:
155
+ start = time.time()
156
+ task_id = _task_id(config.repo, pr.number)
157
+ harbor_dir = tasks_root / task_id
158
+
159
+ # Wrap everything in try-except to catch unexpected errors
160
+ try:
161
+ return _run_reversal_for_pr_impl(
162
+ pr, config, tasks_root, console, task_id, harbor_dir, start
163
+ )
164
+ except Exception as e:
165
+ # Catch any unexpected exception and return proper error
166
+ error_msg = f"Unexpected error: {type(e).__name__}: {str(e)}"
167
+ console.print(f"[red]✗ PR #{pr.number}: {error_msg}[/red]")
168
+ console.print(f"[dim]{traceback.format_exc()}[/dim]")
169
+ _cleanup_task(task_id, tasks_root, console)
170
+ return TaskResult(
171
+ repo=config.repo,
172
+ pr_number=pr.number,
173
+ task_id=task_id,
174
+ status="failed",
175
+ message=error_msg,
176
+ duration_seconds=round(time.time() - start, 2),
177
+ timestamp=_now_utc().isoformat(),
178
+ category="other",
179
+ )
180
+
181
+
182
+ def _run_reversal_for_pr_impl(
183
+ pr: PRCandidate,
184
+ config: FarmConfig,
185
+ tasks_root: Path,
186
+ console: Console,
187
+ task_id: str,
188
+ harbor_dir: Path,
189
+ start: float,
190
+ ) -> TaskResult:
191
+ if config.dry_run:
192
+ console.print(f"[cyan]DRY RUN[/cyan] would generate task for PR #{pr.number} -> {task_id}")
193
+ return TaskResult(
194
+ repo=config.repo,
195
+ pr_number=pr.number,
196
+ task_id=task_id,
197
+ status="dry-run",
198
+ message="Dry run (skipped actual execution)",
199
+ duration_seconds=0.0,
200
+ timestamp=_now_utc().isoformat(),
201
+ category=None,
202
+ )
203
+
204
+ # Build CreateConfig for run_reversal
205
+ create_config = CreateConfig(
206
+ repo=config.repo,
207
+ pr=pr.number,
208
+ output=config.output,
209
+ cc_timeout=config.cc_timeout,
210
+ validate=config.validate, # Run Harbor validation if --validate flag is set
211
+ force=config.force,
212
+ state_dir=config.state_dir,
213
+ verbose=config.verbose,
214
+ quiet=False,
215
+ use_cache=not config.no_cache,
216
+ require_minimum_difficulty=config.require_minimum_difficulty,
217
+ min_source_files=config.min_source_files,
218
+ max_source_files=config.max_source_files,
219
+ require_issue=config.issue_only,
220
+ environment=config.environment,
221
+ )
222
+
223
+ # Capture any errors from the pipeline
224
+ success = False
225
+ error_msg = ""
226
+ error_category = None
227
+
228
+ try:
229
+ # Call the pipeline directly instead of using subprocess
230
+ run_reversal(create_config)
231
+ success = True
232
+ except TrivialPRError as e:
233
+ # Trivial PR - not an error, just skip it
234
+ error_msg = str(e)
235
+ error_category = "trivial"
236
+ success = False
237
+ except MissingIssueError as e:
238
+ # No linked issue - not an error, just skip it
239
+ error_msg = str(e)
240
+ error_category = "no_issue"
241
+ success = False
242
+ except ValidationError as e:
243
+ # Validation failed - not an error, just skip it
244
+ error_msg = str(e)
245
+ error_category = "validation_failed"
246
+ success = False
247
+ except FileExistsError as e:
248
+ # Task already exists - skip it
249
+ error_msg = f"Task already exists: {str(e)}"
250
+ error_category = "already_exists"
251
+ success = False
252
+ except Exception as e:
253
+ # Other errors
254
+ error_msg = f"{type(e).__name__}: {str(e)}"
255
+ if config.verbose:
256
+ console.print(f"[red]{traceback.format_exc()}[/red]")
257
+ # Classify the error
258
+ error_category, _ = _classify_failure(error_msg)
259
+ success = False
260
+
261
+ if success:
262
+ if not harbor_dir.exists():
263
+ # Check for trivial PR (should have been caught by TrivialPRError)
264
+ if "trivial" in error_msg.lower():
265
+ failure_reason = "Trivial PR (skipped)"
266
+ failure_category = "trivial"
267
+ else:
268
+ failure_reason = (
269
+ "Pipeline reported success but Harbor task directory was not created."
270
+ )
271
+ failure_category = "other"
272
+ _cleanup_task(task_id, tasks_root, console)
273
+ console.print(f"[red]✗ PR #{pr.number}: {failure_reason}[/red]")
274
+ return TaskResult(
275
+ repo=config.repo,
276
+ pr_number=pr.number,
277
+ task_id=task_id,
278
+ status="failed",
279
+ message=failure_reason,
280
+ duration_seconds=round(time.time() - start, 2),
281
+ timestamp=_now_utc().isoformat(),
282
+ category=failure_category,
283
+ )
284
+
285
+ # Task is already in Harbor format (create now generates directly to Harbor)
286
+ duration = time.time() - start
287
+ gate_ok, gate_msg = _gate_task(task_id, tasks_root)
288
+ if gate_ok:
289
+ _print_success(console, pr, task_id, harbor_dir)
290
+
291
+ # Save task reference for future PRs
292
+ try:
293
+ reference_store = TaskReferenceStore()
294
+ reference_store.save(
295
+ repo=config.repo,
296
+ task_id=task_id,
297
+ pr_number=pr.number,
298
+ )
299
+ except Exception as e:
300
+ console.print(f"[yellow]Warning: Could not save task reference: {e}[/yellow]")
301
+
302
+ return TaskResult(
303
+ repo=config.repo,
304
+ pr_number=pr.number,
305
+ task_id=task_id,
306
+ status="success",
307
+ message=gate_msg,
308
+ duration_seconds=round(duration, 2),
309
+ timestamp=_now_utc().isoformat(),
310
+ category=None,
311
+ )
312
+
313
+ # Gate failed
314
+ failure_reason = gate_msg
315
+ failure_category = "other"
316
+ _cleanup_task(task_id, tasks_root, console)
317
+ console.print(f"[red]✗ PR #{pr.number}: {failure_reason}[/red]")
318
+ return TaskResult(
319
+ repo=config.repo,
320
+ pr_number=pr.number,
321
+ task_id=task_id,
322
+ status="failed",
323
+ message=failure_reason,
324
+ duration_seconds=round(duration, 2),
325
+ timestamp=_now_utc().isoformat(),
326
+ category=failure_category,
327
+ )
328
+
329
+ # Pipeline failed
330
+ failure_category, failure_reason = _classify_failure(error_msg)
331
+ _cleanup_task(task_id, tasks_root, console)
332
+ console.print(f"[red]✗ PR #{pr.number}: {failure_reason}[/red]")
333
+ return TaskResult(
334
+ repo=config.repo,
335
+ pr_number=pr.number,
336
+ task_id=task_id,
337
+ status="failed",
338
+ message=failure_reason,
339
+ duration_seconds=round(time.time() - start, 2),
340
+ timestamp=_now_utc().isoformat(),
341
+ category=failure_category,
342
+ )
swegen/farm/fetcher.py ADDED
@@ -0,0 +1,341 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import time
5
+ from collections.abc import Iterator
6
+ from datetime import datetime
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import requests
11
+ from rich.console import Console
12
+
13
+ from swegen.create import is_test_file
14
+
15
+ from .farm_hand import PRCandidate, _slug
16
+ from .state import StreamState
17
+
18
+
19
+ def load_skip_list(skip_list_file: Path, repo: str) -> set[int]:
20
+ """Load PR numbers from a skip list file for the given repository.
21
+
22
+ The file should contain task IDs like (SWEBench format):
23
+ owner__repo-123
24
+ owner__repo-456
25
+
26
+ This function extracts PR numbers matching the current repo.
27
+
28
+ Args:
29
+ skip_list_file: Path to the skip list file
30
+ repo: Repository in owner/repo format (e.g., "python/pillow")
31
+
32
+ Returns:
33
+ Set of PR numbers to skip
34
+ """
35
+ if not skip_list_file.exists():
36
+ return set()
37
+
38
+ # Create expected prefix from repo (e.g., "python/pillow" -> "python__pillow-")
39
+ repo_slug = _slug(repo)
40
+ prefix = f"{repo_slug}-"
41
+
42
+ skip_prs: set[int] = set()
43
+ try:
44
+ content = skip_list_file.read_text()
45
+ for line in content.strip().split("\n"):
46
+ line = line.strip()
47
+ if not line or line.startswith("#"):
48
+ continue
49
+
50
+ # Check if this task ID matches our repo
51
+ if line.startswith(prefix):
52
+ # Extract PR number from task ID (e.g., "python__pillow-9272" -> 9272)
53
+ pr_part = line[len(prefix) :]
54
+ try:
55
+ pr_number = int(pr_part)
56
+ skip_prs.add(pr_number)
57
+ except ValueError:
58
+ # Ignore malformed entries
59
+ pass
60
+ except Exception:
61
+ # If file read fails, return empty set
62
+ pass
63
+
64
+ return skip_prs
65
+
66
+
67
+ class StreamingPRFetcher:
68
+ """Fetches PRs from GitHub in a streaming fashion.
69
+
70
+ Yields PRs one at a time after filtering. Handles pagination,
71
+ rate limiting, and various filters (merged, has tests).
72
+
73
+ Attributes:
74
+ repo: Repository in "owner/repo" format
75
+ console: Rich console for output
76
+ state: StreamState for tracking processed PRs
77
+ min_files: Minimum total files changed (early approximate filter)
78
+ require_tests: Whether PRs must have test file changes
79
+ api_delay: Delay between API calls in seconds
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ repo: str,
85
+ console: Console,
86
+ state: StreamState,
87
+ min_files: int = 3,
88
+ require_tests: bool = True,
89
+ api_delay: float = 0.5,
90
+ ):
91
+ self.repo = repo
92
+ self.console = console
93
+ self.state = state
94
+ self.min_files = min_files
95
+ self.require_tests = require_tests
96
+ self.api_delay = api_delay
97
+
98
+ # GitHub API setup
99
+ self.api_base = "https://api.github.com"
100
+ self.github_token = (
101
+ os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN") or os.getenv("REPO_CREATION_TOKEN")
102
+ )
103
+ self.headers = {
104
+ "Accept": "application/vnd.github+json",
105
+ "User-Agent": "swegen-stream-farm",
106
+ }
107
+ if self.github_token:
108
+ self.headers["Authorization"] = f"token {self.github_token}"
109
+
110
+ def stream_prs(
111
+ self,
112
+ resume_from_time: str | None = None,
113
+ ) -> Iterator[PRCandidate]:
114
+ """Stream PRs from GitHub API, skipping already processed ones.
115
+
116
+ Yields PRs one at a time after validation. Fetches in pages
117
+ but yields immediately, allowing processing to happen concurrently.
118
+
119
+ Works backwards in time from present day (or resume point) by PR creation time.
120
+
121
+ Args:
122
+ resume_from_time: If specified, only process PRs created before this timestamp.
123
+ Format: ISO 8601 string (e.g., "2024-01-15T23:59:59.999999+00:00")
124
+ This allows resuming from a specific time and continuing backwards.
125
+
126
+ Yields:
127
+ PRCandidate instances for each PR that passes filters
128
+ """
129
+ yielded = 0
130
+ page = 1
131
+
132
+ # Fetch closed PRs sorted by created time descending
133
+ # This gives us all merged PRs in reverse chronological order (by creation)
134
+ params_base = {
135
+ "state": "closed",
136
+ "sort": "created",
137
+ "direction": "desc",
138
+ "per_page": 100,
139
+ }
140
+
141
+ self.console.print(f"[dim]Streaming PRs from {self.repo}...[/dim]")
142
+ if resume_from_time is not None:
143
+ resume_dt = datetime.fromisoformat(resume_from_time.replace("Z", "+00:00"))
144
+ self.console.print(
145
+ f"[yellow]Resuming from {resume_dt.strftime('%Y-%m-%d %H:%M:%S UTC')} "
146
+ f"(only processing PRs created before this time)[/yellow]"
147
+ )
148
+ elif self.state.total_processed > 0:
149
+ self.console.print(
150
+ f"[yellow]Resuming: {self.state.total_processed} PRs already processed "
151
+ f"({self.state.successful} successful, {self.state.failed} failed)[/yellow]"
152
+ )
153
+ if self.state.last_created_at:
154
+ last_dt = datetime.fromisoformat(self.state.last_created_at.replace("Z", "+00:00"))
155
+ self.console.print(
156
+ f"[yellow]Last processed PR created at: {last_dt.strftime('%Y-%m-%d %H:%M:%S UTC')}[/yellow]"
157
+ )
158
+
159
+ skipped_stats = {
160
+ "already_processed": 0,
161
+ "in_skip_list": 0,
162
+ "not_merged": 0,
163
+ "too_few_changes": 0,
164
+ "no_tests": 0,
165
+ "api_error": 0,
166
+ "after_resume_time": 0,
167
+ }
168
+
169
+ while True:
170
+ # Fetch next page
171
+ url = f"{self.api_base}/repos/{self.repo}/pulls"
172
+ params: dict[str, Any] = {**params_base, "page": page}
173
+
174
+ try:
175
+ resp = requests.get(url, headers=self.headers, params=params, timeout=30)
176
+ resp.raise_for_status()
177
+ except requests.exceptions.RequestException as exc:
178
+ self.console.print(f"[red]API error on page {page}: {exc}[/red]")
179
+ skipped_stats["api_error"] += 1
180
+ break
181
+
182
+ prs = resp.json()
183
+ if not prs:
184
+ self.console.print("[dim]No more PRs available[/dim]")
185
+ break
186
+
187
+ # Check rate limiting
188
+ remaining = int(resp.headers.get("X-RateLimit-Remaining", 999))
189
+ if remaining < 10:
190
+ reset_time = int(resp.headers.get("X-RateLimit-Reset", 0))
191
+ wait_seconds = max(0, reset_time - time.time())
192
+ self.console.print(
193
+ f"[yellow]Rate limit low ({remaining}), waiting {wait_seconds:.0f}s...[/yellow]"
194
+ )
195
+ time.sleep(wait_seconds + 1)
196
+
197
+ # Process PRs from this page
198
+ for pr_data in prs:
199
+ pr_number = pr_data["number"]
200
+
201
+ # Filter: must be merged
202
+ merged_at = pr_data.get("merged_at")
203
+ if not merged_at:
204
+ skipped_stats["not_merged"] += 1
205
+ continue
206
+
207
+ # Get creation time
208
+ created_at = pr_data.get("created_at")
209
+
210
+ # Skip if this PR was created after our resume time
211
+ # (we're working backwards, so we only want PRs created before the resume point)
212
+ if resume_from_time is not None and created_at:
213
+ pr_created_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
214
+ resume_dt = datetime.fromisoformat(resume_from_time.replace("Z", "+00:00"))
215
+ if pr_created_dt >= resume_dt:
216
+ skipped_stats["after_resume_time"] += 1
217
+ continue
218
+
219
+ # Skip if already processed
220
+ if pr_number in self.state.processed_prs:
221
+ skipped_stats["already_processed"] += 1
222
+ continue
223
+
224
+ # Skip if in external skip list
225
+ if pr_number in self.state.skip_list_prs:
226
+ skipped_stats["in_skip_list"] += 1
227
+ continue
228
+
229
+ # Fetch full PR details
230
+ try:
231
+ pr_url = f"{self.api_base}/repos/{self.repo}/pulls/{pr_number}"
232
+ pr_resp = requests.get(pr_url, headers=self.headers, timeout=30)
233
+ pr_resp.raise_for_status()
234
+ pr_full = pr_resp.json()
235
+ time.sleep(self.api_delay)
236
+ except requests.exceptions.RequestException:
237
+ skipped_stats["api_error"] += 1
238
+ continue
239
+
240
+ # Get file change count for metadata
241
+ files_changed = pr_full.get("changed_files", 0)
242
+
243
+ # Filter: minimum files changed (early approximate filter to save API calls)
244
+ # Note: This is total files (including tests/docs/CI)
245
+ # The accurate source-only check happens later in the pipeline
246
+ if files_changed < self.min_files:
247
+ skipped_stats["too_few_changes"] += 1
248
+ continue
249
+
250
+ # Filter: test file changes (if required)
251
+ if self.require_tests:
252
+ try:
253
+ has_tests = self._pr_has_test_changes(pr_number)
254
+ time.sleep(self.api_delay)
255
+ if not has_tests:
256
+ skipped_stats["no_tests"] += 1
257
+ continue
258
+ except requests.exceptions.RequestException:
259
+ skipped_stats["api_error"] += 1
260
+ continue
261
+
262
+ # Passed all filters - yield this PR
263
+ candidate = PRCandidate(
264
+ number=pr_number,
265
+ title=pr_full.get("title", ""),
266
+ created_at=pr_full.get("created_at", ""),
267
+ merged_at=pr_full.get("merged_at", ""),
268
+ author=pr_full.get("user", {}).get("login", "unknown"),
269
+ files_changed=files_changed,
270
+ additions=pr_full.get("additions", 0),
271
+ deletions=pr_full.get("deletions", 0),
272
+ url=pr_full.get("html_url", ""),
273
+ )
274
+
275
+ self.state.total_fetched += 1
276
+ yielded += 1
277
+
278
+ yield candidate
279
+
280
+ # Move to next page
281
+ page += 1
282
+
283
+ # Break if we got fewer results than expected (last page)
284
+ if len(prs) < 100:
285
+ self.console.print("[dim]Reached last page of PRs[/dim]")
286
+ break
287
+
288
+ # Final stats
289
+ self._print_stats(skipped_stats)
290
+ self.console.print(
291
+ f"[green]Stream complete: {yielded} PRs yielded, "
292
+ f"{self.state.total_processed} total processed[/green]"
293
+ )
294
+
295
+ def _pr_has_test_changes(self, pr_number: int) -> bool:
296
+ """Check if PR modifies test files.
297
+
298
+ Args:
299
+ pr_number: PR number to check
300
+
301
+ Returns:
302
+ True if PR has test file changes
303
+ """
304
+ files_url = f"{self.api_base}/repos/{self.repo}/pulls/{pr_number}/files"
305
+ page = 1
306
+
307
+ while True:
308
+ params = {"page": page, "per_page": 100}
309
+ resp = requests.get(files_url, headers=self.headers, params=params, timeout=30)
310
+ resp.raise_for_status()
311
+
312
+ files = resp.json()
313
+ if not files:
314
+ break
315
+
316
+ for file in files:
317
+ filename = file.get("filename", "")
318
+ # Use centralized test file detection (supports all languages)
319
+ if is_test_file(filename):
320
+ return True
321
+
322
+ if len(files) < 100:
323
+ break
324
+ page += 1
325
+
326
+ return False
327
+
328
+ def _print_stats(self, skipped: dict) -> None:
329
+ """Print skipping statistics.
330
+
331
+ Args:
332
+ skipped: Dict of skip reasons to counts
333
+ """
334
+ total_skipped = sum(skipped.values())
335
+ if total_skipped == 0:
336
+ return
337
+
338
+ self.console.print("\n[dim]Skipped PRs:[/dim]")
339
+ for reason, count in skipped.items():
340
+ if count > 0:
341
+ self.console.print(f" [dim]• {reason}: {count}[/dim]")