PyPI - swegen - Versions diffs - 0.1.0__py3-none-any.whl - Mend

swegen 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

swegen/__init__.py +14 -0
swegen/analyze/__init__.py +24 -0
swegen/analyze/classifier.py +637 -0
swegen/analyze/classify_prompt.txt +241 -0
swegen/analyze/models.py +253 -0
swegen/analyze/run.py +656 -0
swegen/analyze/verdict_prompt.txt +126 -0
swegen/cli.py +411 -0
swegen/config.py +142 -0
swegen/create/__init__.py +22 -0
swegen/create/claude_code_runner.py +988 -0
swegen/create/claude_code_utils.py +95 -0
swegen/create/create.py +706 -0
swegen/create/diff_utils.py +142 -0
swegen/create/orchestrator.py +368 -0
swegen/create/pr_fetcher.py +187 -0
swegen/create/repo_cache.py +175 -0
swegen/create/task_instruction.py +363 -0
swegen/create/task_reference.py +130 -0
swegen/create/task_skeleton.py +266 -0
swegen/create/utils.py +350 -0
swegen/farm/__init__.py +13 -0
swegen/farm/farm_hand.py +342 -0
swegen/farm/fetcher.py +341 -0
swegen/farm/state.py +231 -0
swegen/farm/stream_farm.py +430 -0
swegen/tools/__init__.py +16 -0
swegen/tools/harbor_runner.py +191 -0
swegen/tools/validate.py +523 -0
swegen/tools/validate_utils.py +142 -0
swegen-0.1.0.dist-info/METADATA +292 -0
swegen-0.1.0.dist-info/RECORD +35 -0
swegen-0.1.0.dist-info/WHEEL +4 -0
swegen-0.1.0.dist-info/entry_points.txt +3 -0
swegen-0.1.0.dist-info/licenses/LICENSE +201 -0

swegen/create/task_skeleton.py ADDED Viewed

@@ -0,0 +1,266 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from harbor.models.task.config import (
+    AgentConfig,
+    EnvironmentConfig,
+    TaskConfig,
+    VerifierConfig,
+)
+from .utils import strip_tests_prefix
+@dataclass
+class SkeletonParams:
+    """Parameters for skeleton generation (all deterministic from git)."""
+    repo_url: str
+    head_sha: str
+    base_sha: str
+    pr_number: int
+def generate_dockerfile(params: SkeletonParams) -> str:
+    """
+    Generate a minimal, language-agnostic Dockerfile skeleton.
+    The skeleton contains:
+    - Deterministic parts filled in (git clone, SHAs, bug.patch application)
+    - TODO comments for Claude Code to fill in (runtime, deps, build)
+    Claude Code will analyze the repo and fill in:
+    - Language runtime installation
+    - Package manager setup
+    - Dependency installation
+    - Build steps (if needed)
+    - Post-patch rebuild (if needed)
+    Git clone strategy:
+    - Simple + robust: clone, then fetch the exact commit SHA.
+    - NOTE: `head_sha` currently comes from the PR's HEAD branch tip (GitHub API).
+    - If the PR was squash-merged/rebased, that commit may not be on any normal branch.
+    - In that case, fetching `refs/pull/<n>/head` is a robust fallback without fetching ALL PR refs.
+    """
+    return f"""FROM ubuntu:24.04
+# Base system packages (common to all languages)
+RUN apt-get update && apt-get install -y \\
+    git \\
+    curl \\
+    ca-certificates \\
+    patch \\
+    build-essential \\
+    && rm -rf /var/lib/apt/lists/*
+# TODO: Install language runtime
+# Analyze the repo to determine what's needed. Examples:
+#   Python: apt-get install python3 python3-pip python3-venv python3-dev
+#   Node.js: curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && apt-get install -y nodejs
+#   Go: Download from golang.org/dl or use apt
+#   Rust: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+#   Ruby: apt-get install ruby ruby-dev
+#   Java: apt-get install openjdk-17-jdk
+# Check .nvmrc, .python-version, .ruby-version, go.mod, rust-toolchain.toml, etc.
+# TODO: Install additional system packages if needed
+# Check CI config (.github/workflows/*.yml) for hints about required packages
+# Examples: python3-dev, libssl-dev, pkg-config, cmake, etc.
+# TODO: Set up package manager if needed
+# For Python: PREFER uv (much faster than pip)
+#   curl -LsSf https://astral.sh/uv/install.sh | sh && mv /root/.local/bin/uv /usr/local/bin/uv
+# For Node.js: corepack enable (for yarn/pnpm) or npm is built-in
+# For Ruby: gem install bundler
+WORKDIR /app
+# Clone repo at HEAD commit (with fix applied)
+RUN git clone {params.repo_url} src && \\
+    cd src && \\
+    (git fetch --depth 1 origin {params.head_sha} || git fetch --depth 1 origin "+refs/pull/{params.pr_number}/head:refs/remotes/origin/pr/{params.pr_number}") && \\
+    git checkout --detach FETCH_HEAD && \\
+    git submodule update --init --recursive
+WORKDIR /app/src
+# TODO: Set environment variables if needed
+# Check CI config and README for required env vars
+# Examples: CI=true, NODE_ENV=test, CARGO_TERM_COLOR=never
+# TODO: Install dependencies
+# For Python: PREFER uv (much faster). Create venv and install:
+#   uv venv /opt/venv
+#   uv pip install --python /opt/venv/bin/python -e ".[dev,test]"
+#   # Or: uv pip install --python /opt/venv/bin/python -r requirements.txt
+#   # Then add to PATH: ENV PATH="/opt/venv/bin:${{PATH}}"
+# For Node.js: npm ci, yarn install --frozen-lockfile, pnpm install --frozen-lockfile
+# For Go: go mod download
+# For Rust: cargo fetch
+# For Ruby: bundle install
+# For Java: mvn dependency:resolve or gradle dependencies
+# TODO: Build if needed (check if it's a compiled language or has build step)
+# Examples:
+#   TypeScript: npm run build, tsc
+#   Rust: cargo build
+#   Go: go build ./...
+#   Java: mvn compile, gradle build
+# If install/build steps touched tracked files, reset them so bug.patch applies cleanly,
+RUN git reset --hard
+# Apply bug.patch to revert to buggy state (BASE)
+COPY bug.patch /tmp/bug.patch
+RUN patch -p1 < /tmp/bug.patch && rm /tmp/bug.patch
+# TODO: Rebuild after applying bug.patch if needed
+# For compiled languages (TypeScript, Rust, Go, Java), you MUST rebuild after patching
+RUN rm -rf /app/src/.git
+WORKDIR /app/src
+"""
+def generate_test_sh(
+    test_files: list[str],
+) -> str:
+    """
+    Generate a minimal, language-agnostic test.sh skeleton.
+    The skeleton contains:
+    - Test file copy commands (deterministic)
+    - TODO for Claude Code to fill in the actual test command
+    Claude Code will analyze the repo and fill in:
+    - Test framework detection
+    - Correct test command with specific file paths
+    """
+    # Build copy commands for test files
+    if test_files:
+        copy_lines = []
+        for tf in test_files:
+            # Handle common test directory prefixes
+            source_path = strip_tests_prefix(tf)
+            target_dir = str(Path(tf).parent)
+            copy_lines.append(f'mkdir -p "{target_dir}"')
+            copy_lines.append(f'cp "/tests/{source_path}" "{tf}"')
+        copy_commands = "\n".join(copy_lines)
+        # Build example test file list for comments
+        test_files_example = " ".join([f'"{tf}"' for tf in test_files[:5]])
+        if len(test_files) > 5:
+            test_files_example += f" # ... and {len(test_files) - 5} more"
+    else:
+        copy_commands = "# No test files to copy"
+        test_files_example = ""
+    return f"""#!/bin/bash
+cd /app/src
+# TODO: Set environment variables if needed for tests
+# Examples: CI=true, NODE_ENV=test, RUST_BACKTRACE=1
+# Copy HEAD test files from /tests (overwrites BASE state)
+{copy_commands}
+# CRITICAL: Run ONLY the specific test files from the PR, NOT the entire test suite!
+# The test files to run are: {test_files_example if test_files_example else "(see list above)"}
+#
+# TODO: Fill in the actual test command to run ONLY these specific files
+#
+# DO NOT run the entire test suite - it's too slow and may have unrelated failures!
+#
+# Examples for different languages/frameworks:
+#
+# Python (pytest with uv):
+#   # If using uv venv at /opt/venv:
+#   source /opt/venv/bin/activate
+#   uv pip install -e . --no-deps 2>/dev/null || true  # Reinstall to pick up changes
+#   pytest -xvs path/to/test_file.py
+#   # Or without venv activation:
+#   /opt/venv/bin/pytest -xvs path/to/test_file.py
+#
+# JavaScript/TypeScript (IMPORTANT: disable coverage thresholds when running subset!):
+#   npx jest path/to/test.js path/to/test2.js --coverage=false
+#   npx vitest run path/to/test.ts --coverage.enabled=false
+#   npx mocha path/to/test.js path/to/test2.js
+#   npx borp path/to/test.js --no-check-coverage   # Used by fastify, pino, etc.
+#   npx tap path/to/test.js --no-check-coverage    # Node TAP framework
+#   npx ava path/to/test.js                        # AVA framework
+#
+#   CRITICAL for JS/TS: DO NOT use "npm test" or "npm run test" without args!
+#   These run the ENTIRE suite. Pass specific files via the test runner directly.
+#   If you must use npm: npm run test -- path/to/test.js (note the -- separator)
+#
+# Go:
+#   go test -v ./path/to/package/...
+#   go test -v -run TestSpecificName ./...
+#
+# Rust:
+#   cargo test --test test_name -- --nocapture
+#   cargo test specific_test_name -- --nocapture
+#
+# Ruby (RSpec/Minitest):
+#   bundle exec rspec path/to/spec.rb
+#   bundle exec ruby -Itest path/to/test.rb
+#
+# Java (JUnit/Maven/Gradle):
+#   mvn test -Dtest=TestClassName
+#   gradle test --tests TestClassName
+# TODO: Replace this placeholder with actual test command running ONLY the specific test files above
+echo "ERROR: Test command not filled in! Must run specific test files, not entire suite." >&2
+false
+test_status=$?
+if [ $test_status -eq 0 ]; then
+  echo 1 > /logs/verifier/reward.txt
+else
+  echo 0 > /logs/verifier/reward.txt
+fi
+exit "$test_status"
+"""
+def generate_solve_sh() -> str:
+    """Generate solution/solve.sh script (same for all tasks)."""
+    return """#!/bin/bash
+set -euo pipefail
+cd /app/src
+patch -p1 < /solution/fix.patch
+"""
+def generate_instruction_md(instruction_data: dict) -> str:
+    """Generate instruction.md file for Harbor format."""
+    return instruction_data["instruction"]
+def generate_task_toml(instruction_data: dict) -> str:
+    """Generate task.toml config file for Harbor format.
+    Uses Harbor's TaskConfig for proper serialization and validation.
+    """
+    config = TaskConfig(
+        metadata={
+            "difficulty": instruction_data.get("difficulty", "medium"),
+            "category": instruction_data.get("category", "bugfix"),
+            "tags": instruction_data.get("tags", []),
+        },
+        verifier=VerifierConfig(timeout_sec=600.0),
+        agent=AgentConfig(timeout_sec=600.0),
+        environment=EnvironmentConfig(
+            build_timeout_sec=600.0,
+            cpus=1,
+            memory_mb=2048,
+            storage_mb=10240,
+        ),
+    )
+    return config.model_dump_toml()

swegen/create/utils.py ADDED Viewed

@@ -0,0 +1,350 @@
+from __future__ import annotations
+from pathlib import Path
+from pydantic import BaseModel, Field
+class CombinedPRTaskEvaluation(BaseModel):
+    """Combined evaluation and task generation for a PR.
+    First evaluates if PR is substantial, then generates task details if it is.
+    """
+    is_substantial: bool = Field(
+        ..., description="Whether the PR is substantial enough to generate a task"
+    )
+    reason: str = Field(..., description="Brief explanation of why the PR is or isn't substantial")
+    instruction: str | None = Field(
+        None,
+        description="Concise bug report describing problem, reproduction, expected behavior. No bullet lists or verbose sections.",
+    )
+    difficulty: str = Field("medium", description="Task difficulty: easy, medium, or hard")
+    category: str = Field("bugfix", description="Task category, typically 'bugfix' or 'feature'")
+    tags: list[str] = Field(
+        default_factory=list,
+        description="Exactly 3 tags: [language, tier, framework/category]. Example: ['python', 'backend', 'fastapi']",
+    )
+def strip_tests_prefix(path: str) -> str:
+    """Strip leading test directory prefix if present.
+    Handles common patterns across languages:
+    - tests/, test/, __tests__/ (Python, JS/TS)
+    - spec/ (Ruby)
+    - src/test/ (Java/Kotlin)
+    Args:
+        path: File path that may start with a test directory prefix
+    Returns:
+        Path with test directory prefix removed if present
+    """
+    p = Path(path)
+    parts = p.parts
+    if not parts:
+        return path
+    first = parts[0].lower()
+    # Python, JS/TS, Ruby
+    if first in ("tests", "test", "__tests__", "spec"):
+        return str(Path(*parts[1:]))
+    # Java/Kotlin: src/test/java/... or src/test/kotlin/...
+    if len(parts) >= 2 and parts[0].lower() == "src" and parts[1].lower() == "test":
+        return str(Path(*parts[2:]))
+    return path
+def is_test_file(filename: str) -> bool:
+    """Check if a filename represents a test file or test-related resource.
+    Supports all languages: Python, JS/TS, Go, Rust, Ruby, Java, C/C++, PHP, C#.
+    Args:
+        filename: File path (repo-relative)
+    Returns:
+        True if the file is a test file or test resource (fixtures, data, etc.)
+    """
+    if not filename:
+        return False
+    name_lower = filename.lower()
+    base_name = filename.split("/")[-1].lower()
+    # Check if file is under a test directory (common across languages)
+    in_test_dir = (
+        # Python/generic
+        name_lower.startswith("tests/")
+        or "/tests/" in name_lower
+        or name_lower.startswith("test/")
+        or "/test/" in name_lower
+        # JS/TS
+        or name_lower.startswith("__tests__/")
+        or "/__tests__/" in name_lower
+        # Ruby
+        or name_lower.startswith("spec/")
+        or "/spec/" in name_lower
+        # Java/Kotlin (Maven/Gradle convention)
+        or "/src/test/" in name_lower
+        or name_lower.startswith("src/test/")
+    )
+    # Python patterns
+    is_python_test = (
+        base_name.startswith("test_") and name_lower.endswith(".py")
+    ) or base_name.endswith("_test.py")
+    # JavaScript/TypeScript patterns
+    is_js_ts_test = (
+        base_name.endswith(".test.js")
+        or base_name.endswith(".test.ts")
+        or base_name.endswith(".test.jsx")
+        or base_name.endswith(".test.tsx")
+        or base_name.endswith(".test.mjs")
+        or base_name.endswith(".test.cjs")
+        or base_name.endswith(".spec.js")
+        or base_name.endswith(".spec.ts")
+        or base_name.endswith(".spec.jsx")
+        or base_name.endswith(".spec.tsx")
+        or base_name.endswith(".spec.mjs")
+        or base_name.endswith(".spec.cjs")
+    )
+    # Go patterns
+    is_go_test = base_name.endswith("_test.go")
+    # Rust patterns
+    is_rust_test = base_name.endswith("_test.rs") or base_name == "tests.rs"
+    # Ruby patterns
+    is_ruby_test = (
+        base_name.endswith("_spec.rb")
+        or base_name.endswith("_test.rb")
+        or base_name.startswith("test_")
+        and name_lower.endswith(".rb")
+    )
+    # Java/Kotlin patterns
+    is_java_test = (
+        base_name.endswith("test.java")
+        or base_name.endswith("tests.java")
+        or base_name.endswith("test.kt")
+        or base_name.endswith("tests.kt")
+        or base_name.startswith("test")
+        and (name_lower.endswith(".java") or name_lower.endswith(".kt"))
+    )
+    # C/C++ patterns
+    is_cpp_test = (
+        base_name.endswith("_test.cpp")
+        or base_name.endswith("_test.cc")
+        or base_name.endswith("_test.c")
+        or base_name.startswith("test_")
+        and name_lower.endswith((".cpp", ".cc", ".c"))
+    )
+    # PHP patterns
+    is_php_test = (
+        base_name.endswith("test.php")
+        or base_name.startswith("test")
+        and name_lower.endswith(".php")
+    )
+    # C# patterns
+    is_csharp_test = base_name.endswith("tests.cs") or base_name.endswith("test.cs")
+    return (
+        in_test_dir
+        or is_python_test
+        or is_js_ts_test
+        or is_go_test
+        or is_rust_test
+        or is_ruby_test
+        or is_java_test
+        or is_cpp_test
+        or is_php_test
+        or is_csharp_test
+    )
+def identify_test_files(files: list[dict]) -> list[str]:
+    """Identify test files from a list of changed files.
+    Supports all languages: Python, JS/TS, Go, Rust, Ruby, Java, C/C++, PHP, C#.
+    Args:
+        files: List of file dicts with 'filename' key (from GitHub API)
+    Returns:
+        List of test file paths (repo-relative)
+    """
+    test_files = []
+    for f in files:
+        filename = f.get("filename", "")
+        if is_test_file(filename):
+            test_files.append(filename)
+    return test_files
+def _is_relevant_source(path: str) -> bool:
+    """Check if a file path is relevant for the fix (not tests, CI, or build artifacts).
+    NOTE: We include docs, examples, and other non-test files to keep fix.patch
+    consistent with bug.patch. This prevents issues where bug.patch reverts docs
+    but fix.patch doesn't re-apply them, causing inconsistencies.
+    Supports all languages: Python, JS/TS, Go, Rust, Ruby, Java, C/C++, PHP, C#.
+    Args:
+        path: File path to check
+    Returns:
+        True if the file should be included in fix.patch
+    """
+    pl = path.lower()
+    base = path.split("/")[-1].lower()
+    # === Common exclusions (all languages) ===
+    # Exclude test directories
+    if pl.startswith("tests/") or "/tests/" in pl:
+        return False
+    if pl.startswith("test/") or "/test/" in pl:
+        return False
+    if pl.startswith("__tests__/") or "/__tests__/" in pl:
+        return False
+    if pl.startswith("spec/") or "/spec/" in pl:  # Ruby
+        return False
+    if "/src/test/" in pl or pl.startswith("src/test/"):  # Java/Kotlin
+        return False
+    # Exclude CI and meta (these shouldn't be in fix.patch)
+    if pl.startswith(".github/") or "/.github/" in pl:
+        return False
+    if pl.startswith(".gitlab/") or "/.gitlab/" in pl:
+        return False
+    if pl.startswith(".circleci/") or "/.circleci/" in pl:
+        return False
+    # Exclude build outputs and dependency directories (should never be in a PR)
+    build_dirs = [
+        "node_modules/",
+        "dist/",
+        "build/",
+        ".next/",
+        "__pycache__/",
+        ".tox/",
+        ".pytest_cache/",
+        "*.egg-info/",
+        "target/",
+        "vendor/",
+        "bin/",
+        "obj/",
+        "out/",
+    ]
+    for bd in build_dirs:
+        if bd in pl or pl.startswith(bd.rstrip("/")):
+            return False
+    # Exclude test files by naming convention (comprehensive, language-agnostic)
+    # Python
+    if base.startswith("test_") and base.endswith(".py"):
+        return False
+    if base.endswith("_test.py"):
+        return False
+    # JavaScript/TypeScript
+    if base.endswith((".test.js", ".test.ts", ".test.jsx", ".test.tsx", ".test.mjs", ".test.cjs")):
+        return False
+    if base.endswith((".spec.js", ".spec.ts", ".spec.jsx", ".spec.tsx", ".spec.mjs", ".spec.cjs")):
+        return False
+    # Go
+    if base.endswith("_test.go"):
+        return False
+    # Rust
+    if base.endswith("_test.rs") or base == "tests.rs":
+        return False
+    # Ruby
+    if base.endswith("_spec.rb") or base.endswith("_test.rb"):
+        return False
+    if base.startswith("test_") and base.endswith(".rb"):
+        return False
+    # Java/Kotlin
+    if base.endswith(("test.java", "tests.java", "test.kt", "tests.kt")):
+        return False
+    # C/C++
+    if base.endswith(("_test.cpp", "_test.cc", "_test.c")):
+        return False
+    if base.startswith("test_") and base.endswith((".cpp", ".cc", ".c")):
+        return False
+    # PHP
+    if base.endswith("test.php"):
+        return False
+    # C#
+    if base.endswith(("tests.cs", "test.cs")):
+        return False
+    # Include everything else (source code, docs, examples, type definitions, etc.)
+    # This ensures fix.patch is comprehensive and consistent with bug.patch
+    return True
+def check_multi_file_requirement(
+    files: list[dict], min_files: int = 3, max_files: int = 10
+) -> tuple[bool, str, int]:
+    """Check if PR modifies sufficient source files for a good task.
+    Harbor tasks should require changes to 3+ source files (tests don't count).
+    Single-file and two-file changes are too easy - agents can pattern-match.
+    Large refactors (10+ files) are too complex and often not single bug fixes.
+    Args:
+        files: List of file dicts with 'filename' key (from GitHub API)
+        min_files: Minimum number of source files required (default: 3)
+        max_files: Maximum number of source files allowed (default: 10)
+    Returns:
+        Tuple of (passes, reason, source_count) where:
+        - passes: True if source files are within [min_files, max_files] range
+        - reason: Explanation if failed
+        - source_count: Number of source files found
+    """
+    source_files = []
+    for f in files:
+        filename = f.get("filename", "")
+        if _is_relevant_source(filename):
+            source_files.append(filename)
+    count = len(source_files)
+    if count < min_files:
+        return (
+            False,
+            f"Only {count} source file{'s' if count != 1 else ''} modified (need {min_files}+, tests excluded)",
+            count,
+        )
+    if count > max_files:
+        return (
+            False,
+            f"Too many source files modified ({count}, max {max_files}) - likely a large refactor (tests excluded)",
+            count,
+        )
+    return True, "", count

swegen/farm/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .farm_hand import PRCandidate, TaskResult
+from .fetcher import StreamingPRFetcher, load_skip_list
+from .state import StreamState
+from .stream_farm import StreamFarmer
+__all__ = [
+    "StreamFarmer",
+    "StreamState",
+    "StreamingPRFetcher",
+    "PRCandidate",
+    "TaskResult",
+    "load_skip_list",
+]