swegen 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ import time
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ from harbor.models.environment_type import EnvironmentType
10
+ from harbor.models.job.result import JobResult
11
+ from harbor.models.trial.paths import TrialPaths
12
+ from harbor.models.trial.result import TrialResult
13
+
14
+
15
+ def harbor_cmd_base() -> list[str]:
16
+ """Get the base command to invoke Harbor.
17
+
18
+ Prefers direct `harbor` binary, falls back to `uv run harbor`.
19
+ """
20
+ if shutil.which("harbor"):
21
+ return ["harbor"]
22
+ if shutil.which("uv"):
23
+ return ["uv", "run", "harbor"]
24
+ return ["python", "-m", "harbor"]
25
+
26
+
27
+ def run_harbor_agent(
28
+ task_id: str,
29
+ dataset_path: Path,
30
+ jobs_dir: Path,
31
+ agent: str,
32
+ timeout_multiplier: float | None = None,
33
+ capture_output: bool = False,
34
+ delete_after: bool = True,
35
+ environment: EnvironmentType = EnvironmentType.DOCKER,
36
+ ) -> tuple[int, Path | None]:
37
+ """Run a Harbor agent and return (exit_code, job_result_path).
38
+
39
+ Args:
40
+ task_id: The task identifier
41
+ dataset_path: Path to the Harbor dataset root
42
+ jobs_dir: Parent directory for job artifacts
43
+ agent: Agent type ("nop" or "oracle")
44
+ timeout_multiplier: Optional timeout multiplier for long tasks
45
+ capture_output: If True, suppress stdout/stderr (for rich console usage)
46
+ delete_after: If True, delete Docker images after run (default: True)
47
+ Set to False to keep images for faster subsequent runs
48
+ environment: Environment type (docker, daytona, e2b, modal, runloop, gke)
49
+
50
+ Returns:
51
+ Tuple of (exit_code, path_to_result_json or None)
52
+ """
53
+ # Create unique job directory to avoid race conditions
54
+ unique_parent = jobs_dir / f"{task_id}.{agent}.{int(time.time())}"
55
+ unique_parent.mkdir(parents=True, exist_ok=True)
56
+ before = set(unique_parent.iterdir())
57
+
58
+ cmd = harbor_cmd_base() + [
59
+ "run",
60
+ "--agent",
61
+ agent,
62
+ "-p",
63
+ str(dataset_path),
64
+ "-t",
65
+ task_id,
66
+ "--jobs-dir",
67
+ str(unique_parent),
68
+ "--env",
69
+ environment.value,
70
+ ]
71
+ if timeout_multiplier is not None:
72
+ cmd += ["--timeout-multiplier", str(timeout_multiplier)]
73
+
74
+ # Control image deletion: --no-delete keeps images for faster subsequent runs
75
+ if not delete_after:
76
+ cmd.append("--no-delete")
77
+
78
+ proc: subprocess.CompletedProcess[str]
79
+ if capture_output:
80
+ proc = subprocess.run(cmd, check=False, capture_output=True, text=True)
81
+ else:
82
+ proc_bytes = subprocess.run(cmd, check=False)
83
+ # Convert to text version for consistent return type
84
+ proc = subprocess.CompletedProcess(
85
+ args=proc_bytes.args,
86
+ returncode=proc_bytes.returncode,
87
+ stdout="",
88
+ stderr="",
89
+ )
90
+
91
+ # Check if directory still exists after subprocess
92
+ if not unique_parent.exists():
93
+ return proc.returncode, None
94
+
95
+ after = set(unique_parent.iterdir())
96
+ new_dirs = [p for p in (after - before) if p.is_dir()]
97
+ job_dir = (
98
+ sorted(new_dirs, key=lambda p: p.stat().st_mtime, reverse=True)[0] if new_dirs else None
99
+ )
100
+ job_result = (job_dir / "result.json").resolve() if job_dir else None
101
+
102
+ return proc.returncode, job_result
103
+
104
+
105
+ @dataclass(frozen=True)
106
+ class HarborOutcome:
107
+ reward: int | None
108
+ error: str | None
109
+
110
+
111
+ def parse_harbor_outcome(job_result_path: Path | None) -> HarborOutcome:
112
+ """Parse Harbor job result and return both reward and error (best-effort).
113
+
114
+ Uses Harbor's JobResult and TrialResult Pydantic models for type-safe parsing.
115
+ This automatically handles schema changes and provides better error messages.
116
+
117
+ Args:
118
+ job_result_path: Path to the job-level result.json
119
+
120
+ Returns:
121
+ HarborOutcome with:
122
+ - reward: 0 or 1 (or None if unavailable)
123
+ - error: best-effort exception message (or None)
124
+ """
125
+ if not job_result_path or not job_result_path.exists():
126
+ return HarborOutcome(reward=None, error=None)
127
+
128
+ try:
129
+ # Use Harbor's JobResult model for type-safe parsing
130
+ job_result = JobResult.model_validate_json(job_result_path.read_text())
131
+
132
+ # Prefer structured exception info from typed trial results.
133
+ error: str | None = None
134
+ for trial_result in job_result.trial_results:
135
+ if getattr(trial_result, "exception_info", None):
136
+ exc = trial_result.exception_info
137
+ msg = getattr(exc, "exception_message", None) or getattr(exc, "exception_type", None)
138
+ if msg:
139
+ error = str(msg)
140
+ break
141
+
142
+ # Method 1: Check reward_stats in job stats (fastest)
143
+ if job_result.stats.evals:
144
+ # Get first eval (typically only one for single-task runs)
145
+ first_eval = next(iter(job_result.stats.evals.values()))
146
+
147
+ # Check reward_stats for "reward" key
148
+ if first_eval.reward_stats and "reward" in first_eval.reward_stats:
149
+ reward_map = first_eval.reward_stats["reward"]
150
+
151
+ # Check for reward=1 first (oracle success)
152
+ if 1 in reward_map or 1.0 in reward_map:
153
+ return HarborOutcome(reward=1, error=error)
154
+ # Then check for reward=0 (nop success)
155
+ if 0 in reward_map or 0.0 in reward_map:
156
+ return HarborOutcome(reward=0, error=error)
157
+
158
+ # Method 2: Check trial results directly
159
+ for trial_result in job_result.trial_results:
160
+ if trial_result.verifier_result and trial_result.verifier_result.rewards:
161
+ reward_value = trial_result.verifier_result.rewards.get("reward")
162
+ if reward_value is not None:
163
+ return HarborOutcome(reward=int(float(reward_value)), error=error)
164
+
165
+ # Method 3: Fallback - scan trial directories using TrialPaths
166
+ job_root = job_result_path.parent
167
+ for trial_dir in (p for p in job_root.iterdir() if p.is_dir()):
168
+ try:
169
+ trial_paths = TrialPaths(trial_dir)
170
+ if not trial_paths.result_path.exists():
171
+ continue
172
+ trial_result = TrialResult.model_validate_json(trial_paths.result_path.read_text())
173
+
174
+ if error is None and getattr(trial_result, "exception_info", None):
175
+ exc = trial_result.exception_info
176
+ msg = getattr(exc, "exception_message", None) or getattr(exc, "exception_type", None)
177
+ if msg:
178
+ error = str(msg)
179
+
180
+ if trial_result.verifier_result and trial_result.verifier_result.rewards:
181
+ reward_value = trial_result.verifier_result.rewards.get("reward")
182
+ if reward_value is not None:
183
+ return HarborOutcome(reward=int(float(reward_value)), error=error)
184
+ except Exception:
185
+ # Not a valid trial directory, continue searching
186
+ continue
187
+
188
+ except Exception:
189
+ return HarborOutcome(reward=None, error=None)
190
+
191
+ return HarborOutcome(reward=None, error=error)