trapstreet-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trap/__init__.py +6 -0
- trap/_version.py +24 -0
- trap/cli.py +326 -0
- trap/loader.py +75 -0
- trap/models/__init__.py +18 -0
- trap/models/config.py +27 -0
- trap/models/report.py +39 -0
- trap/models/results.py +15 -0
- trap/models/task.py +30 -0
- trap/report/__init__.py +23 -0
- trap/report/base.py +8 -0
- trap/report/json.py +11 -0
- trap/report/rich.py +93 -0
- trap/report/saver.py +28 -0
- trap/runner/__init__.py +16 -0
- trap/runner/case.py +84 -0
- trap/runner/grader.py +57 -0
- trap/runner/judge.py +74 -0
- trap/runner/task.py +59 -0
- trapstreet_cli-0.1.0.dist-info/METADATA +394 -0
- trapstreet_cli-0.1.0.dist-info/RECORD +23 -0
- trapstreet_cli-0.1.0.dist-info/WHEEL +4 -0
- trapstreet_cli-0.1.0.dist-info/entry_points.txt +2 -0
trap/__init__.py
ADDED
trap/_version.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.1.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
trap/cli.py
ADDED
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import http.server
|
|
4
|
+
import json as _json
|
|
5
|
+
import os
|
|
6
|
+
import socket
|
|
7
|
+
import socketserver
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
import urllib.error
|
|
11
|
+
import urllib.parse
|
|
12
|
+
import urllib.request
|
|
13
|
+
import webbrowser
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import typer
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
|
|
21
|
+
from trap.loader import TrapLoader, TrapTaskLoader
|
|
22
|
+
from trap.report import OutputFormat, ReportSaver, renderer_factory
|
|
23
|
+
from trap.runner import TaskRunner
|
|
24
|
+
|
|
25
|
+
app = typer.Typer(help="AI prompt / agent / workflow / testing framework.")
|
|
26
|
+
console = Console()
|
|
27
|
+
|
|
28
|
+
AUTH_FILE = Path.home() / ".config" / "trapstreet" / "auth.json"
|
|
29
|
+
DEFAULT_SERVER = "https://trapstreet.run"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# -----------------------------------------------------------------------------
|
|
33
|
+
# auth helpers
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _load_auth_file() -> dict[str, str]:
|
|
37
|
+
"""Read ~/.config/trapstreet/auth.json or return empty dict."""
|
|
38
|
+
if not AUTH_FILE.exists():
|
|
39
|
+
return {}
|
|
40
|
+
try:
|
|
41
|
+
return _json.loads(AUTH_FILE.read_text())
|
|
42
|
+
except (OSError, _json.JSONDecodeError):
|
|
43
|
+
return {}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _save_auth_file(server: str, api_key: str, runner: str | None) -> Path:
|
|
47
|
+
AUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
payload = {"server": server, "api_key": api_key}
|
|
49
|
+
if runner:
|
|
50
|
+
payload["runner"] = runner
|
|
51
|
+
AUTH_FILE.write_text(_json.dumps(payload, indent=2) + "\n")
|
|
52
|
+
AUTH_FILE.chmod(0o600)
|
|
53
|
+
return AUTH_FILE
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _resolve_api_key(flag_or_env: str | None) -> str:
|
|
57
|
+
"""Auth precedence: --api-key / TRAPSTREET_API_KEY > auth.json > error."""
|
|
58
|
+
if flag_or_env:
|
|
59
|
+
return flag_or_env
|
|
60
|
+
api_key = _load_auth_file().get("api_key")
|
|
61
|
+
if api_key:
|
|
62
|
+
return api_key
|
|
63
|
+
console.print(
|
|
64
|
+
"[red]not logged in[/red]. Run [bold]tp login[/bold] "
|
|
65
|
+
"or set [bold]TRAPSTREET_API_KEY[/bold] / pass [bold]--api-key[/bold]."
|
|
66
|
+
)
|
|
67
|
+
raise SystemExit(2)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# -----------------------------------------------------------------------------
|
|
71
|
+
# commands
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@app.command()
|
|
75
|
+
def run(
|
|
76
|
+
task: str | None = typer.Argument(None),
|
|
77
|
+
trap_yaml_path: Path = typer.Option(Path("trap.yaml"), "--config", "-c"),
|
|
78
|
+
tags: list[str] = typer.Option([], "--tag", "-t"),
|
|
79
|
+
output: OutputFormat = typer.Option(OutputFormat.rich, "--output", "-o"),
|
|
80
|
+
fail_fast: bool = typer.Option(False, "--fail-fast"),
|
|
81
|
+
workspace: Path = typer.Option(Path(".trap"), "--workspace", "-w"),
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Run a task against a solution."""
|
|
84
|
+
trap_yaml_loader = TrapLoader(trap_yaml_path)
|
|
85
|
+
task_obj = trap_yaml_loader.resolve_task(task)
|
|
86
|
+
|
|
87
|
+
task_yaml_loader = TrapTaskLoader.from_task(task_obj, trap_yaml_loader.trap_dir)
|
|
88
|
+
active_cases = task_yaml_loader.cases_with_tags(tags)
|
|
89
|
+
|
|
90
|
+
ts = datetime.now().isoformat(timespec="seconds")
|
|
91
|
+
trap_run_dir = workspace.resolve() / task_obj.name / ts
|
|
92
|
+
|
|
93
|
+
runner = TaskRunner(
|
|
94
|
+
task_obj=task_obj,
|
|
95
|
+
trap_dir=trap_yaml_loader.trap_dir,
|
|
96
|
+
traptask_obj=task_yaml_loader.traptask,
|
|
97
|
+
traptask_dir=task_yaml_loader.task_dir,
|
|
98
|
+
task_outputs_dir=trap_run_dir,
|
|
99
|
+
)
|
|
100
|
+
case_results, grader_metrics = runner.run(active_cases, fail_fast=fail_fast)
|
|
101
|
+
|
|
102
|
+
report_data = ReportSaver.save(
|
|
103
|
+
trap_run_dir, case_results, task_obj, grader_metrics=grader_metrics
|
|
104
|
+
)
|
|
105
|
+
renderer_factory(output).render(report_data)
|
|
106
|
+
|
|
107
|
+
case_failed = any(cr.exit_code != 0 for cr in case_results)
|
|
108
|
+
grader_failed = grader_metrics is not None and grader_metrics.get("passed") is False
|
|
109
|
+
raise SystemExit(case_failed or grader_failed)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@app.command()
|
|
113
|
+
def report(
|
|
114
|
+
task: str | None = typer.Argument(None),
|
|
115
|
+
run: str = typer.Argument("latest"),
|
|
116
|
+
trap_yaml_path: Path = typer.Option(Path("trap.yaml"), "--config", "-c"),
|
|
117
|
+
output: OutputFormat = typer.Option(OutputFormat.rich, "--output", "-o"),
|
|
118
|
+
workspace: Path = typer.Option(Path(".trap"), "--workspace", "-w"),
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Display a report for a task (defaults to latest run)."""
|
|
121
|
+
trap_yaml_loader = TrapLoader(trap_yaml_path)
|
|
122
|
+
task_name = trap_yaml_loader.resolve_task(task).name
|
|
123
|
+
run_dir = workspace.resolve() / task_name / run
|
|
124
|
+
report_data = ReportSaver.load(run_dir)
|
|
125
|
+
renderer_factory(output).render(report_data)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@app.command()
|
|
129
|
+
def login(
|
|
130
|
+
server: str = typer.Option(
|
|
131
|
+
DEFAULT_SERVER,
|
|
132
|
+
"--server",
|
|
133
|
+
envvar="TRAPSTREET_URL",
|
|
134
|
+
help="Trapstreet server URL.",
|
|
135
|
+
),
|
|
136
|
+
timeout: int = typer.Option(
|
|
137
|
+
300, "--timeout", help="Seconds to wait for browser approval."
|
|
138
|
+
),
|
|
139
|
+
) -> None:
|
|
140
|
+
"""Open the browser to authorize this machine; save api_key locally.
|
|
141
|
+
|
|
142
|
+
Starts a temporary HTTP server on localhost, opens
|
|
143
|
+
<server>/cli/authorize?return=http://localhost:<port>/callback in your
|
|
144
|
+
browser, and waits for the redirect back with the api_key.
|
|
145
|
+
|
|
146
|
+
The token is saved to ~/.config/trapstreet/auth.json (mode 600).
|
|
147
|
+
Subsequent `tp submit` calls read from there automatically — no env
|
|
148
|
+
var needed (but TRAPSTREET_API_KEY still works as an override).
|
|
149
|
+
"""
|
|
150
|
+
# Bind to an arbitrary free port on the loopback interface.
|
|
151
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
|
|
152
|
+
probe.bind(("127.0.0.1", 0))
|
|
153
|
+
port = probe.getsockname()[1]
|
|
154
|
+
|
|
155
|
+
captured: dict[str, str] = {}
|
|
156
|
+
|
|
157
|
+
class _CallbackHandler(http.server.BaseHTTPRequestHandler):
|
|
158
|
+
# Silence default access log
|
|
159
|
+
def log_message(self, *_args: Any) -> None: # noqa: ARG002
|
|
160
|
+
return
|
|
161
|
+
|
|
162
|
+
def do_GET(self) -> None: # noqa: N802
|
|
163
|
+
qs = urllib.parse.urlparse(self.path).query
|
|
164
|
+
params = urllib.parse.parse_qs(qs)
|
|
165
|
+
api_key = (params.get("api_key") or [None])[0]
|
|
166
|
+
runner_name = (params.get("runner") or [None])[0]
|
|
167
|
+
|
|
168
|
+
if api_key:
|
|
169
|
+
captured["api_key"] = api_key
|
|
170
|
+
if runner_name:
|
|
171
|
+
captured["runner"] = runner_name
|
|
172
|
+
self.send_response(200)
|
|
173
|
+
self.send_header("content-type", "text/html; charset=utf-8")
|
|
174
|
+
self.end_headers()
|
|
175
|
+
self.wfile.write(
|
|
176
|
+
b"<!doctype html><meta charset=utf-8>"
|
|
177
|
+
b"<title>logged in</title>"
|
|
178
|
+
b"<style>body{font-family:ui-monospace,monospace;"
|
|
179
|
+
b"background:#0a0a0a;color:#ededed;padding:3em;}"
|
|
180
|
+
b"h1{color:#ff5f1f}</style>"
|
|
181
|
+
b"<h1>logged in</h1>"
|
|
182
|
+
b"<p>You can close this tab.</p>"
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
self.send_response(400)
|
|
186
|
+
self.send_header("content-type", "text/plain")
|
|
187
|
+
self.end_headers()
|
|
188
|
+
self.wfile.write(b"missing api_key in query string")
|
|
189
|
+
|
|
190
|
+
server_obj = socketserver.TCPServer(("127.0.0.1", port), _CallbackHandler)
|
|
191
|
+
thread = threading.Thread(target=server_obj.serve_forever, daemon=True)
|
|
192
|
+
thread.start()
|
|
193
|
+
|
|
194
|
+
return_url = f"http://localhost:{port}/callback"
|
|
195
|
+
auth_url = (
|
|
196
|
+
f"{server.rstrip('/')}/cli/authorize"
|
|
197
|
+
f"?return={urllib.parse.quote(return_url, safe='')}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
console.print(f"opening [link={auth_url}]{auth_url}[/link]")
|
|
201
|
+
try:
|
|
202
|
+
webbrowser.open(auth_url)
|
|
203
|
+
except Exception: # noqa: BLE001
|
|
204
|
+
# Couldn't open; user can copy/paste
|
|
205
|
+
console.print(
|
|
206
|
+
"[yellow]could not open browser automatically — "
|
|
207
|
+
"copy the URL above into a browser[/yellow]"
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
deadline = time.time() + timeout
|
|
211
|
+
while time.time() < deadline and "api_key" not in captured:
|
|
212
|
+
time.sleep(0.2)
|
|
213
|
+
|
|
214
|
+
server_obj.shutdown()
|
|
215
|
+
server_obj.server_close()
|
|
216
|
+
|
|
217
|
+
if "api_key" not in captured:
|
|
218
|
+
console.print(
|
|
219
|
+
f"[red]timed out after {timeout}s[/red] waiting for browser approval"
|
|
220
|
+
)
|
|
221
|
+
raise SystemExit(2)
|
|
222
|
+
|
|
223
|
+
path = _save_auth_file(server, captured["api_key"], captured.get("runner"))
|
|
224
|
+
runner_hint = (
|
|
225
|
+
f" · runner [bold]{captured.get('runner')}[/bold]"
|
|
226
|
+
if captured.get("runner")
|
|
227
|
+
else ""
|
|
228
|
+
)
|
|
229
|
+
console.print(f"[green]✓ logged in[/green]{runner_hint} · token saved to {path}")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
@app.command()
|
|
233
|
+
def logout() -> None:
|
|
234
|
+
"""Delete the locally-stored api_key."""
|
|
235
|
+
if AUTH_FILE.exists():
|
|
236
|
+
AUTH_FILE.unlink()
|
|
237
|
+
console.print(f"[green]✓[/green] removed {AUTH_FILE}")
|
|
238
|
+
else:
|
|
239
|
+
console.print(f"already logged out — no file at {AUTH_FILE}")
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@app.command()
|
|
243
|
+
def submit(
|
|
244
|
+
task: str | None = typer.Argument(
|
|
245
|
+
None,
|
|
246
|
+
help="Task name (defaults to first task in trap.yaml). "
|
|
247
|
+
"Used as both the local run dir and the trapstreet task_id.",
|
|
248
|
+
),
|
|
249
|
+
trap_yaml_path: Path = typer.Option(Path("trap.yaml"), "--config", "-c"),
|
|
250
|
+
workspace: Path = typer.Option(Path(".trap"), "--workspace", "-w"),
|
|
251
|
+
run: str = typer.Option(
|
|
252
|
+
"latest", "--run", "-r", help="Which run to upload (default: latest)."
|
|
253
|
+
),
|
|
254
|
+
server: str = typer.Option(
|
|
255
|
+
DEFAULT_SERVER,
|
|
256
|
+
"--server",
|
|
257
|
+
envvar="TRAPSTREET_URL",
|
|
258
|
+
help="Trapstreet server URL.",
|
|
259
|
+
),
|
|
260
|
+
api_key: str | None = typer.Option(
|
|
261
|
+
None,
|
|
262
|
+
"--api-key",
|
|
263
|
+
envvar="TRAPSTREET_API_KEY",
|
|
264
|
+
help="Runner api_key. Falls back to TRAPSTREET_API_KEY env, "
|
|
265
|
+
"then ~/.config/trapstreet/auth.json (see `tp login`).",
|
|
266
|
+
),
|
|
267
|
+
) -> None:
|
|
268
|
+
"""Upload the latest report.json to trapstreet."""
|
|
269
|
+
# Resolve api_key from flag > env > auth.json > error.
|
|
270
|
+
api_key = _resolve_api_key(api_key)
|
|
271
|
+
|
|
272
|
+
# If --server wasn't overridden but auth.json has one, prefer file's server.
|
|
273
|
+
if server == DEFAULT_SERVER and not os.environ.get("TRAPSTREET_URL"):
|
|
274
|
+
file_server = _load_auth_file().get("server")
|
|
275
|
+
if file_server:
|
|
276
|
+
server = file_server
|
|
277
|
+
|
|
278
|
+
trap_yaml_loader = TrapLoader(trap_yaml_path)
|
|
279
|
+
task_name = trap_yaml_loader.resolve_task(task).name
|
|
280
|
+
|
|
281
|
+
report_path = workspace.resolve() / task_name / run / "report.json"
|
|
282
|
+
if not report_path.exists():
|
|
283
|
+
console.print(
|
|
284
|
+
f"[red]error[/red]: no report at {report_path}. "
|
|
285
|
+
"Run [bold]tp run[/bold] first."
|
|
286
|
+
)
|
|
287
|
+
raise SystemExit(2)
|
|
288
|
+
|
|
289
|
+
payload = report_path.read_bytes()
|
|
290
|
+
url = f"{server.rstrip('/')}/api/submit/{task_name}"
|
|
291
|
+
req = urllib.request.Request(
|
|
292
|
+
url,
|
|
293
|
+
data=payload,
|
|
294
|
+
headers={
|
|
295
|
+
"authorization": f"Bearer {api_key}",
|
|
296
|
+
"content-type": "application/json",
|
|
297
|
+
},
|
|
298
|
+
method="POST",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
303
|
+
body: Any = _json.loads(resp.read())
|
|
304
|
+
except urllib.error.HTTPError as e:
|
|
305
|
+
msg = e.read().decode("utf-8", errors="replace")
|
|
306
|
+
console.print(f"[red]http {e.code}[/red]: {msg}")
|
|
307
|
+
raise SystemExit(2)
|
|
308
|
+
except urllib.error.URLError as e:
|
|
309
|
+
console.print(f"[red]connection error[/red]: {e.reason}")
|
|
310
|
+
raise SystemExit(2)
|
|
311
|
+
|
|
312
|
+
run_obj = body.get("run") or {}
|
|
313
|
+
run_id = run_obj.get("id", "?")
|
|
314
|
+
score = run_obj.get("total_score")
|
|
315
|
+
passed = run_obj.get("passed")
|
|
316
|
+
view_url = body.get("view_url", "")
|
|
317
|
+
status = "[green]✓ passed[/green]" if passed else "[red]✗ failed[/red]"
|
|
318
|
+
console.print(f"{status} [bold]{run_id}[/bold] · score [cyan]{score}[/cyan]")
|
|
319
|
+
if view_url:
|
|
320
|
+
console.print(f" → [link={view_url}]{view_url}[/link]")
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
@app.command()
|
|
324
|
+
def init() -> None:
|
|
325
|
+
"""Generate annotated trap.yaml + traptask.yaml scaffold."""
|
|
326
|
+
console.print("not yet")
|
trap/loader.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Loads trap.yaml and traptask.yaml into their respective loader classes.
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from pydantic import TypeAdapter
|
|
9
|
+
|
|
10
|
+
from trap.models import Task, TrapTask, TrapTaskCase
|
|
11
|
+
|
|
12
|
+
_tasks_adapter: TypeAdapter[dict[str, Task]] = TypeAdapter(dict[str, Task])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TrapLoader:
|
|
16
|
+
"""Loads trap.yaml (solution author's config)."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, trap_yaml_path: Path) -> None:
|
|
19
|
+
self.trap_dir: Path = trap_yaml_path.resolve().parent
|
|
20
|
+
data = yaml.safe_load(trap_yaml_path.read_text())
|
|
21
|
+
raw = _tasks_adapter.validate_python(data["tasks"])
|
|
22
|
+
self.tasks: dict[str, Task] = {
|
|
23
|
+
name: task.model_copy(update={"name": name}) for name, task in raw.items()
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
def select_task(self, name: str) -> Task:
|
|
27
|
+
"""Return task by name."""
|
|
28
|
+
if name not in self.tasks:
|
|
29
|
+
raise KeyError(f"task {name!r} not found in trap.yaml")
|
|
30
|
+
return self.tasks[name]
|
|
31
|
+
|
|
32
|
+
def resolve_task(self, name: str | None) -> Task:
|
|
33
|
+
"""Return named task, or the first task if name is None."""
|
|
34
|
+
return self.select_task(name or next(iter(self.tasks)))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TrapTaskLoader:
|
|
38
|
+
"""Loads traptask.yaml (task author's config) and resolves runtime paths."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, traptask_yaml_path: Path) -> None:
|
|
41
|
+
self.task_dir: Path = traptask_yaml_path.resolve().parent
|
|
42
|
+
if traptask_yaml_path.exists():
|
|
43
|
+
self.traptask = TrapTask.model_validate(yaml.safe_load(traptask_yaml_path.read_text()))
|
|
44
|
+
else:
|
|
45
|
+
self.traptask = self._discover(self.task_dir)
|
|
46
|
+
self.inputs_dir: Path = (self.task_dir / self.traptask.dirs.inputs).resolve()
|
|
47
|
+
self.expected_dir: Path = (self.task_dir / self.traptask.dirs.expected).resolve()
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def _discover(task_dir: Path) -> TrapTask:
|
|
51
|
+
"""Auto-build TrapTask by scanning inputs/ when traptask.yaml is absent."""
|
|
52
|
+
inputs_dir = task_dir / "inputs"
|
|
53
|
+
if not inputs_dir.is_dir():
|
|
54
|
+
raise FileNotFoundError(f"no traptask.yaml and no inputs/ directory found in {task_dir}")
|
|
55
|
+
case_ids = sorted(p.name for p in inputs_dir.iterdir() if p.is_dir())
|
|
56
|
+
if not case_ids:
|
|
57
|
+
raise FileNotFoundError(f"inputs/ in {task_dir} has no case subdirectories")
|
|
58
|
+
return TrapTask(cases=tuple(TrapTaskCase(id=case_id) for case_id in case_ids))
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_task(cls, task: Task, trap_dir: Path) -> TrapTaskLoader:
|
|
62
|
+
"""Resolve traptask.yaml from a Task's directory path and the trap.yaml directory."""
|
|
63
|
+
traptask_dir = (trap_dir / task.traptask).resolve()
|
|
64
|
+
return cls(traptask_dir / "traptask.yaml")
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def cases(self) -> tuple[TrapTaskCase, ...]:
|
|
68
|
+
"""Return all non-skipped cases."""
|
|
69
|
+
return tuple(c for c in self.traptask.cases if not c.skip)
|
|
70
|
+
|
|
71
|
+
def cases_with_tags(self, tags: Iterable[str] | None = None) -> tuple[TrapTaskCase, ...]:
|
|
72
|
+
"""Return non-skipped cases matching any of the specified tags, or all cases if tags is empty/None."""
|
|
73
|
+
if not (tag_set := set(tags or ())):
|
|
74
|
+
return self.cases
|
|
75
|
+
return tuple(c for c in self.cases if not tag_set.isdisjoint(c.tags))
|
trap/models/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .config import InputsBinding, Task
|
|
4
|
+
from .report import Counts, ReportData
|
|
5
|
+
from .results import CaseResult
|
|
6
|
+
from .task import DirsConfig, SubprocessCmd, TrapTask, TrapTaskCase
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"CaseResult",
|
|
10
|
+
"Counts",
|
|
11
|
+
"DirsConfig",
|
|
12
|
+
"InputsBinding",
|
|
13
|
+
"ReportData",
|
|
14
|
+
"SubprocessCmd",
|
|
15
|
+
"Task",
|
|
16
|
+
"TrapTask",
|
|
17
|
+
"TrapTaskCase",
|
|
18
|
+
]
|
trap/models/config.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Models for trap.yaml (solution author's config).
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class InputsBinding(BaseModel):
|
|
8
|
+
stdin: str | None = None # filename in inputs/{case_id}/ piped as subprocess stdin
|
|
9
|
+
# declared input keys; runner validates these stems exist in inputs/{case_id}/ before running
|
|
10
|
+
files: tuple[str, ...] = ()
|
|
11
|
+
# all files in inputs/{case_id}/ are also exposed via INPUTS env var at runtime
|
|
12
|
+
# TODO: args: list[str] = [] — pass inputs as CLI positional/named arguments
|
|
13
|
+
# TODO: env: dict[str, str] = {} — inject inputs as environment variables
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Task(BaseModel):
|
|
17
|
+
name: str = ""
|
|
18
|
+
description: str = ""
|
|
19
|
+
cmd: str
|
|
20
|
+
traptask: str # path to traptask directory (relative to trap.yaml); trap looks for traptask.yaml inside
|
|
21
|
+
inputs: InputsBinding | None = None
|
|
22
|
+
# output filenames; solution writes each to the path given by outputs_envvar[name] at runtime
|
|
23
|
+
file_outputs: tuple[str, ...] = ()
|
|
24
|
+
timeout: int = 30
|
|
25
|
+
# env var names injected by the runner; override if the solution already uses these names
|
|
26
|
+
inputs_envvar: str = "INPUTS"
|
|
27
|
+
outputs_envvar: str = "OUTPUTS"
|
trap/models/report.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from trap.models.config import Task
|
|
8
|
+
from trap.models.results import CaseResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Counts(BaseModel):
|
|
12
|
+
passed: int
|
|
13
|
+
failed: int
|
|
14
|
+
skipped: int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ReportData(BaseModel):
|
|
18
|
+
task: Task
|
|
19
|
+
cases: tuple[CaseResult, ...]
|
|
20
|
+
run_counts: Counts
|
|
21
|
+
grader_metrics: Any = None
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def from_run(
|
|
25
|
+
cls,
|
|
26
|
+
cases: tuple[CaseResult, ...],
|
|
27
|
+
task: Task,
|
|
28
|
+
grader_metrics: Any = None,
|
|
29
|
+
) -> ReportData:
|
|
30
|
+
return cls(
|
|
31
|
+
task=task,
|
|
32
|
+
cases=cases,
|
|
33
|
+
grader_metrics=grader_metrics,
|
|
34
|
+
run_counts=Counts(
|
|
35
|
+
passed=sum(1 for r in cases if not r.skipped and r.exit_code == 0),
|
|
36
|
+
failed=sum(1 for r in cases if not r.skipped and r.exit_code != 0),
|
|
37
|
+
skipped=sum(1 for r in cases if r.skipped),
|
|
38
|
+
),
|
|
39
|
+
)
|
trap/models/results.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Runtime result models produced by judge and grader subprocesses.
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CaseResult(BaseModel):
|
|
10
|
+
case_id: str
|
|
11
|
+
exit_code: int = 0
|
|
12
|
+
duration: float = 0.0 # seconds
|
|
13
|
+
# any JSON-serializable value; trap does not interpret this, grader does
|
|
14
|
+
metrics: Any
|
|
15
|
+
skipped: bool = False
|
trap/models/task.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Models for traptask.yaml (task author's config).
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SubprocessCmd(BaseModel):
|
|
8
|
+
# cmd is relative to traptask.yaml's directory and run via shlex.split
|
|
9
|
+
cmd: str
|
|
10
|
+
payload_envvar: str = "TRAPTASK_PAYLOAD"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DirsConfig(BaseModel):
|
|
14
|
+
# paths relative to traptask.yaml; outputs dir is a runtime tmpdir, not declared here
|
|
15
|
+
inputs: str = "inputs/"
|
|
16
|
+
expected: str = "expected/"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TrapTaskCase(BaseModel):
|
|
20
|
+
id: str
|
|
21
|
+
description: str = ""
|
|
22
|
+
tags: tuple[str, ...] = ()
|
|
23
|
+
skip: bool = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TrapTask(BaseModel):
|
|
27
|
+
dirs: DirsConfig = DirsConfig()
|
|
28
|
+
cases: tuple[TrapTaskCase, ...]
|
|
29
|
+
judge: SubprocessCmd | None = None # None → skip per-case scoring
|
|
30
|
+
grader: SubprocessCmd | None = None # None → skip overall aggregation
|
trap/report/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import enum
|
|
4
|
+
|
|
5
|
+
from trap.report.base import BaseRenderer
|
|
6
|
+
from trap.report.json import JsonRenderer
|
|
7
|
+
from trap.report.rich import RichRenderer
|
|
8
|
+
from trap.report.saver import ReportSaver
|
|
9
|
+
|
|
10
|
+
__all__ = ["BaseRenderer", "JsonRenderer", "OutputFormat", "ReportSaver", "RichRenderer", "renderer_factory"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OutputFormat(enum.StrEnum):
|
|
14
|
+
rich = "rich"
|
|
15
|
+
json = "json"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def renderer_factory(fmt: OutputFormat) -> BaseRenderer:
|
|
19
|
+
match fmt:
|
|
20
|
+
case OutputFormat.rich:
|
|
21
|
+
return RichRenderer()
|
|
22
|
+
case OutputFormat.json:
|
|
23
|
+
return JsonRenderer()
|
trap/report/base.py
ADDED
trap/report/json.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from trap.models import ReportData
|
|
6
|
+
from trap.report.base import BaseRenderer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JsonRenderer(BaseRenderer):
|
|
10
|
+
def render(self, data: ReportData) -> None:
|
|
11
|
+
sys.stdout.write(data.model_dump_json(indent=2) + "\n")
|