synth-ai 0.2.8.dev11__py3-none-any.whl → 0.2.8.dev13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (37) hide show
  1. synth_ai/api/train/__init__.py +5 -0
  2. synth_ai/api/train/builders.py +165 -0
  3. synth_ai/api/train/cli.py +429 -0
  4. synth_ai/api/train/config_finder.py +120 -0
  5. synth_ai/api/train/env_resolver.py +302 -0
  6. synth_ai/api/train/pollers.py +66 -0
  7. synth_ai/api/train/task_app.py +128 -0
  8. synth_ai/api/train/utils.py +232 -0
  9. synth_ai/cli/__init__.py +23 -0
  10. synth_ai/cli/rl_demo.py +2 -2
  11. synth_ai/cli/root.py +2 -1
  12. synth_ai/cli/task_apps.py +520 -0
  13. synth_ai/demos/demo_task_apps/math/modal_task_app.py +31 -25
  14. synth_ai/task/__init__.py +94 -1
  15. synth_ai/task/apps/__init__.py +88 -0
  16. synth_ai/task/apps/grpo_crafter.py +438 -0
  17. synth_ai/task/apps/math_single_step.py +852 -0
  18. synth_ai/task/auth.py +132 -0
  19. synth_ai/task/client.py +148 -0
  20. synth_ai/task/contracts.py +29 -14
  21. synth_ai/task/datasets.py +105 -0
  22. synth_ai/task/errors.py +49 -0
  23. synth_ai/task/json.py +77 -0
  24. synth_ai/task/proxy.py +258 -0
  25. synth_ai/task/rubrics.py +212 -0
  26. synth_ai/task/server.py +398 -0
  27. synth_ai/task/tracing_utils.py +79 -0
  28. synth_ai/task/vendors.py +61 -0
  29. synth_ai/tracing_v3/session_tracer.py +13 -5
  30. synth_ai/tracing_v3/storage/base.py +10 -12
  31. synth_ai/tracing_v3/turso/manager.py +20 -6
  32. {synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/METADATA +3 -2
  33. {synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/RECORD +37 -15
  34. {synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/WHEEL +0 -0
  35. {synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/entry_points.txt +0 -0
  36. {synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/licenses/LICENSE +0 -0
  37. {synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,520 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ import tempfile
6
+ import os
7
+ import signal
8
+ from pathlib import Path
9
+ from typing import Sequence
10
+
11
+ REPO_ROOT = Path(__file__).resolve().parents[2]
12
+
13
+ import click
14
+
15
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, registry
16
+ from synth_ai.task.server import run_task_app
17
+
18
+
19
+ @click.group(
20
+ name='task-app',
21
+ help='Utilities for serving and deploying Synth task apps.'
22
+ )
23
+ def task_app_group() -> None:
24
+ pass
25
+
26
+
27
+ @task_app_group.command('list')
28
+ def list_apps() -> None:
29
+ """List registered task apps."""
30
+
31
+ entries = registry.list()
32
+ if not entries:
33
+ click.echo("No task apps registered.")
34
+ return
35
+ for entry in entries:
36
+ aliases = f" (aliases: {', '.join(entry.aliases)})" if entry.aliases else ""
37
+ click.echo(f"- {entry.app_id}{aliases}: {entry.description}")
38
+ def _load_env_files_into_process(paths: Sequence[str]) -> None:
39
+ for p in paths:
40
+ try:
41
+ txt = Path(p).expanduser().read_text()
42
+ except Exception:
43
+ continue
44
+ for line in txt.splitlines():
45
+ if not line or line.startswith('#') or '=' not in line:
46
+ continue
47
+ k, v = line.split('=', 1)
48
+ key = k.strip()
49
+ val = v.strip().strip('"').strip("'")
50
+ if key and key not in os.environ:
51
+ os.environ[key] = val
52
+
53
+
54
+
55
+ @click.command('serve')
56
+ @click.argument('app_id', type=str)
57
+ @click.option('--host', default='0.0.0.0', show_default=True)
58
+ @click.option('--port', default=8001, show_default=True, type=int)
59
+ @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
60
+ @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
61
+ @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
62
+ @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
63
+ @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
64
+ def serve_command(
65
+ app_id: str,
66
+ host: str,
67
+ port: int,
68
+ env_file: Sequence[str],
69
+ reload_flag: bool,
70
+ force: bool,
71
+ trace_dir: str | None,
72
+ trace_db: str | None,
73
+ ) -> None:
74
+ _serve(app_id, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
75
+
76
+
77
+ @task_app_group.command('serve')
78
+ @click.argument('app_id', type=str)
79
+ @click.option('--host', default='0.0.0.0', show_default=True)
80
+ @click.option('--port', default=8001, show_default=True, type=int)
81
+ @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
82
+ @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
83
+ @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
84
+ @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
85
+ @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
86
+ def serve_task_group(
87
+ app_id: str,
88
+ host: str,
89
+ port: int,
90
+ env_file: Sequence[str],
91
+ reload_flag: bool,
92
+ force: bool,
93
+ trace_dir: str | None,
94
+ trace_db: str | None,
95
+ ) -> None:
96
+ _serve(app_id, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
97
+
98
+ def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) -> list[Path]:
99
+ resolved: list[Path] = []
100
+ for candidate in user_env_files:
101
+ p = Path(candidate).expanduser()
102
+ if not p.exists():
103
+ raise click.ClickException(f"Env file not found: {p}")
104
+ resolved.append(p)
105
+ if resolved:
106
+ return resolved
107
+
108
+ defaults = [Path(path).expanduser() for path in (entry.env_files or []) if Path(path).expanduser().exists()]
109
+ if defaults:
110
+ return defaults
111
+
112
+ env_candidates = sorted(REPO_ROOT.glob('**/*.env'))
113
+ if not env_candidates:
114
+ raise click.ClickException('No env file found. Pass --env-file explicitly.')
115
+
116
+ click.echo('Select env file to load:')
117
+ for idx, path in enumerate(env_candidates, start=1):
118
+ click.echo(f" {idx}) {path}")
119
+ choice = click.prompt('Enter choice', type=click.IntRange(1, len(env_candidates)))
120
+ return [env_candidates[choice - 1]]
121
+
122
+
123
+ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
124
+ import os
125
+ import socket
126
+ import subprocess
127
+ import time
128
+
129
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
130
+ in_use = s.connect_ex((host, port)) == 0
131
+ if not in_use:
132
+ return
133
+
134
+ try:
135
+ out = subprocess.run(["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False)
136
+ pids = [pid for pid in out.stdout.strip().splitlines() if pid]
137
+ except FileNotFoundError:
138
+ pids = []
139
+
140
+ if not force:
141
+ message = f"Port {port} appears to be in use"
142
+ if pids:
143
+ message += f" (PIDs: {', '.join(pids)})"
144
+ raise click.ClickException(message)
145
+
146
+ for pid in pids:
147
+ try:
148
+ os.kill(int(pid), signal.SIGTERM)
149
+ except Exception as exc:
150
+ raise click.ClickException(f'Failed to terminate PID {pid}: {exc}')
151
+
152
+ time.sleep(0.5)
153
+
154
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
155
+ still_in_use = s.connect_ex((host, port)) == 0
156
+
157
+ if still_in_use:
158
+ for pid in pids:
159
+ try:
160
+ os.kill(int(pid), signal.SIGKILL)
161
+ except Exception as exc:
162
+ raise click.ClickException(f'Failed to force terminate PID {pid}: {exc}')
163
+ time.sleep(0.5)
164
+
165
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
166
+ in_use_after = s.connect_ex((host, port)) == 0
167
+ if in_use_after:
168
+ raise click.ClickException(f'Port {port} is still in use after attempting to terminate processes.')
169
+
170
+ def _serve(
171
+ app_id: str,
172
+ host: str,
173
+ port: int,
174
+ env_file: Sequence[str],
175
+ reload_flag: bool,
176
+ force: bool,
177
+ *,
178
+ trace_dir: str | None = None,
179
+ trace_db: str | None = None,
180
+ ) -> None:
181
+ try:
182
+ entry = registry.get(app_id)
183
+ except KeyError as exc: # pragma: no cover - CLI input validation
184
+ raise click.ClickException(str(exc)) from exc
185
+
186
+ env_files = list(entry.env_files)
187
+ env_files.extend(env_file)
188
+
189
+ trace_enabled = trace_dir is not None or trace_db is not None
190
+ if trace_enabled:
191
+ os.environ['TASKAPP_TRACING_ENABLED'] = '1'
192
+ if trace_dir is not None:
193
+ dir_path = Path(trace_dir).expanduser()
194
+ try:
195
+ dir_path.mkdir(parents=True, exist_ok=True)
196
+ except Exception as exc:
197
+ raise click.ClickException(f"Failed to create trace directory {dir_path}: {exc}") from exc
198
+ os.environ['TASKAPP_SFT_OUTPUT_DIR'] = str(dir_path)
199
+ click.echo(f"Tracing enabled. SFT JSONL will be written to {dir_path}")
200
+ if trace_db is not None:
201
+ db_path = Path(trace_db).expanduser()
202
+ os.environ['SQLD_DB_PATH'] = str(db_path)
203
+ os.environ.pop('TURSO_LOCAL_DB_URL', None)
204
+ click.echo(f"Tracing DB path set to {db_path}")
205
+ from synth_ai.tracing_v3.config import CONFIG as TRACE_CONFIG
206
+ # recompute db_url based on current environment
207
+ new_db_url = os.getenv('TURSO_LOCAL_DB_URL') or TRACE_CONFIG.db_url
208
+ TRACE_CONFIG.db_url = new_db_url
209
+ if new_db_url:
210
+ os.environ['TURSO_LOCAL_DB_URL'] = new_db_url
211
+ click.echo(f"Tracing DB URL resolved to {new_db_url}")
212
+ elif os.getenv('TASKAPP_TRACING_ENABLED'):
213
+ click.echo("Tracing enabled via environment variables")
214
+
215
+ _ensure_port_free(port, host, force=force)
216
+
217
+ # Preflight: upsert and verify ENVIRONMENT_API_KEY with backend before serving
218
+ try:
219
+ raw_backend = os.environ.get("BACKEND_BASE_URL") or os.environ.get("SYNTH_BASE_URL") or "http://localhost:8000/api"
220
+ backend_base = raw_backend.rstrip("/")
221
+ if not backend_base.endswith("/api"):
222
+ backend_base = backend_base + "/api"
223
+ synth_key = os.environ.get("SYNTH_API_KEY") or ""
224
+ env_api_key = os.environ.get("ENVIRONMENT_API_KEY") or os.environ.get("dev_environment_api_key") or os.environ.get("DEV_ENVIRONMENT_API_KEY") or ""
225
+ if synth_key and env_api_key:
226
+ import base64, httpx
227
+ click.echo(f"[preflight] backend={backend_base}")
228
+ # Fetch sealed-box public key
229
+ with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}"}) as c:
230
+ click.echo("[preflight] fetching public key…")
231
+ rpk = c.get(f"{backend_base.rstrip('/')}/v1/crypto/public-key")
232
+ if rpk.status_code == 200:
233
+ pk = (rpk.json() or {}).get("public_key")
234
+ else:
235
+ pk = None
236
+ if pk:
237
+ # Encrypt env_api_key using libsodium sealed box
238
+ try:
239
+ from nacl.public import SealedBox, PublicKey
240
+ pub = PublicKey(base64.b64decode(pk, validate=True))
241
+ sb = SealedBox(pub)
242
+ ct = sb.encrypt(env_api_key.encode("utf-8"))
243
+ ct_b64 = base64.b64encode(ct).decode()
244
+ payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
245
+ with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}) as c:
246
+ click.echo("[preflight] upserting env key…")
247
+ up = c.post(f"{backend_base.rstrip('/')}/v1/env-keys", json=payload)
248
+ click.echo(f"[preflight] upsert status={up.status_code}")
249
+ # Verify
250
+ click.echo("[preflight] verifying env key presence…")
251
+ ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
252
+ if ver.status_code == 200 and (ver.json() or {}).get("present"):
253
+ click.echo("✅ ENVIRONMENT_API_KEY upserted and verified in backend")
254
+ else:
255
+ click.echo("[WARN] ENVIRONMENT_API_KEY verification failed; proceeding anyway")
256
+ except Exception:
257
+ click.echo("[WARN] Failed to encrypt/upload ENVIRONMENT_API_KEY; proceeding anyway")
258
+ except Exception:
259
+ click.echo("[WARN] Backend preflight for ENVIRONMENT_API_KEY failed; proceeding anyway")
260
+
261
+ run_task_app(
262
+ entry.config_factory,
263
+ host=host,
264
+ port=port,
265
+ reload=reload_flag,
266
+ env_files=env_files,
267
+ )
268
+
269
+
270
+ @task_app_group.command('deploy')
271
+ @click.argument("app_id", type=str)
272
+ @click.option("--name", "modal_name", default=None, help="Override Modal app name")
273
+ @click.option("--dry-run", is_flag=True, help="Print modal deploy command without executing")
274
+ @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
275
+ @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
276
+ def deploy_app(app_id: str, modal_name: str | None, dry_run: bool, modal_cli: str, env_file: Sequence[str]) -> None:
277
+ """Deploy a task app to Modal."""
278
+
279
+ try:
280
+ entry = registry.get(app_id)
281
+ except KeyError as exc: # pragma: no cover - CLI input validation
282
+ raise click.ClickException(str(exc)) from exc
283
+
284
+ modal_cfg = entry.modal
285
+ if modal_cfg is None:
286
+ raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
287
+
288
+ env_paths = _determine_env_files(entry, env_file)
289
+ click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
290
+
291
+ modal_path = shutil.which(modal_cli)
292
+ if modal_path is None:
293
+ raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
294
+
295
+ # Preflight: upsert and verify ENVIRONMENT_API_KEY with backend before deploy
296
+ try:
297
+ raw_backend = os.environ.get("BACKEND_BASE_URL") or os.environ.get("SYNTH_BASE_URL") or "http://localhost:8000/api"
298
+ backend_base = raw_backend.rstrip("/")
299
+ if not backend_base.endswith("/api"):
300
+ backend_base = backend_base + "/api"
301
+ synth_key = os.environ.get("SYNTH_API_KEY") or ""
302
+ env_api_key = os.environ.get("ENVIRONMENT_API_KEY") or os.environ.get("dev_environment_api_key") or os.environ.get("DEV_ENVIRONMENT_API_KEY") or ""
303
+ if synth_key and env_api_key:
304
+ import base64, httpx
305
+ click.echo(f"[preflight] backend={backend_base}")
306
+ with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}"}) as c:
307
+ click.echo("[preflight] fetching public key…")
308
+ rpk = c.get(f"{backend_base.rstrip('/')}/v1/crypto/public-key")
309
+ pk = (rpk.json() or {}).get("public_key") if rpk.status_code == 200 else None
310
+ if pk:
311
+ try:
312
+ from nacl.public import SealedBox, PublicKey
313
+ pub = PublicKey(base64.b64decode(pk, validate=True))
314
+ sb = SealedBox(pub)
315
+ ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode("utf-8"))).decode()
316
+ payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
317
+ with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}) as c:
318
+ click.echo("[preflight] upserting env key…")
319
+ up = c.post(f"{backend_base.rstrip('/')}/v1/env-keys", json=payload)
320
+ click.echo(f"[preflight] upsert status={up.status_code}")
321
+ ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
322
+ if ver.status_code == 200 and (ver.json() or {}).get("present"):
323
+ click.echo("✅ ENVIRONMENT_API_KEY upserted and verified in backend")
324
+ else:
325
+ click.echo("[WARN] ENVIRONMENT_API_KEY verification failed; proceeding anyway")
326
+ except Exception:
327
+ click.echo("[WARN] Failed to encrypt/upload ENVIRONMENT_API_KEY; proceeding anyway")
328
+ except Exception:
329
+ click.echo("[WARN] Backend preflight for ENVIRONMENT_API_KEY failed; proceeding anyway")
330
+
331
+ script_path = _write_modal_entrypoint(
332
+ entry,
333
+ modal_cfg,
334
+ modal_name,
335
+ dotenv_paths=[str(path) for path in env_paths],
336
+ )
337
+ cmd = [modal_path, "deploy", str(script_path)]
338
+ if dry_run:
339
+ click.echo("Dry run: " + " ".join(cmd))
340
+ script_path.unlink(missing_ok=True)
341
+ return
342
+ try:
343
+ subprocess.run(cmd, check=True)
344
+ finally:
345
+ script_path.unlink(missing_ok=True)
346
+
347
+ @task_app_group.command('modal-serve')
348
+ @click.argument('app_id', type=str, required=False)
349
+ @click.option('--modal-cli', default='modal', help='Path to modal CLI executable')
350
+ @click.option('--name', 'modal_name', default=None, help='Override Modal app name (optional)')
351
+ @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
352
+ def modal_serve_app(app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]) -> None:
353
+ entries = registry.list()
354
+ if app_id is None:
355
+ if len(entries) == 1:
356
+ entry = entries[0]
357
+ else:
358
+ available = ', '.join(e.app_id for e in entries) or 'none'
359
+ raise click.ClickException(f"APP_ID required (available: {available})")
360
+ else:
361
+ try:
362
+ entry = registry.get(app_id)
363
+ except KeyError as exc:
364
+ raise click.ClickException(str(exc)) from exc
365
+
366
+ modal_cfg = entry.modal
367
+ if modal_cfg is None:
368
+ raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
369
+
370
+ env_paths = _determine_env_files(entry, env_file)
371
+ click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
372
+ # Make values available for preflight
373
+ _load_env_files_into_process([str(p) for p in env_paths])
374
+
375
+ modal_path = shutil.which(modal_cli)
376
+ if modal_path is None:
377
+ raise click.ClickException(f"Modal CLI not found (looked for '{modal_cli}')")
378
+
379
+ # Preflight: upsert and verify ENVIRONMENT_API_KEY with backend before serve
380
+ try:
381
+ raw_backend = os.environ.get("BACKEND_BASE_URL") or os.environ.get("SYNTH_BASE_URL") or "http://localhost:8000/api"
382
+ backend_base = raw_backend.rstrip('/')
383
+ if not backend_base.endswith('/api'):
384
+ backend_base = backend_base + '/api'
385
+ synth_key = os.environ.get("SYNTH_API_KEY") or ""
386
+ env_api_key = os.environ.get("ENVIRONMENT_API_KEY") or os.environ.get("dev_environment_api_key") or os.environ.get("DEV_ENVIRONMENT_API_KEY") or ""
387
+ if synth_key and env_api_key:
388
+ import base64, httpx
389
+ click.echo(f"[preflight] backend={backend_base}")
390
+ with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}"}) as c:
391
+ click.echo("[preflight] fetching public key…")
392
+ rpk = c.get(f"{backend_base}/v1/crypto/public-key")
393
+ pk = (rpk.json() or {}).get("public_key") if rpk.status_code == 200 else None
394
+ if pk:
395
+ try:
396
+ from nacl.public import SealedBox, PublicKey
397
+ pub = PublicKey(base64.b64decode(pk, validate=True))
398
+ sb = SealedBox(pub)
399
+ ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode('utf-8'))).decode()
400
+ payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
401
+ with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}) as c:
402
+ click.echo("[preflight] upserting env key…")
403
+ up = c.post(f"{backend_base}/v1/env-keys", json=payload)
404
+ click.echo(f"[preflight] upsert status={up.status_code}")
405
+ click.echo("[preflight] verifying env key presence…")
406
+ ver = c.get(f"{backend_base}/v1/env-keys/verify")
407
+ if ver.status_code == 200 and (ver.json() or {}).get("present"):
408
+ click.echo("✅ ENVIRONMENT_API_KEY upserted and verified in backend")
409
+ else:
410
+ click.echo("[WARN] ENVIRONMENT_API_KEY verification failed; proceeding anyway")
411
+ except Exception:
412
+ click.echo("[WARN] Failed to encrypt/upload ENVIRONMENT_API_KEY; proceeding anyway")
413
+ except Exception:
414
+ click.echo("[WARN] Backend preflight for ENVIRONMENT_API_KEY failed; proceeding anyway")
415
+
416
+ script_path = _write_modal_entrypoint(
417
+ entry,
418
+ modal_cfg,
419
+ modal_name,
420
+ dotenv_paths=[str(path) for path in env_paths],
421
+ )
422
+ cmd = [modal_path, 'serve', str(script_path)]
423
+ try:
424
+ subprocess.run(cmd, check=True)
425
+ except subprocess.CalledProcessError as exc:
426
+ raise click.ClickException(f"modal serve failed with exit code {exc.returncode}") from exc
427
+ finally:
428
+ script_path.unlink(missing_ok=True)
429
+
430
+
431
+ def _write_modal_entrypoint(
432
+ entry: TaskAppEntry,
433
+ modal_cfg: ModalDeploymentConfig,
434
+ override_name: str | None,
435
+ *,
436
+ dotenv_paths: Sequence[str] | None = None,
437
+ ) -> Path:
438
+ modal_name = override_name or modal_cfg.app_name
439
+
440
+ module_name = entry.config_factory.__module__
441
+ dotenv_paths = [str(Path(path)) for path in (dotenv_paths or [])]
442
+
443
+ pip_packages = list(modal_cfg.pip_packages)
444
+
445
+ local_dirs = [(str(Path(src)), dst) for src, dst in modal_cfg.extra_local_dirs]
446
+ secret_names = list(modal_cfg.secret_names)
447
+ volume_mounts = [(name, mount) for name, mount in modal_cfg.volume_mounts]
448
+
449
+ script = f"""from __future__ import annotations
450
+
451
+ import importlib
452
+ import sys
453
+ sys.path.insert(0, '/opt/synth_ai_repo')
454
+
455
+ from modal import App, Image, Secret, Volume, asgi_app
456
+
457
+ from synth_ai.task.apps import registry
458
+ from synth_ai.task.server import create_task_app
459
+
460
+ ENTRY_ID = {entry.app_id!r}
461
+ MODAL_APP_NAME = {modal_name!r}
462
+ MODULE_NAME = {module_name!r}
463
+ DOTENV_PATHS = {dotenv_paths!r}
464
+
465
+ image = Image.debian_slim(python_version={modal_cfg.python_version!r})
466
+
467
+ pip_packages = {pip_packages!r}
468
+ if pip_packages:
469
+ image = image.pip_install(*pip_packages)
470
+
471
+ local_dirs = {local_dirs!r}
472
+ for local_src, remote_dst in local_dirs:
473
+ image = image.add_local_dir(local_src, remote_dst)
474
+
475
+ secrets = {secret_names!r}
476
+ secret_objs = [Secret.from_name(name) for name in secrets]
477
+
478
+ if DOTENV_PATHS:
479
+ secret_objs.extend(Secret.from_dotenv(path) for path in DOTENV_PATHS)
480
+
481
+ volume_mounts = {volume_mounts!r}
482
+ volume_map = {{}}
483
+ for vol_name, mount_path in volume_mounts:
484
+ volume_map[mount_path] = Volume.from_name(vol_name, create_if_missing=True)
485
+
486
+ importlib.import_module(MODULE_NAME)
487
+
488
+ entry = registry.get(ENTRY_ID)
489
+ modal_cfg = entry.modal
490
+ if modal_cfg is None:
491
+ raise RuntimeError("Modal configuration missing for task app {entry.app_id}")
492
+
493
+ app = App(MODAL_APP_NAME)
494
+
495
+ @app.function(
496
+ image=image,
497
+ timeout={modal_cfg.timeout},
498
+ memory={modal_cfg.memory},
499
+ cpu={modal_cfg.cpu},
500
+ min_containers={modal_cfg.min_containers},
501
+ max_containers={modal_cfg.max_containers},
502
+ secrets=secret_objs,
503
+ volumes=volume_map,
504
+ )
505
+ @asgi_app()
506
+ def fastapi_app():
507
+ config = entry.config_factory()
508
+ return create_task_app(config)
509
+ """
510
+
511
+ tmp = tempfile.NamedTemporaryFile("w", suffix=f"_{entry.app_id}_modal.py", delete=False)
512
+ tmp.write(script)
513
+ tmp.flush()
514
+ tmp.close()
515
+ return Path(tmp.name)
516
+
517
+
518
+ def register(cli: click.Group) -> None:
519
+ cli.add_command(serve_command)
520
+ cli.add_command(task_app_group)
@@ -205,12 +205,14 @@ def fastapi_app():
205
205
  """Return Hendrycks MATH problem/answer and tool schema for a seed."""
206
206
  q, a = _load_hendrycks_problem(int(seed), subject=subject)
207
207
  tools = [{
208
- "name": "interact",
209
- "description": "Submit one or more actions to the math environment.",
208
+ "name": "submit_answer",
209
+ "description": "Provide the final numerical or algebraic answer for the current math problem.",
210
210
  "parameters": {
211
211
  "type": "object",
212
- "properties": {"actions": {"type": "array", "items": {"type": "string"}}},
213
- "required": ["actions"],
212
+ "properties": {
213
+ "answer": {"type": "string", "description": "The proposed final answer"},
214
+ },
215
+ "required": ["answer"],
214
216
  },
215
217
  }]
216
218
  return {
@@ -245,7 +247,7 @@ def fastapi_app():
245
247
 
246
248
  OPENAI_REMOVE_FIELDS = ("stop_after_tool_calls", "thinking_mode", "thinking_budget", "reasoning")
247
249
  OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
248
- TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "interact_many"}}
250
+ TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "submit_answer"}}
249
251
 
250
252
  def _prepare_openai_payload(model: str | None, payload: dict[str, object]) -> dict[str, object]:
251
253
  sanitized = dict(payload)
@@ -258,9 +260,9 @@ def fastapi_app():
258
260
  sanitized.pop("max_tokens", None)
259
261
  for field in OPENAI_REMOVE_SAMPLING_FIELDS:
260
262
  sanitized.pop(field, None)
261
- sanitized["tool_choice"] = TOOL_CHOICE_FORCE
262
- sanitized["parallel_tool_calls"] = False
263
- return sanitized
263
+ sanitized["tool_choice"] = TOOL_CHOICE_FORCE
264
+ sanitized["parallel_tool_calls"] = False
265
+ return sanitized
264
266
 
265
267
  @api.post("/proxy/v1/chat/completions")
266
268
  def proxy_chat_completions(request: dict[str, object] = Body(...)):
@@ -330,11 +332,11 @@ def fastapi_app():
330
332
  sanitized.pop("max_tokens", None)
331
333
  for field in ("temperature", "top_p"):
332
334
  sanitized.pop(field, None)
333
- sanitized["tool_choice"] = {"type": "function", "function": {"name": "interact"}}
335
+ sanitized["tool_choice"] = {"type": "function", "function": {"name": "submit_answer"}}
334
336
  sanitized["parallel_tool_calls"] = False
335
337
  return sanitized
336
338
 
337
- def _parse_tool_actions(resp: dict[str, Any]) -> list[str]:
339
+ def _parse_tool_answer(resp: dict[str, Any]) -> str:
338
340
  try:
339
341
  choices = resp.get("choices")
340
342
  if isinstance(choices, list) and choices:
@@ -343,7 +345,7 @@ def fastapi_app():
343
345
  if isinstance(tcs, list) and tcs:
344
346
  fn = tcs[0].get("function", {}) if isinstance(tcs[0], dict) else {}
345
347
  args = fn.get("arguments")
346
- obj = {}
348
+ obj: dict[str, Any] = {}
347
349
  if isinstance(args, str):
348
350
  try:
349
351
  obj = _json.loads(args)
@@ -351,12 +353,12 @@ def fastapi_app():
351
353
  obj = {}
352
354
  elif isinstance(args, dict):
353
355
  obj = args
354
- acts = obj.get("actions")
355
- if isinstance(acts, list):
356
- return [str(a) for a in acts][:5]
356
+ ans = obj.get("answer")
357
+ if isinstance(ans, str):
358
+ return ans.strip()
357
359
  except Exception:
358
360
  pass
359
- return []
361
+ return ""
360
362
 
361
363
  # Single-step rollout: one agent call followed by evaluation of the returned tool answer
362
364
  history: list[dict[str, Any]] = []
@@ -373,7 +375,16 @@ def fastapi_app():
373
375
  "messages": [{"role": "user", "content": user_prompt}],
374
376
  "tools": [{
375
377
  "type": "function",
376
- "function": {"name": "interact", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}}}, "required": ["actions"]}},
378
+ "function": {
379
+ "name": "submit_answer",
380
+ "parameters": {
381
+ "type": "object",
382
+ "properties": {
383
+ "answer": {"type": "string"},
384
+ },
385
+ "required": ["answer"],
386
+ },
387
+ },
377
388
  }],
378
389
  "max_tokens": 256,
379
390
  "temperature": 0.2,
@@ -431,11 +442,11 @@ def fastapi_app():
431
442
  except Exception:
432
443
  pass
433
444
 
434
- tool_actions = _parse_tool_actions(data)
435
- history.append({"actions": tool_actions})
445
+ tool_answer = _parse_tool_answer(data)
446
+ history.append({"answer": tool_answer})
436
447
  steps.append({
437
448
  "obs": {},
438
- "tool_calls": [{"tool_name": "interact", "arguments": _json.dumps({"actions": tool_actions})}],
449
+ "tool_calls": [{"tool_name": "submit_answer", "arguments": _json.dumps({"answer": tool_answer})}],
439
450
  "reward": None,
440
451
  "done": False,
441
452
  "truncated": False,
@@ -444,13 +455,8 @@ def fastapi_app():
444
455
 
445
456
  # Evaluate answer correctness using tool output (or fall back to assistant text)
446
457
  reward_val = 0.0
447
- candidate = ""
458
+ candidate = tool_answer or ""
448
459
  try:
449
- if isinstance(tool_actions, list):
450
- for s in reversed(tool_actions):
451
- if isinstance(s, str) and s.strip():
452
- candidate = s.strip()
453
- break
454
460
  if not candidate and llm_text is not None:
455
461
  candidate = _extract_boxed(llm_text) or llm_text
456
462
  if expected_answer is not None: