synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (157) hide show
  1. examples/common_old/backend.py +0 -1
  2. examples/crafter_debug_render.py +15 -6
  3. examples/evals_old/compare_models.py +1 -0
  4. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
  5. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
  6. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
  7. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
  8. examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
  9. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
  10. examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
  11. examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
  12. examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
  13. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
  14. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
  15. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
  16. examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
  17. examples/finetuning_old/synth_qwen_v1/util.py +7 -2
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +17 -15
  22. examples/rl/run_rl_and_save.py +24 -7
  23. examples/rl/task_app/math_single_step.py +128 -11
  24. examples/rl/task_app/math_task_app.py +11 -3
  25. examples/rl_old/task_app.py +222 -53
  26. examples/warming_up_to_rl/analyze_trace_db.py +7 -5
  27. examples/warming_up_to_rl/export_trace_sft.py +141 -16
  28. examples/warming_up_to_rl/groq_test.py +11 -4
  29. examples/warming_up_to_rl/manage_secrets.py +15 -6
  30. examples/warming_up_to_rl/readme.md +9 -2
  31. examples/warming_up_to_rl/run_eval.py +108 -30
  32. examples/warming_up_to_rl/run_fft_and_save.py +128 -52
  33. examples/warming_up_to_rl/run_local_rollout.py +87 -36
  34. examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
  35. examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
  36. examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
  37. examples/warming_up_to_rl/run_rl_and_save.py +31 -7
  38. examples/warming_up_to_rl/run_rollout_remote.py +37 -10
  39. examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
  40. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
  41. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
  42. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  43. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  44. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  45. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
  46. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
  47. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
  48. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
  49. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  50. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
  51. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  52. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
  53. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
  54. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
  55. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  56. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
  57. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
  58. synth_ai/__init__.py +1 -0
  59. synth_ai/api/train/builders.py +34 -10
  60. synth_ai/api/train/cli.py +172 -32
  61. synth_ai/api/train/config_finder.py +59 -4
  62. synth_ai/api/train/env_resolver.py +32 -14
  63. synth_ai/api/train/pollers.py +11 -3
  64. synth_ai/api/train/task_app.py +4 -1
  65. synth_ai/api/train/utils.py +20 -4
  66. synth_ai/cli/__init__.py +11 -4
  67. synth_ai/cli/balance.py +1 -1
  68. synth_ai/cli/demo.py +19 -5
  69. synth_ai/cli/rl_demo.py +75 -16
  70. synth_ai/cli/root.py +116 -37
  71. synth_ai/cli/task_apps.py +1286 -170
  72. synth_ai/cli/traces.py +1 -0
  73. synth_ai/cli/turso.py +73 -0
  74. synth_ai/core/experiment.py +0 -2
  75. synth_ai/demo_registry.py +67 -30
  76. synth_ai/demos/core/cli.py +493 -164
  77. synth_ai/demos/demo_task_apps/core.py +50 -6
  78. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  79. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
  80. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
  82. synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
  83. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  84. synth_ai/environments/examples/bandit/engine.py +12 -4
  85. synth_ai/environments/examples/bandit/taskset.py +4 -4
  86. synth_ai/environments/reproducibility/tree.py +3 -1
  87. synth_ai/environments/service/core_routes.py +6 -2
  88. synth_ai/evals/base.py +0 -2
  89. synth_ai/experimental/synth_oss.py +11 -12
  90. synth_ai/handshake.py +3 -1
  91. synth_ai/http_client.py +31 -7
  92. synth_ai/inference/__init__.py +0 -2
  93. synth_ai/inference/client.py +8 -4
  94. synth_ai/jobs/client.py +40 -10
  95. synth_ai/learning/client.py +33 -8
  96. synth_ai/learning/config.py +0 -2
  97. synth_ai/learning/constants.py +0 -2
  98. synth_ai/learning/ft_client.py +6 -3
  99. synth_ai/learning/health.py +9 -2
  100. synth_ai/learning/jobs.py +17 -5
  101. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
  102. synth_ai/learning/prompts/random_search.py +4 -1
  103. synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
  104. synth_ai/learning/rl_client.py +42 -14
  105. synth_ai/learning/sse.py +0 -2
  106. synth_ai/learning/validators.py +6 -2
  107. synth_ai/lm/caching/ephemeral.py +1 -3
  108. synth_ai/lm/core/exceptions.py +0 -2
  109. synth_ai/lm/core/main.py +13 -1
  110. synth_ai/lm/core/synth_models.py +0 -1
  111. synth_ai/lm/core/vendor_clients.py +4 -2
  112. synth_ai/lm/overrides.py +2 -2
  113. synth_ai/lm/vendors/core/anthropic_api.py +7 -7
  114. synth_ai/lm/vendors/core/openai_api.py +2 -0
  115. synth_ai/lm/vendors/openai_standard.py +3 -1
  116. synth_ai/lm/vendors/openai_standard_responses.py +6 -3
  117. synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
  118. synth_ai/lm/vendors/synth_client.py +37 -10
  119. synth_ai/rl/__init__.py +0 -1
  120. synth_ai/rl/contracts.py +0 -2
  121. synth_ai/rl/env_keys.py +6 -1
  122. synth_ai/task/__init__.py +1 -0
  123. synth_ai/task/apps/__init__.py +11 -11
  124. synth_ai/task/auth.py +29 -17
  125. synth_ai/task/client.py +3 -1
  126. synth_ai/task/contracts.py +1 -0
  127. synth_ai/task/datasets.py +3 -1
  128. synth_ai/task/errors.py +3 -2
  129. synth_ai/task/health.py +0 -2
  130. synth_ai/task/json.py +0 -1
  131. synth_ai/task/proxy.py +2 -5
  132. synth_ai/task/rubrics.py +9 -3
  133. synth_ai/task/server.py +31 -5
  134. synth_ai/task/tracing_utils.py +8 -3
  135. synth_ai/task/validators.py +0 -1
  136. synth_ai/task/vendors.py +0 -1
  137. synth_ai/tracing_v3/db_config.py +26 -1
  138. synth_ai/tracing_v3/decorators.py +1 -0
  139. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  140. synth_ai/tracing_v3/hooks.py +2 -0
  141. synth_ai/tracing_v3/replica_sync.py +1 -0
  142. synth_ai/tracing_v3/session_tracer.py +24 -3
  143. synth_ai/tracing_v3/storage/base.py +4 -1
  144. synth_ai/tracing_v3/storage/factory.py +0 -1
  145. synth_ai/tracing_v3/turso/manager.py +102 -38
  146. synth_ai/tracing_v3/turso/models.py +4 -1
  147. synth_ai/tracing_v3/utils.py +1 -0
  148. synth_ai/v0/tracing/upload.py +32 -135
  149. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
  150. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  152. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  153. synth_ai/install_sqld.sh +0 -40
  154. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
  155. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
  156. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
  157. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
synth_ai/cli/task_apps.py CHANGED
@@ -8,6 +8,7 @@ import importlib
8
8
  import importlib.util
9
9
  import inspect
10
10
  import os
11
+ import json
11
12
  import signal
12
13
  import shutil
13
14
  import subprocess
@@ -15,11 +16,19 @@ import sys
15
16
  import tempfile
16
17
  from dataclasses import dataclass
17
18
  from pathlib import Path
18
- from typing import Callable, Iterable, Sequence
19
+ import types
20
+ from typing import Any, Callable, Iterable, Sequence, Iterator, cast
21
+
22
+ try: # Python 3.11+
23
+ import tomllib as _toml
24
+ except Exception: # pragma: no cover - fallback
25
+ _toml = None # type: ignore
26
+ import uuid
19
27
 
20
28
  import click
21
29
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppConfig, TaskAppEntry, registry
22
- from synth_ai.task.server import run_task_app
30
+ from synth_ai.task.server import run_task_app, create_task_app
31
+ from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
23
32
 
24
33
  REPO_ROOT = Path(__file__).resolve().parents[2]
25
34
 
@@ -37,6 +46,8 @@ DEFAULT_IGNORE_DIRS = {
37
46
 
38
47
  DEFAULT_SEARCH_RELATIVE = (
39
48
  Path("."),
49
+ Path("examples"),
50
+ Path("synth_ai"),
40
51
  )
41
52
 
42
53
 
@@ -63,6 +74,73 @@ class AppChoice:
63
74
  return entry
64
75
 
65
76
 
77
+ def _temporary_sys_path(paths: Sequence[Path]):
78
+ """Context manager to prepend entries to sys.path temporarily."""
79
+
80
+ @contextlib.contextmanager
81
+ def _manager() -> Iterator[None]:
82
+ added: list[str] = []
83
+ for p in paths:
84
+ try:
85
+ resolved = str(p.resolve())
86
+ except Exception:
87
+ continue
88
+ if resolved in sys.path:
89
+ continue
90
+ sys.path.insert(0, resolved)
91
+ added.append(resolved)
92
+ try:
93
+ yield None
94
+ finally:
95
+ for entry in added:
96
+ with contextlib.suppress(ValueError):
97
+ sys.path.remove(entry)
98
+
99
+ return _manager()
100
+
101
+
102
+ def _possible_module_names(
103
+ path: Path, module_search_roots: Sequence[Path]
104
+ ) -> list[tuple[str, Path]]:
105
+ """Return potential module names based on candidate roots."""
106
+
107
+ candidates: list[tuple[str, Path]] = []
108
+ for root in module_search_roots:
109
+ try:
110
+ resolved_root = root.resolve()
111
+ except Exception:
112
+ continue
113
+ if not resolved_root.exists() or not path.is_relative_to(resolved_root):
114
+ continue
115
+ relative = path.relative_to(resolved_root)
116
+ stem = relative.with_suffix("")
117
+ parts = list(stem.parts)
118
+ if not parts:
119
+ continue
120
+ module_name = ".".join(parts)
121
+ if module_name:
122
+ candidates.append((module_name, resolved_root))
123
+ return candidates
124
+
125
+
126
+ def _ensure_parent_namespace(module_name: str, search_root: Path) -> None:
127
+ """Ensure namespace packages exist for dotted module names."""
128
+
129
+ parts = module_name.split(".")
130
+ for depth in range(1, len(parts)):
131
+ parent_name = ".".join(parts[:depth])
132
+ if parent_name in sys.modules:
133
+ continue
134
+ parent_module = types.ModuleType(parent_name)
135
+ candidate_dir = search_root.joinpath(*parts[:depth])
136
+ try:
137
+ resolved = candidate_dir.resolve()
138
+ except Exception:
139
+ resolved = search_root.resolve()
140
+ parent_module.__path__ = [str(resolved)] # type: ignore[attr-defined]
141
+ sys.modules[parent_name] = parent_module
142
+
143
+
66
144
  def _should_ignore_path(path: Path) -> bool:
67
145
  return any(part in DEFAULT_IGNORE_DIRS for part in path.parts)
68
146
 
@@ -70,7 +148,19 @@ def _should_ignore_path(path: Path) -> bool:
70
148
  def _candidate_search_roots() -> list[Path]:
71
149
  """Only search for task apps in the current working directory and subdirectories."""
72
150
  roots: list[Path] = []
73
-
151
+
152
+ # Prioritize demo directory if it exists
153
+ try:
154
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
155
+
156
+ demo_dir = load_demo_dir()
157
+ if demo_dir:
158
+ demo_path = Path(demo_dir)
159
+ if demo_path.exists() and demo_path.is_dir():
160
+ roots.append(demo_path.resolve())
161
+ except Exception:
162
+ pass
163
+
74
164
  # Allow explicit search paths via environment variable
75
165
  env_paths = os.environ.get("SYNTH_TASK_APP_SEARCH_PATH")
76
166
  if env_paths:
@@ -82,6 +172,13 @@ def _candidate_search_roots() -> list[Path]:
82
172
  cwd = Path.cwd().resolve()
83
173
  roots.append(cwd)
84
174
 
175
+ for rel in DEFAULT_SEARCH_RELATIVE:
176
+ try:
177
+ candidate = (cwd / rel).resolve()
178
+ except Exception:
179
+ continue
180
+ roots.append(candidate)
181
+
85
182
  # Remove duplicates while preserving order
86
183
  seen: set[Path] = set()
87
184
  ordered: list[Path] = []
@@ -97,6 +194,49 @@ def _candidate_search_roots() -> list[Path]:
97
194
  return ordered
98
195
 
99
196
 
197
+ def _eval_config_sort_key(path: Path) -> tuple[int, int, int, str]:
198
+ name = path.name.lower()
199
+ parent_names = {p.name.lower() for p in path.parents}
200
+ in_configs = 0 if "configs" in parent_names else 1
201
+ in_examples = 0 if "examples" in parent_names else 1
202
+ starts_eval = 0 if name.startswith("eval") else 1
203
+ return (in_configs, in_examples, starts_eval, str(path))
204
+
205
+
206
+ def _discover_eval_config_paths() -> list[Path]:
207
+ """Find candidate eval TOML files near the current working directory."""
208
+
209
+ candidates: list[Path] = []
210
+ seen: set[Path] = set()
211
+ search_roots = _candidate_search_roots()
212
+ for root in search_roots:
213
+ if not root.exists() or not root.is_dir():
214
+ continue
215
+ try:
216
+ root_resolved = root.resolve()
217
+ except Exception:
218
+ continue
219
+ for path in root.rglob("*.toml"):
220
+ if not path.is_file():
221
+ continue
222
+ if _should_ignore_path(path):
223
+ continue
224
+ name_lower = path.name.lower()
225
+ if "eval" not in name_lower and "evaluation" not in name_lower:
226
+ continue
227
+ try:
228
+ resolved = path.resolve()
229
+ except Exception:
230
+ continue
231
+ if resolved in seen:
232
+ continue
233
+ seen.add(resolved)
234
+ candidates.append(resolved)
235
+
236
+ candidates.sort(key=_eval_config_sort_key)
237
+ return candidates
238
+
239
+
100
240
  class _TaskAppConfigVisitor(ast.NodeVisitor):
101
241
  def __init__(self) -> None:
102
242
  self.matches: list[tuple[str, int]] = []
@@ -124,7 +264,11 @@ def _is_task_app_config_call(node: ast.Call) -> bool:
124
264
 
125
265
  def _extract_app_id(node: ast.Call) -> str | None:
126
266
  for kw in node.keywords:
127
- if kw.arg == "app_id" and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
267
+ if (
268
+ kw.arg == "app_id"
269
+ and isinstance(kw.value, ast.Constant)
270
+ and isinstance(kw.value.value, str)
271
+ ):
128
272
  return kw.value.value
129
273
  if node.args:
130
274
  first = node.args[0]
@@ -149,7 +293,11 @@ def _extract_register_app_id(node: ast.Call) -> str | None:
149
293
  entry_call = kw.value
150
294
  if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
151
295
  for entry_kw in entry_call.keywords:
152
- if entry_kw.arg == "app_id" and isinstance(entry_kw.value, ast.Constant) and isinstance(entry_kw.value.value, str):
296
+ if (
297
+ entry_kw.arg == "app_id"
298
+ and isinstance(entry_kw.value, ast.Constant)
299
+ and isinstance(entry_kw.value.value, str)
300
+ ):
153
301
  return entry_kw.value.value
154
302
  return None
155
303
 
@@ -180,7 +328,11 @@ class _ModalAppVisitor(ast.NodeVisitor):
180
328
  if name:
181
329
  self.matches.append((name, getattr(node, "lineno", 0)))
182
330
  elif isinstance(func, ast.Attribute):
183
- if isinstance(func.value, ast.Name) and func.value.id in self.modal_aliases and func.attr == "App":
331
+ if (
332
+ isinstance(func.value, ast.Name)
333
+ and func.value.id in self.modal_aliases
334
+ and func.attr == "App"
335
+ ):
184
336
  name = _extract_modal_app_name(node)
185
337
  if name:
186
338
  self.matches.append((name, getattr(node, "lineno", 0)))
@@ -189,7 +341,11 @@ class _ModalAppVisitor(ast.NodeVisitor):
189
341
 
190
342
  def _extract_modal_app_name(node: ast.Call) -> str | None:
191
343
  for kw in node.keywords:
192
- if kw.arg in {"name", "app_name"} and isinstance(kw.value, ast.Constant) and isinstance(kw.value.value, str):
344
+ if (
345
+ kw.arg in {"name", "app_name"}
346
+ and isinstance(kw.value, ast.Constant)
347
+ and isinstance(kw.value.value, str)
348
+ ):
193
349
  return kw.value.value
194
350
  if node.args:
195
351
  first = node.args[0]
@@ -201,7 +357,7 @@ def _extract_modal_app_name(node: ast.Call) -> str | None:
201
357
  def _collect_task_app_choices() -> list[AppChoice]:
202
358
  # Clear registry to avoid duplicate registration errors
203
359
  registry.clear()
204
-
360
+
205
361
  choices: list[AppChoice] = []
206
362
  with contextlib.suppress(Exception):
207
363
  import synth_ai.demos.demo_task_apps # noqa: F401
@@ -224,6 +380,7 @@ def _collect_task_app_choices() -> list[AppChoice]:
224
380
  continue
225
381
  unique[key] = choice
226
382
  ordered.append(choice)
383
+ ordered.sort(key=_app_choice_sort_key)
227
384
  return ordered
228
385
 
229
386
 
@@ -254,6 +411,10 @@ def _collect_scanned_task_configs() -> list[AppChoice]:
254
411
  results: list[AppChoice] = []
255
412
  seen: set[tuple[str, Path]] = set()
256
413
  for root in _candidate_search_roots():
414
+ try:
415
+ root_resolved = root.resolve()
416
+ except Exception:
417
+ continue
257
418
  if not root.exists() or not root.is_dir():
258
419
  continue
259
420
  for path in root.rglob("*.py"):
@@ -283,7 +444,11 @@ def _collect_scanned_task_configs() -> list[AppChoice]:
283
444
  path=path.resolve(),
284
445
  source="discovered",
285
446
  description=f"TaskAppConfig in {path.name} (line {lineno})",
286
- entry_loader=lambda p=path.resolve(), a=app_id: _load_entry_from_path(p, a),
447
+ entry_loader=lambda p=path.resolve(),
448
+ a=app_id,
449
+ roots=(root_resolved,): _load_entry_from_path(
450
+ p, a, module_search_roots=roots
451
+ ),
287
452
  lineno=lineno,
288
453
  )
289
454
  )
@@ -330,6 +495,62 @@ def _collect_modal_scripts() -> list[AppChoice]:
330
495
  return results
331
496
 
332
497
 
498
+ def _app_choice_sort_key(choice: AppChoice) -> tuple[int, int, int, int, int, str, str]:
499
+ """Ranking heuristic so wrapper-style task apps surface first."""
500
+
501
+ # Prioritize apps in the current working directory (demo or otherwise)
502
+ cwd_rank = 1
503
+ try:
504
+ cwd = Path.cwd().resolve()
505
+ if choice.path.is_relative_to(cwd):
506
+ # Check if this is directly in CWD (not in subdirectories like examples/)
507
+ try:
508
+ rel_path = choice.path.relative_to(cwd)
509
+ # If it's in the immediate directory or one level deep, prioritize it
510
+ if len(rel_path.parts) <= 2:
511
+ cwd_rank = 0
512
+ except Exception:
513
+ pass
514
+ except Exception:
515
+ pass
516
+
517
+ # Further prioritize apps in the demo directory if one is set
518
+ demo_rank = 1
519
+ try:
520
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
521
+
522
+ demo_dir = load_demo_dir()
523
+ if demo_dir:
524
+ demo_path = Path(demo_dir).resolve()
525
+ if choice.path.is_relative_to(demo_path):
526
+ demo_rank = 0
527
+ except Exception:
528
+ pass
529
+
530
+ modal_rank = 1 if choice.modal_script else 0
531
+
532
+ name = choice.path.name.lower()
533
+ file_rank = 3
534
+ if name.endswith("_task_app.py") or name.endswith("task_app.py"):
535
+ file_rank = 0
536
+ elif name.endswith("_app.py") or "task_app" in name:
537
+ file_rank = 1
538
+ elif name.endswith(".py"):
539
+ file_rank = 2
540
+
541
+ directory_rank = 0 if choice.path.parent.name.lower() in {"task_app", "task_apps"} else 1
542
+
543
+ return (
544
+ demo_rank,
545
+ cwd_rank,
546
+ modal_rank,
547
+ file_rank,
548
+ directory_rank,
549
+ choice.app_id,
550
+ str(choice.path),
551
+ )
552
+
553
+
333
554
  def _choice_matches_identifier(choice: AppChoice, identifier: str) -> bool:
334
555
  ident = identifier.strip()
335
556
  if not ident:
@@ -357,7 +578,7 @@ def _has_modal_support_in_file(path: Path) -> bool:
357
578
  try:
358
579
  source = path.read_text(encoding="utf-8")
359
580
  tree = ast.parse(source, filename=str(path))
360
-
581
+
361
582
  # Look for ModalDeploymentConfig in register_task_app calls
362
583
  for node in ast.walk(tree):
363
584
  if isinstance(node, ast.Call):
@@ -366,11 +587,19 @@ def _has_modal_support_in_file(path: Path) -> bool:
366
587
  for kw in node.keywords:
367
588
  if kw.arg == "entry" and isinstance(kw.value, ast.Call):
368
589
  entry_call = kw.value
369
- if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
590
+ if (
591
+ isinstance(entry_call.func, ast.Name)
592
+ and entry_call.func.id == "TaskAppEntry"
593
+ ):
370
594
  for entry_kw in entry_call.keywords:
371
- if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
595
+ if entry_kw.arg == "modal" and isinstance(
596
+ entry_kw.value, ast.Call
597
+ ):
372
598
  modal_call = entry_kw.value
373
- if isinstance(modal_call.func, ast.Name) and modal_call.func.id == "ModalDeploymentConfig":
599
+ if (
600
+ isinstance(modal_call.func, ast.Name)
601
+ and modal_call.func.id == "ModalDeploymentConfig"
602
+ ):
374
603
  return True
375
604
  except Exception:
376
605
  pass
@@ -382,7 +611,7 @@ def _extract_modal_config_from_file(path: Path) -> ModalDeploymentConfig | None:
382
611
  try:
383
612
  source = path.read_text(encoding="utf-8")
384
613
  tree = ast.parse(source, filename=str(path))
385
-
614
+
386
615
  # Look for ModalDeploymentConfig in register_task_app calls
387
616
  for node in ast.walk(tree):
388
617
  if isinstance(node, ast.Call):
@@ -391,11 +620,19 @@ def _extract_modal_config_from_file(path: Path) -> ModalDeploymentConfig | None:
391
620
  for kw in node.keywords:
392
621
  if kw.arg == "entry" and isinstance(kw.value, ast.Call):
393
622
  entry_call = kw.value
394
- if isinstance(entry_call.func, ast.Name) and entry_call.func.id == "TaskAppEntry":
623
+ if (
624
+ isinstance(entry_call.func, ast.Name)
625
+ and entry_call.func.id == "TaskAppEntry"
626
+ ):
395
627
  for entry_kw in entry_call.keywords:
396
- if entry_kw.arg == "modal" and isinstance(entry_kw.value, ast.Call):
628
+ if entry_kw.arg == "modal" and isinstance(
629
+ entry_kw.value, ast.Call
630
+ ):
397
631
  modal_call = entry_kw.value
398
- if isinstance(modal_call.func, ast.Name) and modal_call.func.id == "ModalDeploymentConfig":
632
+ if (
633
+ isinstance(modal_call.func, ast.Name)
634
+ and modal_call.func.id == "ModalDeploymentConfig"
635
+ ):
399
636
  # Extract the arguments to ModalDeploymentConfig
400
637
  return _build_modal_config_from_ast(modal_call)
401
638
  except Exception:
@@ -445,9 +682,10 @@ def _build_modal_config_from_ast(modal_call: ast.Call) -> ModalDeploymentConfig
445
682
  if name and mount:
446
683
  mounts.append((name, mount))
447
684
  kwargs[kw.arg] = tuple(mounts)
448
-
685
+
449
686
  # Create ModalDeploymentConfig with extracted arguments
450
687
  from synth_ai.task.apps import ModalDeploymentConfig
688
+
451
689
  return ModalDeploymentConfig(**kwargs)
452
690
  except Exception:
453
691
  return None
@@ -465,20 +703,29 @@ def _choice_has_local_support(choice: AppChoice) -> bool:
465
703
 
466
704
  def _format_choice(choice: AppChoice, index: int | None = None) -> str:
467
705
  prefix = f"[{index}] " if index is not None else ""
468
- rel_path: str
706
+ # Get file modification timestamp
469
707
  try:
470
- rel_path = str(choice.path.relative_to(REPO_ROOT))
708
+ from datetime import datetime
709
+
710
+ mtime = choice.path.stat().st_mtime
711
+ modified_str = datetime.fromtimestamp(mtime).strftime("%Y-%m-%d %H:%M:%S")
712
+ details = f"Modified: {modified_str}"
471
713
  except Exception:
472
- rel_path = str(choice.path)
473
- details = choice.description or f"Located at {rel_path}"
474
- return f"{prefix}{choice.app_id} ({choice.source}) {details}"
714
+ # Fallback if timestamp unavailable
715
+ details = choice.description or "No timestamp available"
716
+ # Format: single line with timestamp
717
+ main_line = f"{prefix}{choice.app_id} ({choice.source}) – {details}"
718
+ return main_line
475
719
 
476
720
 
477
721
  def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
478
722
  click.echo("Select a task app:")
479
723
  for idx, choice in enumerate(choices, start=1):
480
724
  click.echo(_format_choice(choice, idx))
481
- response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
725
+ try:
726
+ response = click.prompt("Enter choice", default="1", type=str).strip() or "1"
727
+ except (click.exceptions.Abort, EOFError, KeyboardInterrupt):
728
+ raise click.ClickException("Task app selection cancelled by user")
482
729
  if not response.isdigit():
483
730
  raise click.ClickException("Selection must be a number")
484
731
  index = int(response)
@@ -489,7 +736,7 @@ def _prompt_user_for_choice(choices: list[AppChoice]) -> AppChoice:
489
736
 
490
737
  def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
491
738
  choices = _collect_task_app_choices()
492
- if purpose == "serve":
739
+ if purpose in {"serve", "eval"}:
493
740
  filtered = [c for c in choices if not c.modal_script]
494
741
  elif purpose in {"deploy", "modal-serve"}:
495
742
  filtered = []
@@ -499,6 +746,8 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
499
746
  else:
500
747
  filtered = choices
501
748
 
749
+ filtered.sort(key=_app_choice_sort_key)
750
+
502
751
  if not filtered:
503
752
  raise click.ClickException("No task apps discovered for this command.")
504
753
 
@@ -526,22 +775,90 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
526
775
  return _prompt_user_for_choice(filtered)
527
776
 
528
777
 
529
- def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
530
- resolved = path.resolve()
531
- module_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
778
+ def _import_task_app_module(
779
+ resolved: Path,
780
+ module_name: str,
781
+ *,
782
+ namespace_root: Path | None,
783
+ sys_path_roots: Sequence[Path],
784
+ ensure_namespace: bool = True,
785
+ ) -> types.ModuleType:
532
786
  spec = importlib.util.spec_from_file_location(module_name, str(resolved))
533
787
  if spec is None or spec.loader is None:
534
788
  raise click.ClickException(f"Unable to load Python module from {resolved}")
789
+
535
790
  module = importlib.util.module_from_spec(spec)
536
791
  sys.modules[module_name] = module
537
-
538
- # Clear registry before importing to avoid duplicate registration errors
539
- registry.clear()
540
-
541
- try:
542
- spec.loader.exec_module(module)
543
- except Exception as exc:
544
- raise click.ClickException(f"Failed to import {resolved}: {exc}") from exc
792
+
793
+ with _temporary_sys_path(sys_path_roots):
794
+ if ensure_namespace and namespace_root is not None and "." in module_name:
795
+ _ensure_parent_namespace(module_name, namespace_root)
796
+
797
+ # Clear registry before importing to avoid duplicate registration errors
798
+ registry.clear()
799
+
800
+ try:
801
+ spec.loader.exec_module(module)
802
+ except Exception:
803
+ # Remove partially-imported module to avoid reuse
804
+ sys.modules.pop(module_name, None)
805
+ raise
806
+
807
+ return module
808
+
809
+
810
+ def _load_entry_from_path(
811
+ path: Path, app_id: str, module_search_roots: Sequence[Path] | None = None
812
+ ) -> TaskAppEntry:
813
+ resolved = path.resolve()
814
+ search_roots: list[Path] = []
815
+ seen_roots: set[Path] = set()
816
+
817
+ def _append_root(candidate: Path) -> None:
818
+ try:
819
+ resolved_root = candidate.resolve()
820
+ except Exception:
821
+ return
822
+ if resolved_root in seen_roots:
823
+ return
824
+ seen_roots.add(resolved_root)
825
+ search_roots.append(resolved_root)
826
+
827
+ for root in module_search_roots or []:
828
+ _append_root(root)
829
+ _append_root(resolved.parent)
830
+ _append_root(REPO_ROOT)
831
+
832
+ last_error: Exception | None = None
833
+ module: types.ModuleType | None = None
834
+
835
+ for module_name, namespace_root in _possible_module_names(resolved, search_roots):
836
+ try:
837
+ module = _import_task_app_module(
838
+ resolved,
839
+ module_name,
840
+ namespace_root=namespace_root,
841
+ sys_path_roots=search_roots,
842
+ ensure_namespace=True,
843
+ )
844
+ break
845
+ except Exception as exc: # pragma: no cover - best-effort fallbacks
846
+ last_error = exc
847
+ continue
848
+
849
+ if module is None:
850
+ hashed_name = f"_synth_task_app_{hashlib.md5(str(resolved).encode(), usedforsecurity=False).hexdigest()}"
851
+ try:
852
+ module = _import_task_app_module(
853
+ resolved,
854
+ hashed_name,
855
+ namespace_root=None,
856
+ sys_path_roots=search_roots,
857
+ ensure_namespace=False,
858
+ )
859
+ except Exception as exc: # pragma: no cover - propagate meaningful error
860
+ detail = last_error or exc
861
+ raise click.ClickException(f"Failed to import {resolved}: {detail}") from detail
545
862
 
546
863
  config_obj: TaskAppConfig | None = None
547
864
  factory_callable: Callable[[], TaskAppConfig] | None = None
@@ -572,7 +889,11 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
572
889
  continue
573
890
  has_required = False
574
891
  for param in sig.parameters.values():
575
- if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD) and param.default is inspect._empty:
892
+ if (
893
+ param.kind
894
+ in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD)
895
+ and param.default is inspect._empty
896
+ ):
576
897
  has_required = True
577
898
  break
578
899
  if has_required:
@@ -582,9 +903,13 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
582
903
  except Exception:
583
904
  continue
584
905
  if isinstance(result, TaskAppConfig) and result.app_id == app_id:
585
- def _factory() -> TaskAppConfig:
586
- return attr() # type: ignore[call-arg]
587
- factory_callable = _factory
906
+ # Bind attr to a local and close over it without exposing parameters
907
+ _bound_func: Callable[[], TaskAppConfig] = cast(Callable[[], TaskAppConfig], attr) # type: ignore[assignment]
908
+
909
+ def _factory_noargs() -> TaskAppConfig:
910
+ return _bound_func()
911
+
912
+ factory_callable = _factory_noargs
588
913
  config_obj = result
589
914
  break
590
915
 
@@ -608,7 +933,7 @@ def _load_entry_from_path(path: Path, app_id: str) -> TaskAppEntry:
608
933
  if isinstance(attr, ModalDeploymentConfig):
609
934
  modal_cfg = attr
610
935
  break
611
-
936
+
612
937
  # If no ModalDeploymentConfig found, try to detect it via AST parsing
613
938
  if modal_cfg is None:
614
939
  modal_cfg = _extract_modal_config_from_file(resolved)
@@ -640,31 +965,31 @@ def _resolve_env_paths_for_script(script_path: Path, explicit: Sequence[str]) ->
640
965
  # Always prompt for env file selection instead of auto-loading defaults
641
966
  script_dir = script_path.parent.resolve()
642
967
  cwd = Path.cwd()
643
-
968
+
644
969
  # Look for env files in current working directory first, then repo root
645
970
  env_candidates = []
646
-
971
+
647
972
  # Add CWD env files first (prioritized)
648
- cwd_env_files = sorted(cwd.glob('**/*.env'))
973
+ cwd_env_files = sorted(cwd.glob("**/*.env"))
649
974
  env_candidates.extend(cwd_env_files)
650
-
975
+
651
976
  # Add repo root env files
652
- repo_env_files = sorted(REPO_ROOT.glob('**/*.env'))
977
+ repo_env_files = sorted(REPO_ROOT.glob("**/*.env"))
653
978
  # Avoid duplicates
654
979
  for repo_file in repo_env_files:
655
980
  if repo_file not in env_candidates:
656
981
  env_candidates.append(repo_file)
657
-
982
+
658
983
  if not env_candidates:
659
984
  created = _interactive_create_env(script_dir)
660
985
  if created is None:
661
986
  raise click.ClickException("Env file required (--env-file) for this task app")
662
987
  return [created]
663
988
 
664
- click.echo('Select env file to load:')
989
+ click.echo("Select env file to load:")
665
990
  for idx, path in enumerate(env_candidates, start=1):
666
- click.echo(f" {idx}) {path}")
667
- choice = click.prompt('Enter choice', type=click.IntRange(1, len(env_candidates)))
991
+ click.echo(f" {idx}) {path.resolve()}")
992
+ choice = click.prompt("Enter choice", type=click.IntRange(1, len(env_candidates)), default=1)
668
993
  return [env_candidates[choice - 1]]
669
994
 
670
995
 
@@ -694,17 +1019,47 @@ def _run_modal_script(
694
1019
  click.echo("Dry run: " + " ".join(cmd))
695
1020
  return
696
1021
  try:
697
- subprocess.run(cmd, check=True)
1022
+ # Capture output to extract URL
1023
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True)
1024
+ # Print output as it would normally appear
1025
+ if result.stdout:
1026
+ click.echo(result.stdout, nl=False)
1027
+ if result.stderr:
1028
+ click.echo(result.stderr, nl=False, err=True)
1029
+
1030
+ # Extract and save task app URL from output
1031
+ task_app_url = None
1032
+ for line in result.stdout.splitlines():
1033
+ # Look for lines containing modal.run URLs
1034
+ if "modal.run" in line and "=>" in line:
1035
+ # Extract URL from lines like: "└── 🔨 Created web function fastapi_app => https://...modal.run"
1036
+ parts = line.split("=>")
1037
+ if len(parts) >= 2:
1038
+ task_app_url = parts[-1].strip()
1039
+ break
1040
+
1041
+ # Save URL to .env file if found
1042
+ if task_app_url and env_paths_list:
1043
+ env_file = env_paths_list[0] # Use the first .env file
1044
+ _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1045
+ click.echo(f"\n✓ Task app URL: {task_app_url}")
1046
+
698
1047
  except subprocess.CalledProcessError as exc:
699
- raise click.ClickException(f"modal {command} failed with exit code {exc.returncode}") from exc
1048
+ raise click.ClickException(
1049
+ f"modal {command} failed with exit code {exc.returncode}"
1050
+ ) from exc
700
1051
 
701
1052
 
702
1053
  def _preflight_env_key(crash_on_failure: bool = False) -> None:
703
1054
  try:
704
- raw_backend = os.environ.get("BACKEND_BASE_URL") or os.environ.get("SYNTH_BASE_URL") or "http://localhost:8000/api"
705
- backend_base = raw_backend.rstrip('/')
706
- if not backend_base.endswith('/api'):
707
- backend_base = backend_base + '/api'
1055
+ raw_backend = (
1056
+ os.environ.get("BACKEND_BASE_URL")
1057
+ or os.environ.get("SYNTH_BASE_URL")
1058
+ or f"{PROD_BASE_URL_DEFAULT}/api"
1059
+ )
1060
+ backend_base = raw_backend.rstrip("/")
1061
+ if not backend_base.endswith("/api"):
1062
+ backend_base = backend_base + "/api"
708
1063
  synth_key = os.environ.get("SYNTH_API_KEY") or ""
709
1064
  env_api_key = (
710
1065
  os.environ.get("ENVIRONMENT_API_KEY")
@@ -727,20 +1082,49 @@ def _preflight_env_key(crash_on_failure: bool = False) -> None:
727
1082
 
728
1083
  pub = PublicKey(base64.b64decode(pk, validate=True))
729
1084
  sb = SealedBox(pub)
730
- ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode('utf-8'))).decode()
1085
+ ct_b64 = base64.b64encode(sb.encrypt(env_api_key.encode("utf-8"))).decode()
731
1086
  payload = {"name": "ENVIRONMENT_API_KEY", "ciphertext_b64": ct_b64}
732
- with httpx.Client(timeout=15.0, headers={"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}) as c:
1087
+ with httpx.Client(
1088
+ timeout=15.0,
1089
+ headers={
1090
+ "Authorization": f"Bearer {synth_key}",
1091
+ "Content-Type": "application/json",
1092
+ },
1093
+ ) as c:
733
1094
  click.echo("[preflight] upserting env key…")
734
1095
  up = c.post(f"{backend_base.rstrip('/')}/v1/env-keys", json=payload)
735
1096
  click.echo(f"[preflight] upsert status={up.status_code}")
736
- click.echo("[preflight] verifying env key presence…")
737
- ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
738
- if ver.status_code == 200 and (ver.json() or {}).get("present"):
739
- # Show first and last 5 chars of the API key for verification
740
- key_preview = f"{env_api_key[:5]}...{env_api_key[-5:]}" if len(env_api_key) > 10 else env_api_key
741
- click.echo(f"✅ ENVIRONMENT_API_KEY upserted and verified in backend ({key_preview})")
1097
+
1098
+ # If upload succeeded (2xx), consider it successful even if verification fails
1099
+ # This handles cases where verification endpoint has issues
1100
+ if 200 <= up.status_code < 300:
1101
+ key_preview = (
1102
+ f"{env_api_key[:5]}...{env_api_key[-5:]}"
1103
+ if len(env_api_key) > 10
1104
+ else env_api_key
1105
+ )
1106
+ click.echo(
1107
+ f"✅ ENVIRONMENT_API_KEY uploaded successfully ({key_preview})"
1108
+ )
1109
+
1110
+ # Try verification, but don't fail if it doesn't work
1111
+ click.echo("[preflight] verifying env key presence…")
1112
+ try:
1113
+ ver = c.get(f"{backend_base.rstrip('/')}/v1/env-keys/verify")
1114
+ if ver.status_code == 200 and (ver.json() or {}).get("present"):
1115
+ click.echo("✅ Key verified in backend")
1116
+ else:
1117
+ click.echo(
1118
+ f"⚠️ Verification returned {ver.status_code}, but upload succeeded - proceeding"
1119
+ )
1120
+ except Exception as verify_err:
1121
+ click.echo(
1122
+ f"⚠️ Verification check failed ({verify_err}), but upload succeeded - proceeding"
1123
+ )
742
1124
  else:
743
- error_msg = "ENVIRONMENT_API_KEY verification failed"
1125
+ error_msg = (
1126
+ f"ENVIRONMENT_API_KEY upload failed with status {up.status_code}"
1127
+ )
744
1128
  if crash_on_failure:
745
1129
  raise click.ClickException(f"[CRITICAL] {error_msg}")
746
1130
  click.echo(f"[WARN] {error_msg}; proceeding anyway")
@@ -794,15 +1178,39 @@ def _run_modal_with_entry(
794
1178
  return
795
1179
 
796
1180
  try:
797
- subprocess.run(cmd, check=True)
1181
+ # Capture output to extract URL
1182
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True)
1183
+ # Print output as it would normally appear
1184
+ if result.stdout:
1185
+ click.echo(result.stdout, nl=False)
1186
+ if result.stderr:
1187
+ click.echo(result.stderr, nl=False, err=True)
1188
+
1189
+ # Extract and save task app URL from output
1190
+ task_app_url = None
1191
+ for line in result.stdout.splitlines():
1192
+ # Look for lines containing modal.run URLs
1193
+ if "modal.run" in line and "=>" in line:
1194
+ # Extract URL from lines like: "└── 🔨 Created web function fastapi_app => https://...modal.run"
1195
+ parts = line.split("=>")
1196
+ if len(parts) >= 2:
1197
+ task_app_url = parts[-1].strip()
1198
+ break
1199
+
1200
+ # Save URL to .env file if found
1201
+ if task_app_url and env_paths_list:
1202
+ env_file = env_paths_list[0] # Use the first .env file
1203
+ _save_to_env_file(env_file, "TASK_APP_BASE_URL", task_app_url)
1204
+ click.echo(f"\n✓ Task app URL: {task_app_url}")
1205
+
798
1206
  except subprocess.CalledProcessError as exc:
799
- raise click.ClickException(f"modal {command} failed with exit code {exc.returncode}") from exc
1207
+ raise click.ClickException(
1208
+ f"modal {command} failed with exit code {exc.returncode}"
1209
+ ) from exc
800
1210
  finally:
801
1211
  script_path.unlink(missing_ok=True)
802
1212
 
803
1213
 
804
-
805
-
806
1214
  def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[str, str]:
807
1215
  values: dict[str, str] = {}
808
1216
  for p in paths:
@@ -811,15 +1219,17 @@ def _load_env_values(paths: list[Path], *, allow_empty: bool = False) -> dict[st
811
1219
  except FileNotFoundError:
812
1220
  continue
813
1221
  for line in content.splitlines():
814
- if not line or line.lstrip().startswith('#') or '=' not in line:
1222
+ if not line or line.lstrip().startswith("#") or "=" not in line:
815
1223
  continue
816
- key, value = line.split('=', 1)
1224
+ key, value = line.split("=", 1)
817
1225
  if key and key not in values:
818
1226
  values[key.strip()] = value.strip()
819
1227
  if not allow_empty and not values:
820
1228
  raise click.ClickException("No environment values found")
821
1229
  os.environ.update({k: v for k, v in values.items() if k and v})
822
1230
  return values
1231
+
1232
+
823
1233
  def _interactive_create_env(target_dir: Path) -> Path | None:
824
1234
  env_path = (target_dir / ".env").resolve()
825
1235
  if env_path.exists():
@@ -838,9 +1248,9 @@ def _parse_env_file(path: Path) -> dict[str, str]:
838
1248
  data: dict[str, str] = {}
839
1249
  try:
840
1250
  for line in path.read_text(encoding="utf-8").splitlines():
841
- if not line or line.lstrip().startswith('#') or '=' not in line:
1251
+ if not line or line.lstrip().startswith("#") or "=" not in line:
842
1252
  continue
843
- key, value = line.split('=', 1)
1253
+ key, value = line.split("=", 1)
844
1254
  data[key.strip()] = value.strip()
845
1255
  except FileNotFoundError:
846
1256
  pass
@@ -853,7 +1263,9 @@ def _interactive_fill_env(env_path: Path) -> Path | None:
853
1263
  def _prompt(label: str, *, default: str = "", required: bool) -> str | None:
854
1264
  while True:
855
1265
  try:
856
- value = click.prompt(label, default=default, show_default=bool(default) or not required).strip()
1266
+ value = click.prompt(
1267
+ label, default=default, show_default=bool(default) or not required
1268
+ ).strip()
857
1269
  except (click.exceptions.Abort, EOFError, KeyboardInterrupt):
858
1270
  click.echo("Aborted env creation.")
859
1271
  return None
@@ -904,11 +1316,22 @@ def _deploy_entry(
904
1316
  ) -> None:
905
1317
  modal_cfg = entry.modal
906
1318
  if modal_cfg is None:
907
- raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
1319
+ raise click.ClickException(
1320
+ f"Task app '{entry.app_id}' does not define Modal deployment settings"
1321
+ )
908
1322
 
909
1323
  env_paths = _determine_env_files(entry, env_file)
910
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
911
- _run_modal_with_entry(entry, modal_cfg, modal_cli, modal_name, env_paths, command="deploy", dry_run=dry_run, original_path=original_path)
1324
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1325
+ _run_modal_with_entry(
1326
+ entry,
1327
+ modal_cfg,
1328
+ modal_cli,
1329
+ modal_name,
1330
+ env_paths,
1331
+ command="deploy",
1332
+ dry_run=dry_run,
1333
+ original_path=original_path,
1334
+ )
912
1335
 
913
1336
 
914
1337
  def _modal_serve_entry(
@@ -920,21 +1343,29 @@ def _modal_serve_entry(
920
1343
  ) -> None:
921
1344
  modal_cfg = entry.modal
922
1345
  if modal_cfg is None:
923
- raise click.ClickException(f"Task app '{entry.app_id}' does not define Modal deployment settings")
1346
+ raise click.ClickException(
1347
+ f"Task app '{entry.app_id}' does not define Modal deployment settings"
1348
+ )
924
1349
 
925
1350
  env_paths = _determine_env_files(entry, env_file)
926
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
927
- _run_modal_with_entry(entry, modal_cfg, modal_cli, modal_name, env_paths, command="serve", original_path=original_path)
1351
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1352
+ _run_modal_with_entry(
1353
+ entry,
1354
+ modal_cfg,
1355
+ modal_cli,
1356
+ modal_name,
1357
+ env_paths,
1358
+ command="serve",
1359
+ original_path=original_path,
1360
+ )
928
1361
 
929
- @click.group(
930
- name='task-app',
931
- help='Utilities for serving and deploying Synth task apps.'
932
- )
1362
+
1363
+ @click.group(name="task-app", help="Utilities for serving and deploying Synth task apps.")
933
1364
  def task_app_group() -> None:
934
1365
  pass
935
1366
 
936
1367
 
937
- @task_app_group.command('list')
1368
+ @task_app_group.command("list")
938
1369
  def list_apps() -> None:
939
1370
  """List registered task apps."""
940
1371
 
@@ -945,6 +1376,8 @@ def list_apps() -> None:
945
1376
  for entry in entries:
946
1377
  aliases = f" (aliases: {', '.join(entry.aliases)})" if entry.aliases else ""
947
1378
  click.echo(f"- {entry.app_id}{aliases}: {entry.description}")
1379
+
1380
+
948
1381
  def _load_env_files_into_process(paths: Sequence[str]) -> None:
949
1382
  for p in paths:
950
1383
  try:
@@ -952,9 +1385,9 @@ def _load_env_files_into_process(paths: Sequence[str]) -> None:
952
1385
  except Exception:
953
1386
  continue
954
1387
  for line in txt.splitlines():
955
- if not line or line.startswith('#') or '=' not in line:
1388
+ if not line or line.startswith("#") or "=" not in line:
956
1389
  continue
957
- k, v = line.split('=', 1)
1390
+ k, v = line.split("=", 1)
958
1391
  key = k.strip()
959
1392
  val = v.strip().strip('"').strip("'")
960
1393
  # Load into process, but allow overriding if the current value is empty
@@ -964,53 +1397,181 @@ def _load_env_files_into_process(paths: Sequence[str]) -> None:
964
1397
  os.environ[key] = val
965
1398
 
966
1399
 
967
-
968
- @click.command('serve')
969
- @click.argument('app_id', type=str, required=False)
970
- @click.option('--host', default='0.0.0.0', show_default=True)
971
- @click.option('--port', default=8001, show_default=True, type=int)
972
- @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
973
- @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
974
- @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
975
- @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
976
- @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
1400
+ @click.command("serve")
1401
+ @click.argument("app_id", type=str, required=False)
1402
+ @click.option("--host", default="0.0.0.0", show_default=True)
1403
+ @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
1404
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
1405
+ @click.option(
1406
+ "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
1407
+ )
1408
+ @click.option(
1409
+ "--force/--no-force",
1410
+ "force",
1411
+ default=False,
1412
+ help="Kill any process already bound to the selected port before starting",
1413
+ )
1414
+ @click.option(
1415
+ "--trace",
1416
+ "trace_dir",
1417
+ type=click.Path(),
1418
+ default=None,
1419
+ help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
1420
+ )
1421
+ @click.option(
1422
+ "--trace-db",
1423
+ "trace_db",
1424
+ type=click.Path(),
1425
+ default=None,
1426
+ help="Override local trace DB path (default: traces/v3/synth_ai.db)",
1427
+ )
977
1428
  def serve_command(
978
1429
  app_id: str | None,
979
1430
  host: str,
980
- port: int,
1431
+ port: int | None,
981
1432
  env_file: Sequence[str],
982
1433
  reload_flag: bool,
983
1434
  force: bool,
984
1435
  trace_dir: str | None,
985
1436
  trace_db: str | None,
986
1437
  ) -> None:
1438
+ # Change to demo directory if stored (REQUIRED for demo isolation)
1439
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
1440
+
1441
+ demo_dir = load_demo_dir()
1442
+ if demo_dir:
1443
+ demo_path = Path(demo_dir)
1444
+ if not demo_path.is_dir():
1445
+ raise click.ClickException(
1446
+ f"Demo directory not found: {demo_dir}\nRun 'synth-ai setup' to create a demo."
1447
+ )
1448
+ os.chdir(demo_dir)
1449
+ click.echo(f"Using demo directory: {demo_dir}\n")
1450
+ # Store demo directory for path resolution
1451
+ os.environ["SYNTH_DEMO_DIR"] = str(demo_path.resolve())
1452
+
1453
+ # Prompt for port if not provided
1454
+ if port is None:
1455
+ port = click.prompt("Port to serve on", type=int, default=8001)
1456
+
1457
+ # Prompt for trace directory if not provided
1458
+ if trace_dir is None:
1459
+ click.echo(
1460
+ "\nTracing captures rollout data (actions, rewards, model outputs) to a local SQLite DB."
1461
+ )
1462
+ click.echo("This data can be exported to JSONL for supervised fine-tuning (SFT).")
1463
+ enable_tracing = click.confirm("Enable tracing?", default=True)
1464
+ if enable_tracing:
1465
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1466
+ default_trace_dir = str((demo_base / "traces/v3").resolve())
1467
+ trace_dir = click.prompt(
1468
+ "Trace directory", type=str, default=default_trace_dir, show_default=True
1469
+ )
1470
+ else:
1471
+ trace_dir = None
1472
+
1473
+ # Prompt for trace DB if not provided and tracing is enabled
1474
+ if trace_dir and trace_db is None:
1475
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1476
+ default_trace_db = str((demo_base / "traces/v3/synth_ai.db").resolve())
1477
+ trace_db = click.prompt(
1478
+ "Trace DB path", type=str, default=default_trace_db, show_default=True
1479
+ )
1480
+
987
1481
  choice = _select_app_choice(app_id, purpose="serve")
988
1482
  entry = choice.ensure_entry()
989
- _serve_entry(entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
990
-
991
-
992
- @task_app_group.command('serve')
993
- @click.argument('app_id', type=str, required=False)
994
- @click.option('--host', default='0.0.0.0', show_default=True)
995
- @click.option('--port', default=8001, show_default=True, type=int)
996
- @click.option('--env-file', multiple=True, type=click.Path(), help='Extra .env files to load')
997
- @click.option('--reload/--no-reload', 'reload_flag', default=False, help='Enable uvicorn auto-reload')
998
- @click.option('--force/--no-force', 'force', default=False, help='Kill any process already bound to the selected port before starting')
999
- @click.option('--trace', 'trace_dir', type=click.Path(), default=None, help='Enable tracing and write SFT JSONL files to this directory')
1000
- @click.option('--trace-db', 'trace_db', type=click.Path(), default=None, help='Override local trace DB path (maps to SQLD_DB_PATH)')
1483
+ _serve_entry(
1484
+ entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db
1485
+ )
1486
+
1487
+
1488
+ @task_app_group.command("serve")
1489
+ @click.argument("app_id", type=str, required=False)
1490
+ @click.option("--host", default="0.0.0.0", show_default=True)
1491
+ @click.option("--port", default=None, type=int, help="Port to serve on (default: 8001)")
1492
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Extra .env files to load")
1493
+ @click.option(
1494
+ "--reload/--no-reload", "reload_flag", default=False, help="Enable uvicorn auto-reload"
1495
+ )
1496
+ @click.option(
1497
+ "--force/--no-force",
1498
+ "force",
1499
+ default=False,
1500
+ help="Kill any process already bound to the selected port before starting",
1501
+ )
1502
+ @click.option(
1503
+ "--trace",
1504
+ "trace_dir",
1505
+ type=click.Path(),
1506
+ default=None,
1507
+ help="Enable tracing and write SFT JSONL files to this directory (default: traces/v3)",
1508
+ )
1509
+ @click.option(
1510
+ "--trace-db",
1511
+ "trace_db",
1512
+ type=click.Path(),
1513
+ default=None,
1514
+ help="Override local trace DB path (default: traces/v3/synth_ai.db)",
1515
+ )
1001
1516
  def serve_task_group(
1002
1517
  app_id: str | None,
1003
1518
  host: str,
1004
- port: int,
1519
+ port: int | None,
1005
1520
  env_file: Sequence[str],
1006
1521
  reload_flag: bool,
1007
1522
  force: bool,
1008
1523
  trace_dir: str | None,
1009
1524
  trace_db: str | None,
1010
1525
  ) -> None:
1526
+ # Change to demo directory if stored (REQUIRED for demo isolation)
1527
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
1528
+
1529
+ demo_dir = load_demo_dir()
1530
+ if demo_dir:
1531
+ demo_path = Path(demo_dir)
1532
+ if not demo_path.is_dir():
1533
+ raise click.ClickException(
1534
+ f"Demo directory not found: {demo_dir}\nRun 'synth-ai setup' to create a demo."
1535
+ )
1536
+ os.chdir(demo_dir)
1537
+ click.echo(f"Using demo directory: {demo_dir}\n")
1538
+ # Store demo directory for path resolution
1539
+ os.environ["SYNTH_DEMO_DIR"] = str(demo_path.resolve())
1540
+
1541
+ # Prompt for port if not provided
1542
+ if port is None:
1543
+ port = click.prompt("Port to serve on", type=int, default=8001)
1544
+
1545
+ # Prompt for trace directory if not provided
1546
+ if trace_dir is None:
1547
+ click.echo(
1548
+ "\nTracing captures rollout data (actions, rewards, model outputs) to a local SQLite DB."
1549
+ )
1550
+ click.echo("This data can be exported to JSONL for supervised fine-tuning (SFT).")
1551
+ enable_tracing = click.confirm("Enable tracing?", default=True)
1552
+ if enable_tracing:
1553
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1554
+ default_trace_dir = str((demo_base / "traces/v3").resolve())
1555
+ trace_dir = click.prompt(
1556
+ "Trace directory", type=str, default=default_trace_dir, show_default=True
1557
+ )
1558
+ else:
1559
+ trace_dir = None
1560
+
1561
+ # Prompt for trace DB if not provided and tracing is enabled
1562
+ if trace_dir and trace_db is None:
1563
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1564
+ default_trace_db = str((demo_base / "traces/v3/synth_ai.db").resolve())
1565
+ trace_db = click.prompt(
1566
+ "Trace DB path", type=str, default=default_trace_db, show_default=True
1567
+ )
1568
+
1011
1569
  choice = _select_app_choice(app_id, purpose="serve")
1012
1570
  entry = choice.ensure_entry()
1013
- _serve_entry(entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db)
1571
+ _serve_entry(
1572
+ entry, host, port, env_file, reload_flag, force, trace_dir=trace_dir, trace_db=trace_db
1573
+ )
1574
+
1014
1575
 
1015
1576
  def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) -> list[Path]:
1016
1577
  resolved: list[Path] = []
@@ -1026,25 +1587,25 @@ def _determine_env_files(entry: TaskAppEntry, user_env_files: Sequence[str]) ->
1026
1587
  # Look for env files in current working directory first, then repo root
1027
1588
  cwd = Path.cwd()
1028
1589
  env_candidates = []
1029
-
1590
+
1030
1591
  # Add CWD env files first (prioritized)
1031
- cwd_env_files = sorted(cwd.glob('**/*.env'))
1592
+ cwd_env_files = sorted(cwd.glob("**/*.env"))
1032
1593
  env_candidates.extend(cwd_env_files)
1033
-
1594
+
1034
1595
  # Add repo root env files
1035
- repo_env_files = sorted(REPO_ROOT.glob('**/*.env'))
1596
+ repo_env_files = sorted(REPO_ROOT.glob("**/*.env"))
1036
1597
  # Avoid duplicates
1037
1598
  for repo_file in repo_env_files:
1038
1599
  if repo_file not in env_candidates:
1039
1600
  env_candidates.append(repo_file)
1040
-
1601
+
1041
1602
  if not env_candidates:
1042
- raise click.ClickException('No env file found. Pass --env-file explicitly.')
1603
+ raise click.ClickException("No env file found. Pass --env-file explicitly.")
1043
1604
 
1044
- click.echo('Select env file to load:')
1605
+ click.echo("Select env file to load:")
1045
1606
  for idx, path in enumerate(env_candidates, start=1):
1046
- click.echo(f" {idx}) {path}")
1047
- choice = click.prompt('Enter choice', type=click.IntRange(1, len(env_candidates)))
1607
+ click.echo(f" {idx}) {path.resolve()}")
1608
+ choice = click.prompt("Enter choice", type=click.IntRange(1, len(env_candidates)), default=1)
1048
1609
  return [env_candidates[choice - 1]]
1049
1610
 
1050
1611
 
@@ -1060,7 +1621,9 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
1060
1621
  return
1061
1622
 
1062
1623
  try:
1063
- out = subprocess.run(["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False)
1624
+ out = subprocess.run(
1625
+ ["lsof", "-ti", f"TCP:{port}"], capture_output=True, text=True, check=False
1626
+ )
1064
1627
  pids = [pid for pid in out.stdout.strip().splitlines() if pid]
1065
1628
  except FileNotFoundError:
1066
1629
  pids = []
@@ -1075,7 +1638,7 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
1075
1638
  try:
1076
1639
  os.kill(int(pid), signal.SIGTERM)
1077
1640
  except Exception as exc:
1078
- raise click.ClickException(f'Failed to terminate PID {pid}: {exc}')
1641
+ raise click.ClickException(f"Failed to terminate PID {pid}: {exc}")
1079
1642
 
1080
1643
  time.sleep(0.5)
1081
1644
 
@@ -1087,13 +1650,113 @@ def _ensure_port_free(port: int, host: str, *, force: bool) -> None:
1087
1650
  try:
1088
1651
  os.kill(int(pid), signal.SIGKILL)
1089
1652
  except Exception as exc:
1090
- raise click.ClickException(f'Failed to force terminate PID {pid}: {exc}')
1653
+ raise click.ClickException(f"Failed to force terminate PID {pid}: {exc}")
1091
1654
  time.sleep(0.5)
1092
1655
 
1093
1656
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
1094
1657
  in_use_after = s.connect_ex((host, port)) == 0
1095
1658
  if in_use_after:
1096
- raise click.ClickException(f'Port {port} is still in use after attempting to terminate processes.')
1659
+ raise click.ClickException(
1660
+ f"Port {port} is still in use after attempting to terminate processes."
1661
+ )
1662
+
1663
+
1664
+ def _save_to_env_file(env_path: Path, key: str, value: str) -> None:
1665
+ """Save or update a key-value pair in the .env file."""
1666
+ try:
1667
+ # Read existing .env
1668
+ existing_lines = []
1669
+ if env_path.exists():
1670
+ existing_lines = env_path.read_text().splitlines()
1671
+
1672
+ # Check if key already exists and update it
1673
+ key_updated = False
1674
+ new_lines = []
1675
+ for line in existing_lines:
1676
+ if line.strip().startswith(f"{key}="):
1677
+ new_lines.append(f"{key}={value}")
1678
+ key_updated = True
1679
+ else:
1680
+ new_lines.append(line)
1681
+
1682
+ if key_updated:
1683
+ # Write updated lines back
1684
+ env_path.write_text("\n".join(new_lines) + "\n")
1685
+ click.echo(f"Updated {key} in {env_path}")
1686
+ else:
1687
+ # Append to .env
1688
+ with open(env_path, "a") as f:
1689
+ if existing_lines and not existing_lines[-1].strip():
1690
+ # File exists and last line is not empty
1691
+ pass
1692
+ elif existing_lines:
1693
+ # Add newline before appending
1694
+ f.write("\n")
1695
+ f.write(f"{key}={value}\n")
1696
+ click.echo(f"Saved {key} to {env_path}")
1697
+ except Exception as e:
1698
+ click.echo(f"Warning: Could not save {key} to .env: {e}", err=True)
1699
+
1700
+
1701
+ def _validate_required_env_keys() -> None:
1702
+ """Validate required environment keys are set, prompting if missing."""
1703
+ # Use demo directory .env file if set, otherwise current directory
1704
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1705
+ env_file = demo_base / ".env"
1706
+
1707
+ if env_file.exists():
1708
+ try:
1709
+ from dotenv import load_dotenv
1710
+
1711
+ load_dotenv(env_file, override=False)
1712
+ except Exception:
1713
+ pass # Best effort
1714
+
1715
+ env_api_key = os.environ.get("ENVIRONMENT_API_KEY", "").strip()
1716
+
1717
+ if not env_api_key:
1718
+ env_api_key = input("Please enter your RL Environment API key:\n> ").strip()
1719
+ if not env_api_key:
1720
+ raise click.ClickException("RL Environment API key is required to start the server")
1721
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
1722
+ _save_to_env_file(env_file, "ENVIRONMENT_API_KEY", env_api_key)
1723
+
1724
+ # Check for Groq API key
1725
+ groq_api_key = os.environ.get("GROQ_API_KEY", "").strip()
1726
+
1727
+ if not groq_api_key:
1728
+ click.echo("\nInference API key configuration:")
1729
+ click.echo("This workflow requires a Groq API key.")
1730
+ groq_api_key = input("Groq API key (or press Enter to skip): ").strip()
1731
+ if groq_api_key:
1732
+ os.environ["GROQ_API_KEY"] = groq_api_key
1733
+ _save_to_env_file(env_file, "GROQ_API_KEY", groq_api_key)
1734
+
1735
+
1736
+ def _print_demo_next_steps_if_applicable() -> None:
1737
+ """Print next steps if currently in a demo directory."""
1738
+ try:
1739
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
1740
+
1741
+ cwd = Path.cwd().resolve()
1742
+ demo_dir = load_demo_dir()
1743
+
1744
+ # Check if we're in the demo directory
1745
+ if demo_dir and Path(demo_dir).resolve() == cwd:
1746
+ # Check if this looks like the crafter demo (has run_local_rollout_traced.py)
1747
+ if (cwd / "run_local_rollout_traced.py").exists():
1748
+ click.echo("\n" + "=" * 60)
1749
+ click.echo("Next step: Collect traced rollouts")
1750
+ click.echo("=" * 60)
1751
+ click.echo("\nIn another terminal, run:")
1752
+ click.echo(f" cd {cwd}")
1753
+ click.echo(" uv run python run_local_rollout_traced.py")
1754
+ click.echo("\nRun this 5-10 times to collect diverse traces.")
1755
+ click.echo("=" * 60 + "\n")
1756
+ except Exception:
1757
+ # Silently fail - this is just a helpful printout
1758
+ pass
1759
+
1097
1760
 
1098
1761
  def _serve_entry(
1099
1762
  entry: TaskAppEntry,
@@ -1111,34 +1774,51 @@ def _serve_entry(
1111
1774
 
1112
1775
  trace_enabled = trace_dir is not None or trace_db is not None
1113
1776
  if trace_enabled:
1114
- os.environ['TASKAPP_TRACING_ENABLED'] = '1'
1777
+ os.environ["TASKAPP_TRACING_ENABLED"] = "1"
1778
+
1779
+ # Ensure paths are absolute relative to demo directory
1780
+ demo_base = Path(os.environ.get("SYNTH_DEMO_DIR") or Path.cwd())
1781
+
1115
1782
  if trace_dir is not None:
1116
1783
  dir_path = Path(trace_dir).expanduser()
1784
+ if not dir_path.is_absolute():
1785
+ dir_path = (demo_base / dir_path).resolve()
1117
1786
  try:
1118
1787
  dir_path.mkdir(parents=True, exist_ok=True)
1119
1788
  except Exception as exc:
1120
- raise click.ClickException(f"Failed to create trace directory {dir_path}: {exc}") from exc
1121
- os.environ['TASKAPP_SFT_OUTPUT_DIR'] = str(dir_path)
1789
+ raise click.ClickException(
1790
+ f"Failed to create trace directory {dir_path}: {exc}"
1791
+ ) from exc
1792
+ os.environ["TASKAPP_SFT_OUTPUT_DIR"] = str(dir_path)
1122
1793
  click.echo(f"Tracing enabled. SFT JSONL will be written to {dir_path}")
1123
1794
  if trace_db is not None:
1124
1795
  db_path = Path(trace_db).expanduser()
1125
- os.environ['SQLD_DB_PATH'] = str(db_path)
1126
- os.environ.pop('TURSO_LOCAL_DB_URL', None)
1796
+ if not db_path.is_absolute():
1797
+ db_path = (demo_base / db_path).resolve()
1798
+ # Construct the sqlite URL from the absolute path
1799
+ db_url = f"sqlite+aiosqlite:///{db_path}"
1800
+ os.environ["SQLD_DB_PATH"] = str(db_path)
1801
+ os.environ["TURSO_LOCAL_DB_URL"] = db_url
1127
1802
  click.echo(f"Tracing DB path set to {db_path}")
1128
1803
  from synth_ai.tracing_v3.config import CONFIG as TRACE_CONFIG
1129
- # recompute db_url based on current environment
1130
- new_db_url = os.getenv('TURSO_LOCAL_DB_URL') or TRACE_CONFIG.db_url
1804
+
1805
+ # Use the explicitly set URL if available
1806
+ new_db_url = os.getenv("TURSO_LOCAL_DB_URL") or TRACE_CONFIG.db_url
1131
1807
  TRACE_CONFIG.db_url = new_db_url
1132
1808
  if new_db_url:
1133
- os.environ['TURSO_LOCAL_DB_URL'] = new_db_url
1134
1809
  click.echo(f"Tracing DB URL resolved to {new_db_url}")
1135
- elif os.getenv('TASKAPP_TRACING_ENABLED'):
1810
+ elif os.getenv("TASKAPP_TRACING_ENABLED"):
1136
1811
  click.echo("Tracing enabled via environment variables")
1137
1812
 
1138
1813
  _ensure_port_free(port, host, force=force)
1139
1814
 
1815
+ _validate_required_env_keys()
1140
1816
  _preflight_env_key()
1141
1817
 
1818
+ # Print next steps if in demo context
1819
+ if trace_enabled:
1820
+ _print_demo_next_steps_if_applicable()
1821
+
1142
1822
  run_task_app(
1143
1823
  entry.config_factory,
1144
1824
  host=host,
@@ -1148,37 +1828,76 @@ def _serve_entry(
1148
1828
  )
1149
1829
 
1150
1830
 
1151
- @task_app_group.command('deploy')
1831
+ @task_app_group.command("deploy")
1152
1832
  @click.argument("app_id", type=str, required=False)
1153
1833
  @click.option("--name", "modal_name", default=None, help="Override Modal app name")
1154
1834
  @click.option("--dry-run", is_flag=True, help="Print modal deploy command without executing")
1155
1835
  @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
1156
- @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
1157
- def deploy_app(app_id: str | None, modal_name: str | None, dry_run: bool, modal_cli: str, env_file: Sequence[str]) -> None:
1836
+ @click.option(
1837
+ "--env-file",
1838
+ multiple=True,
1839
+ type=click.Path(),
1840
+ help="Env file to load into the container (can be repeated)",
1841
+ )
1842
+ def deploy_app(
1843
+ app_id: str | None,
1844
+ modal_name: str | None,
1845
+ dry_run: bool,
1846
+ modal_cli: str,
1847
+ env_file: Sequence[str],
1848
+ ) -> None:
1158
1849
  """Deploy a task app to Modal."""
1159
1850
 
1851
+ # Change to demo directory if stored (for consistent discovery)
1852
+ from synth_ai.demos.demo_task_apps.core import load_demo_dir
1853
+
1854
+ demo_dir = load_demo_dir()
1855
+ if demo_dir:
1856
+ demo_path = Path(demo_dir)
1857
+ if not demo_path.is_dir():
1858
+ raise click.ClickException(
1859
+ f"Demo directory not found: {demo_dir}\nRun 'synth-ai demo' to create a demo."
1860
+ )
1861
+ os.chdir(demo_dir)
1862
+ click.echo(f"Using demo directory: {demo_dir}\n")
1863
+
1160
1864
  choice = _select_app_choice(app_id, purpose="deploy")
1161
1865
 
1162
1866
  if choice.modal_script:
1163
1867
  env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
1164
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
1165
- _run_modal_script(choice.modal_script, modal_cli, "deploy", env_paths, modal_name=modal_name, dry_run=dry_run)
1868
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1869
+ _run_modal_script(
1870
+ choice.modal_script,
1871
+ modal_cli,
1872
+ "deploy",
1873
+ env_paths,
1874
+ modal_name=modal_name,
1875
+ dry_run=dry_run,
1876
+ )
1166
1877
  return
1167
1878
 
1168
1879
  entry = choice.ensure_entry()
1169
1880
  _deploy_entry(entry, modal_name, dry_run, modal_cli, env_file, original_path=choice.path)
1170
1881
 
1171
- @task_app_group.command('modal-serve')
1172
- @click.argument('app_id', type=str, required=False)
1173
- @click.option('--modal-cli', default='modal', help='Path to modal CLI executable')
1174
- @click.option('--name', 'modal_name', default=None, help='Override Modal app name (optional)')
1175
- @click.option('--env-file', multiple=True, type=click.Path(), help='Env file to load into the container (can be repeated)')
1176
- def modal_serve_app(app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]) -> None:
1882
+
1883
+ @task_app_group.command("modal-serve")
1884
+ @click.argument("app_id", type=str, required=False)
1885
+ @click.option("--modal-cli", default="modal", help="Path to modal CLI executable")
1886
+ @click.option("--name", "modal_name", default=None, help="Override Modal app name (optional)")
1887
+ @click.option(
1888
+ "--env-file",
1889
+ multiple=True,
1890
+ type=click.Path(),
1891
+ help="Env file to load into the container (can be repeated)",
1892
+ )
1893
+ def modal_serve_app(
1894
+ app_id: str | None, modal_cli: str, modal_name: str | None, env_file: Sequence[str]
1895
+ ) -> None:
1177
1896
  choice = _select_app_choice(app_id, purpose="modal-serve")
1178
1897
 
1179
1898
  if choice.modal_script:
1180
1899
  env_paths = _resolve_env_paths_for_script(choice.modal_script, env_file)
1181
- click.echo('Using env file(s): ' + ', '.join(str(p) for p in env_paths))
1900
+ click.echo("Using env file(s): " + ", ".join(str(p.resolve()) for p in env_paths))
1182
1901
  _run_modal_script(choice.modal_script, modal_cli, "serve", env_paths, modal_name=modal_name)
1183
1902
  return
1184
1903
 
@@ -1203,7 +1922,8 @@ def _write_modal_entrypoint(
1203
1922
  try:
1204
1923
  # Build lookup of local->remote mounts
1205
1924
  mount_map: list[tuple[Path, Path]] = [
1206
- (Path(local).resolve(), Path(remote)) for (local, remote) in modal_cfg.extra_local_dirs
1925
+ (Path(local).resolve(), Path(remote))
1926
+ for (local, remote) in modal_cfg.extra_local_dirs
1207
1927
  ]
1208
1928
  orig = Path(original_path).resolve()
1209
1929
  for local_src, remote_dst in mount_map:
@@ -1220,12 +1940,51 @@ def _write_modal_entrypoint(
1220
1940
  except Exception:
1221
1941
  remote_file_str = None
1222
1942
  module_name = entry.config_factory.__module__
1223
-
1943
+
1944
+ # Prefer a guaranteed mount for the discovered file to avoid package import issues
1945
+ guaranteed_file_str: str | None = None
1946
+ if original_path:
1947
+ guaranteed_file_str = str(
1948
+ (Path("/opt/synth_ai_repo/__local_task_app__") / Path(original_path).stem).with_suffix(
1949
+ ".py"
1950
+ )
1951
+ )
1952
+
1224
1953
  dotenv_paths = [str(Path(path)) for path in (dotenv_paths or [])]
1225
1954
 
1226
1955
  pip_packages = list(modal_cfg.pip_packages)
1956
+ # Ensure synth-ai (matching host version if available) is installed in the container
1957
+ synth_pkg = "synth-ai"
1958
+ try:
1959
+ import synth_ai as _host_synth
1960
+
1961
+ host_ver = getattr(_host_synth, "__version__", None)
1962
+ if host_ver:
1963
+ synth_pkg = f"synth-ai=={host_ver}"
1964
+ except Exception:
1965
+ pass
1966
+ if not any(str(p).startswith("synth-ai") for p in pip_packages):
1967
+ pip_packages.insert(0, synth_pkg)
1227
1968
 
1228
1969
  local_dirs = [(str(Path(src)), dst) for src, dst in modal_cfg.extra_local_dirs]
1970
+ # Also mount the host synth_ai source if available to ensure latest code is used
1971
+ try:
1972
+ import synth_ai as _host_synth
1973
+
1974
+ host_synth_dir = Path(_host_synth.__file__).resolve().parent
1975
+ # host_synth_dir points to .../synth_ai; mount that directory
1976
+ sy_dst = "/opt/synth_ai_repo/synth_ai"
1977
+ candidate = (str(host_synth_dir), sy_dst)
1978
+ if candidate not in local_dirs:
1979
+ local_dirs.insert(0, candidate)
1980
+ except Exception:
1981
+ pass
1982
+ # Ensure the discovered app directory is mounted, regardless of modal_cfg
1983
+ if original_path:
1984
+ discovered_dir = str(Path(original_path).resolve().parent)
1985
+ mount_dst = "/opt/synth_ai_repo/__local_task_app__"
1986
+ if (discovered_dir, mount_dst) not in local_dirs:
1987
+ local_dirs.append((discovered_dir, mount_dst))
1229
1988
  secret_names = list(modal_cfg.secret_names)
1230
1989
  volume_mounts = [(name, mount) for name, mount in modal_cfg.volume_mounts]
1231
1990
 
@@ -1234,17 +1993,21 @@ def _write_modal_entrypoint(
1234
1993
  import importlib
1235
1994
  import importlib.util
1236
1995
  import sys
1996
+ import os
1997
+ import shutil
1998
+ import tempfile
1999
+ from pathlib import Path as _Path
2000
+ import fnmatch
1237
2001
  sys.path.insert(0, '/opt/synth_ai_repo')
1238
2002
 
1239
2003
  from modal import App, Image, Secret, Volume, asgi_app
1240
2004
 
1241
- from synth_ai.task.apps import registry
1242
- from synth_ai.task.server import create_task_app
2005
+ # Defer importing synth_ai until inside fastapi_app to avoid local import errors
1243
2006
 
1244
2007
  ENTRY_ID = {entry.app_id!r}
1245
2008
  MODAL_APP_NAME = {modal_name!r}
1246
2009
  MODULE_NAME = {module_name!r}
1247
- MODULE_FILE = {remote_file_str!r}
2010
+ MODULE_FILE = {guaranteed_file_str or remote_file_str!r}
1248
2011
  DOTENV_PATHS = {dotenv_paths!r}
1249
2012
 
1250
2013
  image = Image.debian_slim(python_version={modal_cfg.python_version!r})
@@ -1254,8 +2017,37 @@ if pip_packages:
1254
2017
  image = image.pip_install(*pip_packages)
1255
2018
 
1256
2019
  local_dirs = {local_dirs!r}
2020
+
2021
+ def _copy_tree_filtered(src_dir: str) -> str:
2022
+ src = _Path(src_dir)
2023
+ temp_dir = _Path(tempfile.mkdtemp(prefix='synth_mount_'))
2024
+
2025
+ exclude_dirs = {".cache", ".git", "__pycache__"}
2026
+ exclude_globs = ['*.db', '*.db-journal', '*-wal', '*-shm']
2027
+
2028
+ for root, dirs, files in os.walk(src):
2029
+ rel_root = _Path(root).relative_to(src)
2030
+ # filter dirs in-place
2031
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
2032
+ # ensure target directory exists
2033
+ target_dir = (temp_dir / rel_root)
2034
+ target_dir.mkdir(parents=True, exist_ok=True)
2035
+ # copy files with filtering
2036
+ for name in files:
2037
+ if any(fnmatch.fnmatch(name, pat) for pat in exclude_globs):
2038
+ continue
2039
+ src_file = _Path(root) / name
2040
+ dst_file = target_dir / name
2041
+ try:
2042
+ shutil.copy2(src_file, dst_file)
2043
+ except Exception:
2044
+ # ignore problematic files
2045
+ continue
2046
+ return str(temp_dir)
2047
+
1257
2048
  for local_src, remote_dst in local_dirs:
1258
- image = image.add_local_dir(local_src, remote_dst)
2049
+ safe_src = _copy_tree_filtered(local_src)
2050
+ image = image.add_local_dir(safe_src, remote_dst)
1259
2051
 
1260
2052
  secrets = {secret_names!r}
1261
2053
  secret_objs = [Secret.from_name(name) for name in secrets]
@@ -1268,24 +2060,6 @@ volume_map = {{}}
1268
2060
  for vol_name, mount_path in volume_mounts:
1269
2061
  volume_map[mount_path] = Volume.from_name(vol_name, create_if_missing=True)
1270
2062
 
1271
- # Import the module to trigger registration
1272
- if MODULE_FILE:
1273
- spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', MODULE_FILE)
1274
- if spec and spec.loader:
1275
- mod = importlib.util.module_from_spec(spec)
1276
- sys.modules[MODULE_NAME or 'task_app_module'] = mod
1277
- spec.loader.exec_module(mod)
1278
- else:
1279
- raise RuntimeError("Failed to import task app from file: " + str(MODULE_FILE))
1280
- else:
1281
- importlib.import_module(MODULE_NAME)
1282
-
1283
- # Get the entry from registry (now that it's registered)
1284
- entry = registry.get(ENTRY_ID)
1285
- modal_cfg = entry.modal
1286
- if modal_cfg is None:
1287
- raise RuntimeError("Modal configuration missing for task app {entry.app_id}")
1288
-
1289
2063
  app = App(MODAL_APP_NAME)
1290
2064
 
1291
2065
  @app.function(
@@ -1300,6 +2074,47 @@ app = App(MODAL_APP_NAME)
1300
2074
  )
1301
2075
  @asgi_app()
1302
2076
  def fastapi_app():
2077
+ # Import the module to trigger registration (inside container)
2078
+ import os
2079
+ # Prefer mounted source over any preinstalled site-packages version
2080
+ import sys as _sys
2081
+ for k in list(_sys.modules.keys()):
2082
+ if k == 'synth_ai' or k.startswith('synth_ai.'):
2083
+ _sys.modules.pop(k, None)
2084
+ import importlib as _importlib
2085
+ _importlib.invalidate_caches()
2086
+ try:
2087
+ if MODULE_FILE and os.path.exists(MODULE_FILE):
2088
+ spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', MODULE_FILE)
2089
+ if not spec or not spec.loader:
2090
+ raise RuntimeError("Failed to prepare spec for: " + str(MODULE_FILE))
2091
+ mod = importlib.util.module_from_spec(spec)
2092
+ sys.modules[MODULE_NAME or 'task_app_module'] = mod
2093
+ spec.loader.exec_module(mod)
2094
+ else:
2095
+ try:
2096
+ importlib.import_module(MODULE_NAME)
2097
+ except Exception:
2098
+ fallback_file = '/opt/synth_ai_repo/__local_task_app__/' + (MODULE_NAME.split('.')[-1] if MODULE_NAME else 'task_app') + '.py'
2099
+ if os.path.exists(fallback_file):
2100
+ spec = importlib.util.spec_from_file_location(MODULE_NAME or 'task_app_module', fallback_file)
2101
+ if not spec or not spec.loader:
2102
+ raise RuntimeError("Failed to prepare fallback spec for: " + str(fallback_file))
2103
+ mod = importlib.util.module_from_spec(spec)
2104
+ sys.modules[MODULE_NAME or 'task_app_module'] = mod
2105
+ spec.loader.exec_module(mod)
2106
+ else:
2107
+ raise
2108
+ except Exception as e:
2109
+ raise RuntimeError("Task app import failed: " + str(e))
2110
+
2111
+ # Get the entry from registry (now that it's registered)
2112
+ from synth_ai.task.apps import registry
2113
+ from synth_ai.task.server import create_task_app
2114
+ entry = registry.get(ENTRY_ID)
2115
+ cfg = entry.modal
2116
+ if cfg is None:
2117
+ raise RuntimeError("Modal configuration missing for task app " + ENTRY_ID)
1303
2118
  config = entry.config_factory()
1304
2119
  return create_task_app(config)
1305
2120
  """
@@ -1314,3 +2129,304 @@ def fastapi_app():
1314
2129
  def register(cli: click.Group) -> None:
1315
2130
  cli.add_command(serve_command)
1316
2131
  cli.add_command(task_app_group)
2132
+ cli.add_command(eval_command)
2133
+
2134
+
2135
+ @click.command("eval")
2136
+ @click.argument("app_id", type=str, required=False)
2137
+ @click.option("--config", type=click.Path(), default=None, help="Path to eval TOML (short schema)")
2138
+ @click.option(
2139
+ "--url",
2140
+ "task_app_url",
2141
+ type=str,
2142
+ default=None,
2143
+ help="Base URL of a running task app (skip in-process server)",
2144
+ )
2145
+ @click.option("--seeds", default="0,1,2,3,4", help="Comma-separated seeds/indices to evaluate")
2146
+ @click.option("--split", default="train", show_default=True, help="Dataset split to use")
2147
+ @click.option("--model", default=None, help="Model identifier (prompted if omitted)")
2148
+ @click.option("--env-file", multiple=True, type=click.Path(), help="Env file(s) for keys")
2149
+ def eval_command(
2150
+ app_id: str | None,
2151
+ config: str | None,
2152
+ task_app_url: str | None,
2153
+ seeds: str,
2154
+ split: str,
2155
+ model: str | None,
2156
+ env_file: Sequence[str],
2157
+ ) -> None:
2158
+ """Run local rollouts against a task app using in-process ASGI and summarize results."""
2159
+ cfg: dict[str, Any] = {}
2160
+ config_path: Path | None = None
2161
+ if config:
2162
+ config_path = Path(config)
2163
+ else:
2164
+ auto_configs = _discover_eval_config_paths()
2165
+ if auto_configs:
2166
+ config_path = auto_configs[0]
2167
+ click.echo(f"Using eval config: {config_path}")
2168
+
2169
+ if config_path:
2170
+ if _toml is None:
2171
+ raise click.ClickException(
2172
+ "TOML parser not available; use Python 3.11+ or install tomli"
2173
+ )
2174
+ if not config_path.exists():
2175
+ raise click.ClickException(f"Eval config not found: {config_path}")
2176
+ try:
2177
+ data = config_path.read_bytes()
2178
+ parsed = _toml.loads(data.decode("utf-8"))
2179
+ if isinstance(parsed, dict):
2180
+ section = parsed.get("eval")
2181
+ if isinstance(section, dict):
2182
+ cfg = dict(section)
2183
+ else:
2184
+ cfg = dict(parsed)
2185
+ except Exception as exc:
2186
+ raise click.ClickException(f"Failed to parse TOML '{config_path}': {exc}")
2187
+
2188
+ app_id = app_id or (cfg.get("app_id") if isinstance(cfg.get("app_id"), str) else None) # type: ignore
2189
+
2190
+ # Determine selection params (CLI takes precedence; TOML only fills unset model/seeds/env)
2191
+ if cfg.get("model") and not model:
2192
+ model = str(cfg["model"]) # type: ignore[index]
2193
+ if cfg.get("seeds") and seeds == "0,1,2,3,4":
2194
+ val = cfg["seeds"]
2195
+ if isinstance(val, list):
2196
+ try:
2197
+ seeds = ",".join(str(int(x)) for x in val)
2198
+ except Exception:
2199
+ pass
2200
+ elif isinstance(val, str):
2201
+ seeds = val
2202
+ elif isinstance(val, int):
2203
+ seeds = str(val)
2204
+ if cfg.get("env_file") and not env_file:
2205
+ ef = cfg["env_file"]
2206
+ if isinstance(ef, str):
2207
+ env_file = (ef,) # type: ignore[assignment]
2208
+ elif isinstance(ef, list):
2209
+ env_file = tuple(str(x) for x in ef) # type: ignore[assignment]
2210
+
2211
+ entry: TaskAppEntry | None = None
2212
+ if task_app_url is None:
2213
+ choice = _select_app_choice(app_id, purpose="eval")
2214
+ entry = choice.ensure_entry()
2215
+
2216
+ env_paths: list[Path] = []
2217
+ if entry is not None:
2218
+ env_paths = _determine_env_files(entry, env_file)
2219
+ else:
2220
+ if not env_file:
2221
+ raise click.ClickException("--env-file is required when using --url")
2222
+ for candidate in env_file:
2223
+ p = Path(candidate).expanduser()
2224
+ if not p.exists():
2225
+ raise click.ClickException(f"Env file not found: {p}")
2226
+ env_paths.append(p)
2227
+
2228
+ click.echo("Using env file(s): " + ", ".join(str(p) for p in env_paths))
2229
+ _load_env_files_into_process([str(Path(p)) for p in env_paths])
2230
+
2231
+ if task_app_url is None:
2232
+ config = entry.config_factory() # type: ignore[union-attr]
2233
+ # Help the type checker; runtime check also enforced in server.run_task_app
2234
+ if not isinstance(config, TaskAppConfig):
2235
+ raise click.ClickException(
2236
+ "Invalid task app: config_factory did not return TaskAppConfig"
2237
+ )
2238
+ app = create_task_app(config)
2239
+
2240
+ # Determine supported models
2241
+ supported: list[str] = []
2242
+ if task_app_url is None:
2243
+ try:
2244
+ supported = list((config.base_task_info.inference or {}).get("models") or []) # type: ignore[union-attr]
2245
+ except Exception:
2246
+ supported = []
2247
+ else:
2248
+ try:
2249
+ import httpx as _hx
2250
+
2251
+ headers = {}
2252
+ api_key = (os.environ.get("ENVIRONMENT_API_KEY") or "").strip()
2253
+ if api_key:
2254
+ headers["X-API-Key"] = api_key
2255
+ with _hx.Client(base_url=task_app_url, headers=headers, timeout=15.0) as c:
2256
+ info = c.get("/info").json()
2257
+ inf = info.get("inference") if isinstance(info, dict) else None
2258
+ if isinstance(inf, dict):
2259
+ m = inf.get("models")
2260
+ if isinstance(m, list):
2261
+ supported = [str(x) for x in m]
2262
+ if not supported:
2263
+ providers = inf.get("providers")
2264
+ if isinstance(providers, list):
2265
+ if "openai" in providers:
2266
+ supported.append("gpt-5")
2267
+ if "groq" in providers:
2268
+ supported.append("groq:llama-3.1-70b-versatile")
2269
+ supported.append("synth:qwen-0.6b")
2270
+ except Exception:
2271
+ supported = []
2272
+ if not supported:
2273
+ # Only fall back to local config-derived providers when running in-process
2274
+ if task_app_url is None:
2275
+ try:
2276
+ providers = list((config.base_task_info.inference or {}).get("providers") or []) # type: ignore[union-attr]
2277
+ except Exception:
2278
+ providers = []
2279
+ if "openai" in providers:
2280
+ supported.append("gpt-5")
2281
+ if "groq" in providers:
2282
+ supported.append("groq:llama-3.1-70b-versatile")
2283
+ # Always include a local synth model option for smoke tests
2284
+ supported.append("synth:qwen-0.6b")
2285
+
2286
+ selected_model = model
2287
+ if not selected_model:
2288
+ if not supported:
2289
+ raise click.ClickException(
2290
+ "No supported models; supply --model or add base_task_info.inference.models"
2291
+ )
2292
+ click.echo("Select model to evaluate:")
2293
+ for idx, m in enumerate(supported, start=1):
2294
+ click.echo(f" {idx}) {m}")
2295
+ choice_idx = click.prompt("Enter choice", type=click.IntRange(1, len(supported)))
2296
+ selected_model = supported[choice_idx - 1]
2297
+
2298
+ try:
2299
+ seed_values = [int(s.strip()) for s in seeds.split(",") if s.strip()]
2300
+ except Exception:
2301
+ raise click.ClickException("Invalid --seeds; expected comma-separated integers")
2302
+
2303
+ import httpx
2304
+
2305
+ headers = {}
2306
+ api_key = (os.environ.get("ENVIRONMENT_API_KEY") or "").strip()
2307
+ if api_key:
2308
+ headers["X-API-Key"] = api_key
2309
+
2310
+ successes = 0
2311
+ failures = 0
2312
+ # Aggregate outcome stats across successful seeds
2313
+ outcome_sum: float = 0.0
2314
+ outcome_count: int = 0
2315
+ outcome_correct: int = 0
2316
+ if task_app_url is None:
2317
+ transport = httpx.ASGITransport(app=app) # type: ignore[name-defined]
2318
+ # Newer httpx types consider ASGITransport under httpx._transports; cast to satisfy type checker
2319
+ client = httpx.Client(
2320
+ transport=cast(Any, transport),
2321
+ base_url="http://eval.local",
2322
+ timeout=60.0,
2323
+ headers=headers,
2324
+ )
2325
+ else:
2326
+ client = httpx.Client(base_url=task_app_url, timeout=60.0, headers=headers)
2327
+ with client as client:
2328
+ try:
2329
+ client.get("/task_info")
2330
+ except Exception:
2331
+ pass
2332
+ # Precompute optional policy overrides from TOML
2333
+ policy_overrides: dict[str, Any] = {}
2334
+ try:
2335
+ # Accept [eval.policy] table or top-level keys for convenience
2336
+ if isinstance(cfg.get("policy"), dict):
2337
+ policy_overrides.update(dict(cfg["policy"]))
2338
+ # Back-compat: allow temperature/max_tokens at top level
2339
+ for k in (
2340
+ "temperature",
2341
+ "max_tokens",
2342
+ "reasoning_effort",
2343
+ "system_hint",
2344
+ "tool_choice",
2345
+ ):
2346
+ if k in cfg and k not in policy_overrides:
2347
+ policy_overrides[k] = cfg.get(k)
2348
+ except Exception:
2349
+ policy_overrides = {}
2350
+
2351
+ for seed_val in seed_values:
2352
+ body = {
2353
+ "run_id": str(uuid.uuid4()),
2354
+ "env": {"config": {"split": split, "index": seed_val}, "seed": seed_val},
2355
+ "policy": {
2356
+ "policy_name": selected_model,
2357
+ "config": {"model": selected_model, **policy_overrides},
2358
+ },
2359
+ "ops": [],
2360
+ }
2361
+ try:
2362
+ resp = client.post("/rollout", json=body)
2363
+ ok = 200 <= resp.status_code < 300
2364
+ if ok:
2365
+ successes += 1
2366
+ else:
2367
+ failures += 1
2368
+
2369
+ # Print summary with any available metrics/tool calls
2370
+ summary = [f"seed={seed_val}", f"status={resp.status_code}"]
2371
+ try:
2372
+ data = resp.json()
2373
+ except Exception:
2374
+ data = None
2375
+ if isinstance(data, dict):
2376
+ metrics = data.get("metrics") if isinstance(data.get("metrics"), dict) else None
2377
+ if metrics:
2378
+ mean_return = metrics.get("mean_return") or metrics.get("total_reward")
2379
+ outcome = metrics.get("outcome_score")
2380
+ if mean_return is not None:
2381
+ summary.append(f"mean_return={mean_return}")
2382
+ if outcome is not None:
2383
+ summary.append(f"outcome={outcome}")
2384
+ # Aggregate outcome stats
2385
+ try:
2386
+ val = float(outcome)
2387
+ outcome_sum += val
2388
+ outcome_count += 1
2389
+ if val >= 0.5:
2390
+ outcome_correct += 1
2391
+ except Exception:
2392
+ pass
2393
+ # Try to infer tool call count from first trajectory step
2394
+ trajs = (
2395
+ data.get("trajectories")
2396
+ if isinstance(data.get("trajectories"), list)
2397
+ else None
2398
+ )
2399
+ if trajs:
2400
+ first = trajs[0] if trajs else None
2401
+ steps = first.get("steps") if isinstance(first, dict) else None
2402
+ if isinstance(steps, list) and steps:
2403
+ step0 = steps[0]
2404
+ tool_calls = step0.get("tool_calls") or step0.get("tools") or []
2405
+ if isinstance(tool_calls, list):
2406
+ summary.append(f"tool_calls={len(tool_calls)}")
2407
+ click.echo(" ".join(summary))
2408
+ # Print the full response JSON (trace, trajectories, metrics)
2409
+ try:
2410
+ click.echo(json.dumps(data, indent=2))
2411
+ except Exception:
2412
+ pass
2413
+ else:
2414
+ click.echo(" ".join(summary))
2415
+ except Exception as exc:
2416
+ failures += 1
2417
+ click.echo(f"seed={seed_val} error={exc}")
2418
+
2419
+ click.echo(
2420
+ f"Eval complete: {successes} ok, {failures} failed; model={selected_model}, split={split}"
2421
+ )
2422
+ # Print outcome summary if any successes
2423
+ if outcome_count > 0:
2424
+ mean_outcome = outcome_sum / float(outcome_count)
2425
+ frac_right = outcome_correct / float(outcome_count)
2426
+ click.echo(
2427
+ f"Outcome summary: correct={outcome_correct}/{outcome_count} ({frac_right:.2%}), mean_outcome={mean_outcome:.3f}"
2428
+ )
2429
+
2430
+
2431
+ def register_eval(cli: click.Group) -> None:
2432
+ cli.add_command(eval_command)