synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
synth_ai/cli/tui.py CHANGED
@@ -3,6 +3,7 @@
3
3
  CLI: Interactive TUI dashboard for Synth AI.
4
4
  """
5
5
 
6
+ import importlib
6
7
  import os
7
8
 
8
9
  import click
@@ -24,19 +25,23 @@ def register(cli):
24
25
 
25
26
  # Import here to avoid circular imports and handle optional dependencies
26
27
  try:
27
- from synth_ai.tui.dashboard import main as tui_main
28
+ module = importlib.import_module("synth_ai.tui.dashboard")
28
29
  except (ImportError, ModuleNotFoundError) as e:
29
30
  console.print("[red]Error:[/red] TUI dashboard not available.")
30
31
  console.print(f"Missing dependencies: {e}")
31
32
  console.print("Install with: pip install textual")
32
33
  return
33
- except Exception as e:
34
+ except Exception:
34
35
  # Handle other import errors (like missing libsql, type annotation issues, etc.)
35
36
  console.print("[red]Error:[/red] TUI dashboard not available.")
36
37
  console.print("This may be due to missing dependencies or Python version compatibility.")
37
38
  console.print("Try: pip install textual libsql")
38
39
  console.print("If using Python < 3.10, you may need to update Python or install eval_type_backport.")
39
40
  return
41
+ tui_main = getattr(module, "main", None)
42
+ if not callable(tui_main):
43
+ console.print("[red]Error:[/red] TUI dashboard entrypoint not available.")
44
+ return
40
45
 
41
46
  # Set environment variables for the TUI to use
42
47
  os.environ.setdefault("TUI_DB_URL", db_url)
synth_ai/cli/turso.py CHANGED
@@ -6,7 +6,7 @@ import subprocess
6
6
 
7
7
  import click
8
8
 
9
- from synth_ai.cli.root import SQLD_VERSION, find_sqld_binary, install_sqld
9
+ from .root import SQLD_VERSION, find_sqld_binary, install_sqld
10
10
 
11
11
 
12
12
  def register(cli: click.Group) -> None:
synth_ai/cli/watch.py CHANGED
@@ -16,7 +16,7 @@ from rich.console import Console, Group
16
16
  from rich.panel import Panel
17
17
  from rich.table import Table
18
18
 
19
- from synth_ai.cli._storage import load_storage
19
+ from ._storage import load_storage
20
20
 
21
21
 
22
22
  def _open_db(db_url: str):
@@ -0,0 +1,10 @@
1
+ """Namespace for demo task apps (math, crafter, etc.)."""
2
+
3
+ import contextlib
4
+
5
+ # Ensure registry entries are loaded for CLI discovery.
6
+ with contextlib.suppress(Exception): # pragma: no cover - optional on downstream installs
7
+ from synth_ai.demos.math import task_app_entry # noqa: F401
8
+
9
+ with contextlib.suppress(Exception): # pragma: no cover - optional on downstream installs
10
+ from synth_ai.demos.crafter import grpo_crafter_task_app # noqa: F401
@@ -1 +1,28 @@
1
- # Core demo CLI and helpers package
1
+ """
2
+ Compatibility layer exposing the legacy demo helpers.
3
+
4
+ Historically these utilities lived in ``synth_ai.demos.core`` as a module.
5
+ Upstream refactors moved the implementation under
6
+ ``synth_ai.demos.demo_task_apps.core``. Several call sites (including the new
7
+ vision tests) still import the older path, so we re-export everything here.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from synth_ai.demos.demo_task_apps import core as _demo_core
13
+
14
+ __all__ = [
15
+ name
16
+ for name in dir(_demo_core)
17
+ if not name.startswith("_")
18
+ ]
19
+
20
+ globals().update({name: getattr(_demo_core, name) for name in __all__})
21
+
22
+
23
+ def __getattr__(name: str):
24
+ if name in __all__:
25
+ value = getattr(_demo_core, name)
26
+ globals()[name] = value
27
+ return value
28
+ raise AttributeError(name)
@@ -0,0 +1 @@
1
+ # Crafter demo task app
@@ -0,0 +1,55 @@
1
+ # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
2
+
3
+ type = "sft"
4
+
5
+ [algorithm]
6
+ type = "offline"
7
+ method = "supervised_finetune"
8
+ variety = "fft"
9
+
10
+
11
+ [job]
12
+ model = "Qwen/Qwen3-4B"
13
+ data = "ft_data/crafter_sft.jsonl"
14
+
15
+ [compute]
16
+ # Adjust as needed for your quota
17
+ gpu_type = "H100"
18
+ gpu_count = 1
19
+ nodes = 1
20
+
21
+ [data]
22
+ # Optional topology metadata (left empty for now)
23
+ topology = {}
24
+
25
+ # Optional local validation dataset path (JSONL). If set, the client will upload
26
+ # this file and wire up validation so the frontend can display val.loss.
27
+ # validation_path = "../ft_data/crafter_validation.jsonl"
28
+
29
+ [training]
30
+ mode = "sft_offline"
31
+ use_qlora = false
32
+
33
+ # Validation settings to emit val.loss on the frontend
34
+ [training.validation]
35
+ enabled = true
36
+ evaluation_strategy = "steps"
37
+ eval_steps = 20
38
+ save_best_model_at_end = true
39
+ metric_for_best_model = "val.loss"
40
+ greater_is_better = false
41
+
42
+ [hyperparameters]
43
+ # Minimal safe defaults; backend can override
44
+ n_epochs = 1
45
+ batch_size = 1
46
+ gradient_accumulation_steps = 64
47
+ sequence_length = 4096
48
+ learning_rate = 5e-6
49
+ warmup_ratio = 0.03
50
+ train_kind = "fft"
51
+
52
+ # Optional parallelism block example
53
+ #[hyperparameters.parallelism]
54
+ # tensor_parallel_size = 1
55
+ # pipeline_parallel_size = 1
@@ -0,0 +1,185 @@
1
+ """Compatibility wrapper for the GRPO Crafter task app.
2
+
3
+ This module now delegates to the TaskAppConfig defined in the local example at
4
+ `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
+ (running the file directly or targeting `fastapi_app` from external tooling).
6
+ Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import importlib.util
13
+ from contextlib import suppress
14
+ from pathlib import Path
15
+
16
+ from fastapi.exceptions import RequestValidationError
17
+ from fastapi.responses import JSONResponse
18
+ from starlette.requests import Request
19
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, registry
20
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
21
+ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
22
+
23
+
24
+ def _load_build_config():
25
+ import synth_ai
26
+
27
+ synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
28
+ module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
29
+
30
+ if not module_path.exists():
31
+ raise ImportError(
32
+ f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
33
+ )
34
+
35
+ spec = importlib.util.spec_from_file_location(
36
+ "warming_up_to_rl.task_app.grpo_crafter", module_path
37
+ )
38
+ if spec is None or spec.loader is None:
39
+ raise ImportError(f"Could not load task app module at {module_path}")
40
+ module = importlib.util.module_from_spec(spec)
41
+ import sys
42
+
43
+ sys.modules.setdefault(spec.name, module)
44
+
45
+ from synth_ai.task import apps as task_apps
46
+
47
+ original_register = task_apps.registry.register
48
+
49
+ def _safe_register(entry):
50
+ with suppress(ValueError):
51
+ original_register(entry)
52
+
53
+ task_apps.registry.register = _safe_register
54
+ try:
55
+ spec.loader.exec_module(module)
56
+ finally:
57
+ task_apps.registry.register = original_register
58
+ return module.build_config
59
+
60
+
61
+ build_config = _load_build_config()
62
+
63
+
64
+ APP_ID = "grpo-crafter"
65
+
66
+
67
+ def _build_base_config() -> TaskAppConfig:
68
+ return build_config()
69
+
70
+
71
+ try:
72
+ _REGISTERED_ENTRY = registry.get(APP_ID)
73
+ except Exception: # pragma: no cover - registry unavailable in some contexts
74
+ MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
75
+ else:
76
+ MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
77
+
78
+
79
+ def build_task_app_config() -> TaskAppConfig:
80
+ base = _build_base_config()
81
+ return base.clone()
82
+
83
+
84
+ def fastapi_app():
85
+ app = create_task_app(build_task_app_config())
86
+
87
+ filtered_routes = []
88
+ for route in app.router.routes:
89
+ path = getattr(route, "path", None)
90
+ methods = getattr(route, "methods", set()) or set()
91
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
92
+ continue
93
+ filtered_routes.append(route)
94
+ app.router.routes = filtered_routes
95
+
96
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
97
+ if not env_key:
98
+ return None
99
+ prefix = env_key[: max(1, len(env_key) // 2)]
100
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
101
+ return prefix
102
+
103
+ @app.get("/health")
104
+ async def health(request: Request):
105
+ env_key = normalize_environment_api_key()
106
+ if not env_key:
107
+ return JSONResponse(
108
+ status_code=503,
109
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
110
+ )
111
+ if not is_api_key_header_authorized(request):
112
+ prefix = _log_env_key_prefix("health", env_key)
113
+ content = {"status": "healthy", "authorized": False}
114
+ if prefix:
115
+ content["expected_api_key_prefix"] = prefix
116
+ return JSONResponse(status_code=200, content=content)
117
+ return {"status": "healthy", "authorized": True}
118
+
119
+ @app.get("/health/rollout")
120
+ async def health_rollout(request: Request):
121
+ env_key = normalize_environment_api_key()
122
+ if not env_key:
123
+ return JSONResponse(
124
+ status_code=503,
125
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
126
+ )
127
+ if not is_api_key_header_authorized(request):
128
+ prefix = _log_env_key_prefix("health/rollout", env_key)
129
+ content = {"status": "healthy", "authorized": False}
130
+ if prefix:
131
+ content["expected_api_key_prefix"] = prefix
132
+ return JSONResponse(status_code=200, content=content)
133
+ return {"ok": True, "authorized": True}
134
+
135
+ @app.exception_handler(RequestValidationError)
136
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
137
+ try:
138
+ hdr = request.headers
139
+ snapshot = {
140
+ "path": str(request.url.path),
141
+ "have_x_api_key": bool(hdr.get("x-api-key")),
142
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
143
+ "have_authorization": bool(hdr.get("authorization")),
144
+ "errors": exc.errors()[:5],
145
+ }
146
+ print("[422] validation", snapshot, flush=True)
147
+ except Exception:
148
+ pass
149
+ return JSONResponse(
150
+ status_code=422,
151
+ content={"status": "invalid", "detail": exc.errors()[:5]},
152
+ )
153
+
154
+ return app
155
+
156
+
157
+ def register_demo_entry() -> None:
158
+ description = "Crafter demo task app"
159
+ entry = TaskAppEntry(
160
+ app_id="crafter-demo",
161
+ description=description,
162
+ config_factory=build_task_app_config,
163
+ aliases=("crafter",),
164
+ modal=MODAL_DEPLOYMENT,
165
+ )
166
+ with suppress(ValueError):
167
+ registry.register(entry)
168
+
169
+
170
+ register_demo_entry()
171
+
172
+
173
+ if __name__ == "__main__":
174
+ parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
175
+ parser.add_argument("--host", default="0.0.0.0")
176
+ parser.add_argument("--port", type=int, default=8001)
177
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
178
+ args = parser.parse_args()
179
+
180
+ run_task_app(
181
+ build_task_app_config,
182
+ host=args.host,
183
+ port=args.port,
184
+ reload=args.reload,
185
+ )
@@ -0,0 +1,74 @@
1
+ # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
2
+
3
+ type = "rl"
4
+
5
+ [algorithm]
6
+ type = "online"
7
+ method = "policy_gradient"
8
+ variety = "gspo"
9
+
10
+ [services]
11
+ task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
12
+
13
+ [compute]
14
+ # Cluster shape for RL pipeline
15
+ gpu_type = "H100"
16
+ gpu_count = 8
17
+
18
+ [topology]
19
+ # Split GPUs across vLLM, training, and reference
20
+ # Must sum to compute.gpu_count
21
+ type = "single_node_split"
22
+ gpus_for_vllm = 4
23
+ gpus_for_training = 3
24
+ gpus_for_ref = 1
25
+ tensor_parallel = 4
26
+
27
+ [vllm]
28
+ # Serving tensor parallel size
29
+ tensor_parallel_size = 4
30
+ max_model_len = 8192
31
+
32
+ [reference]
33
+ # Required by trainer/runtime; ensures dedicated/scoped scoring server config exists
34
+ placement = "dedicated"
35
+ port = 8002
36
+ tp = 1
37
+ health_max_wait_s = 180
38
+ health_interval_ms = 300
39
+
40
+ [model]
41
+ # Base model start
42
+ base = "Qwen/Qwen3-4B"
43
+ label = "crafter-rl-from-base"
44
+
45
+ [rollout]
46
+ max_turns = 10
47
+ episodes_per_batch = 64
48
+ policy_name = "crafter"
49
+
50
+ [evaluation]
51
+ # Run baseline evaluation over the first 100 seeds every 20 training iterations
52
+ instances = 10
53
+ every_n_iters = 10
54
+ seeds = [
55
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
56
+ ]
57
+
58
+ [training]
59
+ log_interval = 1
60
+ weight_sync_interval = 1
61
+ # Additional RL hyperparameters can go here
62
+
63
+ # Stepwise rewards (Crafter decision-level)
64
+ step_rewards_enabled = true
65
+ step_rewards_mode = "decision_stepwise" # "off" | "decision_stepwise" | "env_sparse"
66
+ step_rewards_beta = 0.0
67
+ step_rewards_indicator_lambda = 1.0
68
+ # Optional selector for decision scalar: "unique" | "absolute" (default unique)
69
+ event_rewards_kind = "unique"
70
+
71
+ [training.weight_sync]
72
+ enable = true
73
+ targets = ["policy"]
74
+ weight_sync_interval = 1
@@ -0,0 +1,176 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Iterator
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ REPO_ROOT = Path(__file__).resolve().parents[2]
8
+
9
+
10
+ @dataclass(slots=True)
11
+ class CopySpec:
12
+ source: str
13
+ destination: str
14
+ make_executable: bool = False
15
+
16
+ def absolute_source(self) -> Path:
17
+ return (REPO_ROOT / self.source).resolve()
18
+
19
+
20
+ @dataclass(slots=True)
21
+ class DemoTemplate:
22
+ template_id: str
23
+ name: str
24
+ description: str
25
+ copy_specs: tuple[CopySpec, ...]
26
+ default_subdir: str | None = None
27
+ env_lines: tuple[str, ...] = ()
28
+ config_source: str | None = None
29
+ config_destination: str = "demo_config.toml"
30
+ requires_modal: bool = False
31
+ post_copy: Callable[[Path], None] | None = None
32
+ default_secret_name: str | None = None
33
+
34
+ def iter_copy_specs(self) -> Iterator[CopySpec]:
35
+ yield from self.copy_specs
36
+
37
+ def config_source_path(self) -> Path | None:
38
+ if not self.config_source:
39
+ return None
40
+ return (REPO_ROOT / self.config_source).resolve()
41
+
42
+
43
+ def _postprocess_math_modal(root: Path) -> None:
44
+ task_path = (root / "task_app.py").resolve()
45
+ if not task_path.exists():
46
+ return
47
+ text = task_path.read_text(encoding="utf-8")
48
+ text = text.replace('App("hendrycks-math-task-app")', 'App("hendrycks-math-task-app-demo")')
49
+ text = text.replace(
50
+ 'DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-secret"',
51
+ 'DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-demo-secret"',
52
+ )
53
+ task_path.write_text(text, encoding="utf-8")
54
+
55
+ deploy_script = root / "deploy_task_app.sh"
56
+ if deploy_script.exists():
57
+ import stat
58
+
59
+ mode = deploy_script.stat().st_mode
60
+ deploy_script.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
61
+
62
+
63
+ DEMO_TEMPLATES: dict[str, DemoTemplate] = {
64
+ "math-modal": DemoTemplate(
65
+ template_id="math-modal",
66
+ name="Math Single-Step (Modal deployment)",
67
+ description="Packaged modal task app matching examples/rl math environment.",
68
+ copy_specs=(
69
+ CopySpec(
70
+ "synth_ai/demos/math/modal_task_app.py",
71
+ "task_app.py",
72
+ ),
73
+ CopySpec(
74
+ "synth_ai/demos/math/README.md",
75
+ "README.md",
76
+ ),
77
+ CopySpec(
78
+ "synth_ai/demos/math/deploy_task_app.sh",
79
+ "deploy_task_app.sh",
80
+ make_executable=True,
81
+ ),
82
+ CopySpec(
83
+ "synth_ai/demos/math/config.toml",
84
+ "configs/rl_from_base_qwen17.toml",
85
+ ),
86
+ ),
87
+ default_subdir="math_demo",
88
+ env_lines=(
89
+ "# Required for task app auth to environment service",
90
+ "ENVIRONMENT_API_KEY=",
91
+ "",
92
+ "# Optional: for CLI job submission and proxying OpenAI models",
93
+ "SYNTH_API_KEY=",
94
+ "OPENAI_API_KEY=",
95
+ "",
96
+ "# Optional: set to 'prod' to use production names",
97
+ "ENVIRONMENT=",
98
+ ),
99
+ config_source="synth_ai/demos/math/config.toml",
100
+ requires_modal=True,
101
+ post_copy=lambda root: _postprocess_math_modal(root),
102
+ default_secret_name="hendrycks-math-task-app-demo-secret",
103
+ ),
104
+ "crafter-local": DemoTemplate(
105
+ template_id="crafter-local",
106
+ name="Crafter GRPO (local FastAPI)",
107
+ description="Lightweight wrapper around examples/warming_up_to_rl/task_app/grpo_crafter for local experimentation.",
108
+ copy_specs=(
109
+ CopySpec(
110
+ "synth_ai/demos/crafter/grpo_crafter_task_app.py",
111
+ "task_app.py",
112
+ ),
113
+ CopySpec(
114
+ "synth_ai/demos/crafter/README.md",
115
+ "README.md",
116
+ ),
117
+ CopySpec(
118
+ "synth_ai/demos/crafter/configs/rl_from_base_qwen4b.toml",
119
+ "configs/rl_from_base_qwen4b.toml",
120
+ ),
121
+ CopySpec(
122
+ "synth_ai/demos/crafter/configs/crafter_fft_4b.toml",
123
+ "configs/crafter_fft_4b.toml",
124
+ ),
125
+ CopySpec(
126
+ "examples/warming_up_to_rl/task_app/grpo_crafter.py",
127
+ "grpo_crafter.py",
128
+ ),
129
+ CopySpec(
130
+ "examples/warming_up_to_rl/task_app/synth_envs_hosted",
131
+ "synth_envs_hosted",
132
+ ),
133
+ CopySpec(
134
+ "examples/warming_up_to_rl/run_local_rollout.py",
135
+ "run_local_rollout.py",
136
+ ),
137
+ CopySpec(
138
+ "examples/warming_up_to_rl/run_local_rollout_traced.py",
139
+ "run_local_rollout_traced.py",
140
+ ),
141
+ CopySpec(
142
+ "examples/warming_up_to_rl/shared.py",
143
+ "shared.py",
144
+ ),
145
+ CopySpec(
146
+ "examples/warming_up_to_rl/export_trace_sft.py",
147
+ "export_trace_sft.py",
148
+ ),
149
+ CopySpec(
150
+ "examples/warming_up_to_rl/run_fft_and_save.py",
151
+ "run_fft_and_save.py",
152
+ ),
153
+ CopySpec(
154
+ "examples/warming_up_to_rl/run_local_rollout_modal.py",
155
+ "run_local_rollout_modal.py",
156
+ ),
157
+ ),
158
+ default_subdir="crafter_demo",
159
+ env_lines=(
160
+ "ENVIRONMENT_API_KEY=",
161
+ "SYNTH_API_KEY=",
162
+ "",
163
+ "# Optional: URL for existing Crafter task app",
164
+ "TASK_APP_BASE_URL=",
165
+ ),
166
+ default_secret_name="grpo-crafter-demo-secret",
167
+ ),
168
+ }
169
+
170
+
171
+ def list_demo_templates() -> tuple[DemoTemplate, ...]:
172
+ return tuple(DEMO_TEMPLATES.values())
173
+
174
+
175
+ def get_demo_template(template_id: str) -> DemoTemplate | None:
176
+ return DEMO_TEMPLATES.get(template_id)
@@ -0,0 +1 @@
1
+ # Package namespace for Math demo task app
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ """Minimal helpers for the math task app.
4
+
5
+ This module provides a local fallback for install_problem_bank_into_shared so
6
+ the modal task app can import it without requiring an external math_rl package.
7
+ """
8
+
9
+
10
+ def install_problem_bank_into_shared() -> None:
11
+ """No-op placeholder for installing the Hendrycks MATH problem bank.
12
+
13
+ In production deployments, this can download or unpack the problem bank
14
+ into a shared directory. For the demo scaffold, it is a no-op.
15
+ """
16
+ return None
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from fastapi import FastAPI
6
+ from starlette.middleware.cors import CORSMiddleware
7
+
8
+ # Reuse the examples/rl task_app routes if available
9
+ try:
10
+ from synth_ai.examples.rl.task_app import make_app as make_rl_app # type: ignore
11
+ except Exception: # fallback path when imported from repo root
12
+ try:
13
+ from examples.rl.task_app import make_app as make_rl_app # type: ignore
14
+ except Exception as e: # pragma: no cover
15
+ raise ImportError(f"Unable to import RL task app: {e}") from e
16
+
17
+
18
+ def create_app() -> FastAPI:
19
+ # Configure math defaults via env (consumed by RL task_app helpers)
20
+ os.environ.setdefault("DEMO_ENV_NAME", "math")
21
+ os.environ.setdefault("DEMO_POLICY_NAME", "math-react")
22
+ # Build base app
23
+ app = make_rl_app()
24
+ # CORS for local demo
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"],
28
+ allow_credentials=True,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+ return app
33
+
34
+
35
+ def run(host: str = "127.0.0.1", port: int = 8080):
36
+ import uvicorn
37
+
38
+ uvicorn.run(create_app(), host=host, port=int(os.getenv("PORT", port)))