synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/blog_posts/pokemon_vl/README.md +98 -0
  3. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  5. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  6. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  7. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  8. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  9. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  12. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  13. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  15. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  16. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  17. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  18. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  20. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  21. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  22. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  23. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  24. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  25. examples/qwen_vl/README.md +10 -12
  26. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  27. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  28. examples/qwen_vl/collect_data_via_cli.md +76 -84
  29. examples/qwen_vl/collect_vision_traces.py +4 -4
  30. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  31. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  32. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  33. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  34. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  35. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  36. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  37. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  38. examples/qwen_vl/run_vision_comparison.sh +6 -7
  39. examples/rl/README.md +5 -5
  40. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  41. examples/rl/configs/rl_from_base_qwen17.toml +5 -2
  42. examples/rl/task_app/README.md +1 -2
  43. examples/rl/task_app/math_single_step.py +2 -2
  44. examples/run_crafter_demo.sh +2 -2
  45. examples/sft/README.md +1 -1
  46. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  47. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  48. examples/swe/task_app/README.md +32 -2
  49. examples/swe/task_app/grpo_swe_mini.py +4 -0
  50. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  51. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  52. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  53. examples/swe/task_app/morph_backend.py +178 -0
  54. examples/task_apps/crafter/task_app/README.md +1 -1
  55. examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
  56. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  57. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  58. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  59. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
  60. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
  61. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
  62. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  63. examples/task_apps/math/README.md +1 -2
  64. examples/task_apps/pokemon_red/README.md +3 -4
  65. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  66. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  67. examples/task_apps/pokemon_red/task_app.py +36 -5
  68. examples/task_apps/sokoban/README.md +2 -3
  69. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  70. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  71. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  72. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  73. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  74. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
  75. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  76. examples/warming_up_to_rl/task_app/README.md +1 -1
  77. examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
  78. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
  83. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  85. synth_ai/api/train/builders.py +9 -3
  86. synth_ai/api/train/cli.py +125 -10
  87. synth_ai/api/train/configs/__init__.py +8 -1
  88. synth_ai/api/train/configs/rl.py +32 -7
  89. synth_ai/api/train/configs/sft.py +6 -2
  90. synth_ai/api/train/configs/shared.py +59 -2
  91. synth_ai/auth/credentials.py +119 -0
  92. synth_ai/cli/__init__.py +12 -4
  93. synth_ai/cli/commands/__init__.py +17 -0
  94. synth_ai/cli/commands/demo/__init__.py +6 -0
  95. synth_ai/cli/commands/demo/core.py +163 -0
  96. synth_ai/cli/commands/deploy/__init__.py +23 -0
  97. synth_ai/cli/commands/deploy/core.py +614 -0
  98. synth_ai/cli/commands/deploy/errors.py +72 -0
  99. synth_ai/cli/commands/deploy/validation.py +11 -0
  100. synth_ai/cli/commands/eval/__init__.py +19 -0
  101. synth_ai/cli/commands/eval/core.py +1109 -0
  102. synth_ai/cli/commands/eval/errors.py +81 -0
  103. synth_ai/cli/commands/eval/validation.py +133 -0
  104. synth_ai/cli/commands/filter/__init__.py +12 -0
  105. synth_ai/cli/commands/filter/core.py +388 -0
  106. synth_ai/cli/commands/filter/errors.py +55 -0
  107. synth_ai/cli/commands/filter/validation.py +77 -0
  108. synth_ai/cli/commands/help/__init__.py +177 -0
  109. synth_ai/cli/commands/help/core.py +73 -0
  110. synth_ai/cli/commands/status/__init__.py +64 -0
  111. synth_ai/cli/commands/status/client.py +192 -0
  112. synth_ai/cli/commands/status/config.py +92 -0
  113. synth_ai/cli/commands/status/errors.py +20 -0
  114. synth_ai/cli/commands/status/formatters.py +164 -0
  115. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  116. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  117. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  118. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  119. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  120. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  121. synth_ai/cli/commands/status/utils.py +114 -0
  122. synth_ai/cli/commands/train/__init__.py +53 -0
  123. synth_ai/cli/commands/train/core.py +21 -0
  124. synth_ai/cli/commands/train/errors.py +117 -0
  125. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  126. synth_ai/cli/commands/train/judge_validation.py +304 -0
  127. synth_ai/cli/commands/train/validation.py +443 -0
  128. synth_ai/cli/demo.py +2 -162
  129. synth_ai/cli/deploy/__init__.py +28 -0
  130. synth_ai/cli/deploy/core.py +5 -0
  131. synth_ai/cli/deploy/errors.py +23 -0
  132. synth_ai/cli/deploy/validation.py +5 -0
  133. synth_ai/cli/eval/__init__.py +36 -0
  134. synth_ai/cli/eval/core.py +5 -0
  135. synth_ai/cli/eval/errors.py +31 -0
  136. synth_ai/cli/eval/validation.py +5 -0
  137. synth_ai/cli/filter/__init__.py +28 -0
  138. synth_ai/cli/filter/core.py +5 -0
  139. synth_ai/cli/filter/errors.py +23 -0
  140. synth_ai/cli/filter/validation.py +5 -0
  141. synth_ai/cli/modal_serve/__init__.py +12 -0
  142. synth_ai/cli/modal_serve/core.py +14 -0
  143. synth_ai/cli/modal_serve/errors.py +8 -0
  144. synth_ai/cli/modal_serve/validation.py +11 -0
  145. synth_ai/cli/serve/__init__.py +12 -0
  146. synth_ai/cli/serve/core.py +14 -0
  147. synth_ai/cli/serve/errors.py +8 -0
  148. synth_ai/cli/serve/validation.py +11 -0
  149. synth_ai/cli/setup.py +20 -265
  150. synth_ai/cli/status.py +7 -126
  151. synth_ai/cli/task_app_deploy.py +1 -10
  152. synth_ai/cli/task_app_modal_serve.py +4 -9
  153. synth_ai/cli/task_app_serve.py +4 -11
  154. synth_ai/cli/task_apps.py +58 -1487
  155. synth_ai/cli/train/__init__.py +12 -0
  156. synth_ai/cli/train/core.py +21 -0
  157. synth_ai/cli/train/errors.py +8 -0
  158. synth_ai/cli/train/validation.py +24 -0
  159. synth_ai/cli/train.py +1 -14
  160. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  161. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  162. synth_ai/environments/examples/red/engine.py +33 -12
  163. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  164. synth_ai/environments/examples/red/environment.py +26 -0
  165. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  166. synth_ai/http.py +12 -0
  167. synth_ai/judge_schemas.py +10 -11
  168. synth_ai/learning/rl/client.py +3 -1
  169. synth_ai/streaming/__init__.py +29 -0
  170. synth_ai/streaming/config.py +94 -0
  171. synth_ai/streaming/handlers.py +469 -0
  172. synth_ai/streaming/streamer.py +301 -0
  173. synth_ai/streaming/types.py +95 -0
  174. synth_ai/task/validators.py +2 -2
  175. synth_ai/tracing_v3/migration_helper.py +1 -2
  176. synth_ai/utils/env.py +25 -18
  177. synth_ai/utils/http.py +4 -1
  178. synth_ai/utils/modal.py +2 -2
  179. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
  180. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
  181. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  182. synth_ai/cli/tui.py +0 -62
  183. synth_ai/tui/__init__.py +0 -5
  184. synth_ai/tui/__main__.py +0 -13
  185. synth_ai/tui/cli/__init__.py +0 -1
  186. synth_ai/tui/cli/query_experiments.py +0 -164
  187. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  188. synth_ai/tui/dashboard.py +0 -911
  189. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  190. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  191. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  192. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -129,7 +129,7 @@ async def main():
129
129
  print("✓ Server is healthy")
130
130
  except Exception as e:
131
131
  print(f"❌ Server not responding: {e}")
132
- print(f" Start it with: uv run -m synth_ai task-app serve pokemon_red --port 8913")
132
+ print(f" Start it with: uv run -m synth_ai task-app deploy --runtime uvicorn pokemon_red --port 8913")
133
133
  return
134
134
 
135
135
  # Check API key
@@ -222,4 +222,3 @@ async def main():
222
222
 
223
223
  if __name__ == "__main__":
224
224
  asyncio.run(main())
225
-
@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
12
12
  from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
13
13
  PalletTownProgressionCompositeReward,
14
14
  )
15
- from synth_ai.task.apps import TaskAppEntry, register_task_app
15
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
16
16
  from synth_ai.task.contracts import (
17
17
  RolloutMetrics,
18
18
  RolloutRequest,
@@ -260,8 +260,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
260
260
  {
261
261
  "role": "system",
262
262
  "content": (
263
- "You are controlling Pokémon Red. Respond with a single tool call named 'press_button' "
264
- "with JSON arguments {button: 'A|B|UP|DOWN|LEFT|RIGHT|START|SELECT', frames: 1-120}."
263
+ "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
264
+ "Your goal is to make progress in the game. Use the execute_sequence tool to press buttons. "
265
+ "Choose appropriate button presses based on what you see in the game screen. "
266
+ "Always respond with exactly one tool call in the format: <tool_call>{\"name\": \"execute_sequence\", \"arguments\": {...}}</tool_call>"
265
267
  ),
266
268
  },
267
269
  {
@@ -788,11 +790,40 @@ def build_config() -> TaskAppConfig:
788
790
  register_task_app(
789
791
  entry=TaskAppEntry(
790
792
  app_id="pokemon_red",
791
- description="Pokémon Red demo task app",
793
+ description="Pokémon Red demo task app with vision support",
792
794
  config_factory=build_config,
793
795
  aliases=("pokemon_red_demo",),
794
796
  env_files=(),
795
- modal=None,
797
+ modal=ModalDeploymentConfig(
798
+ app_name="pokemon-red-vision-task-app",
799
+ python_version="3.11",
800
+ pip_packages=(
801
+ "fastapi>=0.100.0",
802
+ "uvicorn>=0.23.0",
803
+ "pydantic>=2.0.0",
804
+ "numpy>=1.24.0",
805
+ "aiohttp>=3.8.0",
806
+ "httpx>=0.24.0",
807
+ "python-dotenv>=1.0.1",
808
+ # Tracing/DB runtime deps
809
+ "sqlalchemy>=2.0.42",
810
+ "aiosqlite>=0.21.0",
811
+ "greenlet>=3.2.3",
812
+ # Pokemon Red environment
813
+ "pyboy>=2.0.0",
814
+ "pillow>=9.0.0",
815
+ ),
816
+ extra_local_dirs=(
817
+ # Mount repo root so local modules resolve when deployed on Modal
818
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
819
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
820
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
821
+ ),
822
+ secret_names=("openai-api-key", "groq-api-key"),
823
+ memory=16384,
824
+ cpu=4.0,
825
+ max_containers=10,
826
+ ),
796
827
  )
797
828
  )
798
829
 
@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
20
20
  cd /path/to/synth-ai
21
21
 
22
22
  # Start the Sokoban task app on port 8911
23
- uvx synth-ai task-app serve sokoban --port 8911
23
+ uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
24
24
  ```
25
25
 
26
26
  The server will be available at `http://localhost:8911`.
@@ -283,7 +283,7 @@ lsof -i :8911
283
283
  kill -9 $(lsof -ti :8911)
284
284
 
285
285
  # Restart
286
- uvx synth-ai task-app serve sokoban --port 8911
286
+ uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
287
287
  ```
288
288
 
289
289
  ## Examples
@@ -304,4 +304,3 @@ To add new features:
304
304
  ## License
305
305
 
306
306
  MIT
307
-
@@ -1,24 +1,22 @@
1
1
  # Verilog Eval Config for Groq Qwen3-32B
2
- # Quick eval to test Verilog task app before RL training
3
-
4
- [task_app]
5
- # Update this with your Modal URL after deployment
6
- url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
2
+ # Quick eval to test the Verilog task app before RL training
7
3
 
8
4
  [eval]
9
- num_episodes = 3 # Quick test with 3 seeds
5
+ app_id = "grpo-verilog"
6
+ task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
7
+ model = "groq:qwen3-32b"
10
8
  seeds = [0, 1, 2]
11
- max_steps = 15 # More steps for Verilog compilation chains
9
+ max_turns = 15
10
+ concurrency = 1
11
+ return_trace = true
12
+ trace_format = "structured"
13
+
14
+ [eval.env_config]
15
+ difficulty = "medium"
12
16
 
13
- [policy]
17
+ [eval.policy_config]
14
18
  provider = "groq"
15
19
  model = "qwen/qwen3-32b"
16
20
  temperature = 0.2
17
21
  max_tokens = 768
18
22
  inference_url = "https://api.groq.com/openai/v1/chat/completions"
19
-
20
- [env]
21
- difficulty = "medium" # Can be "easy", "medium", or "hard"
22
-
23
-
24
-
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Verilog task app.
2
2
 
3
3
  This mirrors the Crafter task app wrapper while delegating configuration to
4
- `grpo_verilog.py`. Normal usage should prefer `uvx synth-ai serve grpo-verilog`,
4
+ `grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
5
5
  but the module remains for direct execution or importing the FastAPI app.
6
6
  """
7
7
 
@@ -1,4 +1,7 @@
1
- type = "sft"
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "fft"
2
5
 
3
6
  [job]
4
7
  model = "openai/gpt-4o-mini-2024-07-18"
@@ -1,7 +1,10 @@
1
1
  # Crafter Full Finetune (FFT) example on H100
2
2
  # Adjust paths and hyperparameters to your environment before running.
3
3
 
4
- type = "sft"
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "fft"
5
8
 
6
9
  [job]
7
10
  model = "Qwen/Qwen3-4B" # base model to finetune
@@ -1,7 +1,5 @@
1
1
  # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
2
2
 
3
- type = "sft"
4
-
5
3
  [algorithm]
6
4
  type = "offline"
7
5
  method = "supervised_finetune"
@@ -1,7 +1,5 @@
1
1
  # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
2
2
 
3
- type = "rl"
4
-
5
3
  [algorithm]
6
4
  type = "online"
7
5
  method = "policy_gradient"
@@ -40,6 +38,7 @@ health_interval_ms = 300
40
38
  [model]
41
39
  # Base model start
42
40
  base = "Qwen/Qwen3-4B"
41
+ trainer_mode = "full"
43
42
  label = "crafter-rl-from-base"
44
43
 
45
44
  [rollout]
@@ -62,6 +61,7 @@ seeds = [
62
61
  [training]
63
62
  num_epochs = 1
64
63
  iterations_per_epoch = 10
64
+ max_turns = 10
65
65
  batch_size = 16
66
66
  group_size = 4
67
67
  gradient_accumulation_steps = 1
@@ -448,7 +448,7 @@ async def main() -> None:
448
448
 
449
449
  print(f"Ops executed: {ops}")
450
450
  print(
451
- "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai serve …` to persist traces/SFT."
451
+ "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai deploy --runtime uvicorn …` to persist traces/SFT."
452
452
  )
453
453
  except httpx.HTTPStatusError as exc:
454
454
  detail = (
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
6
6
 
7
7
  ## Local development
8
8
  ```bash
9
- uvx synth-ai serve grpo-crafter --port 8001
9
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
10
10
  # Optional extras:
11
11
  # --env-file path/to/.env # load additional environment variables
12
12
  # --reload # enable uvicorn auto-reload
@@ -8,11 +8,17 @@ import sys
8
8
  from collections.abc import Iterable, Sequence
9
9
  from contextlib import suppress
10
10
  from dataclasses import dataclass
11
+ from datetime import UTC, datetime
11
12
  from pathlib import Path
12
13
  from typing import Any
13
14
 
15
+ from fastapi import HTTPException
16
+ from pydantic import BaseModel
17
+
18
+ from pydantic import BaseModel
19
+
14
20
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
15
- from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
21
+ from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
16
22
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
17
23
  from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
18
24
  from synth_ai.task.rubrics import load_rubric
@@ -115,6 +121,18 @@ try:
115
121
  except Exception:
116
122
  pass
117
123
 
124
+ try:
125
+ from .synth_envs_hosted.utils import (
126
+ ensure_chat_completions_url,
127
+ extract_trace_correlation_id,
128
+ )
129
+ except Exception: # pragma: no cover - fallback when optional deps missing
130
+ def ensure_chat_completions_url(raw_url, mode=None):
131
+ return raw_url
132
+
133
+ def extract_trace_correlation_id(_raw_url):
134
+ return None
135
+
118
136
  HAS_HOSTED = True
119
137
  try:
120
138
  import crafter # type: ignore
@@ -306,7 +324,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
306
324
  def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
307
325
  return TaskInfo(
308
326
  task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
309
- environments=["crafter"],
327
+ environment="crafter",
310
328
  action_space={
311
329
  "type": "discrete",
312
330
  "size": len(crafter_constants.actions),
@@ -402,7 +420,7 @@ def provide_task_instances(
402
420
  infos.append(
403
421
  TaskInfo(
404
422
  task=base_info.task,
405
- environments=base_info.environments,
423
+ environment=base_info.environment,
406
424
  action_space=base_info.action_space,
407
425
  observation={
408
426
  **base_info.observation,
@@ -536,7 +554,47 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
536
554
 
537
555
  request = _coerce_math_to_crafter(request)
538
556
 
557
+ record_cfg = request.record.model_copy(
558
+ update={
559
+ "return_trace": True,
560
+ "trace_format": "structured",
561
+ }
562
+ )
563
+ request = request.model_copy(update={"record": record_cfg})
564
+
539
565
  policy_cfg = dict(request.policy.config or {})
566
+ logger.info(
567
+ "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
568
+ sorted(policy_cfg.keys()),
569
+ policy_cfg.get("inference_url"),
570
+ request.run_id,
571
+ request.mode,
572
+ )
573
+ inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
574
+ if isinstance(inferred_url, str) and inferred_url:
575
+ policy_cfg["inference_url"] = inferred_url
576
+ else:
577
+ logger.warning(
578
+ "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
579
+ request.run_id,
580
+ policy_cfg.get("inference_url"),
581
+ )
582
+
583
+ trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"))
584
+ if request.mode == RolloutMode.RL:
585
+ assert trace_correlation_id, (
586
+ f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
587
+ f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
588
+ )
589
+ if trace_correlation_id:
590
+ policy_cfg["trace_correlation_id"] = trace_correlation_id
591
+
592
+ pipeline_metadata: dict[str, Any] = {}
593
+ if trace_correlation_id:
594
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
595
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
596
+ pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
597
+
540
598
  try:
541
599
  max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
542
600
  except Exception:
@@ -585,17 +643,90 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
585
643
  safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
586
644
  training_session_id=request.training_session_id,
587
645
  synth_base_url=request.synth_base_url,
646
+ mode=request.mode,
588
647
  )
589
648
 
590
649
  legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
591
650
  legacy_request, fastapi_request
592
651
  )
593
652
  data = legacy_response.model_dump()
653
+ logger.debug(
654
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
655
+ sorted(data.keys()),
656
+ bool(data.get("trace")),
657
+ )
594
658
  metrics = data.get("metrics", {}) or {}
595
659
  metrics.setdefault("outcome_score", None)
596
660
  metrics.setdefault("events_score", None)
597
661
  metrics.setdefault("details", {})
598
662
  data["metrics"] = metrics
663
+
664
+ if data.get("trace") is None:
665
+ legacy_trace = getattr(legacy_response, "trace", None)
666
+ if legacy_trace is not None:
667
+ data["trace"] = legacy_trace
668
+ else:
669
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
670
+ if callable(tracer_factory):
671
+ tracer = tracer_factory()
672
+ logger.debug(
673
+ "ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
674
+ )
675
+ if isinstance(tracer, SessionTracer):
676
+ try:
677
+ await tracer.initialize()
678
+ if tracer.db is not None:
679
+ trace_row = await tracer.db.get_session_trace(request.run_id)
680
+ if trace_row is not None:
681
+ data["trace"] = trace_row
682
+ except Exception as exc:
683
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
684
+ finally:
685
+ with suppress(Exception):
686
+ await tracer.close()
687
+
688
+ final_cid = trace_correlation_id or f"trace_{request.run_id}"
689
+ data["trace_correlation_id"] = final_cid
690
+
691
+ existing_meta = data.get("pipeline_metadata")
692
+ if not isinstance(existing_meta, dict):
693
+ existing_meta = {}
694
+ existing_meta.setdefault("trace_correlation_id", final_cid)
695
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
696
+ existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
697
+ data["pipeline_metadata"] = existing_meta
698
+
699
+ # Propagate inference_url into each legacy trajectory entry for downstream tooling.
700
+ inferred_url = policy_cfg.get("inference_url")
701
+
702
+ if "trajectories" in data:
703
+ normalized_trajs: list[dict[str, Any]] = []
704
+ for traj in data.get("trajectories", []):
705
+ if isinstance(traj, BaseModel):
706
+ traj_dict = traj.model_dump()
707
+ elif isinstance(traj, dict):
708
+ traj_dict = dict(traj)
709
+ else:
710
+ continue
711
+ traj_dict.setdefault("trace_correlation_id", final_cid)
712
+ if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
713
+ traj_dict["inference_url"] = inferred_url
714
+ normalized_trajs.append(traj_dict)
715
+ if normalized_trajs:
716
+ data["trajectories"] = normalized_trajs
717
+
718
+ if data.get("trace") is None:
719
+ data["trace"] = {
720
+ "session_id": request.run_id,
721
+ "created_at": datetime.now(UTC).isoformat(),
722
+ "metadata": dict(existing_meta),
723
+ "event_history": [],
724
+ "markov_blanket_message_history": [],
725
+ }
726
+ raise HTTPException(
727
+ status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
728
+ )
729
+
599
730
  return RolloutResponse.model_validate(data)
600
731
 
601
732
 
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the colocated example at
4
4
  `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
- `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -148,8 +148,8 @@ class CrafterPolicy(Policy):
148
148
  if self.use_tools:
149
149
  payload["tools"] = TOOLS_SCHEMA
150
150
  payload["tool_choice"] = "required"
151
- # Ensure the inference server injects family-specific stop sequences
152
- # to terminate immediately after the first tool call for compliance.
151
+ payload["function_call"] = {"name": "interact_many"}
152
+ payload["parallel_tool_calls"] = False
153
153
  payload["stop_after_tool_calls"] = 1
154
154
  return payload
155
155
 
@@ -158,13 +158,7 @@ class CrafterPolicy(Policy):
158
158
  response: dict[str, Any],
159
159
  use_tools: bool = True,
160
160
  ) -> list[dict[str, Any]]:
161
- """Turn an inference response into environment tool calls.
162
-
163
- - If tools were used, expect tool_calls-compatible output and forward as-is
164
- in our simple JSON format: {"tool_name": str, "arguments": {...}}.
165
- - If no tools, parse plain-text actions using CrafterReActAgent parser and
166
- wrap them into a single interact_many tool call.
167
- """
161
+ """Turn an inference response into environment tool calls."""
168
162
  # First check if we got actual tool calls
169
163
  choices = response.get("choices", [])
170
164
  tool_calls: list[dict[str, Any]] = []
@@ -223,24 +217,6 @@ class CrafterPolicy(Policy):
223
217
  normalized.append(tc)
224
218
  return normalized
225
219
 
226
- # Otherwise, parse plain text content for actions
227
- text = ""
228
- for choice in choices:
229
- msg = choice.get("message", {})
230
- content = msg.get("content", "")
231
- if content:
232
- text = content
233
- break
234
-
235
- if text:
236
- # Try to parse actions from the text
237
- from .shared import parse_actions
238
-
239
- actions = parse_actions(text)
240
- if actions:
241
- # Wrap actions in interact_many tool call
242
- return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
243
-
244
220
  # No actions found
245
221
  return []
246
222
 
@@ -46,7 +46,7 @@ class CrafterReActAgent:
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
48
  "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
49
+ "\n"
50
50
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
51
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
52
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -156,13 +156,13 @@ class OpenAIClient:
156
156
  keys_preview = sorted(processed_request.keys())
157
157
  logger.info(f"Request keys: {keys_preview}")
158
158
 
159
- # Final hard-guard for OpenAI: ensure unsupported field is not present
159
+ # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
160
160
  try:
161
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
161
+ low_url = url.lower()
162
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
162
163
  processed_request.pop("stop_after_tool_calls", None)
163
- logger.info("Removed stop_after_tool_calls for OpenAI request")
164
+ logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
164
165
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
165
- low_url = url.lower()
166
166
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
167
167
  processed_request, dict
168
168
  ):
@@ -692,9 +692,10 @@ async def step_policy(
692
692
  "sokoban-react",
693
693
  "crafter-react",
694
694
  ) and getattr(policy, "use_tools", True):
695
- req_tools = meta["inference_request"]["tools"]
696
- req_tool_choice = meta["inference_request"]["tool_choice"]
697
- req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
695
+ inf_req = meta.get("inference_request", {})
696
+ req_tools = inf_req.get("tools")
697
+ req_tool_choice = inf_req.get("tool_choice")
698
+ req_stop_after = inf_req.get("stop_after_tool_calls")
698
699
  logger.info(
699
700
  f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
700
701
  )
@@ -703,6 +704,8 @@ async def step_policy(
703
704
  status_code=500,
704
705
  detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
705
706
  )
707
+ if req_stop_after is None:
708
+ inf_req["stop_after_tool_calls"] = 1
706
709
 
707
710
  # Call inference service with retries for Flash cold-start (503)
708
711
  import time as _t
@@ -1,8 +1,15 @@
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
5
+
1
6
  [services]
2
7
  task_url = "https://your-math-task.modal.run"
3
8
 
4
9
  [model]
5
10
  base = "Qwen/Qwen3-4B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-4b"
6
13
 
7
14
  [policy]
8
15
  model = "Qwen/Qwen3-4B"
@@ -18,6 +25,8 @@ evaluation_split = "validation"
18
25
  evaluation_episodes = 256
19
26
 
20
27
  [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
21
30
  max_turns = 1
22
31
  ops = ["agent", "env"]
23
32
  batch_size = 128
@@ -31,5 +40,23 @@ learning_rate = 5e-6
31
40
  gpu_type = "A10G"
32
41
  gpu_count = 4
33
42
 
43
+ [topology]
44
+ type = "single_node_split"
45
+ gpus_for_vllm = 2
46
+ gpus_for_training = 2
47
+ gpus_for_ref = 0
48
+ tensor_parallel = 1
49
+
50
+ [rollout]
51
+ env_name = "math"
52
+ policy_name = "math-single-step"
53
+ max_turns = 1
54
+ episodes_per_batch = 256
55
+
56
+ [evaluation]
57
+ instances = 256
58
+ every_n_iters = 10
59
+ seeds = [0, 1, 2, 3, 4]
60
+
34
61
  [tags]
35
62
  experiment = "math_single_step"
@@ -8,6 +8,8 @@ task_url = "http://localhost:8101"
8
8
 
9
9
  [model]
10
10
  base = "Qwen/Qwen3-1.7B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-1.7b"
11
13
 
12
14
  [policy]
13
15
  model = "Qwen/Qwen3-1.7B"
@@ -23,6 +25,8 @@ evaluation_split = "validation"
23
25
  evaluation_episodes = 50
24
26
 
25
27
  [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
26
30
  max_turns = 1
27
31
  ops = ["agent", "env"]
28
32
  batch_size = 2
@@ -61,6 +65,7 @@ health_max_wait_s = 180
61
65
  health_interval_ms = 300
62
66
 
63
67
  [rollout]
68
+ env_name = "math"
64
69
  policy_name = "math-single-step"
65
70
  max_turns = 1
66
71
  episodes_per_batch = 32 # group_size * batch_size
@@ -74,8 +74,14 @@ def build_rl_payload(
74
74
  idempotency: str | None,
75
75
  allow_experimental: bool | None = None,
76
76
  ) -> RLBuildResult:
77
+ # Load and validate config with SDK-level checks
78
+ from synth_ai.api.train.utils import load_toml
79
+ from synth_ai.cli.commands.train.validation import validate_rl_config
80
+
77
81
  try:
78
- rl_cfg = RLConfig.from_path(config_path)
82
+ raw_config = load_toml(config_path)
83
+ validated_config = validate_rl_config(raw_config) # Adds defaults & validates
84
+ rl_cfg = RLConfig.from_mapping(validated_config)
79
85
  except ValidationError as exc:
80
86
  raise click.ClickException(_format_validation_error(config_path, exc)) from exc
81
87
 
@@ -110,8 +116,8 @@ def build_rl_payload(
110
116
  "Task app URL required (provide --task-url or set services.task_url in TOML)"
111
117
  )
112
118
 
113
- model_source = (model_cfg.source or "").strip()
114
- model_base = (model_cfg.base or "").strip()
119
+ model_source = (model_cfg.source or "").strip() if model_cfg else ""
120
+ model_base = (model_cfg.base or "").strip() if model_cfg else ""
115
121
  override_model = (overrides.get("model") or "").strip()
116
122
  if override_model:
117
123
  model_source = override_model