synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/blog_posts/pokemon_vl/README.md +98 -0
  3. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  5. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  6. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  7. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  8. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  9. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  12. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  13. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  15. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  16. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  17. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  18. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  20. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  21. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  22. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  23. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  24. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  25. examples/qwen_vl/README.md +10 -12
  26. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  27. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  28. examples/qwen_vl/collect_data_via_cli.md +76 -84
  29. examples/qwen_vl/collect_vision_traces.py +4 -4
  30. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  31. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  32. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  33. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  34. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  35. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  36. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  37. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  38. examples/qwen_vl/run_vision_comparison.sh +6 -7
  39. examples/rl/README.md +5 -5
  40. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  41. examples/rl/configs/rl_from_base_qwen17.toml +5 -2
  42. examples/rl/task_app/README.md +1 -2
  43. examples/rl/task_app/math_single_step.py +2 -2
  44. examples/run_crafter_demo.sh +2 -2
  45. examples/sft/README.md +1 -1
  46. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  47. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  48. examples/swe/task_app/README.md +32 -2
  49. examples/swe/task_app/grpo_swe_mini.py +4 -0
  50. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  51. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  52. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  53. examples/swe/task_app/morph_backend.py +178 -0
  54. examples/task_apps/crafter/task_app/README.md +1 -1
  55. examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
  56. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  57. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  58. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  59. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
  60. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
  61. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
  62. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  63. examples/task_apps/math/README.md +1 -2
  64. examples/task_apps/pokemon_red/README.md +3 -4
  65. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  66. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  67. examples/task_apps/pokemon_red/task_app.py +36 -5
  68. examples/task_apps/sokoban/README.md +2 -3
  69. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  70. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  71. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  72. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  73. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  74. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
  75. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  76. examples/warming_up_to_rl/task_app/README.md +1 -1
  77. examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
  78. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
  83. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  85. synth_ai/api/train/builders.py +9 -3
  86. synth_ai/api/train/cli.py +125 -10
  87. synth_ai/api/train/configs/__init__.py +8 -1
  88. synth_ai/api/train/configs/rl.py +32 -7
  89. synth_ai/api/train/configs/sft.py +6 -2
  90. synth_ai/api/train/configs/shared.py +59 -2
  91. synth_ai/auth/credentials.py +119 -0
  92. synth_ai/cli/__init__.py +12 -4
  93. synth_ai/cli/commands/__init__.py +17 -0
  94. synth_ai/cli/commands/demo/__init__.py +6 -0
  95. synth_ai/cli/commands/demo/core.py +163 -0
  96. synth_ai/cli/commands/deploy/__init__.py +23 -0
  97. synth_ai/cli/commands/deploy/core.py +614 -0
  98. synth_ai/cli/commands/deploy/errors.py +72 -0
  99. synth_ai/cli/commands/deploy/validation.py +11 -0
  100. synth_ai/cli/commands/eval/__init__.py +19 -0
  101. synth_ai/cli/commands/eval/core.py +1109 -0
  102. synth_ai/cli/commands/eval/errors.py +81 -0
  103. synth_ai/cli/commands/eval/validation.py +133 -0
  104. synth_ai/cli/commands/filter/__init__.py +12 -0
  105. synth_ai/cli/commands/filter/core.py +388 -0
  106. synth_ai/cli/commands/filter/errors.py +55 -0
  107. synth_ai/cli/commands/filter/validation.py +77 -0
  108. synth_ai/cli/commands/help/__init__.py +177 -0
  109. synth_ai/cli/commands/help/core.py +73 -0
  110. synth_ai/cli/commands/status/__init__.py +64 -0
  111. synth_ai/cli/commands/status/client.py +192 -0
  112. synth_ai/cli/commands/status/config.py +92 -0
  113. synth_ai/cli/commands/status/errors.py +20 -0
  114. synth_ai/cli/commands/status/formatters.py +164 -0
  115. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  116. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  117. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  118. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  119. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  120. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  121. synth_ai/cli/commands/status/utils.py +114 -0
  122. synth_ai/cli/commands/train/__init__.py +53 -0
  123. synth_ai/cli/commands/train/core.py +21 -0
  124. synth_ai/cli/commands/train/errors.py +117 -0
  125. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  126. synth_ai/cli/commands/train/judge_validation.py +304 -0
  127. synth_ai/cli/commands/train/validation.py +443 -0
  128. synth_ai/cli/demo.py +2 -162
  129. synth_ai/cli/deploy/__init__.py +28 -0
  130. synth_ai/cli/deploy/core.py +5 -0
  131. synth_ai/cli/deploy/errors.py +23 -0
  132. synth_ai/cli/deploy/validation.py +5 -0
  133. synth_ai/cli/eval/__init__.py +36 -0
  134. synth_ai/cli/eval/core.py +5 -0
  135. synth_ai/cli/eval/errors.py +31 -0
  136. synth_ai/cli/eval/validation.py +5 -0
  137. synth_ai/cli/filter/__init__.py +28 -0
  138. synth_ai/cli/filter/core.py +5 -0
  139. synth_ai/cli/filter/errors.py +23 -0
  140. synth_ai/cli/filter/validation.py +5 -0
  141. synth_ai/cli/modal_serve/__init__.py +12 -0
  142. synth_ai/cli/modal_serve/core.py +14 -0
  143. synth_ai/cli/modal_serve/errors.py +8 -0
  144. synth_ai/cli/modal_serve/validation.py +11 -0
  145. synth_ai/cli/serve/__init__.py +12 -0
  146. synth_ai/cli/serve/core.py +14 -0
  147. synth_ai/cli/serve/errors.py +8 -0
  148. synth_ai/cli/serve/validation.py +11 -0
  149. synth_ai/cli/setup.py +20 -265
  150. synth_ai/cli/status.py +7 -126
  151. synth_ai/cli/task_app_deploy.py +1 -10
  152. synth_ai/cli/task_app_modal_serve.py +4 -9
  153. synth_ai/cli/task_app_serve.py +4 -11
  154. synth_ai/cli/task_apps.py +58 -1487
  155. synth_ai/cli/train/__init__.py +12 -0
  156. synth_ai/cli/train/core.py +21 -0
  157. synth_ai/cli/train/errors.py +8 -0
  158. synth_ai/cli/train/validation.py +24 -0
  159. synth_ai/cli/train.py +1 -14
  160. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  161. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  162. synth_ai/environments/examples/red/engine.py +33 -12
  163. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  164. synth_ai/environments/examples/red/environment.py +26 -0
  165. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  166. synth_ai/http.py +12 -0
  167. synth_ai/judge_schemas.py +10 -11
  168. synth_ai/learning/rl/client.py +3 -1
  169. synth_ai/streaming/__init__.py +29 -0
  170. synth_ai/streaming/config.py +94 -0
  171. synth_ai/streaming/handlers.py +469 -0
  172. synth_ai/streaming/streamer.py +301 -0
  173. synth_ai/streaming/types.py +95 -0
  174. synth_ai/task/validators.py +2 -2
  175. synth_ai/tracing_v3/migration_helper.py +1 -2
  176. synth_ai/utils/env.py +25 -18
  177. synth_ai/utils/http.py +4 -1
  178. synth_ai/utils/modal.py +2 -2
  179. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
  180. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
  181. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  182. synth_ai/cli/tui.py +0 -62
  183. synth_ai/tui/__init__.py +0 -5
  184. synth_ai/tui/__main__.py +0 -13
  185. synth_ai/tui/cli/__init__.py +0 -1
  186. synth_ai/tui/cli/query_experiments.py +0 -164
  187. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  188. synth_ai/tui/dashboard.py +0 -911
  189. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  190. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  191. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  192. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -1,43 +1,26 @@
1
1
  # Evaluation config for gpt-4o-mini with vision
2
- # Stronger teacher than gpt-5-nano, use for high-quality distillation
2
+ # Higher-quality teacher for Crafter SFT distillation
3
3
 
4
4
  [eval]
5
- model = "gpt-4o-mini-2024-07-18"
6
- provider = "openai" # Use OpenAI API
7
-
8
- # Task app endpoint
5
+ app_id = "grpo-crafter-task-app"
9
6
  task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
10
-
11
- # Vision settings (auto-detected from "gpt-4o" in model name)
12
- use_vision = true
13
- image_only_mode = false # Include both text + images
14
-
15
- # Rollout settings
16
- num_episodes = 100
17
- max_steps_per_episode = 50
18
- seeds = "200-299" # Different seeds for comparison
19
-
20
- # Sampling parameters
21
- temperature = 0.6 # Lower temperature for more consistent behavior
7
+ model = "gpt-4o-mini-2024-07-18"
8
+ seeds = "200-299"
9
+ max_turns = 50
10
+ concurrency = 5
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "structured"
14
+ return_trace = true
15
+
16
+ [eval.env_config]
17
+ env_params = {max_steps_per_episode = 50}
18
+
19
+ [eval.policy_config]
20
+ provider = "openai"
21
+ model = "gpt-4o-mini-2024-07-18"
22
+ temperature = 0.6
22
23
  max_tokens = 512
23
-
24
- # Trace collection
25
- collect_traces = true
26
- trace_db = "traces/gpt4o_mini_vision/rollouts.db"
27
-
28
- # Tools
24
+ use_vision = true
25
+ image_only_mode = false
29
26
  use_tools = true
30
-
31
- # Parallel rollouts
32
- parallel_episodes = 5
33
-
34
- [task]
35
- name = "crafter"
36
- environment = "crafter-classic"
37
-
38
- # Task-specific settings
39
- [task.config]
40
- seed_start = 200
41
- max_episode_length = 256
42
- render_size = [64, 64] # 64x64 PNG images
43
-
@@ -1,45 +1,26 @@
1
- # Evaluation config for gpt-4o-mini with vision
2
- # Collects vision traces for SFT training
3
- # Note: gpt-5-nano doesn't support tool calling yet, use gpt-4o-mini instead
1
+ # Evaluation config for gpt-4o-mini (vision)
2
+ # Collects traces for SFT training; legacy gpt-5-nano naming kept for convenience
4
3
 
5
4
  [eval]
6
- model = "gpt-4o-mini-2024-07-18" # Changed from gpt-5-nano (no tool support)
7
- provider = "openai" # Use OpenAI API
8
-
9
- # Task app endpoint (local or hosted)
10
- # task_app_url = "http://localhost:8000" # Local
11
- task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run" # Hosted
12
-
13
- # Vision settings (auto-detected from "gpt-5" in model name)
14
- use_vision = true
15
- image_only_mode = false # Include both text + images
16
-
17
- # Rollout settings
18
- num_episodes = 100
19
- max_steps_per_episode = 50
20
- seeds = "0-99" # Seeds 0 through 99
21
-
22
- # Sampling parameters
5
+ app_id = "grpo-crafter-task-app"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
7
+ model = "gpt-4o-mini-2024-07-18"
8
+ seeds = "0-99"
9
+ max_turns = 50
10
+ concurrency = 5
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "structured"
14
+ return_trace = true
15
+
16
+ [eval.env_config]
17
+ env_params = {max_steps_per_episode = 50}
18
+
19
+ [eval.policy_config]
20
+ provider = "openai"
21
+ model = "gpt-4o-mini-2024-07-18"
23
22
  temperature = 0.7
24
23
  max_tokens = 512
25
-
26
- # Trace collection
27
- collect_traces = true
28
- trace_db = "traces/gpt5nano_vision/rollouts.db"
29
-
30
- # Tools
24
+ use_vision = true
25
+ image_only_mode = false
31
26
  use_tools = true
32
-
33
- # Parallel rollouts (speeds up collection)
34
- parallel_episodes = 5 # Run 5 episodes in parallel
35
-
36
- [task]
37
- name = "crafter"
38
- environment = "crafter-classic"
39
-
40
- # Task-specific settings
41
- [task.config]
42
- seed_start = 0
43
- max_episode_length = 256
44
- render_size = [64, 64] # 64x64 PNG images
45
-
@@ -0,0 +1,26 @@
1
+ # Evaluation config for Qwen3-VL vision rollouts
2
+ # Collects traces for SFT training via synth-ai hosted inference
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter-task-app"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
7
+ model = "Qwen/Qwen3-VL-8B-Instruct"
8
+ seeds = "100-199"
9
+ max_turns = 50
10
+ concurrency = 5
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "structured"
14
+ return_trace = true
15
+
16
+ [eval.env_config]
17
+ env_params = {max_steps_per_episode = 50}
18
+
19
+ [eval.policy_config]
20
+ provider = "synth"
21
+ model = "Qwen/Qwen3-VL-8B-Instruct"
22
+ temperature = 0.7
23
+ max_tokens = 512
24
+ use_vision = true
25
+ image_only_mode = false
26
+ use_tools = true
@@ -1,9 +1,9 @@
1
- # Filter Qwen2-VL vision traces for SFT training
2
- # Same settings as gpt5nano filter but for Qwen2-VL traces
1
+ # Filter Qwen3-VL vision traces for SFT training
2
+ # Mirrors the GPT-4o mini filter configuration for vision data
3
3
 
4
4
  [filter]
5
- input_db = "traces/qwen2vl_vision/rollouts.db"
6
- output_dir = "traces/qwen2vl_vision/sft"
5
+ input_db = "traces/qwen3vl_vision/rollouts.db"
6
+ output_dir = "traces/qwen3vl_vision/sft"
7
7
 
8
8
  # Quality filters
9
9
  min_steps_per_episode = 5
@@ -47,4 +47,3 @@ val_file = "val.jsonl"
47
47
  save_stats = true
48
48
  stats_file = "filter_stats.json"
49
49
  save_filtered_episode_ids = true
50
-
@@ -2,8 +2,8 @@
2
2
  # Applies quality filters and exports to SFT JSONL format
3
3
 
4
4
  [filter]
5
- input_db = "traces/gpt4o_vision_test/rollouts.db"
6
- output_dir = "traces/gpt4o_vision_test/sft"
5
+ input_db = "traces/gpt4omini_vision/rollouts.db"
6
+ output_dir = "traces/gpt4omini_vision/sft"
7
7
 
8
8
  # Quality filters
9
9
  min_steps_per_episode = 5 # Remove very short episodes
@@ -50,4 +50,3 @@ val_file = "val.jsonl"
50
50
  save_stats = true
51
51
  stats_file = "filter_stats.json"
52
52
  save_filtered_episode_ids = true
53
-
@@ -2,7 +2,7 @@
2
2
  """
3
3
  Crafter agent using Qwen-VL models via synth-ai's hosted inference.
4
4
 
5
- This demonstrates vision-language models (Qwen2-VL, Qwen3-VL) playing Crafter
5
+ This demonstrates vision-language models (Qwen3-VL family) playing Crafter
6
6
  with image observations. The CrafterPolicy automatically detects vision capability
7
7
  from the model name and includes base64-encoded PNG frames in the prompt.
8
8
 
@@ -12,7 +12,7 @@ Requirements:
12
12
 
13
13
  Usage:
14
14
  uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
15
- --model Qwen/Qwen2-VL-7B-Instruct --seeds 10 --steps 20
15
+ --model Qwen/Qwen3-VL-8B-Instruct --seeds 10 --steps 20
16
16
  """
17
17
 
18
18
  from __future__ import annotations
@@ -142,7 +142,7 @@ async def _run_episode(
142
142
  env = CrafterClassicEnvironment(task_instance)
143
143
  wrapper = CrafterEnvironmentWrapper(env, seed=seed)
144
144
 
145
- # Policy will auto-detect vision from model name (qwen-vl, qwen2-vl, qwen3-vl)
145
+ # Policy will auto-detect vision from model name (qwen-vl and qwen3-vl tokens)
146
146
  policy = CrafterPolicy(inference_url="synth://inference", model=model)
147
147
  await policy.initialize({
148
148
  "use_tools": True,
@@ -235,8 +235,8 @@ async def main() -> None:
235
235
  parser = argparse.ArgumentParser(description=__doc__)
236
236
  parser.add_argument(
237
237
  "--model",
238
- default="Qwen/Qwen2-VL-7B-Instruct",
239
- help="Qwen-VL model name (e.g., Qwen/Qwen2-VL-7B-Instruct, Qwen/Qwen3-VL-8B)",
238
+ default="Qwen/Qwen3-VL-8B-Instruct",
239
+ help="Qwen-VL model name (e.g., Qwen/Qwen3-VL-2B-Instruct, Qwen/Qwen3-VL-8B-Instruct)",
240
240
  )
241
241
  parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
242
242
  parser.add_argument("--steps", type=int, default=20, help="Max steps per seed")
@@ -37,13 +37,13 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py \
37
37
 
38
38
  echo ""
39
39
  echo "======================================"
40
- echo "2. Running Qwen2-VL-7B (synth-ai)"
40
+ echo "2. Running Qwen3-VL-8B (synth-ai)"
41
41
  echo "======================================"
42
42
  uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
43
- --model Qwen/Qwen2-VL-7B-Instruct \
43
+ --model Qwen/Qwen3-VL-8B-Instruct \
44
44
  --seeds $SEEDS \
45
45
  --steps $STEPS \
46
- --output-dir "$OUTPUT_DIR/qwen2vl"
46
+ --output-dir "$OUTPUT_DIR/qwen3vl"
47
47
 
48
48
  echo ""
49
49
  echo "======================================"
@@ -53,10 +53,9 @@ echo ""
53
53
  echo "gpt-5-nano (OpenAI):"
54
54
  cat "$OUTPUT_DIR/gpt5nano/gpt5nano_summary.json" | python -m json.tool
55
55
  echo ""
56
- echo "Qwen2-VL-7B (synth-ai):"
57
- cat "$OUTPUT_DIR/qwen2vl/qwen_vl_summary.json" | python -m json.tool
56
+ echo "Qwen3-VL-8B (synth-ai):"
57
+ cat "$OUTPUT_DIR/qwen3vl/qwen_vl_summary.json" | python -m json.tool
58
58
  echo ""
59
59
  echo "Frames saved in:"
60
60
  echo " - $OUTPUT_DIR/gpt5nano/gpt5nano_frames/"
61
- echo " - $OUTPUT_DIR/qwen2vl/qwen_vl_frames/"
62
-
61
+ echo " - $OUTPUT_DIR/qwen3vl/qwen_vl_frames/"
examples/rl/README.md CHANGED
@@ -5,8 +5,8 @@ This example trains a reinforcement learning policy on single-step math problems
5
5
  ## Quick Commands
6
6
 
7
7
  ```bash
8
- # Serve locally with tracing
9
- uvx synth-ai serve math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
8
+ # Serve locally with tracing (uvicorn runtime)
9
+ uvx synth-ai deploy --runtime uvicorn math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
10
10
 
11
11
  # Modal deployment
12
12
  uvx synth-ai deploy --name synth-math-single-step --env-file examples/rl/.env
@@ -45,10 +45,10 @@ The task app is defined in `synth_ai/task/apps/math_single_step.py` and register
45
45
  - `-0.5` if the tool call omits an answer or uses the wrong tool
46
46
  - `-1.0` when no tool call is provided
47
47
 
48
- Serve locally with tracing to capture trajectories:
48
+ Run locally (uvicorn runtime) with tracing to capture trajectories:
49
49
 
50
50
  ```bash
51
- uvx synth-ai serve math-single-step \
51
+ uvx synth-ai deploy --runtime uvicorn math-single-step \
52
52
  --port 8101 \
53
53
  --env-file examples/rl/.env \
54
54
  --trace traces/math \
@@ -162,7 +162,7 @@ For broader background on Synth task apps, CLI commands, and tracing, see the ne
162
162
 
163
163
 
164
164
  uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
165
- uvx synth-ai serve math-single-step \
165
+ uvx synth-ai deploy --runtime uvicorn math-single-step \
166
166
  --port 8101 \
167
167
  --env-file examples/rl/.env \
168
168
  --trace traces/math \
@@ -1,10 +1,15 @@
1
- type = "rl"
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
2
5
 
3
6
  [services]
4
7
  task_url = "https://your-math-task.modal.run"
5
8
 
6
9
  [model]
7
10
  base = "Qwen/Qwen3-4B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-4b"
8
13
 
9
14
  [policy]
10
15
  model = "Qwen/Qwen3-4B"
@@ -20,6 +25,8 @@ evaluation_split = "validation"
20
25
  evaluation_episodes = 256
21
26
 
22
27
  [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
23
30
  max_turns = 1
24
31
  ops = ["agent", "env"]
25
32
  batch_size = 128
@@ -33,5 +40,23 @@ learning_rate = 5e-6
33
40
  gpu_type = "A10G"
34
41
  gpu_count = 4
35
42
 
43
+ [topology]
44
+ type = "single_node_split"
45
+ gpus_for_vllm = 2
46
+ gpus_for_training = 2
47
+ gpus_for_ref = 0
48
+ tensor_parallel = 1
49
+
50
+ [rollout]
51
+ env_name = "math"
52
+ policy_name = "math-single-step"
53
+ max_turns = 1
54
+ episodes_per_batch = 256
55
+
56
+ [evaluation]
57
+ instances = 256
58
+ every_n_iters = 10
59
+ seeds = [0, 1, 2, 3, 4]
60
+
36
61
  [tags]
37
62
  experiment = "math_single_step"
@@ -1,5 +1,3 @@
1
- type = "rl"
2
-
3
1
  [algorithm]
4
2
  type = "online"
5
3
  method = "policy_gradient"
@@ -10,6 +8,8 @@ task_url = "http://localhost:8101"
10
8
 
11
9
  [model]
12
10
  base = "Qwen/Qwen3-1.7B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-1.7b"
13
13
 
14
14
  [policy]
15
15
  model = "Qwen/Qwen3-1.7B"
@@ -25,6 +25,8 @@ evaluation_split = "validation"
25
25
  evaluation_episodes = 50
26
26
 
27
27
  [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
28
30
  max_turns = 1
29
31
  ops = ["agent", "env"]
30
32
  batch_size = 2
@@ -63,6 +65,7 @@ health_max_wait_s = 180
63
65
  health_interval_ms = 300
64
66
 
65
67
  [rollout]
68
+ env_name = "math"
66
69
  policy_name = "math-single-step"
67
70
  max_turns = 1
68
71
  episodes_per_batch = 32 # group_size * batch_size
@@ -3,7 +3,7 @@
3
3
  This directory hosts the legacy entrypoint for the math single-step task app. Prefer starting the app via:
4
4
 
5
5
  ```bash
6
- uvx synth-ai serve math-single-step --env-file examples/rl/.env --port 8101
6
+ uvx synth-ai deploy --runtime uvicorn math-single-step --env-file examples/rl/.env --port 8101
7
7
  ```
8
8
 
9
9
  If you need to run it directly (e.g., for Modal `modal deploy` compatibility), use:
@@ -19,4 +19,3 @@ Environment variables:
19
19
  - `MATH_DATASET_DEFAULT_SPLIT`, `MATH_DATASET_VALIDATION_SPLIT`, `MATH_DATASET_TEST_SPLIT`
20
20
 
21
21
  The task app enforces a single `math_submit` tool call per episode, enabling RL to reward correct final answers and penalise missing or malformed submissions.
22
-
@@ -800,7 +800,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MathDataset]:
800
800
  def _base_task_info() -> TaskInfo:
801
801
  return TaskInfo(
802
802
  task={"id": "math_single_step", "name": "Math Single Step", "version": "1.0.0"},
803
- environments=["math"],
803
+ environment="math",
804
804
  action_space={
805
805
  "type": "tool_call",
806
806
  "tools": [
@@ -891,7 +891,7 @@ def provide_task_instances(dataset: MathDataset, seeds: Sequence[int]) -> Iterab
891
891
  sample = dataset.sample(split=DEFAULT_SPLIT, index=seed)
892
892
  yield TaskInfo(
893
893
  task=info.task,
894
- environments=info.environments,
894
+ environment=info.environment,
895
895
  action_space=info.action_space,
896
896
  observation={**info.observation, "sample_index": sample["index"]},
897
897
  dataset={
@@ -4,7 +4,7 @@
4
4
  # This script demonstrates a reactive agent in the Crafter environment
5
5
 
6
6
  echo "🚀 Starting Crafter agent demo with Gemini 1.5 Flash..."
7
- echo "Make sure the synth-ai service is running: uvx synth-ai serve"
7
+ echo "Make sure the synth-ai service is running: uvx synth-ai deploy --runtime uvicorn"
8
8
  echo ""
9
9
 
10
- uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
10
+ uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
examples/sft/README.md CHANGED
@@ -25,7 +25,7 @@ You can generate traces with the Crafter task app and then export them to SFT JS
25
25
 
26
26
  ```bash
27
27
  # Serve the task app locally with tracing enabled (example)
28
- uvx synth-ai serve grpo-crafter \
28
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter \
29
29
  --trace traces/v3 \
30
30
  --trace-db traces/v3/task_app_traces_<timestamp>.db \
31
31
  --port 8001
@@ -1,4 +1,7 @@
1
- type = "sft"
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "fft"
2
5
 
3
6
  [job]
4
7
  model = "Qwen/Qwen3-0.6B"
@@ -1,4 +1,7 @@
1
- type = "sft"
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "qlora"
2
5
 
3
6
  [job]
4
7
  model = "Qwen/Qwen3-0.6B"
@@ -28,13 +28,13 @@ endpoints.
28
28
  ## Using the task app
29
29
 
30
30
  ```
31
- uvx synth-ai serve swe-mini --port 8020
31
+ uvx synth-ai deploy --runtime uvicorn swe-mini --port 8020
32
32
  ```
33
33
 
34
34
  ### Recommended: non-interactive serve + .env
35
35
 
36
36
  ```bash
37
- uvx synth-ai serve swe-mini \
37
+ uvx synth-ai deploy --runtime uvicorn swe-mini \
38
38
  --port 8020 \
39
39
  --env-file .env \
40
40
  --trace traces/v3 \
@@ -60,6 +60,36 @@ Execution is handled by mini-swe's environment classes. Configure execution via
60
60
  `SWE_MINI_ENVIRONMENT_CLASS` (`local`, `docker`, `singularity`, …) and pass
61
61
  additional keyword arguments with `SWE_MINI_ENVIRONMENT_KWARGS` (JSON).
62
62
 
63
+ ### Morph Cloud backend
64
+
65
+ The task app now ships with a Morph-powered environment class so you can run
66
+ mini-SWE rollouts in managed sandboxes. When `MORPH_API_KEY` is present the app
67
+ defaults to this backend automatically unless you override
68
+ `SWE_MINI_ENVIRONMENT_CLASS`.
69
+
70
+ 1. Install the optional dependencies: `pip install "synth-ai[swe]"`.
71
+ 2. Export your API key: `export MORPH_API_KEY=...`.
72
+ 3. Point the task app at Morph by setting:
73
+
74
+ ```bash
75
+ export SWE_MINI_ENVIRONMENT_CLASS=morph
76
+ export SWE_MINI_ENVIRONMENT_KWARGS='{
77
+ "snapshot_id": "snap_your_pre_baked_swebench_image",
78
+ "cwd": "/workspace/swebench",
79
+ "env": {"PIP_PROGRESS_BAR": "off"},
80
+ "metadata": {"project": "synth-ai", "task": "swe-mini"}
81
+ }'
82
+ ```
83
+
84
+ If you do not have a pre-built snapshot, provide `"image_id"` (defaults to
85
+ `morphvm-minimal`) along with resource hints (`"vcpus"`, `"memory_mb"`,
86
+ `"disk_mb"`). You can also set `SWE_MINI_MORPH_SNAPSHOT_ID` globally.
87
+
88
+ During cleanup the backend deletes the remote workspace and stops the Morph
89
+ instance automatically. All shell commands (including submissions) now execute
90
+ inside the Morph sandbox, enabling RL workflows that require persistent remote
91
+ compute.
92
+
63
93
  ### Tracing & SFT
64
94
 
65
95
  Tracing works the same as Crafter; pass `--trace` / `--trace-db` to the CLI or
@@ -404,6 +404,10 @@ def _ensure_env_has_task(
404
404
  if not instance_id:
405
405
  raise ValueError("mini-swe rollout request requires env.config.instance_id")
406
406
  config["task"] = dataset.get(instance_id)
407
+ env_cfg = dict(config.get("environment") or {})
408
+ if "environment_class" not in env_cfg and os.getenv("MORPH_API_KEY"):
409
+ env_cfg["environment_class"] = "morph"
410
+ config["environment"] = env_cfg
407
411
  return env_spec.model_copy(update={"config": config})
408
412
 
409
413
 
@@ -46,7 +46,7 @@ class CrafterReActAgent:
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
48
  "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
49
+ "\n"
50
50
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
51
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
52
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -18,6 +18,7 @@ from typing import Any
18
18
  from minisweagent.environments import get_environment
19
19
  from synth_ai.environments.environment.tools import EnvToolCall
20
20
 
21
+ from examples.swe.task_app.morph_backend import MorphSandboxBackend
21
22
  from .shared import summarise_history
22
23
  from .tools import TOOLS_SCHEMA
23
24
 
@@ -25,8 +26,9 @@ logger = logging.getLogger(__name__)
25
26
 
26
27
 
27
28
  def _environment_type_from_config(config: dict[str, Any]) -> str:
29
+ default = "morph" if os.getenv("MORPH_API_KEY") else "local"
28
30
  value = (config or {}).get("environment_class") or os.getenv(
29
- "SWE_MINI_ENVIRONMENT_CLASS", "local"
31
+ "SWE_MINI_ENVIRONMENT_CLASS", default
30
32
  )
31
33
  return str(value).strip() or "local"
32
34
 
@@ -91,6 +93,7 @@ class MiniSweEnvironmentWrapper:
91
93
  self._local_workspace_dir: Path | None = None
92
94
  self._remote_workspace: str | None = None
93
95
  self._cleanup_workspace = False
96
+ self._using_morph_backend = False
94
97
 
95
98
  if self.environment_type == "local":
96
99
  workspace = self._prepare_local_workspace(kwargs)
@@ -117,11 +120,11 @@ class MiniSweEnvironmentWrapper:
117
120
  timeout = self.env_config.get("timeout")
118
121
  if timeout and "timeout" not in kwargs:
119
122
  kwargs["timeout"] = int(timeout)
120
- if self.repo_url and "image" not in kwargs:
123
+ if self.environment_type in {"docker", "bubblewrap"} and self.repo_url and "image" not in kwargs:
121
124
  image = self.metadata.get("image_name") or os.getenv("SWE_MINI_DOCKER_IMAGE")
122
125
  if image:
123
126
  kwargs["image"] = image
124
- if self.environment_type in {"docker", "bubblewrap"}:
127
+ if self.environment_type in {"docker", "bubblewrap", "morph"}:
125
128
  remote_env = dict(kwargs.get("env") or {})
126
129
  remote_env.setdefault("GIT_TERMINAL_PROMPT", "0")
127
130
  kwargs["env"] = remote_env
@@ -131,13 +134,34 @@ class MiniSweEnvironmentWrapper:
131
134
  self.environment_type,
132
135
  kwargs,
133
136
  )
134
- self.env = get_environment(
135
- {
136
- "environment_class": self.environment_type,
137
- **kwargs,
138
- },
139
- default_type="local",
140
- )
137
+ if self.environment_type == "morph":
138
+ morph_kwargs = dict(kwargs)
139
+ image_value = morph_kwargs.pop("image", None)
140
+ if image_value and "image_id" not in morph_kwargs:
141
+ morph_kwargs["image_id"] = image_value
142
+ timeout_value = morph_kwargs.pop("timeout", None)
143
+ if timeout_value is not None and "startup_timeout" not in morph_kwargs:
144
+ try:
145
+ morph_kwargs["startup_timeout"] = int(timeout_value)
146
+ except Exception:
147
+ logger.warning("Invalid timeout value for morph backend: %r", timeout_value)
148
+ metadata_override = morph_kwargs.pop("metadata", {}) or {}
149
+ metadata_payload = {
150
+ "app": "swe-mini",
151
+ "instance_id": self.instance_id,
152
+ }
153
+ metadata_payload.update({str(k): str(v) for k, v in dict(metadata_override).items()})
154
+ morph_kwargs["metadata"] = metadata_payload
155
+ self.env = MorphSandboxBackend(**morph_kwargs)
156
+ self._using_morph_backend = True
157
+ else:
158
+ self.env = get_environment(
159
+ {
160
+ "environment_class": self.environment_type,
161
+ **kwargs,
162
+ },
163
+ default_type="local",
164
+ )
141
165
 
142
166
  if self.environment_type != "local":
143
167
  self._bootstrap_remote_workspace()
@@ -181,6 +205,9 @@ class MiniSweEnvironmentWrapper:
181
205
  with contextlib.suppress(Exception):
182
206
  self.env.execute(f"rm -rf {shlex.quote(self._remote_workspace)}")
183
207
  self._remote_workspace = None
208
+ if self._using_morph_backend and hasattr(self.env, "close"):
209
+ with contextlib.suppress(Exception):
210
+ self.env.close()
184
211
 
185
212
  def _resolve_repo_url(self, metadata: dict[str, Any]) -> str | None:
186
213
  candidates = [
@@ -156,13 +156,13 @@ class OpenAIClient:
156
156
  keys_preview = sorted(processed_request.keys())
157
157
  logger.info(f"Request keys: {keys_preview}")
158
158
 
159
- # Final hard-guard for OpenAI: ensure unsupported field is not present
159
+ # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
160
160
  try:
161
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
161
+ low_url = url.lower()
162
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
162
163
  processed_request.pop("stop_after_tool_calls", None)
163
- logger.info("Removed stop_after_tool_calls for OpenAI request")
164
+ logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
164
165
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
165
- low_url = url.lower()
166
166
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
167
167
  processed_request, dict
168
168
  ):