synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ from .core import register, train_command
4
+ from .errors import TrainCliError
5
+ from .validation import validate_train_environment
6
+
7
+ __all__ = [
8
+ "register",
9
+ "train_command",
10
+ "TrainCliError",
11
+ "validate_train_environment",
12
+ ]
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ import click
4
+ from synth_ai.api.train.cli import (
5
+ register as _register_with_cli,
6
+ )
7
+ from synth_ai.api.train.cli import (
8
+ train_command as _train_command,
9
+ )
10
+
11
+ __all__ = ["register", "train_command"]
12
+
13
+
14
+ def register(cli: click.Group) -> None:
15
+ """Attach the train command to the root CLI."""
16
+ _register_with_cli(cli)
17
+
18
+
19
+ def train_command(*args, **kwargs):
20
+ """Entrypoint used by the train CLI command."""
21
+ return _train_command(*args, **kwargs)
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class TrainCliError(RuntimeError):
5
+ """Base exception for train CLI failures."""
6
+
7
+
8
+ __all__ = ["TrainCliError"]
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable
4
+ from pathlib import Path
5
+ from typing import Dict, Tuple
6
+
7
+ from synth_ai.api.train.env_resolver import KeySpec, resolve_env
8
+
9
+ __all__ = ["validate_train_environment"]
10
+
11
+
12
+ def validate_train_environment(
13
+ *,
14
+ config_path: Path | None,
15
+ explicit_env_paths: Iterable[str],
16
+ required_keys: list[KeySpec],
17
+ ) -> Tuple[Path, Dict[str, str]]:
18
+ """Validate and resolve environment secrets used by the train command."""
19
+ resolved_path, resolved_keys = resolve_env(
20
+ config_path=config_path,
21
+ explicit_env_paths=explicit_env_paths,
22
+ required_keys=required_keys,
23
+ )
24
+ return resolved_path, resolved_keys
synth_ai/cli/train.py CHANGED
@@ -1,18 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any
4
-
5
- from synth_ai.api.train.cli import register as _register
6
- from synth_ai.api.train.cli import train_command as _train_command
3
+ from synth_ai.cli.commands.train.core import register, train_command
7
4
 
8
5
  __all__ = ["register", "train_command"]
9
-
10
-
11
- def register(cli: Any) -> None:
12
- """Compatibility wrapper for the legacy train CLI location."""
13
-
14
- _register(cli)
15
-
16
-
17
- def train_command(*args: Any, **kwargs: Any) -> Any:
18
- return _train_command(*args, **kwargs)
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the local example at
4
4
  `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling).
6
- Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ Prefer using `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the local example at
4
4
  `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling).
6
- Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ Prefer using `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -6,16 +6,18 @@ the hash-based set-iteration nondeterminism that caused the drift.
6
6
  """
7
7
 
8
8
  import collections
9
+ import os
9
10
 
10
11
  import crafter
11
12
 
12
- print("[PATCH] Attempting to apply Crafter deterministic patch...")
13
+ # Patch messages permanently disabled to reduce noise
14
+ # print("[PATCH] Attempting to apply Crafter deterministic patch...")
13
15
 
14
16
  # -----------------------------------------------------------------------------
15
17
  # 1. Make per–chunk object order stable
16
18
  # -----------------------------------------------------------------------------
17
19
  if not hasattr(crafter.Env, "_orig_balance_object"):
18
- print("[PATCH] Patching crafter.Env._balance_object...")
20
+ # print("[PATCH] Patching crafter.Env._balance_object...")
19
21
  crafter.Env._orig_balance_object = crafter.Env._balance_object
20
22
 
21
23
  def _balance_object_det(self, chunk, objs, *args, **kwargs):
@@ -25,9 +27,10 @@ if not hasattr(crafter.Env, "_orig_balance_object"):
25
27
  return crafter.Env._orig_balance_object(self, chunk, objs, *args, **kwargs)
26
28
 
27
29
  crafter.Env._balance_object = _balance_object_det
28
- print("[PATCH] crafter.Env._balance_object patched.")
30
+ # print("[PATCH] crafter.Env._balance_object patched.")
29
31
  else:
30
- print("[PATCH] crafter.Env._balance_object already patched or _orig_balance_object exists.")
32
+ pass
33
+ # print("[PATCH] crafter.Env._balance_object already patched or _orig_balance_object exists.")
31
34
 
32
35
  # -----------------------------------------------------------------------------
33
36
  # 2. Make *chunk* iteration order stable
@@ -4,6 +4,7 @@ This version handles player references for Zombie and Skeleton objects.
4
4
  """
5
5
 
6
6
  import collections
7
+ import os
7
8
  import pickle
8
9
  from typing import Any, Dict, Optional, Set
9
10
 
@@ -11,11 +12,12 @@ import crafter
11
12
  import numpy as np
12
13
  from crafter import objects
13
14
 
14
- print("[PATCH] Attempting to apply Crafter serialization patch v3...")
15
+ # Patch messages permanently disabled
16
+ # print("[PATCH] Attempting to apply Crafter serialization patch v3...")
15
17
 
16
18
  # Check if already patched
17
19
  if not hasattr(crafter.Env, "save"):
18
- print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
20
+ # print("[PATCH] Adding enhanced save/load methods to crafter.Env...")
19
21
 
20
22
  def _save(self) -> Dict[str, Any]:
21
23
  """Save complete environment state including all details."""
@@ -260,8 +262,10 @@ if not hasattr(crafter.Env, "save"):
260
262
  crafter.Env.save = _save
261
263
  crafter.Env.load = _load
262
264
 
263
- print("[PATCH] crafter.Env.save() and load() methods added (v3).")
265
+ pass
266
+ # print("[PATCH] crafter.Env.save() and load() methods added (v3).")
264
267
  else:
265
- print("[PATCH] crafter.Env already has save/load methods.")
268
+ pass
269
+ # print("[PATCH] crafter.Env already has save/load methods.")
266
270
 
267
- print("[PATCH] Crafter serialization patch v3 complete.")
271
+ # print("[PATCH] Crafter serialization patch v3 complete.")
@@ -9,7 +9,8 @@ from typing import Any, Dict, Optional
9
9
 
10
10
  import crafter
11
11
 
12
- print("[PATCH] Attempting to apply simplified Crafter world configuration patch...")
12
+ # Patch messages permanently disabled
13
+ # print("[PATCH] Attempting to apply simplified Crafter world configuration patch...")
13
14
 
14
15
  # World configuration presets
15
16
  WORLD_CONFIGS = {
@@ -279,8 +280,8 @@ def patched_env_init(
279
280
 
280
281
  crafter.Env.__init__ = patched_env_init
281
282
 
282
- print("[PATCH] Simplified Crafter world configuration patch complete.")
283
- print("[PATCH] Available configs: easy, normal, hard, peaceful")
283
+ # print("[PATCH] Simplified Crafter world configuration patch complete.")
284
+ # print("[PATCH] Available configs: easy, normal, hard, peaceful")
284
285
 
285
286
  # Example custom config
286
287
  EXAMPLE_CUSTOM_CONFIG = {
@@ -14,12 +14,15 @@ from synth_ai.environments.stateful.engine import StatefulEngine, StatefulEngine
14
14
  from synth_ai.environments.tasks.core import TaskInstance
15
15
 
16
16
  from .engine_helpers.reward_components import (
17
- BadgeRewardComponent,
18
- BattleVictoryComponent,
19
- LevelUpComponent,
20
- MapTransitionComponent,
17
+ RouteExplorationReward,
18
+ StrategicTrainingReward,
19
+ BattleProgressionReward,
20
+ GymPreparationReward,
21
+ ItemCollectionReward,
22
+ HealingManagementReward,
23
+ EfficientExplorationReward,
24
+ BadgeVictoryReward,
21
25
  StepPenaltyComponent,
22
- XPGainComponent,
23
26
  )
24
27
  from .engine_helpers.state_extraction import extract_game_state
25
28
 
@@ -268,15 +271,27 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
268
271
  # For testing purposes, use None emulator
269
272
  self.emulator = None
270
273
 
271
- # Initialize reward stack with dense components
274
+ # Initialize reward stack with comprehensive progress-based components
272
275
  self.reward_stack = RewardStack(
273
276
  components=[
274
- BadgeRewardComponent(),
275
- MapTransitionComponent(),
276
- BattleVictoryComponent(),
277
- LevelUpComponent(),
278
- XPGainComponent(),
279
- StepPenaltyComponent(),
277
+ # Major progress rewards
278
+ BadgeVictoryReward(), # +50.0 for Boulder Badge (main goal)
279
+ RouteExplorationReward(), # +1.0-5.0 for reaching key areas
280
+ GymPreparationReward(), # +3.0 for being gym-ready
281
+
282
+ # Training and battle rewards
283
+ StrategicTrainingReward(), # +0.2-3.0 for level ups and milestones
284
+ BattleProgressionReward(), # +0.1-1.0 for battles
285
+
286
+ # Resource management rewards
287
+ ItemCollectionReward(), # +0.1-0.5 for collecting items
288
+ HealingManagementReward(), # +0.05-0.8 for healing Pokemon
289
+
290
+ # Exploration efficiency
291
+ EfficientExplorationReward(), # +0.02 for discovering new positions
292
+
293
+ # No penalty for unproductive actions
294
+ StepPenaltyComponent(penalty=0.0), # 0.0 per step
280
295
  ]
281
296
  )
282
297
 
@@ -640,6 +655,12 @@ class PokemonRedEngine(StatefulEngine, IReproducibleEngine):
640
655
  "prev_text_box_active": bool(prev_state.get("text_box_active", False)),
641
656
  "prev_enemy_hp_current": int(prev_state.get("enemy_hp_current", 0)),
642
657
  "prev_enemy_hp_percentage": float(prev_state.get("enemy_hp_percentage", 0.0)),
658
+ "prev_player_x": int(prev_state.get("player_x", 0)),
659
+ "prev_player_y": int(prev_state.get("player_y", 0)),
660
+ "prev_party": prev_state.get("party", []),
661
+ "prev_inventory": prev_state.get("inventory", []),
662
+ "prev_party_hp_current": int(prev_state.get("party_hp_current", 0)),
663
+ "prev_party_hp_max": int(prev_state.get("party_hp_max", 0)),
643
664
  },
644
665
  )
645
666
  except Exception as e: