synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,51 @@
1
+ """Task app registry entry for the math demo Modal deployment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contextlib import suppress
6
+ from importlib import import_module
7
+
8
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
9
+
10
+ try:
11
+ from synth_ai.task.apps.math_single_step import build_config as base_build_config
12
+ except ModuleNotFoundError:
13
+ base_module = import_module("examples.rl.task_app.math_single_step")
14
+ base_build_config = base_module.build_config
15
+
16
+ DEMO_MODAL_CONFIG = ModalDeploymentConfig(
17
+ app_name="hendrycks-math-task-app",
18
+ pip_packages=(
19
+ "fastapi>=0.110.0",
20
+ "uvicorn>=0.23.0",
21
+ "pydantic>=2.6.0",
22
+ "httpx>=0.24.0",
23
+ "numpy>=1.24.0",
24
+ "aiohttp>=3.8.0",
25
+ "datasets>=2.16.0",
26
+ "synth-ai",
27
+ ),
28
+ )
29
+
30
+
31
+ def build_config():
32
+ """Reuse the shared math single-step TaskAppConfig."""
33
+
34
+ return base_build_config()
35
+
36
+
37
+ def register_demo_entry() -> None:
38
+ entry = TaskAppEntry(
39
+ app_id="hendrycks-math-demo",
40
+ description="Demo math task app (Modal-focused) shipping with synth-ai demos.",
41
+ config_factory=build_config,
42
+ modal=DEMO_MODAL_CONFIG,
43
+ )
44
+ with suppress(ValueError):
45
+ register_task_app(entry=entry)
46
+
47
+
48
+ register_demo_entry()
49
+
50
+
51
+ __all__ = ["DEMO_MODAL_CONFIG", "build_config", "register_demo_entry"]
@@ -1,4 +1,10 @@
1
- from synth_ai.core.system import System
1
+ class System:
2
+ """Minimal base data structure shared by environment types."""
3
+
4
+ id: str
5
+ name: str
6
+ description: str
7
+ pass
2
8
 
3
9
 
4
10
  class Environment(System):
@@ -4,7 +4,6 @@ from dataclasses import dataclass
4
4
  from typing import Any
5
5
 
6
6
  import numpy as np
7
-
8
7
  from synth_ai.environments.environment.shared_engine import (
9
8
  GetObservationCallable,
10
9
  InternalObservation,
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  from typing import Any
4
4
 
5
5
  from pydantic import BaseModel, Field, ValidationError
6
-
7
6
  from synth_ai.environments.environment.shared_engine import (
8
7
  GetObservationCallable,
9
8
  InternalObservation,
@@ -397,7 +397,7 @@ class CrafterClassicEnvironment(StatefulEnvironment, ReproducibleEnvironment[Cra
397
397
  priv_state, pub_state, self.custom_step_observation_callable
398
398
  )
399
399
  total_step_time = time.time() - step_start_time
400
- logger.info(
400
+ logger.debug(
401
401
  f"CrafterClassic step completed in {total_step_time:.3f}s (interact: {interact_time:.3f}s)"
402
402
  )
403
403
  return obs
@@ -46,7 +46,7 @@ class VerilogCompileSuccessComponent(RewardComponent):
46
46
  if hasattr(action, "get") and action.get("type") == "compile":
47
47
  # Check if compilation was successful (returncode 0)
48
48
  if action.get("returncode") == 0:
49
- return 0.1
49
+ return 0.01 # Normalized: 0.1 / 10.0 = 0.01
50
50
  return 0.0
51
51
 
52
52
 
@@ -55,12 +55,12 @@ class VerilogSimulationPassComponent(RewardComponent):
55
55
  if hasattr(action, "get") and action.get("type") == "simulate":
56
56
  # Check if simulation passed
57
57
  if action.get("passed", False):
58
- return 1.0
58
+ return 0.1 # Normalized: 1.0 / 10.0 = 0.1
59
59
  return 0.0
60
60
 
61
61
 
62
62
  class VerilogStepPenaltyComponent(RewardComponent):
63
- def __init__(self, penalty: float = -0.01):
63
+ def __init__(self, penalty: float = 0.0): # No per-step reward - only reward accomplishments
64
64
  self.penalty = penalty
65
65
 
66
66
  async def score(self, state: Any, action: Any) -> float:
@@ -68,12 +68,12 @@ class VerilogStepPenaltyComponent(RewardComponent):
68
68
 
69
69
 
70
70
  class VerilogSubmitSuccessComponent(RewardComponent):
71
- """Reward for successful submission (tests passed)."""
71
+ """Reward for successful submission (tests passed). Max reward = 1.0 (normalized)."""
72
72
  async def score(self, state: VerilogPublicState, action: Any) -> float:
73
73
  if hasattr(action, "get") and action.get("type") == "submit":
74
74
  # Check if submission passed
75
75
  if action.get("passed", False):
76
- return 10.0 # Large reward for completing the task correctly
76
+ return 1.0 # Normalized: Maximum reward is now 1.0
77
77
  return 0.0
78
78
 
79
79
 
@@ -83,6 +83,9 @@ class VerilogEngine(StatefulEngine):
83
83
  """
84
84
 
85
85
  def __init__(self, task_instance: TaskInstance):
86
+ # Validate required Verilog tools are available
87
+ self._validate_verilog_tools()
88
+
86
89
  self.task_instance = task_instance
87
90
  self._total_reward = 0.0
88
91
  self._current_action_for_reward: Optional[Dict[str, Any]] = None
@@ -92,7 +95,7 @@ class VerilogEngine(StatefulEngine):
92
95
  VerilogCompileSuccessComponent(),
93
96
  VerilogSimulationPassComponent(),
94
97
  VerilogSubmitSuccessComponent(),
95
- VerilogStepPenaltyComponent(penalty=-0.01),
98
+ VerilogStepPenaltyComponent(penalty=0.0), # No per-step reward
96
99
  ]
97
100
  )
98
101
 
@@ -103,6 +106,39 @@ class VerilogEngine(StatefulEngine):
103
106
  # Track last compile/simulate outputs
104
107
  self._last_compile_output: Optional[str] = None
105
108
  self._last_simulate_output: Optional[str] = None
109
+
110
+ @staticmethod
111
+ def _validate_verilog_tools() -> None:
112
+ """Validate that required Verilog tools (iverilog, vvp) are available."""
113
+ missing_tools = []
114
+
115
+ if not shutil.which("iverilog"):
116
+ missing_tools.append("iverilog")
117
+ if not shutil.which("vvp"):
118
+ missing_tools.append("vvp")
119
+
120
+ if missing_tools:
121
+ error_msg = (
122
+ f"🚨🚨🚨 CRITICAL CONFIGURATION ERROR 🚨🚨🚨\n"
123
+ f"\n"
124
+ f"Missing required Verilog tools: {', '.join(missing_tools)}\n"
125
+ f"\n"
126
+ f"The Verilog environment CANNOT function without these tools.\n"
127
+ f"ALL compile/simulate operations will FAIL.\n"
128
+ f"ALL rewards will be ZERO.\n"
129
+ f"Training or evaluation will be COMPLETELY BROKEN.\n"
130
+ f"\n"
131
+ f"🔧 FIX THIS NOW:\n"
132
+ f"1. Add 'iverilog' to apt_packages in Modal deployment config\n"
133
+ f"2. Location: examples/task_apps/verilog/task_app/grpo_verilog.py\n"
134
+ f"3. Look for: modal=ModalDeploymentConfig(\n"
135
+ f"4. Add: apt_packages=('iverilog',) # Provides both iverilog and vvp\n"
136
+ f"5. Redeploy: uvx synth-ai modal-serve grpo-verilog\n"
137
+ f"\n"
138
+ f"{'='*80}"
139
+ )
140
+ print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
141
+ raise RuntimeError(error_msg)
106
142
 
107
143
  async def _reset_engine(
108
144
  self, *, seed: Optional[int] = None
@@ -133,6 +169,13 @@ class VerilogEngine(StatefulEngine):
133
169
  ) -> Tuple[VerilogPrivateState, VerilogPublicState]:
134
170
  """Process an action result and update engine state."""
135
171
  self._current_action_for_reward = action_result
172
+
173
+ # DEBUG: Print action_result
174
+ print(f"\n[ENGINE DEBUG] _step_engine called")
175
+ print(f" action_result: {action_result}")
176
+ print(f" action_result.type: {action_result.get('type')}")
177
+ print(f" action_result.returncode: {action_result.get('returncode')}")
178
+ print(f" action_result.ok: {action_result.get('ok')}")
136
179
 
137
180
  # Update last outputs if this is a compile or simulate action
138
181
  if action_result.get("type") == "compile":
@@ -147,18 +190,21 @@ class VerilogEngine(StatefulEngine):
147
190
  current_pub_state = VerilogPublicState(
148
191
  files=self._get_file_contents(),
149
192
  build_dir=str(self.build_dir),
150
- task_completed=action_result.get("passed", False),
193
+ task_completed=action_result.get("submitted", False) and action_result.get("passed", False),
151
194
  )
152
195
 
153
196
  reward_from_stack = await self.reward_stack.step_reward(
154
197
  state=current_pub_state, action=self._current_action_for_reward
155
198
  )
156
199
  self._current_action_for_reward = None
200
+
201
+ # DEBUG: Print reward
202
+ print(f"[ENGINE DEBUG] reward_from_stack: {reward_from_stack}")
157
203
 
158
204
  self._total_reward += reward_from_stack
159
205
 
160
- # Check termination conditions
161
- terminated = action_result.get("passed", False) or action_result.get("submitted", False)
206
+ # Check termination conditions - only terminate if submitted (regardless of pass/fail)
207
+ terminated = action_result.get("submitted", False)
162
208
 
163
209
  priv = VerilogPrivateState(
164
210
  reward_last=reward_from_stack,
@@ -170,7 +216,7 @@ class VerilogEngine(StatefulEngine):
170
216
  pub = VerilogPublicState(
171
217
  files=self._get_file_contents(),
172
218
  build_dir=str(self.build_dir),
173
- task_completed=action_result.get("passed", False),
219
+ task_completed=action_result.get("submitted", False) and action_result.get("passed", False),
174
220
  last_compile_output=self._last_compile_output,
175
221
  last_simulate_output=self._last_simulate_output,
176
222
  )
@@ -259,6 +305,16 @@ class VerilogEngine(StatefulEngine):
259
305
  }
260
306
  except subprocess.TimeoutExpired:
261
307
  return {"ok": False, "error": "Compilation timeout", "type": "compile"}
308
+ except FileNotFoundError:
309
+ error_msg = (
310
+ "🚨 CRITICAL ERROR: 'iverilog' executable not found! 🚨\n"
311
+ "The Verilog compiler (iverilog) is not installed in this environment.\n"
312
+ "This will cause ALL compile operations to fail and result in ZERO rewards.\n"
313
+ "Fix: Add 'iverilog' to apt_packages in the Modal deployment config.\n"
314
+ "Location: examples/task_apps/verilog/task_app/grpo_verilog.py -> modal=ModalDeploymentConfig(apt_packages=('iverilog',))"
315
+ )
316
+ print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
317
+ raise RuntimeError(error_msg) from None
262
318
  except Exception as e:
263
319
  return {"ok": False, "error": str(e), "type": "compile"}
264
320
 
@@ -290,6 +346,16 @@ class VerilogEngine(StatefulEngine):
290
346
  }
291
347
  except subprocess.TimeoutExpired:
292
348
  return {"ok": False, "error": "Simulation timeout", "type": "simulate"}
349
+ except FileNotFoundError:
350
+ error_msg = (
351
+ "🚨 CRITICAL ERROR: 'vvp' executable not found! 🚨\n"
352
+ "The Verilog simulator (vvp) is not installed in this environment.\n"
353
+ "This will cause ALL simulate operations to fail and result in ZERO rewards.\n"
354
+ "Fix: Add 'iverilog' to apt_packages in the Modal deployment config (provides both iverilog and vvp).\n"
355
+ "Location: examples/task_apps/verilog/task_app/grpo_verilog.py -> modal=ModalDeploymentConfig(apt_packages=('iverilog',))"
356
+ )
357
+ print(f"\n{'='*80}\n{error_msg}\n{'='*80}\n", flush=True)
358
+ raise RuntimeError(error_msg) from None
293
359
  except Exception as e:
294
360
  return {"ok": False, "error": str(e), "type": "simulate"}
295
361
 
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  from typing import Any
4
4
 
5
5
  from pydantic import BaseModel, Field
6
-
7
6
  from synth_ai.environments.environment.shared_engine import (
8
7
  GetObservationCallable,
9
8
  InternalObservation,
synth_ai/evals/base.py CHANGED
@@ -1,13 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any
5
+
6
+
1
7
  class Judgement:
2
8
  def __init__(
3
- self, criteria: str, score: float, reasoning: str = "", evidence: list[str] = None
4
- ):
9
+ self,
10
+ criteria: str,
11
+ score: float,
12
+ reasoning: str = "",
13
+ evidence: list[str] | None = None,
14
+ ) -> None:
5
15
  self.criteria = criteria
6
16
  self.score = score
7
17
  self.reasoning = reasoning
8
18
  self.evidence = evidence or []
9
19
 
10
20
 
11
- class BaseEval:
12
- async def run(self, data: any) -> list[Judgement]:
13
- pass
21
+ class BaseEval(ABC):
22
+ @abstractmethod
23
+ async def run(self, data: Any) -> list[Judgement]:
24
+ """Execute the evaluation and return a list of judgements."""
synth_ai/evals/client.py CHANGED
@@ -10,7 +10,7 @@ import os
10
10
  import warnings
11
11
  from typing import Any, Literal, TypedDict
12
12
 
13
- from synth_ai.http import AsyncHttpClient, HTTPError
13
+ from synth_ai.http_client import AsyncHttpClient, HTTPError
14
14
  from synth_ai.tracing_v3.serialization import normalize_for_json
15
15
 
16
16
  Provider = Literal["groq", "gemini"]
@@ -7,7 +7,7 @@ from synth_ai.api.models.supported import (
7
7
  normalize_model_identifier,
8
8
  )
9
9
 
10
- from ..http import AsyncHttpClient
10
+ from .._utils.http import AsyncHttpClient
11
11
 
12
12
 
13
13
  class InferenceClient:
@@ -11,7 +11,7 @@ from synth_ai.api.models.supported import (
11
11
  )
12
12
  from synth_ai.learning.sft.config import prepare_sft_job_payload
13
13
 
14
- from ..http import AsyncHttpClient, HTTPError, sleep
14
+ from .._utils.http import AsyncHttpClient, HTTPError, sleep
15
15
 
16
16
 
17
17
  class LearningClient:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Any
4
4
 
5
- from ..http import AsyncHttpClient
5
+ from .._utils.http import AsyncHttpClient
6
6
 
7
7
 
8
8
  def _api_base(b: str) -> str:
synth_ai/learning/jobs.py CHANGED
@@ -5,7 +5,7 @@ from collections.abc import Callable
5
5
  from contextlib import suppress
6
6
  from typing import Any
7
7
 
8
- from ..http import AsyncHttpClient, sleep
8
+ from .._utils.http import AsyncHttpClient, sleep
9
9
  from .constants import TERMINAL_EVENT_FAILURE, TERMINAL_EVENT_SUCCESS, TERMINAL_STATUSES
10
10
 
11
11
 
@@ -10,7 +10,7 @@ from synth_ai.api.models.supported import (
10
10
  normalize_model_identifier,
11
11
  )
12
12
 
13
- from ...http import AsyncHttpClient, HTTPError, sleep
13
+ from ..._utils.http import AsyncHttpClient, HTTPError, sleep
14
14
 
15
15
 
16
16
  def _api_base(b: str) -> str:
@@ -1,4 +1,4 @@
1
- """Helpers for uploading RL environment credentials to the backend."""
1
+ """Helpers for uploading Environment credentials to the backend."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -1,4 +1,4 @@
1
- """Helpers for generating RL environment credentials."""
1
+ """Helpers for generating Environment credentials."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  from pathlib import Path
4
4
  from typing import Any
5
5
 
6
- from ...http import AsyncHttpClient, HTTPError
6
+ from ..._utils.http import AsyncHttpClient, HTTPError
7
7
  from .config import prepare_sft_job_payload
8
8
  from .data import validate_jsonl_or_raise
9
9