synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/qwen_coder/README.md +102 -0
  4. examples/qwen_coder/_shared.py +113 -0
  5. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  6. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  7. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  8. examples/qwen_coder/generate_dataset.py +98 -0
  9. examples/qwen_coder/infer_ft_smoke.py +64 -0
  10. examples/qwen_coder/infer_prod_proxy.py +73 -0
  11. examples/qwen_coder/infer_via_synth.py +87 -0
  12. examples/qwen_coder/scripts/infer_coder.sh +18 -0
  13. examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
  14. examples/qwen_coder/sft_full_17b.py +103 -0
  15. examples/qwen_coder/sft_lora_30b.py +110 -0
  16. examples/qwen_coder/subset_jsonl.py +38 -0
  17. examples/qwen_coder/validate_jsonl.py +59 -0
  18. examples/rl/run_eval.py +36 -37
  19. examples/rl/run_rl_and_save.py +5 -5
  20. examples/rl/task_app/math_single_step.py +65 -43
  21. examples/rl/task_app/math_task_app.py +3 -3
  22. examples/sft/README.md +139 -0
  23. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  24. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  25. examples/sft/evaluate.py +117 -0
  26. examples/sft/export_dataset.py +117 -0
  27. examples/sft/generate_traces.py +162 -0
  28. examples/swe/__init__.py +12 -0
  29. examples/swe/task_app/README.md +105 -0
  30. examples/swe/task_app/__init__.py +2 -0
  31. examples/swe/task_app/grpo_swe_mini.py +571 -0
  32. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  33. examples/swe/task_app/hosted/README.md +173 -0
  34. examples/swe/task_app/hosted/__init__.py +5 -0
  35. examples/swe/task_app/hosted/branching.py +143 -0
  36. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  37. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  38. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  39. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  40. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  41. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  42. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  43. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  44. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  45. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  46. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  47. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  50. examples/swe/task_app/hosted/hosted_app.py +204 -0
  51. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  52. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  53. examples/swe/task_app/hosted/main.py +100 -0
  54. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  55. examples/swe/task_app/hosted/registry.py +195 -0
  56. examples/swe/task_app/hosted/rollout.py +1869 -0
  57. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  58. examples/swe/task_app/hosted/storage/volume.py +211 -0
  59. examples/swe/task_app/hosted/test_agents.py +161 -0
  60. examples/swe/task_app/hosted/test_service.py +137 -0
  61. examples/swe/task_app/hosted/utils.py +62 -0
  62. examples/vlm/README.md +68 -0
  63. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  64. examples/vlm/crafter_image_only_agent.py +207 -0
  65. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  66. examples/vlm/filter_image_rows.py +63 -0
  67. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  68. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  69. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  70. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  71. examples/warming_up_to_rl/groq_test.py +4 -4
  72. examples/warming_up_to_rl/manage_secrets.py +13 -18
  73. examples/warming_up_to_rl/run_eval.py +42 -44
  74. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  75. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  76. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  77. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  78. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  79. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  80. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  81. examples/warming_up_to_rl/task_app/README.md +6 -2
  82. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  83. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  84. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  85. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  86. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  87. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  101. synth/__init__.py +14 -0
  102. synth_ai/__init__.py +26 -4
  103. synth_ai/api/models/supported.py +376 -0
  104. synth_ai/api/train/builders.py +128 -21
  105. synth_ai/api/train/cli.py +80 -64
  106. synth_ai/api/train/config_finder.py +7 -2
  107. synth_ai/api/train/env_resolver.py +1 -1
  108. synth_ai/api/train/pollers.py +2 -1
  109. synth_ai/api/train/supported_algos.py +139 -0
  110. synth_ai/api/train/task_app.py +1 -2
  111. synth_ai/api/train/utils.py +13 -44
  112. synth_ai/cli/__init__.py +8 -0
  113. synth_ai/cli/_modal_wrapper.py +28 -0
  114. synth_ai/cli/_typer_patch.py +49 -0
  115. synth_ai/cli/balance.py +1 -2
  116. synth_ai/cli/calc.py +1 -1
  117. synth_ai/cli/demo.py +2 -1
  118. synth_ai/cli/recent.py +2 -2
  119. synth_ai/cli/rl_demo.py +2 -1
  120. synth_ai/cli/root.py +11 -13
  121. synth_ai/cli/status.py +2 -2
  122. synth_ai/cli/task_apps.py +529 -179
  123. synth_ai/cli/traces.py +6 -4
  124. synth_ai/cli/watch.py +12 -18
  125. synth_ai/demo_registry.py +1 -1
  126. synth_ai/demos/core/cli.py +36 -43
  127. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  128. synth_ai/demos/demo_task_apps/core.py +17 -25
  129. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  130. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  131. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  133. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  134. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  135. synth_ai/environments/reproducibility/tree.py +2 -5
  136. synth_ai/environments/service/app.py +11 -12
  137. synth_ai/environments/service/core_routes.py +4 -7
  138. synth_ai/environments/stateful/engine.py +1 -1
  139. synth_ai/environments/tasks/core.py +1 -0
  140. synth_ai/environments/tasks/filters.py +5 -6
  141. synth_ai/environments/tasks/utils.py +4 -5
  142. synth_ai/handshake.py +9 -9
  143. synth_ai/http.py +1 -1
  144. synth_ai/http_client.py +18 -10
  145. synth_ai/inference/client.py +15 -5
  146. synth_ai/jobs/client.py +78 -83
  147. synth_ai/learning/__init__.py +41 -6
  148. synth_ai/learning/algorithms.py +14 -0
  149. synth_ai/learning/client.py +91 -24
  150. synth_ai/learning/config.py +2 -38
  151. synth_ai/learning/ft_client.py +4 -59
  152. synth_ai/learning/health.py +5 -6
  153. synth_ai/learning/jobs.py +31 -47
  154. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  155. synth_ai/learning/rl/client.py +267 -0
  156. synth_ai/learning/rl/config.py +31 -0
  157. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  158. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  159. synth_ai/learning/rl/secrets.py +13 -0
  160. synth_ai/learning/rl_client.py +2 -281
  161. synth_ai/learning/sft/__init__.py +29 -0
  162. synth_ai/learning/sft/client.py +68 -0
  163. synth_ai/learning/sft/config.py +270 -0
  164. synth_ai/learning/sft/data.py +295 -0
  165. synth_ai/learning/sse.py +25 -24
  166. synth_ai/learning/validators.py +25 -28
  167. synth_ai/lm/__init__.py +21 -47
  168. synth_ai/main.py +4 -0
  169. synth_ai/task/__init__.py +25 -27
  170. synth_ai/task/apps/__init__.py +7 -8
  171. synth_ai/task/auth.py +8 -8
  172. synth_ai/task/client.py +14 -14
  173. synth_ai/task/contracts.py +36 -35
  174. synth_ai/task/datasets.py +6 -5
  175. synth_ai/task/errors.py +10 -10
  176. synth_ai/task/health.py +17 -9
  177. synth_ai/task/json.py +58 -23
  178. synth_ai/task/proxy.py +13 -9
  179. synth_ai/task/rubrics.py +16 -15
  180. synth_ai/task/server.py +12 -12
  181. synth_ai/task/tracing_utils.py +4 -4
  182. synth_ai/task/vendors.py +5 -6
  183. synth_ai/tracing_v3/__init__.py +2 -0
  184. synth_ai/tracing_v3/abstractions.py +21 -4
  185. synth_ai/tracing_v3/decorators.py +18 -16
  186. synth_ai/tracing_v3/hooks.py +5 -5
  187. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  188. synth_ai/tracing_v3/session_tracer.py +40 -14
  189. synth_ai/tracing_v3/storage/base.py +85 -0
  190. synth_ai/tracing_v3/storage/config.py +21 -8
  191. synth_ai/tracing_v3/storage/factory.py +10 -7
  192. synth_ai/tracing_v3/storage/utils.py +4 -2
  193. synth_ai/tracing_v3/turso/daemon.py +7 -2
  194. synth_ai/tracing_v3/turso/models.py +2 -2
  195. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  196. synth_ai/tracing_v3/utils.py +4 -4
  197. synth_ai/v0/api/__init__.py +8 -0
  198. synth_ai/v0/api/models/__init__.py +8 -0
  199. synth_ai/v0/api/models/supported.py +8 -0
  200. synth_ai/v0/config/__init__.py +15 -0
  201. synth_ai/v0/config/base_url.py +12 -0
  202. synth_ai/v0/lm/__init__.py +51 -0
  203. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  204. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  205. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  206. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  207. synth_ai/{lm → v0/lm}/config.py +6 -1
  208. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  209. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  210. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  211. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  212. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  213. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  214. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  215. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  216. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  218. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  219. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  220. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  222. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  224. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  225. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  226. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  228. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  230. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  231. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  232. synth_ai/v0/tracing_v3/__init__.py +10 -0
  233. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  234. synth_ai/v0/tracing_v3/decorators.py +3 -0
  235. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  236. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  237. synth_ai-0.2.9.dev8.dist-info/METADATA +191 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/RECORD +268 -238
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/top_level.txt +1 -0
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  273. examples/warming_up_to_rl/old/notes.md +0 -73
  274. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  275. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  276. synth_ai/experimental/synth_oss.py +0 -445
  277. synth_ai/learning/filtering.py +0 -0
  278. synth_ai/learning/offline/dpo.py +0 -0
  279. synth_ai/learning/offline/providers.py +0 -7
  280. synth_ai/learning/offline/sft.py +0 -0
  281. synth_ai/learning/offline/shared.py +0 -0
  282. synth_ai/learning/online/grpo.py +0 -0
  283. synth_ai/learning/online/irft.py +0 -0
  284. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  285. synth_ai/learning/prompts/gepa.py +0 -0
  286. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  287. synth_ai/learning/prompts/mipro.py +0 -289
  288. synth_ai/learning/prompts/random_search.py +0 -249
  289. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  290. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  291. synth_ai/rl/secrets.py +0 -19
  292. synth_ai/scripts/verify_rewards.py +0 -100
  293. synth_ai/tracing/__init__.py +0 -30
  294. synth_ai/tracing_v1/__init__.py +0 -33
  295. synth_ai/tracing_v3/turso/__init__.py +0 -25
  296. synth_ai/tracing_v3/turso/manager.py +0 -838
  297. synth_ai/zyk/__init__.py +0 -30
  298. synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
  299. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  300. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  301. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  302. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  303. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  304. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  305. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  306. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  307. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  308. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  309. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  310. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  311. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  313. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  315. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  320. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  321. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  322. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  323. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  324. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  325. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/WHEEL +0 -0
  326. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/entry_points.txt +0 -0
  327. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Dict, Optional
4
- import aiohttp
3
+ from typing import Any
5
4
 
6
5
  from ..http import AsyncHttpClient
7
6
 
@@ -11,13 +10,13 @@ def _api_base(b: str) -> str:
11
10
  return b if b.endswith("/api") else f"{b}/api"
12
11
 
13
12
 
14
- async def backend_health(base_url: str, api_key: str) -> Dict[str, Any]:
13
+ async def backend_health(base_url: str, api_key: str) -> dict[str, Any]:
15
14
  async with AsyncHttpClient(base_url, api_key, timeout=15.0) as http:
16
15
  js = await http.get(f"{_api_base(base_url)}/health")
17
16
  return {"ok": True, "raw": js}
18
17
 
19
18
 
20
- async def task_app_health(task_app_url: str) -> Dict[str, Any]:
19
+ async def task_app_health(task_app_url: str) -> dict[str, Any]:
21
20
  # Delegate to central task module for consistency
22
21
  from synth_ai.task.health import task_app_health as _th
23
22
 
@@ -32,7 +31,7 @@ async def pricing_preflight(
32
31
  gpu_type: str,
33
32
  estimated_seconds: float,
34
33
  container_count: int,
35
- ) -> Dict[str, Any]:
34
+ ) -> dict[str, Any]:
36
35
  body = {
37
36
  "job_type": job_type,
38
37
  "gpu_type": gpu_type,
@@ -44,7 +43,7 @@ async def pricing_preflight(
44
43
  return js if isinstance(js, dict) else {"raw": js}
45
44
 
46
45
 
47
- async def balance_autumn_normalized(base_url: str, api_key: str) -> Dict[str, Any]:
46
+ async def balance_autumn_normalized(base_url: str, api_key: str) -> dict[str, Any]:
48
47
  async with AsyncHttpClient(base_url, api_key, timeout=30.0) as http:
49
48
  js = await http.get(f"{_api_base(base_url)}/v1/balance/autumn-normalized")
50
49
  return js if isinstance(js, dict) else {"raw": js}
synth_ai/learning/jobs.py CHANGED
@@ -1,10 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Callable, Dict, List, Optional
4
3
  import time
4
+ from collections.abc import Callable
5
+ from contextlib import suppress
6
+ from typing import Any
5
7
 
6
- from .constants import TERMINAL_EVENT_FAILURE, TERMINAL_EVENT_SUCCESS, TERMINAL_STATUSES
7
8
  from ..http import AsyncHttpClient, sleep
9
+ from .constants import TERMINAL_EVENT_FAILURE, TERMINAL_EVENT_SUCCESS, TERMINAL_STATUSES
8
10
 
9
11
 
10
12
  def _api_base(b: str) -> str:
@@ -17,7 +19,7 @@ class JobsApiResolver:
17
19
  self._base = _api_base(base_url)
18
20
  self._strict = strict
19
21
 
20
- def status_urls(self, job_id: str) -> List[str]:
22
+ def status_urls(self, job_id: str) -> list[str]:
21
23
  if self._strict:
22
24
  return [f"{self._base}/learning/jobs/{job_id}"]
23
25
  return [
@@ -26,7 +28,7 @@ class JobsApiResolver:
26
28
  f"{self._base}/orchestration/jobs/{job_id}",
27
29
  ]
28
30
 
29
- def events_urls(self, job_id: str, since: int) -> List[str]:
31
+ def events_urls(self, job_id: str, since: int) -> list[str]:
30
32
  if self._strict:
31
33
  return [f"{self._base}/learning/jobs/{job_id}/events?since_seq={since}&limit=200"]
32
34
  return [
@@ -62,23 +64,23 @@ class JobHandle:
62
64
  max_seconds: float | None = None,
63
65
  empty_polls_threshold: int = 5,
64
66
  startup_deadline_s: int = 45,
65
- on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
66
- on_metric: Optional[Callable[[Dict[str, Any]], None]] = None,
67
- ) -> Dict[str, Any]:
68
- last_seq_by_stream: Dict[str, int] = {}
69
- events_job_id: Optional[str] = None
70
- last_status: Optional[str] = None
71
- last_step_by_name: Dict[str, int] = {}
67
+ on_event: Callable[[dict[str, Any]], None] | None = None,
68
+ on_metric: Callable[[dict[str, Any]], None] | None = None,
69
+ ) -> dict[str, Any]:
70
+ last_seq_by_stream: dict[str, int] = {}
71
+ events_job_id: str | None = None
72
+ last_status: str | None = None
73
+ last_step_by_name: dict[str, int] = {}
72
74
  empty_polls = 0
73
75
  saw_any_event = False
74
76
  start_t = time.time()
75
77
  resolver = JobsApiResolver(self.base_url, strict=self.strict)
76
- detected_fine_tuned_model: Optional[str] = None
78
+ detected_fine_tuned_model: str | None = None
77
79
 
78
80
  async with AsyncHttpClient(self.base_url, self.api_key, timeout=self.timeout) as http:
79
81
  while True:
80
82
  # Status
81
- status_data: Optional[Dict[str, Any]] = None
83
+ status_data: dict[str, Any] | None = None
82
84
  for su in resolver.status_urls(self.job_id):
83
85
  try:
84
86
  status_data = await http.get(su)
@@ -99,10 +101,8 @@ class JobHandle:
99
101
  if status and status != last_status:
100
102
  last_status = status
101
103
  if on_event:
102
- try:
104
+ with suppress(Exception):
103
105
  on_event({"type": "job.status", "message": status})
104
- except Exception:
105
- pass
106
106
 
107
107
  # Events
108
108
  stream_ids = [self.job_id]
@@ -110,7 +110,7 @@ class JobHandle:
110
110
  stream_ids.append(events_job_id)
111
111
  total_events_this_cycle = 0
112
112
  terminal_event_seen = False
113
- terminal_event_status: Optional[str] = None
113
+ terminal_event_status: str | None = None
114
114
  for ev_id in stream_ids:
115
115
  since = last_seq_by_stream.get(ev_id, 0)
116
116
  for eu in resolver.events_urls(ev_id, since):
@@ -118,11 +118,8 @@ class JobHandle:
118
118
  ev_js = await http.get(eu)
119
119
  except Exception:
120
120
  continue
121
- try:
122
- events = (ev_js or {}).get("events") or (ev_js or {}).get("data") or []
123
- if not isinstance(events, list):
124
- events = []
125
- except Exception:
121
+ events = (ev_js or {}).get("events") or (ev_js or {}).get("data") or []
122
+ if not isinstance(events, list):
126
123
  events = []
127
124
  total_events_this_cycle += len(events)
128
125
  if events:
@@ -133,24 +130,16 @@ class JobHandle:
133
130
  continue
134
131
  last_seq_by_stream[ev_id] = seq_val
135
132
  if on_event:
136
- try:
133
+ with suppress(Exception):
137
134
  on_event(e)
138
- except Exception:
139
- pass
140
135
  et = str(e.get("type") or e.get("event_type") or "").lower()
141
136
  # Capture fine_tuned_model from event data when available
142
137
  if not detected_fine_tuned_model:
143
- try:
144
- data_obj = e.get("data") or {}
145
- ftm = (
146
- data_obj.get("fine_tuned_model")
147
- if isinstance(data_obj, dict)
148
- else None
149
- )
138
+ data_obj = e.get("data") or {}
139
+ if isinstance(data_obj, dict):
140
+ ftm = data_obj.get("fine_tuned_model")
150
141
  if isinstance(ftm, str) and ftm:
151
142
  detected_fine_tuned_model = ftm
152
- except Exception:
153
- pass
154
143
  if et in TERMINAL_EVENT_SUCCESS:
155
144
  terminal_event_seen = True
156
145
  terminal_event_status = "succeeded"
@@ -170,10 +159,8 @@ class JobHandle:
170
159
  continue
171
160
  last_step_by_name[name] = step
172
161
  if on_metric:
173
- try:
162
+ with suppress(Exception):
174
163
  on_metric(p)
175
- except Exception:
176
- pass
177
164
  except Exception:
178
165
  pass
179
166
 
@@ -181,20 +168,17 @@ class JobHandle:
181
168
  if terminal_event_seen or (status and status in TERMINAL_STATUSES):
182
169
  # Best-effort enrichment of final result with fine_tuned_model
183
170
  result_status = terminal_event_status or status or "completed"
184
- final_res: Dict[str, Any] = {"status": result_status, "job_id": self.job_id}
171
+ final_res: dict[str, Any] = {"status": result_status, "job_id": self.job_id}
185
172
  if not detected_fine_tuned_model:
186
173
  # Briefly try to re-fetch status to see if fine_tuned_model is persisted
187
174
  try:
188
175
  for su in resolver.status_urls(self.job_id):
189
- try:
190
- final_status = await http.get(su)
191
- if isinstance(final_status, dict):
192
- ftm2 = final_status.get("fine_tuned_model")
193
- if isinstance(ftm2, str) and ftm2:
194
- detected_fine_tuned_model = ftm2
195
- break
196
- except Exception:
197
- continue
176
+ final_status = await http.get(su)
177
+ if isinstance(final_status, dict):
178
+ ftm2 = final_status.get("fine_tuned_model")
179
+ if isinstance(ftm2, str) and ftm2:
180
+ detected_fine_tuned_model = ftm2
181
+ break
198
182
  except Exception:
199
183
  pass
200
184
  if detected_fine_tuned_model:
@@ -1,18 +1,28 @@
1
+ from __future__ import annotations
2
+
3
+ from .client import RlClient
4
+ from .config import RLJobConfig
1
5
  from .contracts import (
2
6
  RolloutEnvSpec,
7
+ RolloutMetrics,
3
8
  RolloutPolicySpec,
4
9
  RolloutRecordConfig,
5
- RolloutSafetyConfig,
6
10
  RolloutRequest,
11
+ RolloutResponse,
12
+ RolloutSafetyConfig,
7
13
  RolloutStep,
8
14
  RolloutTrajectory,
9
- RolloutMetrics,
10
- RolloutResponse,
11
15
  )
12
- from .env_keys import MAX_ENVIRONMENT_API_KEY_BYTES, encrypt_for_backend, setup_environment_api_key
16
+ from .env_keys import (
17
+ MAX_ENVIRONMENT_API_KEY_BYTES,
18
+ encrypt_for_backend,
19
+ setup_environment_api_key,
20
+ )
13
21
  from .secrets import mint_environment_api_key
14
22
 
15
23
  __all__ = [
24
+ "RlClient",
25
+ "RLJobConfig",
16
26
  "RolloutEnvSpec",
17
27
  "RolloutPolicySpec",
18
28
  "RolloutRecordConfig",
@@ -0,0 +1,267 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from collections.abc import Callable
5
+ from contextlib import suppress
6
+ from typing import Any
7
+
8
+ from synth_ai.api.models.supported import (
9
+ UnsupportedModelError,
10
+ normalize_model_identifier,
11
+ )
12
+
13
+ from ...http import AsyncHttpClient, HTTPError, sleep
14
+
15
+
16
+ def _api_base(b: str) -> str:
17
+ b = (b or "").rstrip("/")
18
+ return b if b.endswith("/api") else f"{b}/api"
19
+
20
+
21
+ class RlClient:
22
+ """Lightweight RL client for provider-agnostic job control."""
23
+
24
+ def __init__(self, base_url: str, api_key: str, *, timeout: float = 600.0) -> None:
25
+ self._base_url = base_url.rstrip("/")
26
+ self._api_key = api_key
27
+ self._timeout = timeout
28
+
29
+ async def resolve_trainer_start_url(self, trainer_id: str) -> str:
30
+ path = f"/api/rl/services/{trainer_id}"
31
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
32
+ js = await http.get(path)
33
+ if not isinstance(js, dict):
34
+ raise HTTPError(
35
+ status=500,
36
+ url=path,
37
+ message="invalid_service_response",
38
+ body_snippet=str(js)[:200],
39
+ )
40
+ start_url = js.get("training_start_url")
41
+ if not isinstance(start_url, str) or not start_url:
42
+ raise HTTPError(
43
+ status=500,
44
+ url=path,
45
+ message="missing_training_start_url",
46
+ body_snippet=str(js)[:200],
47
+ )
48
+ return start_url
49
+
50
+ async def create_job(
51
+ self,
52
+ *,
53
+ model: str,
54
+ task_app_url: str,
55
+ trainer: dict[str, Any],
56
+ trainer_id: str | None = None,
57
+ job_config_id: str | None = None,
58
+ inline_config: dict[str, Any] | None = None,
59
+ ) -> dict[str, Any]:
60
+ try:
61
+ normalized_model = normalize_model_identifier(model)
62
+ except UnsupportedModelError as exc:
63
+ raise ValueError(str(exc)) from exc
64
+
65
+ body = {
66
+ "job_type": "rl",
67
+ "data": {
68
+ "model": normalized_model,
69
+ "endpoint_base_url": task_app_url,
70
+ **({"job_config_id": job_config_id} if job_config_id else {}),
71
+ **({"config": inline_config} if inline_config else {}),
72
+ "trainer": {
73
+ "batch_size": int(trainer.get("batch_size", 1)),
74
+ "group_size": max(2, int(trainer.get("group_size", 2))),
75
+ },
76
+ },
77
+ }
78
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=self._timeout) as http:
79
+ js = await http.post_json(f"{_api_base(self._base_url)}/rl/jobs", json=body)
80
+ if not isinstance(js, dict):
81
+ raise HTTPError(
82
+ status=500,
83
+ url="/api/rl/jobs",
84
+ message="invalid_create_response",
85
+ body_snippet=str(js)[:200],
86
+ )
87
+ return js
88
+
89
+ async def start_job_if_supported(self, job_id: str) -> dict[str, Any] | None:
90
+ path = f"{_api_base(self._base_url)}/rl/jobs/{job_id}/start"
91
+ try:
92
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
93
+ return await http.post_json(path, json={})
94
+ except HTTPError as he: # noqa: PERF203
95
+ if he.status == 404:
96
+ return None
97
+ raise
98
+
99
+ async def get_job(self, job_id: str) -> dict[str, Any]:
100
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
101
+ return await http.get(f"{_api_base(self._base_url)}/learning/jobs/{job_id}")
102
+
103
+ async def get_events(
104
+ self, job_id: str, *, since_seq: int = 0, limit: int = 200
105
+ ) -> list[dict[str, Any]]:
106
+ params = {"since_seq": since_seq, "limit": limit}
107
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
108
+ try:
109
+ js = await http.get(
110
+ f"{_api_base(self._base_url)}/learning/jobs/{job_id}/events", params=params
111
+ )
112
+ except HTTPError as he:
113
+ with suppress(Exception):
114
+ print(
115
+ f"[poll] events HTTPError status={he.status} url={he.url} since_seq={since_seq} body={(he.body_snippet or '')[:200]}"
116
+ )
117
+ raise
118
+ if isinstance(js, dict):
119
+ evs = js.get("events") or js.get("data")
120
+ if isinstance(evs, list):
121
+ return evs
122
+ return []
123
+
124
+ async def get_metrics(
125
+ self, job_id: str, *, after_step: int = -1, limit: int = 200
126
+ ) -> list[dict[str, Any]]:
127
+ params = {"after_step": after_step, "limit": limit}
128
+ async with AsyncHttpClient(self._base_url, self._api_key, timeout=30.0) as http:
129
+ js = await http.get(
130
+ f"{_api_base(self._base_url)}/learning/jobs/{job_id}/metrics", params=params
131
+ )
132
+ if isinstance(js, dict) and isinstance(js.get("points"), list):
133
+ return js["points"]
134
+ return []
135
+
136
+ async def poll_until_terminal(
137
+ self,
138
+ job_id: str,
139
+ *,
140
+ interval_seconds: float = 2.0,
141
+ max_seconds: float | None = None,
142
+ empty_polls_threshold: int = 5,
143
+ startup_deadline_s: int = 45,
144
+ on_event: Callable[[dict[str, Any]], None] | None = None,
145
+ on_metric: Callable[[dict[str, Any]], None] | None = None,
146
+ ) -> dict[str, Any]:
147
+ last_seq_by_stream: dict[str, int] = {}
148
+ events_job_id: str | None = None
149
+ last_status: str | None = None
150
+ last_step_by_name: dict[str, int] = {}
151
+ empty_polls = 0
152
+ saw_any_event = False
153
+ start_t = time.time()
154
+ terminal = {"succeeded", "failed", "cancelled", "canceled", "error", "completed"}
155
+
156
+ while True:
157
+ status_data: dict[str, Any] | None = None
158
+ try:
159
+ status_data = await self.get_job(job_id)
160
+ except Exception:
161
+ status_data = None
162
+ if status_data is None:
163
+ with suppress(Exception):
164
+ print(f"[poll] get_job returned None base={self._base_url} job_id={job_id}")
165
+ status = str((status_data or {}).get("status") or "").lower()
166
+ if status_data:
167
+ linked = status_data.get("linked_job_id")
168
+ if isinstance(linked, str) and linked and linked != events_job_id:
169
+ events_job_id = linked
170
+ with suppress(Exception):
171
+ print(f"[poll] discovered linked_job_id stream={events_job_id}")
172
+ if status and status != last_status:
173
+ last_status = status
174
+ if on_event:
175
+ with suppress(Exception):
176
+ on_event({"type": "rl.status", "message": status})
177
+
178
+ stream_ids = [job_id]
179
+ if events_job_id and events_job_id not in stream_ids:
180
+ stream_ids.append(events_job_id)
181
+ with suppress(Exception):
182
+ print(
183
+ f"[poll] streams={stream_ids} intervals={interval_seconds}s since_map={last_seq_by_stream} empty_polls={empty_polls}"
184
+ )
185
+ total_events_this_cycle = 0
186
+ terminal_event_seen = False
187
+ terminal_event_status: str | None = None
188
+ for ev_id in stream_ids:
189
+ since = last_seq_by_stream.get(ev_id, 0)
190
+ try:
191
+ events = await self.get_events(ev_id, since_seq=since, limit=200)
192
+ except HTTPError as he:
193
+ with suppress(Exception):
194
+ print(
195
+ f"[poll] get_events error status={he.status} url={he.url} since={since} body={(he.body_snippet or '')[:200]}"
196
+ )
197
+ events = []
198
+ except Exception as e:
199
+ with suppress(Exception):
200
+ print(
201
+ f"[poll] get_events unexpected error ev_id={ev_id} since={since} err={type(e).__name__}: {e}"
202
+ )
203
+ events = []
204
+ total_events_this_cycle += len(events)
205
+ if events:
206
+ saw_any_event = True
207
+ for e in events:
208
+ seq_val = int(e.get("seq") or 0)
209
+ if seq_val <= last_seq_by_stream.get(ev_id, 0):
210
+ continue
211
+ last_seq_by_stream[ev_id] = seq_val
212
+ if on_event:
213
+ with suppress(Exception):
214
+ on_event(e)
215
+ et = str(e.get("type") or e.get("event_type") or "").lower()
216
+ if et in ("rl.job.completed", "workflow.completed", "rl.train.completed"):
217
+ terminal_event_seen = True
218
+ terminal_event_status = "succeeded"
219
+ elif et in ("rl.job.failed", "workflow.failed"):
220
+ terminal_event_seen = True
221
+ terminal_event_status = "failed"
222
+
223
+ try:
224
+ after = max(last_step_by_name.values()) if last_step_by_name else -1
225
+ points = await self.get_metrics(job_id, after_step=after, limit=200)
226
+ for p in points:
227
+ name = str(p.get("name") or "")
228
+ step = int(p.get("step") or -1)
229
+ if step <= last_step_by_name.get(name, -1):
230
+ continue
231
+ last_step_by_name[name] = step
232
+ if on_metric:
233
+ with suppress(Exception):
234
+ on_metric(p)
235
+ except Exception:
236
+ pass
237
+
238
+ if terminal_event_seen:
239
+ return {"status": terminal_event_status or status or "completed", "job_id": job_id}
240
+ if status and status in terminal:
241
+ return {"status": status, "job_id": job_id}
242
+
243
+ if total_events_this_cycle == 0:
244
+ empty_polls += 1
245
+ else:
246
+ empty_polls = 0
247
+ if empty_polls >= max(1, int(empty_polls_threshold)):
248
+ with suppress(Exception):
249
+ print(
250
+ f"[poll] threshold hit: empty_polls={empty_polls} >= {empty_polls_threshold} streams={stream_ids} last_seq_map={last_seq_by_stream}"
251
+ )
252
+ raise AssertionError(
253
+ f"No new events detected for {empty_polls_threshold} consecutive polls. Check event ingestion."
254
+ )
255
+
256
+ if not saw_any_event and (time.time() - start_t) > int(startup_deadline_s):
257
+ with suppress(Exception):
258
+ print(
259
+ f"[poll] startup window exceeded: {startup_deadline_s}s base={self._base_url} job={job_id} streams={stream_ids} last_seq_map={last_seq_by_stream}"
260
+ )
261
+ raise AssertionError(
262
+ f"No events observed within startup window ({startup_deadline_s}s). Investigate event streaming."
263
+ )
264
+
265
+ await sleep(interval_seconds)
266
+ if max_seconds is not None and (time.time() - start_t) >= max_seconds:
267
+ raise TimeoutError(f"Polling timed out after {max_seconds}s for job {job_id}")
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+
7
+ def _ensure_positive(value: Any, *, name: str) -> int:
8
+ try:
9
+ ivalue = int(value)
10
+ except (TypeError, ValueError) as exc:
11
+ raise ValueError(f"{name} must be an integer") from exc
12
+ if ivalue < 1:
13
+ raise ValueError(f"{name} must be >= 1")
14
+ return ivalue
15
+
16
+
17
+ @dataclass(slots=True)
18
+ class RLJobConfig:
19
+ model: str
20
+ task_app_url: str
21
+ trainer_id: str
22
+ batch_size: int = 1
23
+ group_size: int = 2
24
+ job_config_id: str | None = None
25
+ inline_config: dict[str, Any] | None = None
26
+
27
+ def trainer_dict(self) -> dict[str, Any]:
28
+ return {
29
+ "batch_size": _ensure_positive(self.batch_size, name="trainer.batch_size"),
30
+ "group_size": _ensure_positive(self.group_size, name="trainer.group_size"),
31
+ }
@@ -1,20 +1,17 @@
1
- from __future__ import annotations
1
+ """Compatibility re-export for rollout contracts used by RL tooling."""
2
2
 
3
- """
4
- Compatibility layer: re-export Task App rollout contracts from synth_ai.task.contracts
5
- so existing imports continue to work while consolidating under synth_ai.task.
6
- """
3
+ from __future__ import annotations
7
4
 
8
5
  from synth_ai.task.contracts import (
9
6
  RolloutEnvSpec,
7
+ RolloutMetrics,
10
8
  RolloutPolicySpec,
11
9
  RolloutRecordConfig,
12
- RolloutSafetyConfig,
13
10
  RolloutRequest,
11
+ RolloutResponse,
12
+ RolloutSafetyConfig,
14
13
  RolloutStep,
15
14
  RolloutTrajectory,
16
- RolloutMetrics,
17
- RolloutResponse,
18
15
  )
19
16
 
20
17
  __all__ = [