synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (323) hide show
  1. examples/__init__.py +16 -0
  2. examples/crafter_debug_render.py +8 -11
  3. examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
  4. examples/multi_step/crafter_rl_lora.md +29 -0
  5. examples/qwen_coder/README.md +102 -0
  6. examples/qwen_coder/_shared.py +113 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
  8. examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
  9. examples/qwen_coder/configs/coder_lora_small.toml +58 -0
  10. examples/qwen_coder/generate_dataset.py +98 -0
  11. examples/qwen_coder/infer_ft_smoke.py +65 -0
  12. examples/qwen_coder/infer_prod_proxy.py +73 -0
  13. examples/qwen_coder/infer_via_synth.py +87 -0
  14. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  15. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  16. examples/qwen_coder/sft_full_17b.py +103 -0
  17. examples/qwen_coder/sft_lora_30b.py +110 -0
  18. examples/qwen_coder/subset_jsonl.py +39 -0
  19. examples/qwen_coder/todos.md +38 -0
  20. examples/qwen_coder/validate_jsonl.py +60 -0
  21. examples/rl/run_eval.py +36 -37
  22. examples/rl/run_rl_and_save.py +5 -5
  23. examples/rl/task_app/math_single_step.py +65 -43
  24. examples/rl/task_app/math_task_app.py +3 -3
  25. examples/sft/README.md +139 -0
  26. examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
  27. examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
  28. examples/sft/evaluate.py +117 -0
  29. examples/sft/export_dataset.py +117 -0
  30. examples/sft/generate_traces.py +162 -0
  31. examples/swe/__init__.py +12 -0
  32. examples/swe/task_app/README.md +105 -0
  33. examples/swe/task_app/__init__.py +2 -0
  34. examples/swe/task_app/grpo_swe_mini.py +571 -0
  35. examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
  36. examples/swe/task_app/hosted/README.md +173 -0
  37. examples/swe/task_app/hosted/__init__.py +5 -0
  38. examples/swe/task_app/hosted/branching.py +143 -0
  39. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  40. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  41. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  42. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  43. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  44. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  45. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  46. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  47. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  48. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  49. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
  50. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  51. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  52. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  53. examples/swe/task_app/hosted/hosted_app.py +204 -0
  54. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  55. examples/swe/task_app/hosted/inference/openai_client.py +618 -0
  56. examples/swe/task_app/hosted/main.py +100 -0
  57. examples/swe/task_app/hosted/policy_routes.py +1079 -0
  58. examples/swe/task_app/hosted/registry.py +195 -0
  59. examples/swe/task_app/hosted/rollout.py +1869 -0
  60. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  61. examples/swe/task_app/hosted/storage/volume.py +211 -0
  62. examples/swe/task_app/hosted/test_agents.py +161 -0
  63. examples/swe/task_app/hosted/test_service.py +137 -0
  64. examples/swe/task_app/hosted/utils.py +62 -0
  65. examples/vlm/PROPOSAL.md +53 -0
  66. examples/vlm/README.md +68 -0
  67. examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
  68. examples/vlm/crafter_image_only_agent.py +207 -0
  69. examples/vlm/crafter_openai_vlm_agent.py +277 -0
  70. examples/vlm/filter_image_rows.py +63 -0
  71. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  72. examples/warming_up_to_rl/analyze_trace_db.py +5 -5
  73. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
  74. examples/warming_up_to_rl/export_trace_sft.py +78 -21
  75. examples/warming_up_to_rl/groq_test.py +4 -4
  76. examples/warming_up_to_rl/manage_secrets.py +13 -18
  77. examples/warming_up_to_rl/run_eval.py +42 -44
  78. examples/warming_up_to_rl/run_fft_and_save.py +11 -16
  79. examples/warming_up_to_rl/run_local_rollout.py +1 -3
  80. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
  81. examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
  82. examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
  83. examples/warming_up_to_rl/run_rl_and_save.py +5 -6
  84. examples/warming_up_to_rl/run_rollout_remote.py +8 -10
  85. examples/warming_up_to_rl/task_app/README.md +6 -2
  86. examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
  87. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
  88. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
  89. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
  90. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
  91. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
  105. synth_ai/api/models/supported.py +376 -0
  106. synth_ai/api/train/builders.py +128 -21
  107. synth_ai/api/train/cli.py +80 -64
  108. synth_ai/api/train/config_finder.py +7 -2
  109. synth_ai/api/train/env_resolver.py +1 -1
  110. synth_ai/api/train/pollers.py +2 -1
  111. synth_ai/api/train/supported_algos.py +139 -0
  112. synth_ai/api/train/task_app.py +1 -2
  113. synth_ai/api/train/utils.py +13 -44
  114. synth_ai/cli/__init__.py +8 -0
  115. synth_ai/cli/_modal_wrapper.py +28 -0
  116. synth_ai/cli/_typer_patch.py +49 -0
  117. synth_ai/cli/balance.py +1 -2
  118. synth_ai/cli/calc.py +1 -1
  119. synth_ai/cli/demo.py +2 -1
  120. synth_ai/cli/recent.py +2 -2
  121. synth_ai/cli/rl_demo.py +2 -1
  122. synth_ai/cli/root.py +11 -13
  123. synth_ai/cli/status.py +2 -2
  124. synth_ai/cli/task_apps.py +529 -179
  125. synth_ai/cli/traces.py +6 -4
  126. synth_ai/cli/watch.py +12 -18
  127. synth_ai/demo_registry.py +1 -1
  128. synth_ai/demos/core/cli.py +36 -43
  129. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  130. synth_ai/demos/demo_task_apps/core.py +17 -25
  131. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
  132. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  133. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
  134. synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
  135. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  136. synth_ai/environments/examples/crafter_classic/environment.py +76 -1
  137. synth_ai/environments/reproducibility/tree.py +2 -5
  138. synth_ai/environments/service/app.py +11 -12
  139. synth_ai/environments/service/core_routes.py +4 -7
  140. synth_ai/environments/stateful/engine.py +1 -1
  141. synth_ai/environments/tasks/core.py +1 -0
  142. synth_ai/environments/tasks/filters.py +5 -6
  143. synth_ai/environments/tasks/utils.py +4 -5
  144. synth_ai/handshake.py +9 -9
  145. synth_ai/http.py +1 -1
  146. synth_ai/http_client.py +18 -10
  147. synth_ai/inference/client.py +15 -5
  148. synth_ai/jobs/client.py +78 -83
  149. synth_ai/learning/__init__.py +41 -6
  150. synth_ai/learning/algorithms.py +14 -0
  151. synth_ai/learning/client.py +91 -24
  152. synth_ai/learning/config.py +2 -38
  153. synth_ai/learning/ft_client.py +4 -59
  154. synth_ai/learning/health.py +5 -6
  155. synth_ai/learning/jobs.py +31 -47
  156. synth_ai/{rl → learning/rl}/__init__.py +14 -4
  157. synth_ai/learning/rl/client.py +267 -0
  158. synth_ai/learning/rl/config.py +31 -0
  159. synth_ai/{rl → learning/rl}/contracts.py +5 -8
  160. synth_ai/{rl → learning/rl}/env_keys.py +39 -15
  161. synth_ai/learning/rl/secrets.py +13 -0
  162. synth_ai/learning/rl_client.py +2 -281
  163. synth_ai/learning/sft/__init__.py +29 -0
  164. synth_ai/learning/sft/client.py +68 -0
  165. synth_ai/learning/sft/config.py +270 -0
  166. synth_ai/learning/sft/data.py +295 -0
  167. synth_ai/learning/sse.py +25 -24
  168. synth_ai/learning/validators.py +25 -28
  169. synth_ai/lm/__init__.py +21 -47
  170. synth_ai/task/__init__.py +25 -27
  171. synth_ai/task/apps/__init__.py +7 -8
  172. synth_ai/task/auth.py +8 -8
  173. synth_ai/task/client.py +14 -14
  174. synth_ai/task/contracts.py +36 -35
  175. synth_ai/task/datasets.py +6 -5
  176. synth_ai/task/errors.py +10 -10
  177. synth_ai/task/health.py +17 -9
  178. synth_ai/task/json.py +58 -23
  179. synth_ai/task/proxy.py +13 -9
  180. synth_ai/task/rubrics.py +16 -15
  181. synth_ai/task/server.py +12 -12
  182. synth_ai/task/tracing_utils.py +4 -4
  183. synth_ai/task/vendors.py +5 -6
  184. synth_ai/tracing_v3/__init__.py +2 -0
  185. synth_ai/tracing_v3/abstractions.py +21 -4
  186. synth_ai/tracing_v3/decorators.py +18 -16
  187. synth_ai/tracing_v3/hooks.py +5 -5
  188. synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
  189. synth_ai/tracing_v3/session_tracer.py +40 -14
  190. synth_ai/tracing_v3/storage/base.py +85 -0
  191. synth_ai/tracing_v3/storage/config.py +21 -8
  192. synth_ai/tracing_v3/storage/factory.py +10 -7
  193. synth_ai/tracing_v3/storage/utils.py +4 -2
  194. synth_ai/tracing_v3/turso/daemon.py +7 -2
  195. synth_ai/tracing_v3/turso/models.py +2 -2
  196. synth_ai/tracing_v3/turso/native_manager.py +1173 -0
  197. synth_ai/tracing_v3/utils.py +4 -4
  198. synth_ai/v0/api/__init__.py +8 -0
  199. synth_ai/v0/api/models/__init__.py +8 -0
  200. synth_ai/v0/api/models/supported.py +8 -0
  201. synth_ai/v0/config/__init__.py +15 -0
  202. synth_ai/v0/config/base_url.py +12 -0
  203. synth_ai/v0/lm/__init__.py +51 -0
  204. synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
  205. synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
  206. synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
  207. synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
  208. synth_ai/{lm → v0/lm}/config.py +6 -1
  209. synth_ai/{lm → v0/lm}/core/all.py +9 -9
  210. synth_ai/{lm → v0/lm}/core/main.py +6 -6
  211. synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
  212. synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
  213. synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
  214. synth_ai/{lm → v0/lm}/overrides.py +2 -2
  215. synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
  216. synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
  217. synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
  218. synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
  219. synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
  220. synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
  221. synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
  222. synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
  223. synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
  224. synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
  225. synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
  226. synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
  227. synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
  228. synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
  229. synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
  230. synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
  231. synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
  232. synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
  233. synth_ai/v0/tracing_v3/__init__.py +10 -0
  234. synth_ai/v0/tracing_v3/abstractions.py +3 -0
  235. synth_ai/v0/tracing_v3/decorators.py +3 -0
  236. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
  237. synth_ai/v0/tracing_v3/session_tracer.py +3 -0
  238. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
  239. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/RECORD +269 -233
  240. examples/common_old/backend.py +0 -20
  241. examples/evals_old/README.md +0 -98
  242. examples/evals_old/__init__.py +0 -6
  243. examples/evals_old/compare_models.py +0 -1038
  244. examples/evals_old/example_log.md +0 -145
  245. examples/evals_old/run_demo.sh +0 -126
  246. examples/evals_old/trace_analysis.py +0 -270
  247. examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
  248. examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
  249. examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
  250. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
  251. examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
  252. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
  253. examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
  254. examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
  255. examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
  256. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
  257. examples/finetuning_old/synth_qwen_v1/README.md +0 -68
  258. examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
  259. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
  260. examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
  261. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
  262. examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
  263. examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
  264. examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
  265. examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
  266. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
  267. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
  268. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
  269. examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
  270. examples/finetuning_old/synth_qwen_v1/util.py +0 -152
  271. examples/rl_old/task_app.py +0 -1131
  272. synth_ai/experimental/synth_oss.py +0 -445
  273. synth_ai/learning/filtering.py +0 -0
  274. synth_ai/learning/offline/dpo.py +0 -0
  275. synth_ai/learning/offline/providers.py +0 -7
  276. synth_ai/learning/offline/sft.py +0 -0
  277. synth_ai/learning/offline/shared.py +0 -0
  278. synth_ai/learning/online/grpo.py +0 -0
  279. synth_ai/learning/online/irft.py +0 -0
  280. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  281. synth_ai/learning/prompts/gepa.py +0 -0
  282. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
  283. synth_ai/learning/prompts/mipro.py +0 -289
  284. synth_ai/learning/prompts/random_search.py +0 -249
  285. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  286. synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
  287. synth_ai/rl/secrets.py +0 -19
  288. synth_ai/scripts/verify_rewards.py +0 -100
  289. synth_ai/tracing/__init__.py +0 -30
  290. synth_ai/tracing_v1/__init__.py +0 -33
  291. synth_ai/tracing_v3/turso/__init__.py +0 -25
  292. synth_ai/tracing_v3/turso/manager.py +0 -838
  293. synth_ai/zyk/__init__.py +0 -30
  294. /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
  295. /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
  296. /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
  297. /synth_ai/{lm → v0/lm}/constants.py +0 -0
  298. /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
  299. /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
  300. /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
  301. /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
  302. /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
  303. /synth_ai/{lm → v0/lm}/injection.py +0 -0
  304. /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
  305. /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
  306. /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
  307. /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
  308. /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
  309. /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
  310. /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
  311. /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
  312. /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
  313. /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
  314. /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
  315. /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
  316. /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
  317. /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
  318. /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
  319. /synth_ai/{lm → v0/lm}/warmup.py +0 -0
  320. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
  321. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
  322. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
  323. {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,195 @@
1
+ from __future__ import annotations
2
+
3
+ import uuid
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime
6
+ from typing import Any
7
+
8
+
9
+ @dataclass
10
+ class EnvHandle:
11
+ """In-memory handle for an active environment instance."""
12
+
13
+ env_id: str
14
+ env: Any # StatefulEnvironment or wrapper
15
+ last_observation: dict[str, Any] | None
16
+ last_info: dict[str, Any] | None
17
+ step_idx: int
18
+ seed: int | None
19
+ rl_run_id: str
20
+ created_at: datetime = field(default_factory=datetime.utcnow)
21
+
22
+
23
+ @dataclass
24
+ class PolicyHandle:
25
+ """In-memory handle for an active policy instance."""
26
+
27
+ policy_id: str
28
+ policy: Any # Policy instance
29
+ bound_env_id: str | None
30
+ rl_run_id: str
31
+ created_at: datetime = field(default_factory=datetime.utcnow)
32
+
33
+
34
+ @dataclass
35
+ class RunHandle:
36
+ """Track run status for abort support."""
37
+
38
+ run_id: str
39
+ status: str # "running" | "aborted" | "completed"
40
+ started_at: datetime
41
+ finished_at: datetime | None = None
42
+
43
+
44
+ @dataclass
45
+ class SnapshotMeta:
46
+ """Metadata for a stored snapshot."""
47
+
48
+ snapshot_id: str
49
+ kind: str # "env" | "policy"
50
+ rl_run_id: str
51
+ parent_snapshot_id: str | None
52
+ size: int
53
+ created_at: datetime
54
+ path: str
55
+
56
+
57
+ class Registry:
58
+ """In-memory registries for the service."""
59
+
60
+ def __init__(self) -> None:
61
+ self.envs: dict[str, EnvHandle] = {}
62
+ self.policies: dict[str, PolicyHandle] = {}
63
+ self.runs: dict[str, RunHandle] = {}
64
+ self.snapshots: dict[str, SnapshotMeta] = {}
65
+
66
+ def generate_id(self) -> str:
67
+ """Generate a UUID for unique identification."""
68
+ return str(uuid.uuid4())
69
+
70
+ def register_env(
71
+ self,
72
+ env: Any,
73
+ seed: int | None,
74
+ rl_run_id: str,
75
+ last_observation: dict[str, Any] | None = None,
76
+ last_info: dict[str, Any] | None = None,
77
+ ) -> str:
78
+ """Register a new environment instance."""
79
+ env_id = self.generate_id()
80
+ handle = EnvHandle(
81
+ env_id=env_id,
82
+ env=env,
83
+ last_observation=last_observation,
84
+ last_info=last_info,
85
+ step_idx=0,
86
+ seed=seed,
87
+ rl_run_id=rl_run_id,
88
+ )
89
+ self.envs[env_id] = handle
90
+ return env_id
91
+
92
+ def register_policy(
93
+ self,
94
+ policy: Any,
95
+ rl_run_id: str,
96
+ bound_env_id: str | None = None,
97
+ ) -> str:
98
+ """Register a new policy instance."""
99
+ policy_id = self.generate_id()
100
+ handle = PolicyHandle(
101
+ policy_id=policy_id,
102
+ policy=policy,
103
+ bound_env_id=bound_env_id,
104
+ rl_run_id=rl_run_id,
105
+ )
106
+ self.policies[policy_id] = handle
107
+ return policy_id
108
+
109
+ def register_run(self, run_id: str | None = None) -> str:
110
+ """Register a new run."""
111
+ if run_id is None:
112
+ run_id = self.generate_id()
113
+ handle = RunHandle(
114
+ run_id=run_id,
115
+ status="running",
116
+ started_at=datetime.utcnow(),
117
+ )
118
+ self.runs[run_id] = handle
119
+ return run_id
120
+
121
+ def abort_run(self, run_id: str) -> bool:
122
+ """Mark a run as aborted."""
123
+ if run_id in self.runs:
124
+ self.runs[run_id].status = "aborted"
125
+ self.runs[run_id].finished_at = datetime.utcnow()
126
+ return True
127
+ return False
128
+
129
+ def complete_run(self, run_id: str) -> bool:
130
+ """Mark a run as completed."""
131
+ if run_id in self.runs:
132
+ self.runs[run_id].status = "completed"
133
+ self.runs[run_id].finished_at = datetime.utcnow()
134
+ return True
135
+ return False
136
+
137
+ def is_run_aborted(self, run_id: str) -> bool:
138
+ """Check if a run has been aborted."""
139
+ return run_id in self.runs and self.runs[run_id].status == "aborted"
140
+
141
+ def register_snapshot(
142
+ self,
143
+ kind: str,
144
+ rl_run_id: str,
145
+ size: int,
146
+ path: str,
147
+ parent_snapshot_id: str | None = None,
148
+ ) -> str:
149
+ """Register a new snapshot."""
150
+ snapshot_id = self.generate_id()
151
+ meta = SnapshotMeta(
152
+ snapshot_id=snapshot_id,
153
+ kind=kind,
154
+ rl_run_id=rl_run_id,
155
+ parent_snapshot_id=parent_snapshot_id,
156
+ size=size,
157
+ created_at=datetime.utcnow(),
158
+ path=path,
159
+ )
160
+ self.snapshots[snapshot_id] = meta
161
+ return snapshot_id
162
+
163
+ def get_env(self, env_id: str) -> EnvHandle | None:
164
+ """Get an environment handle by ID."""
165
+ return self.envs.get(env_id)
166
+
167
+ def get_policy(self, policy_id: str) -> PolicyHandle | None:
168
+ """Get a policy handle by ID."""
169
+ return self.policies.get(policy_id)
170
+
171
+ def get_run(self, run_id: str) -> RunHandle | None:
172
+ """Get a run handle by ID."""
173
+ return self.runs.get(run_id)
174
+
175
+ def get_snapshot(self, snapshot_id: str) -> SnapshotMeta | None:
176
+ """Get snapshot metadata by ID."""
177
+ return self.snapshots.get(snapshot_id)
178
+
179
+ def remove_env(self, env_id: str) -> bool:
180
+ """Remove an environment from the registry."""
181
+ if env_id in self.envs:
182
+ del self.envs[env_id]
183
+ return True
184
+ return False
185
+
186
+ def remove_policy(self, policy_id: str) -> bool:
187
+ """Remove a policy from the registry."""
188
+ if policy_id in self.policies:
189
+ del self.policies[policy_id]
190
+ return True
191
+ return False
192
+
193
+
194
+ # Global registry instance
195
+ registry = Registry()