synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1709 -243
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
import contextlib
|
|
5
4
|
import json
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import uuid4
|
|
6
8
|
|
|
7
9
|
from fastapi import APIRouter, HTTPException
|
|
8
10
|
from pydantic import BaseModel
|
|
9
11
|
|
|
10
|
-
from uuid import uuid4
|
|
11
|
-
|
|
12
12
|
# Import the actual classes from synth-ai
|
|
13
13
|
from synth_ai.environments.examples.crafter_classic.environment import (
|
|
14
14
|
CrafterClassicEnvironment,
|
|
@@ -58,9 +58,7 @@ async def validate_environment_observation(observation: Any, context: str) -> No
|
|
|
58
58
|
"terminated",
|
|
59
59
|
}
|
|
60
60
|
if wordle_keys.issubset(set(observation.keys())):
|
|
61
|
-
logger.info(
|
|
62
|
-
f"🔍 ENV_ROUTES: Validating Wordle observation structure in {context}"
|
|
63
|
-
)
|
|
61
|
+
logger.info(f"🔍 ENV_ROUTES: Validating Wordle observation structure in {context}")
|
|
64
62
|
logger.info(f"🔍 ENV_ROUTES: Observation keys: {list(observation.keys())}")
|
|
65
63
|
|
|
66
64
|
missing_keys = wordle_keys - set(observation.keys())
|
|
@@ -100,41 +98,41 @@ async def validate_environment_observation(observation: Any, context: str) -> No
|
|
|
100
98
|
|
|
101
99
|
class EnvCreateRequest(BaseModel):
|
|
102
100
|
env_name: str
|
|
103
|
-
config:
|
|
104
|
-
seed:
|
|
105
|
-
parent_env_id:
|
|
101
|
+
config: dict[str, Any] = {}
|
|
102
|
+
seed: int | None = None
|
|
103
|
+
parent_env_id: str | None = None
|
|
106
104
|
rl_run_id: str
|
|
107
105
|
|
|
108
106
|
|
|
109
107
|
class EnvCreateResponse(BaseModel):
|
|
110
108
|
env_id: str
|
|
111
|
-
observation:
|
|
112
|
-
info:
|
|
109
|
+
observation: dict[str, Any]
|
|
110
|
+
info: dict[str, Any] | None = None
|
|
113
111
|
step_idx: int
|
|
114
112
|
|
|
115
113
|
|
|
116
114
|
class EnvResetRequest(BaseModel):
|
|
117
115
|
env_id: str
|
|
118
|
-
seed:
|
|
116
|
+
seed: int | None = None
|
|
119
117
|
|
|
120
118
|
|
|
121
119
|
class EnvResetResponse(BaseModel):
|
|
122
|
-
observation:
|
|
123
|
-
info:
|
|
120
|
+
observation: dict[str, Any]
|
|
121
|
+
info: dict[str, Any] | None = None
|
|
124
122
|
step_idx: int
|
|
125
123
|
|
|
126
124
|
|
|
127
125
|
class EnvStepRequest(BaseModel):
|
|
128
126
|
env_id: str
|
|
129
|
-
tool_calls:
|
|
127
|
+
tool_calls: list[dict[str, Any]]
|
|
130
128
|
|
|
131
129
|
|
|
132
130
|
class EnvStepResponse(BaseModel):
|
|
133
|
-
observation:
|
|
131
|
+
observation: dict[str, Any]
|
|
134
132
|
done: bool
|
|
135
|
-
info:
|
|
136
|
-
reward:
|
|
137
|
-
truncated:
|
|
133
|
+
info: dict[str, Any] | None = None
|
|
134
|
+
reward: float | None = None
|
|
135
|
+
truncated: bool | None = None
|
|
138
136
|
step_idx: int
|
|
139
137
|
|
|
140
138
|
|
|
@@ -155,8 +153,8 @@ class EnvRestoreRequest(BaseModel):
|
|
|
155
153
|
|
|
156
154
|
class EnvRestoreResponse(BaseModel):
|
|
157
155
|
env_id: str
|
|
158
|
-
observation:
|
|
159
|
-
info:
|
|
156
|
+
observation: dict[str, Any]
|
|
157
|
+
info: dict[str, Any] | None = None
|
|
160
158
|
step_idx: int
|
|
161
159
|
|
|
162
160
|
|
|
@@ -215,7 +213,8 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
215
213
|
# Log a world signature for sanity: seed + starting public state hash
|
|
216
214
|
try:
|
|
217
215
|
pub_state = base_env.engine._get_public_state_from_env() # type: ignore[attr-defined]
|
|
218
|
-
import hashlib
|
|
216
|
+
import hashlib
|
|
217
|
+
import json as _json
|
|
219
218
|
|
|
220
219
|
sig_src = {
|
|
221
220
|
"player_position": list(pub_state.player_position),
|
|
@@ -270,27 +269,27 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
270
269
|
elif env_name_lower == "wordle":
|
|
271
270
|
# Defer imports to avoid hard dependency when not used
|
|
272
271
|
try:
|
|
272
|
+
from synth_ai.environments.examples.wordle.environment import (
|
|
273
|
+
WordleEnvironment,
|
|
274
|
+
)
|
|
273
275
|
from synth_ai.environments.examples.wordle.taskset import (
|
|
274
276
|
WordleTaskInstance,
|
|
275
277
|
WordleTaskInstanceMetadata,
|
|
276
278
|
)
|
|
277
|
-
from synth_ai.environments.examples.wordle.environment import (
|
|
278
|
-
WordleEnvironment,
|
|
279
|
-
)
|
|
280
279
|
except Exception as e:
|
|
281
280
|
raise HTTPException(
|
|
282
281
|
status_code=500, detail=f"Wordle modules unavailable: {e}"
|
|
283
|
-
)
|
|
282
|
+
) from e
|
|
284
283
|
|
|
285
284
|
# Lazy import of wrapper within branch
|
|
286
285
|
try:
|
|
287
|
-
from .envs.wordle.environment import
|
|
288
|
-
WordleEnvironmentWrapper as _WordleWrapper,
|
|
289
|
-
)
|
|
286
|
+
from .envs.wordle.environment import WordleEnvironmentWrapper
|
|
290
287
|
except Exception as e:
|
|
291
288
|
raise HTTPException(
|
|
292
289
|
status_code=500, detail=f"Wordle wrapper unavailable: {e}"
|
|
293
|
-
)
|
|
290
|
+
) from e
|
|
291
|
+
else:
|
|
292
|
+
wordle_wrapper_cls = WordleEnvironmentWrapper
|
|
294
293
|
|
|
295
294
|
cfg = request.config or {}
|
|
296
295
|
word_length = int(cfg.get("word_length", 5))
|
|
@@ -307,12 +306,8 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
307
306
|
)
|
|
308
307
|
instance = WordleTaskInstance(
|
|
309
308
|
id=uuid4(),
|
|
310
|
-
impetus=Impetus(
|
|
311
|
-
|
|
312
|
-
),
|
|
313
|
-
intent=Intent(
|
|
314
|
-
rubric="guess the word", gold_trajectories=None, gold_state_diff={}
|
|
315
|
-
),
|
|
309
|
+
impetus=Impetus(instructions="Play Wordle. Submit one 5-letter word per turn."),
|
|
310
|
+
intent=Intent(rubric="guess the word", gold_trajectories=None, gold_state_diff={}),
|
|
316
311
|
metadata=md,
|
|
317
312
|
is_reproducible=True,
|
|
318
313
|
initial_engine_snapshot=None,
|
|
@@ -322,7 +317,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
322
317
|
# Try to preserve the exact puzzle snapshot for reproducibility
|
|
323
318
|
init_snap = getattr(instance, "initial_engine_snapshot", None)
|
|
324
319
|
|
|
325
|
-
wrapper =
|
|
320
|
+
wrapper = wordle_wrapper_cls(
|
|
326
321
|
env=base_env,
|
|
327
322
|
seed=request.seed,
|
|
328
323
|
word_length=word_length,
|
|
@@ -345,9 +340,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
345
340
|
if key in observation_for_registry:
|
|
346
341
|
del observation_for_registry[key]
|
|
347
342
|
|
|
348
|
-
await validate_environment_observation(
|
|
349
|
-
observation_for_registry, "initialize"
|
|
350
|
-
)
|
|
343
|
+
await validate_environment_observation(observation_for_registry, "initialize")
|
|
351
344
|
|
|
352
345
|
env_id = registry.register_env(
|
|
353
346
|
env=wrapper,
|
|
@@ -368,27 +361,25 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
368
361
|
|
|
369
362
|
elif env_name_lower == "sokoban":
|
|
370
363
|
try:
|
|
364
|
+
from synth_ai.environments.examples.sokoban.environment import (
|
|
365
|
+
SokobanEnvironment,
|
|
366
|
+
)
|
|
371
367
|
from synth_ai.environments.examples.sokoban.taskset import (
|
|
372
368
|
SokobanTaskInstance,
|
|
373
369
|
SokobanTaskInstanceMetadata,
|
|
374
370
|
)
|
|
375
|
-
from synth_ai.environments.examples.sokoban.environment import (
|
|
376
|
-
SokobanEnvironment,
|
|
377
|
-
)
|
|
378
371
|
except Exception as e:
|
|
379
372
|
raise HTTPException(
|
|
380
373
|
status_code=500, detail=f"Sokoban modules unavailable: {e}"
|
|
381
|
-
)
|
|
374
|
+
) from e
|
|
382
375
|
|
|
383
376
|
# Lazy import of wrapper within branch
|
|
384
377
|
try:
|
|
385
|
-
from .envs.sokoban.environment import
|
|
386
|
-
SokobanEnvironmentWrapper as _SokobanWrapper,
|
|
387
|
-
)
|
|
378
|
+
from .envs.sokoban.environment import SokobanEnvironmentWrapper
|
|
388
379
|
except Exception as e:
|
|
389
380
|
raise HTTPException(
|
|
390
381
|
status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
|
|
391
|
-
)
|
|
382
|
+
) from e
|
|
392
383
|
|
|
393
384
|
cfg = request.config or {}
|
|
394
385
|
difficulty = cfg.get("difficulty", "easy")
|
|
@@ -411,9 +402,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
411
402
|
)
|
|
412
403
|
base_env = SokobanEnvironment(task_instance=instance)
|
|
413
404
|
|
|
414
|
-
wrapper =
|
|
415
|
-
env=base_env, seed=request.seed, config=cfg
|
|
416
|
-
)
|
|
405
|
+
wrapper = SokobanEnvironmentWrapper(env=base_env, seed=request.seed, config=cfg)
|
|
417
406
|
result = await wrapper.initialize()
|
|
418
407
|
|
|
419
408
|
# Handle the observation structure consistently for Sokoban
|
|
@@ -449,22 +438,22 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
449
438
|
cfg = request.config or {}
|
|
450
439
|
# Lazy import of wrapper within branch
|
|
451
440
|
try:
|
|
452
|
-
from .envs.math.environment import
|
|
453
|
-
MathEnvironmentWrapper as _MathWrapper,
|
|
454
|
-
)
|
|
441
|
+
from .envs.math.environment import MathEnvironmentWrapper
|
|
455
442
|
except Exception as e:
|
|
456
|
-
raise HTTPException(
|
|
457
|
-
status_code=500, detail=f"Math wrapper unavailable: {e}"
|
|
458
|
-
)
|
|
443
|
+
raise HTTPException(status_code=500, detail=f"Math wrapper unavailable: {e}") from e
|
|
459
444
|
|
|
460
|
-
wrapper =
|
|
445
|
+
wrapper = MathEnvironmentWrapper(
|
|
461
446
|
seed=request.seed,
|
|
462
447
|
problem_id=cfg.get("problem_id"),
|
|
463
448
|
problem_text=cfg.get("problem_text"),
|
|
464
449
|
)
|
|
465
450
|
result = await wrapper.initialize()
|
|
466
451
|
|
|
467
|
-
observation_for_registry =
|
|
452
|
+
observation_for_registry = (
|
|
453
|
+
result["observation"].copy()
|
|
454
|
+
if isinstance(result, dict) and "observation" in result
|
|
455
|
+
else result.copy()
|
|
456
|
+
)
|
|
468
457
|
for key in ["step_idx", "info"]:
|
|
469
458
|
if key in observation_for_registry:
|
|
470
459
|
del observation_for_registry[key]
|
|
@@ -493,7 +482,7 @@ async def create_environment(request: EnvCreateRequest) -> EnvCreateResponse:
|
|
|
493
482
|
|
|
494
483
|
except Exception as e:
|
|
495
484
|
logger.error(f"Failed to create environment: {e}")
|
|
496
|
-
raise HTTPException(status_code=500, detail=str(e))
|
|
485
|
+
raise HTTPException(status_code=500, detail=str(e)) from e
|
|
497
486
|
|
|
498
487
|
|
|
499
488
|
# --- Compatibility routes for existing eval scripts that expect CrafterClassic paths ---
|
|
@@ -509,7 +498,9 @@ async def compat_initialize(payload: dict) -> EnvCreateResponse:
|
|
|
509
498
|
difficulty = str(wc.get("difficulty"))
|
|
510
499
|
elif isinstance(cfg, dict) and cfg.get("difficulty"):
|
|
511
500
|
difficulty = str(cfg.get("difficulty"))
|
|
512
|
-
req = EnvCreateRequest(
|
|
501
|
+
req = EnvCreateRequest(
|
|
502
|
+
env_name="crafter", config={"difficulty": difficulty}, seed=seed, rl_run_id="eval"
|
|
503
|
+
)
|
|
513
504
|
return await create_environment(req)
|
|
514
505
|
|
|
515
506
|
|
|
@@ -525,10 +516,12 @@ async def compat_step(payload: dict) -> EnvStepResponse:
|
|
|
525
516
|
actions_list = action.get("actions") if isinstance(action, dict) else None
|
|
526
517
|
if isinstance(actions_list, list) and actions_list:
|
|
527
518
|
for a in actions_list:
|
|
528
|
-
tool_calls.append(
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
519
|
+
tool_calls.append(
|
|
520
|
+
{
|
|
521
|
+
"tool": "interact",
|
|
522
|
+
"args": {"action": a},
|
|
523
|
+
}
|
|
524
|
+
)
|
|
532
525
|
req = EnvStepRequest(env_id=env_id, tool_calls=tool_calls)
|
|
533
526
|
return await step_environment(req)
|
|
534
527
|
|
|
@@ -545,9 +538,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
545
538
|
"""Reset an environment to its initial state."""
|
|
546
539
|
handle = registry.get_env(request.env_id)
|
|
547
540
|
if not handle:
|
|
548
|
-
raise HTTPException(
|
|
549
|
-
status_code=404, detail=f"Environment {request.env_id} not found"
|
|
550
|
-
)
|
|
541
|
+
raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
|
|
551
542
|
|
|
552
543
|
try:
|
|
553
544
|
# Determine wrapper type and rebuild base env if a new seed is provided
|
|
@@ -586,29 +577,28 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
586
577
|
|
|
587
578
|
elif True:
|
|
588
579
|
# Try to dynamically import Wordle wrapper and check instance safely
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
_WordleWrapper = None # type: ignore
|
|
580
|
+
wordle_wrapper_cls = None
|
|
581
|
+
with contextlib.suppress(Exception):
|
|
582
|
+
from .envs.wordle.environment import WordleEnvironmentWrapper
|
|
583
|
+
|
|
584
|
+
wordle_wrapper_cls = WordleEnvironmentWrapper # type: ignore[assignment]
|
|
595
585
|
|
|
596
|
-
if
|
|
586
|
+
if wordle_wrapper_cls is not None and isinstance(wrapper, wordle_wrapper_cls):
|
|
597
587
|
# Rebuild Wordle env with the same configuration; if we have a preserved
|
|
598
588
|
# initial_engine_snapshot, prefer constructing the instance directly.
|
|
599
589
|
try:
|
|
590
|
+
from synth_ai.environments.examples.wordle.environment import (
|
|
591
|
+
WordleEnvironment,
|
|
592
|
+
)
|
|
600
593
|
from synth_ai.environments.examples.wordle.taskset import (
|
|
601
|
-
create_wordle_taskset,
|
|
602
594
|
WordleTaskInstance,
|
|
603
595
|
WordleTaskInstanceMetadata,
|
|
604
|
-
|
|
605
|
-
from synth_ai.environments.examples.wordle.environment import (
|
|
606
|
-
WordleEnvironment,
|
|
596
|
+
create_wordle_taskset,
|
|
607
597
|
)
|
|
608
598
|
except Exception as e:
|
|
609
599
|
raise HTTPException(
|
|
610
600
|
status_code=500, detail=f"Wordle modules unavailable: {e}"
|
|
611
|
-
)
|
|
601
|
+
) from e
|
|
612
602
|
|
|
613
603
|
init_snap = getattr(wrapper, "initial_engine_snapshot", None)
|
|
614
604
|
if init_snap is not None:
|
|
@@ -646,18 +636,18 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
646
636
|
# Rebuild Wordle env with the same configuration; if we have a preserved
|
|
647
637
|
# initial_engine_snapshot, prefer constructing the instance directly.
|
|
648
638
|
try:
|
|
639
|
+
from synth_ai.environments.examples.wordle.environment import (
|
|
640
|
+
WordleEnvironment,
|
|
641
|
+
)
|
|
649
642
|
from synth_ai.environments.examples.wordle.taskset import (
|
|
650
|
-
create_wordle_taskset,
|
|
651
643
|
WordleTaskInstance,
|
|
652
644
|
WordleTaskInstanceMetadata,
|
|
653
|
-
|
|
654
|
-
from synth_ai.environments.examples.wordle.environment import (
|
|
655
|
-
WordleEnvironment,
|
|
645
|
+
create_wordle_taskset,
|
|
656
646
|
)
|
|
657
647
|
except Exception as e:
|
|
658
648
|
raise HTTPException(
|
|
659
649
|
status_code=500, detail=f"Wordle modules unavailable: {e}"
|
|
660
|
-
)
|
|
650
|
+
) from e
|
|
661
651
|
|
|
662
652
|
init_snap = getattr(wrapper, "initial_engine_snapshot", None)
|
|
663
653
|
if init_snap is not None:
|
|
@@ -693,27 +683,26 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
693
683
|
|
|
694
684
|
elif True:
|
|
695
685
|
# Try to dynamically import Sokoban wrapper and check instance safely
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
)
|
|
700
|
-
except Exception:
|
|
701
|
-
_SokobanWrapper = None # type: ignore
|
|
686
|
+
sokoban_wrapper_cls = None
|
|
687
|
+
with contextlib.suppress(Exception):
|
|
688
|
+
from .envs.sokoban.environment import SokobanEnvironmentWrapper
|
|
702
689
|
|
|
703
|
-
|
|
690
|
+
sokoban_wrapper_cls = SokobanEnvironmentWrapper # type: ignore[assignment]
|
|
691
|
+
|
|
692
|
+
if sokoban_wrapper_cls is not None and isinstance(wrapper, sokoban_wrapper_cls):
|
|
704
693
|
# Rebuild Sokoban env using stored config snapshot
|
|
705
694
|
try:
|
|
695
|
+
from synth_ai.environments.examples.sokoban.environment import (
|
|
696
|
+
SokobanEnvironment,
|
|
697
|
+
)
|
|
706
698
|
from synth_ai.environments.examples.sokoban.taskset import (
|
|
707
699
|
SokobanTaskInstance,
|
|
708
700
|
SokobanTaskInstanceMetadata,
|
|
709
701
|
)
|
|
710
|
-
from synth_ai.environments.examples.sokoban.environment import (
|
|
711
|
-
SokobanEnvironment,
|
|
712
|
-
)
|
|
713
702
|
except Exception as e:
|
|
714
703
|
raise HTTPException(
|
|
715
704
|
status_code=500, detail=f"Sokoban modules unavailable: {e}"
|
|
716
|
-
)
|
|
705
|
+
) from e
|
|
717
706
|
|
|
718
707
|
cfg = dict(wrapper.config or {})
|
|
719
708
|
metadata = SokobanTaskInstanceMetadata(
|
|
@@ -738,17 +727,17 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
738
727
|
pass
|
|
739
728
|
# Rebuild Sokoban env using stored config snapshot
|
|
740
729
|
try:
|
|
730
|
+
from synth_ai.environments.examples.sokoban.environment import (
|
|
731
|
+
SokobanEnvironment,
|
|
732
|
+
)
|
|
741
733
|
from synth_ai.environments.examples.sokoban.taskset import (
|
|
742
734
|
SokobanTaskInstance,
|
|
743
735
|
SokobanTaskInstanceMetadata,
|
|
744
736
|
)
|
|
745
|
-
from synth_ai.environments.examples.sokoban.environment import (
|
|
746
|
-
SokobanEnvironment,
|
|
747
|
-
)
|
|
748
737
|
except Exception as e:
|
|
749
738
|
raise HTTPException(
|
|
750
739
|
status_code=500, detail=f"Sokoban modules unavailable: {e}"
|
|
751
|
-
)
|
|
740
|
+
) from e
|
|
752
741
|
|
|
753
742
|
cfg = dict(wrapper.config or {})
|
|
754
743
|
metadata = SokobanTaskInstanceMetadata(
|
|
@@ -757,9 +746,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
757
746
|
instance = SokobanTaskInstance(
|
|
758
747
|
id=uuid4(),
|
|
759
748
|
impetus=Impetus(instructions="Reset"),
|
|
760
|
-
intent=Intent(
|
|
761
|
-
rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}
|
|
762
|
-
),
|
|
749
|
+
intent=Intent(rubric={"goal": "Reset"}, gold_trajectories=None, gold_state_diff={}),
|
|
763
750
|
metadata=metadata,
|
|
764
751
|
is_reproducible=True,
|
|
765
752
|
initial_engine_snapshot=cfg.get("initial_state"),
|
|
@@ -777,7 +764,8 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
777
764
|
try:
|
|
778
765
|
base_env = handle.env.env # type: ignore[attr-defined]
|
|
779
766
|
pub_state = base_env.engine._get_public_state_from_env() # type: ignore[attr-defined]
|
|
780
|
-
import hashlib
|
|
767
|
+
import hashlib
|
|
768
|
+
import json as _json
|
|
781
769
|
|
|
782
770
|
sig_src = {
|
|
783
771
|
"player_position": list(pub_state.player_position),
|
|
@@ -810,7 +798,7 @@ async def reset_environment(request: EnvResetRequest) -> EnvResetResponse:
|
|
|
810
798
|
|
|
811
799
|
except Exception as e:
|
|
812
800
|
logger.error(f"Failed to reset environment {request.env_id}: {e}")
|
|
813
|
-
raise HTTPException(status_code=500, detail=str(e))
|
|
801
|
+
raise HTTPException(status_code=500, detail=str(e)) from e
|
|
814
802
|
|
|
815
803
|
|
|
816
804
|
@router.post("/step", response_model=EnvStepResponse)
|
|
@@ -818,30 +806,22 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
|
|
|
818
806
|
"""Execute a step in the environment."""
|
|
819
807
|
handle = registry.get_env(request.env_id)
|
|
820
808
|
if not handle:
|
|
821
|
-
raise HTTPException(
|
|
822
|
-
status_code=404, detail=f"Environment {request.env_id} not found"
|
|
823
|
-
)
|
|
809
|
+
raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
|
|
824
810
|
|
|
825
811
|
try:
|
|
826
812
|
# Execute the step, pre-normalizing invalid Wordle guesses to avoid hard failures
|
|
827
813
|
wrapper = handle.env
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
)
|
|
832
|
-
except Exception:
|
|
833
|
-
_WordleWrapper = None # type: ignore
|
|
814
|
+
wordle_wrapper_cls = None
|
|
815
|
+
with contextlib.suppress(Exception):
|
|
816
|
+
from .envs.wordle.environment import WordleEnvironmentWrapper
|
|
834
817
|
|
|
835
|
-
|
|
818
|
+
wordle_wrapper_cls = WordleEnvironmentWrapper # type: ignore[assignment]
|
|
819
|
+
|
|
820
|
+
if wordle_wrapper_cls is not None and isinstance(wrapper, wordle_wrapper_cls):
|
|
836
821
|
expected_len = int(getattr(wrapper, "word_length", 5))
|
|
837
|
-
normalized:
|
|
822
|
+
normalized: list[dict[str, Any]] = []
|
|
838
823
|
for tc in request.tool_calls or []:
|
|
839
|
-
tool = (
|
|
840
|
-
tc.get("tool")
|
|
841
|
-
or tc.get("tool_name")
|
|
842
|
-
or tc.get("name")
|
|
843
|
-
or "interact"
|
|
844
|
-
)
|
|
824
|
+
tool = tc.get("tool") or tc.get("tool_name") or tc.get("name") or "interact"
|
|
845
825
|
args = tc.get("arguments") or tc.get("args") or {}
|
|
846
826
|
if isinstance(args, str):
|
|
847
827
|
try:
|
|
@@ -861,9 +841,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
|
|
|
861
841
|
# Preserve the original tool name (interact or submit) for the environment to handle
|
|
862
842
|
normalized.append({"tool": tool, "args": {"guess": g}})
|
|
863
843
|
else:
|
|
864
|
-
normalized.append(
|
|
865
|
-
{"tool": "invalid_guess", "args": {"original_guess": guess}}
|
|
866
|
-
)
|
|
844
|
+
normalized.append({"tool": "invalid_guess", "args": {"original_guess": guess}})
|
|
867
845
|
result = await wrapper.step(normalized)
|
|
868
846
|
else:
|
|
869
847
|
result = await handle.env.step(request.tool_calls)
|
|
@@ -913,21 +891,18 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
|
|
|
913
891
|
logger.error(f"Failed to step environment {request.env_id}: {e}")
|
|
914
892
|
# Fallback for Wordle: convert invalid guesses into 'invalid_guess' tool calls and retry once
|
|
915
893
|
try:
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
894
|
+
wordle_wrapper_cls = None
|
|
895
|
+
with contextlib.suppress(Exception):
|
|
896
|
+
from .envs.wordle.environment import WordleEnvironmentWrapper
|
|
897
|
+
|
|
898
|
+
wordle_wrapper_cls = WordleEnvironmentWrapper # type: ignore[assignment]
|
|
919
899
|
|
|
920
900
|
wrapper = handle.env
|
|
921
|
-
if isinstance(wrapper,
|
|
901
|
+
if wordle_wrapper_cls is not None and isinstance(wrapper, wordle_wrapper_cls):
|
|
922
902
|
expected_len = int(getattr(wrapper, "word_length", 5))
|
|
923
|
-
normalized:
|
|
903
|
+
normalized: list[dict[str, Any]] = []
|
|
924
904
|
for tc in request.tool_calls or []:
|
|
925
|
-
tool = (
|
|
926
|
-
tc.get("tool")
|
|
927
|
-
or tc.get("tool_name")
|
|
928
|
-
or tc.get("name")
|
|
929
|
-
or "interact"
|
|
930
|
-
)
|
|
905
|
+
tool = tc.get("tool") or tc.get("tool_name") or tc.get("name") or "interact"
|
|
931
906
|
args = tc.get("arguments") or tc.get("args") or {}
|
|
932
907
|
if isinstance(args, str):
|
|
933
908
|
try:
|
|
@@ -947,9 +922,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
|
|
|
947
922
|
}
|
|
948
923
|
)
|
|
949
924
|
else:
|
|
950
|
-
normalized.append(
|
|
951
|
-
{"tool": "interact", "args": {"guess": g}}
|
|
952
|
-
)
|
|
925
|
+
normalized.append({"tool": "interact", "args": {"guess": g}})
|
|
953
926
|
else:
|
|
954
927
|
normalized.append(
|
|
955
928
|
{"tool": "invalid_guess", "args": {"original_guess": guess}}
|
|
@@ -981,7 +954,7 @@ async def step_environment(request: EnvStepRequest) -> EnvStepResponse:
|
|
|
981
954
|
# Ignore fallback errors; fall through to generic error
|
|
982
955
|
pass
|
|
983
956
|
|
|
984
|
-
raise HTTPException(status_code=500, detail=f"{type(e).__name__}: {e}")
|
|
957
|
+
raise HTTPException(status_code=500, detail=f"{type(e).__name__}: {e}") from e
|
|
985
958
|
|
|
986
959
|
|
|
987
960
|
@router.post("/snapshot", response_model=EnvSnapshotResponse)
|
|
@@ -989,9 +962,7 @@ async def snapshot_environment(request: EnvSnapshotRequest) -> EnvSnapshotRespon
|
|
|
989
962
|
"""Create a snapshot of the environment state."""
|
|
990
963
|
handle = registry.get_env(request.env_id)
|
|
991
964
|
if not handle:
|
|
992
|
-
raise HTTPException(
|
|
993
|
-
status_code=404, detail=f"Environment {request.env_id} not found"
|
|
994
|
-
)
|
|
965
|
+
raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
|
|
995
966
|
|
|
996
967
|
try:
|
|
997
968
|
# Serialize environment state
|
|
@@ -1022,7 +993,7 @@ async def snapshot_environment(request: EnvSnapshotRequest) -> EnvSnapshotRespon
|
|
|
1022
993
|
|
|
1023
994
|
except Exception as e:
|
|
1024
995
|
logger.error(f"Failed to snapshot environment {request.env_id}: {e}")
|
|
1025
|
-
raise HTTPException(status_code=500, detail=str(e))
|
|
996
|
+
raise HTTPException(status_code=500, detail=str(e)) from e
|
|
1026
997
|
|
|
1027
998
|
|
|
1028
999
|
@router.post("/restore", response_model=EnvRestoreResponse)
|
|
@@ -1030,9 +1001,7 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
|
|
|
1030
1001
|
"""Restore an environment from a snapshot."""
|
|
1031
1002
|
snapshot = registry.get_snapshot(request.snapshot_id)
|
|
1032
1003
|
if not snapshot:
|
|
1033
|
-
raise HTTPException(
|
|
1034
|
-
status_code=404, detail=f"Snapshot {request.snapshot_id} not found"
|
|
1035
|
-
)
|
|
1004
|
+
raise HTTPException(status_code=404, detail=f"Snapshot {request.snapshot_id} not found")
|
|
1036
1005
|
|
|
1037
1006
|
if snapshot.kind != "env":
|
|
1038
1007
|
raise HTTPException(
|
|
@@ -1104,18 +1073,18 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
|
|
|
1104
1073
|
)
|
|
1105
1074
|
elif name_lower == "wordle":
|
|
1106
1075
|
try:
|
|
1076
|
+
from synth_ai.environments.examples.wordle.environment import (
|
|
1077
|
+
WordleEnvironment,
|
|
1078
|
+
)
|
|
1107
1079
|
from synth_ai.environments.examples.wordle.taskset import (
|
|
1108
|
-
create_wordle_taskset,
|
|
1109
1080
|
WordleTaskInstance,
|
|
1110
1081
|
WordleTaskInstanceMetadata,
|
|
1111
|
-
|
|
1112
|
-
from synth_ai.environments.examples.wordle.environment import (
|
|
1113
|
-
WordleEnvironment,
|
|
1082
|
+
create_wordle_taskset,
|
|
1114
1083
|
)
|
|
1115
1084
|
except Exception as e:
|
|
1116
1085
|
raise HTTPException(
|
|
1117
1086
|
status_code=500, detail=f"Wordle modules unavailable: {e}"
|
|
1118
|
-
)
|
|
1087
|
+
) from e
|
|
1119
1088
|
|
|
1120
1089
|
cfg = state_dict.get("config", {}) or {}
|
|
1121
1090
|
word_length = int(cfg.get("word_length", 5))
|
|
@@ -1146,16 +1115,12 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
|
|
|
1146
1115
|
base_env = WordleEnvironment(task_instance=instance)
|
|
1147
1116
|
# Lazy import of wrapper only when needed
|
|
1148
1117
|
try:
|
|
1149
|
-
from .envs.wordle.environment import
|
|
1150
|
-
WordleEnvironmentWrapper as _WordleWrapper,
|
|
1151
|
-
)
|
|
1118
|
+
from .envs.wordle.environment import WordleEnvironmentWrapper
|
|
1152
1119
|
except Exception as e:
|
|
1153
1120
|
raise HTTPException(
|
|
1154
1121
|
status_code=500, detail=f"Wordle wrapper unavailable: {e}"
|
|
1155
|
-
)
|
|
1156
|
-
wrapper = await
|
|
1157
|
-
payload=state_dict, env=base_env
|
|
1158
|
-
)
|
|
1122
|
+
) from e
|
|
1123
|
+
wrapper = await WordleEnvironmentWrapper.deserialize(payload=state_dict, env=base_env)
|
|
1159
1124
|
|
|
1160
1125
|
env_id = registry.register_env(
|
|
1161
1126
|
env=wrapper,
|
|
@@ -1176,22 +1141,20 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
|
|
|
1176
1141
|
|
|
1177
1142
|
elif name_lower == "sokoban":
|
|
1178
1143
|
try:
|
|
1144
|
+
from synth_ai.environments.examples.sokoban.environment import (
|
|
1145
|
+
SokobanEnvironment,
|
|
1146
|
+
)
|
|
1179
1147
|
from synth_ai.environments.examples.sokoban.taskset import (
|
|
1180
1148
|
SokobanTaskInstance,
|
|
1181
1149
|
SokobanTaskInstanceMetadata,
|
|
1182
1150
|
)
|
|
1183
|
-
from synth_ai.environments.examples.sokoban.environment import (
|
|
1184
|
-
SokobanEnvironment,
|
|
1185
|
-
)
|
|
1186
1151
|
except Exception as e:
|
|
1187
1152
|
raise HTTPException(
|
|
1188
1153
|
status_code=500, detail=f"Sokoban modules unavailable: {e}"
|
|
1189
|
-
)
|
|
1154
|
+
) from e
|
|
1190
1155
|
|
|
1191
1156
|
cfg = state_dict.get("config", {}) or {}
|
|
1192
|
-
metadata = SokobanTaskInstanceMetadata(
|
|
1193
|
-
difficulty=cfg.get("difficulty", "easy")
|
|
1194
|
-
)
|
|
1157
|
+
metadata = SokobanTaskInstanceMetadata(difficulty=cfg.get("difficulty", "easy"))
|
|
1195
1158
|
instance = SokobanTaskInstance(
|
|
1196
1159
|
id=uuid4(),
|
|
1197
1160
|
impetus=Impetus(instructions="Restore"),
|
|
@@ -1207,16 +1170,12 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
|
|
|
1207
1170
|
base_env = SokobanEnvironment(task_instance=instance)
|
|
1208
1171
|
# Lazy import of wrapper only when needed
|
|
1209
1172
|
try:
|
|
1210
|
-
from .envs.sokoban.environment import
|
|
1211
|
-
SokobanEnvironmentWrapper as _SokobanWrapper,
|
|
1212
|
-
)
|
|
1173
|
+
from .envs.sokoban.environment import SokobanEnvironmentWrapper
|
|
1213
1174
|
except Exception as e:
|
|
1214
1175
|
raise HTTPException(
|
|
1215
1176
|
status_code=500, detail=f"Sokoban wrapper unavailable: {e}"
|
|
1216
|
-
)
|
|
1217
|
-
wrapper = await
|
|
1218
|
-
payload=state_dict, env=base_env
|
|
1219
|
-
)
|
|
1177
|
+
) from e
|
|
1178
|
+
wrapper = await SokobanEnvironmentWrapper.deserialize(payload=state_dict, env=base_env)
|
|
1220
1179
|
|
|
1221
1180
|
env_id = registry.register_env(
|
|
1222
1181
|
env=wrapper,
|
|
@@ -1242,10 +1201,8 @@ async def restore_environment(request: EnvRestoreRequest) -> EnvRestoreResponse:
|
|
|
1242
1201
|
)
|
|
1243
1202
|
|
|
1244
1203
|
except Exception as e:
|
|
1245
|
-
logger.error(
|
|
1246
|
-
|
|
1247
|
-
)
|
|
1248
|
-
raise HTTPException(status_code=500, detail=str(e))
|
|
1204
|
+
logger.error(f"Failed to restore environment from snapshot {request.snapshot_id}: {e}")
|
|
1205
|
+
raise HTTPException(status_code=500, detail=str(e)) from e
|
|
1249
1206
|
|
|
1250
1207
|
|
|
1251
1208
|
@router.post("/terminate", response_model=EnvTerminateResponse)
|
|
@@ -1253,9 +1210,7 @@ async def terminate_environment(request: EnvTerminateRequest) -> EnvTerminateRes
|
|
|
1253
1210
|
"""Terminate an environment and clean up resources."""
|
|
1254
1211
|
handle = registry.get_env(request.env_id)
|
|
1255
1212
|
if not handle:
|
|
1256
|
-
raise HTTPException(
|
|
1257
|
-
status_code=404, detail=f"Environment {request.env_id} not found"
|
|
1258
|
-
)
|
|
1213
|
+
raise HTTPException(status_code=404, detail=f"Environment {request.env_id} not found")
|
|
1259
1214
|
|
|
1260
1215
|
try:
|
|
1261
1216
|
# Call terminate on the environment
|
|
@@ -1268,4 +1223,4 @@ async def terminate_environment(request: EnvTerminateRequest) -> EnvTerminateRes
|
|
|
1268
1223
|
|
|
1269
1224
|
except Exception as e:
|
|
1270
1225
|
logger.error(f"Failed to terminate environment {request.env_id}: {e}")
|
|
1271
|
-
raise HTTPException(status_code=500, detail=str(e))
|
|
1226
|
+
raise HTTPException(status_code=500, detail=str(e)) from e
|