synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1709 -243
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import gzip
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import tarfile
|
|
8
|
+
import tempfile
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class VolumeStorage:
|
|
15
|
+
"""Helpers for Modal Volume storage operations."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, base_path: str = "/data/state") -> None:
|
|
18
|
+
self.base_path = Path(base_path)
|
|
19
|
+
|
|
20
|
+
def get_snapshot_path(
|
|
21
|
+
self,
|
|
22
|
+
rl_run_id: str,
|
|
23
|
+
kind: str,
|
|
24
|
+
snapshot_id: str,
|
|
25
|
+
) -> Path:
|
|
26
|
+
"""Build the path for a snapshot file."""
|
|
27
|
+
# Use first 2 chars of snapshot_id for sharding
|
|
28
|
+
shard1 = snapshot_id[:2] if len(snapshot_id) >= 2 else "00"
|
|
29
|
+
shard2 = snapshot_id[2:4] if len(snapshot_id) >= 4 else "00"
|
|
30
|
+
|
|
31
|
+
return (
|
|
32
|
+
self.base_path / "runs" / rl_run_id / kind / shard1 / shard2 / f"{snapshot_id}.tar.gz"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def get_index_path(self, rl_run_id: str) -> Path:
|
|
36
|
+
"""Get the index file path for a run."""
|
|
37
|
+
return self.base_path / "runs" / rl_run_id / "index" / "meta.jsonl"
|
|
38
|
+
|
|
39
|
+
def write_snapshot_atomic(
|
|
40
|
+
self,
|
|
41
|
+
path: Path,
|
|
42
|
+
archive_bytes: bytes,
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Atomically write a snapshot archive to disk."""
|
|
45
|
+
# Ensure parent directory exists
|
|
46
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
# Write to temp file first
|
|
49
|
+
tmp_path = path.with_suffix(".tmp")
|
|
50
|
+
with open(tmp_path, "wb") as f:
|
|
51
|
+
f.write(archive_bytes)
|
|
52
|
+
f.flush()
|
|
53
|
+
os.fsync(f.fileno())
|
|
54
|
+
|
|
55
|
+
# Atomic rename
|
|
56
|
+
os.replace(tmp_path, path)
|
|
57
|
+
|
|
58
|
+
def create_archive(
|
|
59
|
+
self,
|
|
60
|
+
state_dict: dict[str, Any],
|
|
61
|
+
meta: dict[str, Any],
|
|
62
|
+
) -> bytes:
|
|
63
|
+
"""Create a tar.gz archive with state and metadata."""
|
|
64
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
65
|
+
tmppath = Path(tmpdir)
|
|
66
|
+
|
|
67
|
+
# Write state.json
|
|
68
|
+
state_path = tmppath / "state.json"
|
|
69
|
+
with open(state_path, "w") as f:
|
|
70
|
+
json.dump(state_dict, f, sort_keys=True, indent=2)
|
|
71
|
+
|
|
72
|
+
# Write meta.json
|
|
73
|
+
meta_path = tmppath / "meta.json"
|
|
74
|
+
with open(meta_path, "w") as f:
|
|
75
|
+
json.dump(meta, f, sort_keys=True, indent=2)
|
|
76
|
+
|
|
77
|
+
# Create tar archive
|
|
78
|
+
tar_path = tmppath / "archive.tar"
|
|
79
|
+
with tarfile.open(tar_path, "w") as tar:
|
|
80
|
+
tar.add(state_path, arcname="state.json")
|
|
81
|
+
tar.add(meta_path, arcname="meta.json")
|
|
82
|
+
|
|
83
|
+
# Compress with gzip
|
|
84
|
+
with open(tar_path, "rb") as f:
|
|
85
|
+
tar_bytes = f.read()
|
|
86
|
+
|
|
87
|
+
compressed = gzip.compress(tar_bytes, compresslevel=6)
|
|
88
|
+
|
|
89
|
+
return compressed
|
|
90
|
+
|
|
91
|
+
def extract_archive(self, archive_bytes: bytes) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
92
|
+
"""Extract state and metadata from a tar.gz archive."""
|
|
93
|
+
# Decompress
|
|
94
|
+
tar_bytes = gzip.decompress(archive_bytes)
|
|
95
|
+
|
|
96
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
97
|
+
tmppath = Path(tmpdir)
|
|
98
|
+
|
|
99
|
+
# Write tar bytes to temp file
|
|
100
|
+
tar_path = tmppath / "archive.tar"
|
|
101
|
+
with open(tar_path, "wb") as f:
|
|
102
|
+
f.write(tar_bytes)
|
|
103
|
+
|
|
104
|
+
# Extract tar
|
|
105
|
+
with tarfile.open(tar_path, "r") as tar:
|
|
106
|
+
tar.extractall(tmppath)
|
|
107
|
+
|
|
108
|
+
# Read state and meta
|
|
109
|
+
with open(tmppath / "state.json") as f:
|
|
110
|
+
state = json.load(f)
|
|
111
|
+
|
|
112
|
+
with open(tmppath / "meta.json") as f:
|
|
113
|
+
meta = json.load(f)
|
|
114
|
+
|
|
115
|
+
return state, meta
|
|
116
|
+
|
|
117
|
+
def compute_snapshot_id(self, archive_bytes: bytes) -> str:
|
|
118
|
+
"""Compute content-addressed snapshot ID."""
|
|
119
|
+
return hashlib.sha256(archive_bytes).hexdigest()
|
|
120
|
+
|
|
121
|
+
def save_snapshot(
|
|
122
|
+
self,
|
|
123
|
+
rl_run_id: str,
|
|
124
|
+
kind: str,
|
|
125
|
+
state_dict: dict[str, Any],
|
|
126
|
+
config: dict[str, Any] | None = None,
|
|
127
|
+
parent_snapshot_id: str | None = None,
|
|
128
|
+
) -> tuple[str, str, int]:
|
|
129
|
+
"""Save a snapshot and return (snapshot_id, path, size)."""
|
|
130
|
+
# Build metadata
|
|
131
|
+
meta = {
|
|
132
|
+
"kind": kind,
|
|
133
|
+
"rl_run_id": rl_run_id,
|
|
134
|
+
"schema_version": "1.0",
|
|
135
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if parent_snapshot_id:
|
|
139
|
+
meta["parent_snapshot_id"] = parent_snapshot_id
|
|
140
|
+
|
|
141
|
+
if config:
|
|
142
|
+
config_str = json.dumps(config, sort_keys=True)
|
|
143
|
+
meta["config_hash"] = hashlib.sha256(config_str.encode()).hexdigest()
|
|
144
|
+
|
|
145
|
+
# Create archive
|
|
146
|
+
archive_bytes = self.create_archive(state_dict, meta)
|
|
147
|
+
|
|
148
|
+
# Compute snapshot ID
|
|
149
|
+
snapshot_id = self.compute_snapshot_id(archive_bytes)
|
|
150
|
+
meta["snapshot_id"] = snapshot_id
|
|
151
|
+
|
|
152
|
+
# Recreate archive with snapshot_id in metadata
|
|
153
|
+
archive_bytes = self.create_archive(state_dict, meta)
|
|
154
|
+
|
|
155
|
+
# Get path and write
|
|
156
|
+
path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
|
|
157
|
+
self.write_snapshot_atomic(path, archive_bytes)
|
|
158
|
+
|
|
159
|
+
# Append to index
|
|
160
|
+
self.append_to_index(rl_run_id, meta)
|
|
161
|
+
|
|
162
|
+
return snapshot_id, str(path), len(archive_bytes)
|
|
163
|
+
|
|
164
|
+
def load_snapshot(
|
|
165
|
+
self,
|
|
166
|
+
rl_run_id: str,
|
|
167
|
+
kind: str,
|
|
168
|
+
snapshot_id: str,
|
|
169
|
+
) -> tuple[dict[str, Any], dict[str, Any]]:
|
|
170
|
+
"""Load a snapshot and return (state_dict, meta)."""
|
|
171
|
+
path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
|
|
172
|
+
|
|
173
|
+
if not path.exists():
|
|
174
|
+
raise FileNotFoundError(f"Snapshot not found: {path}")
|
|
175
|
+
|
|
176
|
+
with open(path, "rb") as f:
|
|
177
|
+
archive_bytes = f.read()
|
|
178
|
+
|
|
179
|
+
state, meta = self.extract_archive(archive_bytes)
|
|
180
|
+
return state, meta
|
|
181
|
+
|
|
182
|
+
def append_to_index(
|
|
183
|
+
self,
|
|
184
|
+
rl_run_id: str,
|
|
185
|
+
meta: dict[str, Any],
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Append metadata to the run's index file."""
|
|
188
|
+
index_path = self.get_index_path(rl_run_id)
|
|
189
|
+
index_path.parent.mkdir(parents=True, exist_ok=True)
|
|
190
|
+
|
|
191
|
+
with open(index_path, "a") as f:
|
|
192
|
+
f.write(json.dumps(meta) + "\n")
|
|
193
|
+
|
|
194
|
+
def read_index(self, rl_run_id: str) -> list[dict[str, Any]]:
|
|
195
|
+
"""Read all entries from a run's index file."""
|
|
196
|
+
index_path = self.get_index_path(rl_run_id)
|
|
197
|
+
|
|
198
|
+
if not index_path.exists():
|
|
199
|
+
return []
|
|
200
|
+
|
|
201
|
+
entries = []
|
|
202
|
+
with open(index_path) as f:
|
|
203
|
+
for line in f:
|
|
204
|
+
if line.strip():
|
|
205
|
+
entries.append(json.loads(line))
|
|
206
|
+
|
|
207
|
+
return entries
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# Global storage instance
|
|
211
|
+
storage = VolumeStorage()
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Smoke test for Wordle and Sokoban ReAct agents using the hosted service.
|
|
4
|
+
|
|
5
|
+
Prereqs:
|
|
6
|
+
- Run the service: python examples/swe/task_app/hosted/main.py
|
|
7
|
+
- Run an OpenAI-compatible inference server (e.g., Flash/vLLM) at VLLM_BASE_URL
|
|
8
|
+
that serves model "gpt-5-nano" or adjust MODEL below.
|
|
9
|
+
|
|
10
|
+
This script will:
|
|
11
|
+
- Create a Wordle/Sokoban env
|
|
12
|
+
- Create corresponding *-react policy with tools
|
|
13
|
+
- Ask the policy for tool_calls via /policy/step (which calls the model)
|
|
14
|
+
- Apply tool_calls to the env via /env/step
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
|
|
22
|
+
BASE_URL = os.environ.get("SYNTH_ENVS_HOSTED_URL", "http://localhost:8000")
|
|
23
|
+
INFER_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8001")
|
|
24
|
+
MODEL = os.environ.get("MODEL", "gpt-5-nano")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def run_wordle(rounds: int = 3) -> None:
|
|
28
|
+
async with httpx.AsyncClient() as client:
|
|
29
|
+
# Create env
|
|
30
|
+
resp = await client.post(
|
|
31
|
+
f"{BASE_URL}/env/create",
|
|
32
|
+
json={
|
|
33
|
+
"env_name": "Wordle",
|
|
34
|
+
"config": {"word_length": 5, "max_guesses": 6},
|
|
35
|
+
"seed": 0,
|
|
36
|
+
"rl_run_id": "agents-smoke",
|
|
37
|
+
},
|
|
38
|
+
)
|
|
39
|
+
resp.raise_for_status()
|
|
40
|
+
data = resp.json()
|
|
41
|
+
env_id = data["env_id"]
|
|
42
|
+
obs = data["observation"]
|
|
43
|
+
print("Wordle env created:", env_id)
|
|
44
|
+
|
|
45
|
+
# Create policy
|
|
46
|
+
resp = await client.post(
|
|
47
|
+
f"{BASE_URL}/policy/create",
|
|
48
|
+
json={
|
|
49
|
+
"policy_name": "wordle-react",
|
|
50
|
+
"config": {
|
|
51
|
+
"inference_url": INFER_URL,
|
|
52
|
+
"model": MODEL,
|
|
53
|
+
"use_tools": True,
|
|
54
|
+
"word_length": 5,
|
|
55
|
+
"max_guesses": 6,
|
|
56
|
+
},
|
|
57
|
+
"rl_run_id": "agents-smoke",
|
|
58
|
+
"bound_env_id": env_id,
|
|
59
|
+
},
|
|
60
|
+
)
|
|
61
|
+
resp.raise_for_status()
|
|
62
|
+
policy_id = resp.json()["policy_id"]
|
|
63
|
+
print("Wordle policy:", policy_id)
|
|
64
|
+
|
|
65
|
+
# Loop a few rounds
|
|
66
|
+
for i in range(rounds):
|
|
67
|
+
print(f"[Wordle] Round {i + 1}")
|
|
68
|
+
step_req = {"policy_id": policy_id, "observation": obs, "dry_run": False}
|
|
69
|
+
resp = await client.post(f"{BASE_URL}/policy/step", json=step_req)
|
|
70
|
+
resp.raise_for_status()
|
|
71
|
+
step_out = resp.json()
|
|
72
|
+
tool_calls = step_out.get("tool_calls", [])
|
|
73
|
+
print(" tool_calls:", tool_calls)
|
|
74
|
+
if not tool_calls:
|
|
75
|
+
break
|
|
76
|
+
resp = await client.post(
|
|
77
|
+
f"{BASE_URL}/env/step",
|
|
78
|
+
json={"env_id": env_id, "tool_calls": tool_calls},
|
|
79
|
+
)
|
|
80
|
+
resp.raise_for_status()
|
|
81
|
+
env_step = resp.json()
|
|
82
|
+
obs = env_step["observation"]
|
|
83
|
+
print(" done:", env_step.get("done"), "reward:", env_step.get("reward"))
|
|
84
|
+
if env_step.get("done"):
|
|
85
|
+
break
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
async def run_sokoban(rounds: int = 3) -> None:
|
|
89
|
+
async with httpx.AsyncClient() as client:
|
|
90
|
+
# Create env (no initial_state provided; relies on env default)
|
|
91
|
+
resp = await client.post(
|
|
92
|
+
f"{BASE_URL}/env/create",
|
|
93
|
+
json={
|
|
94
|
+
"env_name": "Sokoban",
|
|
95
|
+
"config": {"difficulty": "easy"},
|
|
96
|
+
"seed": 0,
|
|
97
|
+
"rl_run_id": "agents-smoke",
|
|
98
|
+
},
|
|
99
|
+
)
|
|
100
|
+
if resp.status_code != 200:
|
|
101
|
+
print("Sokoban create failed:", resp.status_code, resp.text)
|
|
102
|
+
return
|
|
103
|
+
data = resp.json()
|
|
104
|
+
env_id = data["env_id"]
|
|
105
|
+
obs = data["observation"]
|
|
106
|
+
print("Sokoban env created:", env_id)
|
|
107
|
+
|
|
108
|
+
resp = await client.post(
|
|
109
|
+
f"{BASE_URL}/policy/create",
|
|
110
|
+
json={
|
|
111
|
+
"policy_name": "sokoban-react",
|
|
112
|
+
"config": {
|
|
113
|
+
"inference_url": INFER_URL,
|
|
114
|
+
"model": MODEL,
|
|
115
|
+
"use_tools": True,
|
|
116
|
+
},
|
|
117
|
+
"rl_run_id": "agents-smoke",
|
|
118
|
+
"bound_env_id": env_id,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
if resp.status_code != 200:
|
|
122
|
+
print("Sokoban policy create failed:", resp.status_code, resp.text)
|
|
123
|
+
return
|
|
124
|
+
policy_id = resp.json()["policy_id"]
|
|
125
|
+
print("Sokoban policy:", policy_id)
|
|
126
|
+
|
|
127
|
+
for i in range(rounds):
|
|
128
|
+
print(f"[Sokoban] Round {i + 1}")
|
|
129
|
+
step_req = {"policy_id": policy_id, "observation": obs, "dry_run": False}
|
|
130
|
+
resp = await client.post(f"{BASE_URL}/policy/step", json=step_req)
|
|
131
|
+
if resp.status_code != 200:
|
|
132
|
+
print(" policy step failed:", resp.status_code, resp.text)
|
|
133
|
+
break
|
|
134
|
+
step_out = resp.json()
|
|
135
|
+
tool_calls = step_out.get("tool_calls", [])
|
|
136
|
+
print(" tool_calls:", tool_calls)
|
|
137
|
+
if not tool_calls:
|
|
138
|
+
break
|
|
139
|
+
resp = await client.post(
|
|
140
|
+
f"{BASE_URL}/env/step",
|
|
141
|
+
json={"env_id": env_id, "tool_calls": tool_calls},
|
|
142
|
+
)
|
|
143
|
+
if resp.status_code != 200:
|
|
144
|
+
print(" env step failed:", resp.status_code, resp.text)
|
|
145
|
+
break
|
|
146
|
+
env_step = resp.json()
|
|
147
|
+
obs = env_step["observation"]
|
|
148
|
+
print(" done:", env_step.get("done"), "reward:", env_step.get("reward"))
|
|
149
|
+
if env_step.get("done"):
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
async def main():
|
|
154
|
+
print("Testing Wordle agent with model:", MODEL)
|
|
155
|
+
await run_wordle(rounds=3)
|
|
156
|
+
print("\nTesting Sokoban agent with model:", MODEL)
|
|
157
|
+
await run_sokoban(rounds=3)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
if __name__ == "__main__":
|
|
161
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Simple test script for the GRPO Synth Envs Hosted Service.
|
|
4
|
+
|
|
5
|
+
Run this after starting the service with:
|
|
6
|
+
python main.py
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def test_service():
|
|
16
|
+
"""Test basic service functionality."""
|
|
17
|
+
base_url = "http://localhost:8000"
|
|
18
|
+
|
|
19
|
+
async with httpx.AsyncClient() as client:
|
|
20
|
+
# Test 1: Service info
|
|
21
|
+
print("1. Testing /info endpoint...")
|
|
22
|
+
response = await client.get(f"{base_url}/info")
|
|
23
|
+
assert response.status_code == 200
|
|
24
|
+
info = response.json()
|
|
25
|
+
print(f" Service info: {json.dumps(info, indent=2)}")
|
|
26
|
+
|
|
27
|
+
# Test 2: Health check
|
|
28
|
+
print("\n2. Testing /health endpoint...")
|
|
29
|
+
response = await client.get(f"{base_url}/health")
|
|
30
|
+
assert response.status_code == 200
|
|
31
|
+
print(f" Health: {response.json()}")
|
|
32
|
+
|
|
33
|
+
# Test 3: Create environment
|
|
34
|
+
print("\n3. Creating environment...")
|
|
35
|
+
response = await client.post(
|
|
36
|
+
f"{base_url}/env/create",
|
|
37
|
+
json={
|
|
38
|
+
"env_name": "crafter",
|
|
39
|
+
"config": {},
|
|
40
|
+
"seed": 42,
|
|
41
|
+
"rl_run_id": "test-run-001",
|
|
42
|
+
},
|
|
43
|
+
)
|
|
44
|
+
if response.status_code != 200:
|
|
45
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
46
|
+
return
|
|
47
|
+
env_data = response.json()
|
|
48
|
+
env_id = env_data["env_id"]
|
|
49
|
+
print(f" Created env: {env_id}")
|
|
50
|
+
print(f" Initial observation keys: {list(env_data['observation'].keys())}")
|
|
51
|
+
|
|
52
|
+
# Test 4: Create policy
|
|
53
|
+
print("\n4. Creating policy...")
|
|
54
|
+
response = await client.post(
|
|
55
|
+
f"{base_url}/policy/create",
|
|
56
|
+
json={
|
|
57
|
+
"policy_name": "crafter-react",
|
|
58
|
+
"config": {
|
|
59
|
+
"inference_url": "http://localhost:8001",
|
|
60
|
+
"model": "test-model",
|
|
61
|
+
},
|
|
62
|
+
"rl_run_id": "test-run-001",
|
|
63
|
+
"bound_env_id": env_id,
|
|
64
|
+
},
|
|
65
|
+
)
|
|
66
|
+
if response.status_code != 200:
|
|
67
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
68
|
+
return
|
|
69
|
+
policy_data = response.json()
|
|
70
|
+
policy_id = policy_data["policy_id"]
|
|
71
|
+
print(f" Created policy: {policy_id}")
|
|
72
|
+
|
|
73
|
+
# Test 5: Environment step with dummy tool calls
|
|
74
|
+
print("\n5. Testing environment step...")
|
|
75
|
+
response = await client.post(
|
|
76
|
+
f"{base_url}/env/step",
|
|
77
|
+
json={
|
|
78
|
+
"env_id": env_id,
|
|
79
|
+
"tool_calls": [{"tool": "interact", "args": {"action": "move_left"}}],
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
if response.status_code != 200:
|
|
83
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
84
|
+
else:
|
|
85
|
+
step_data = response.json()
|
|
86
|
+
print(f" Step result - done: {step_data['done']}, reward: {step_data.get('reward')}")
|
|
87
|
+
|
|
88
|
+
# Test 6: Environment snapshot
|
|
89
|
+
print("\n6. Creating environment snapshot...")
|
|
90
|
+
response = await client.post(f"{base_url}/env/snapshot", json={"env_id": env_id})
|
|
91
|
+
if response.status_code != 200:
|
|
92
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
93
|
+
else:
|
|
94
|
+
snapshot_data = response.json()
|
|
95
|
+
print(f" Snapshot ID: {snapshot_data['snapshot_id']}")
|
|
96
|
+
print(f" Size: {snapshot_data['size']} bytes")
|
|
97
|
+
|
|
98
|
+
# Test 7: Policy snapshot
|
|
99
|
+
print("\n7. Creating policy snapshot...")
|
|
100
|
+
response = await client.post(f"{base_url}/policy/snapshot", json={"policy_id": policy_id})
|
|
101
|
+
if response.status_code != 200:
|
|
102
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
103
|
+
else:
|
|
104
|
+
snapshot_data = response.json()
|
|
105
|
+
print(f" Snapshot ID: {snapshot_data['snapshot_id']}")
|
|
106
|
+
print(f" Size: {snapshot_data['size']} bytes")
|
|
107
|
+
|
|
108
|
+
# Test 8: Run status
|
|
109
|
+
print("\n8. Testing run status...")
|
|
110
|
+
response = await client.get(f"{base_url}/run/status/test-run-001")
|
|
111
|
+
if response.status_code != 200:
|
|
112
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
113
|
+
else:
|
|
114
|
+
status_data = response.json()
|
|
115
|
+
print(f" Run status: {status_data['status']}")
|
|
116
|
+
|
|
117
|
+
# Test 9: Terminate environment
|
|
118
|
+
print("\n9. Terminating environment...")
|
|
119
|
+
response = await client.post(f"{base_url}/env/terminate", json={"env_id": env_id})
|
|
120
|
+
if response.status_code != 200:
|
|
121
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
122
|
+
else:
|
|
123
|
+
print(f" Environment terminated: {response.json()['ok']}")
|
|
124
|
+
|
|
125
|
+
# Test 10: Terminate policy
|
|
126
|
+
print("\n10. Terminating policy...")
|
|
127
|
+
response = await client.post(f"{base_url}/policy/terminate", json={"policy_id": policy_id})
|
|
128
|
+
if response.status_code != 200:
|
|
129
|
+
print(f" Error: {response.status_code} - {response.text}")
|
|
130
|
+
else:
|
|
131
|
+
print(f" Policy terminated: {response.json()['ok']}")
|
|
132
|
+
|
|
133
|
+
print("\n✅ All basic tests completed!")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
if __name__ == "__main__":
|
|
137
|
+
asyncio.run(test_service())
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Utility functions for the task service."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def convert_numpy_to_python(obj: Any) -> Any:
|
|
9
|
+
"""
|
|
10
|
+
Recursively convert numpy types to Python native types for JSON serialization.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
obj: Object that may contain numpy types
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Object with numpy types converted to Python native types
|
|
17
|
+
"""
|
|
18
|
+
if isinstance(obj, np.integer):
|
|
19
|
+
return int(obj)
|
|
20
|
+
elif isinstance(obj, np.floating):
|
|
21
|
+
return float(obj)
|
|
22
|
+
elif isinstance(obj, np.ndarray):
|
|
23
|
+
return obj.tolist()
|
|
24
|
+
elif isinstance(obj, dict):
|
|
25
|
+
return {key: convert_numpy_to_python(value) for key, value in obj.items()}
|
|
26
|
+
elif isinstance(obj, list | tuple):
|
|
27
|
+
return [convert_numpy_to_python(item) for item in obj]
|
|
28
|
+
else:
|
|
29
|
+
return obj
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def sanitize_observation(observation: dict[str, Any]) -> dict[str, Any]:
|
|
33
|
+
"""
|
|
34
|
+
Sanitize observation data for JSON serialization.
|
|
35
|
+
|
|
36
|
+
Converts numpy types and removes non-serializable objects.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
observation: Raw observation from environment
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Sanitized observation safe for JSON serialization
|
|
43
|
+
"""
|
|
44
|
+
if not isinstance(observation, dict):
|
|
45
|
+
return observation
|
|
46
|
+
|
|
47
|
+
sanitized = {}
|
|
48
|
+
for key, value in observation.items():
|
|
49
|
+
# Skip non-serializable keys or convert them
|
|
50
|
+
if key in ["semantic_map", "world_material_map", "observation_image"]:
|
|
51
|
+
# These are likely numpy arrays - convert to lists or skip
|
|
52
|
+
if isinstance(value, np.ndarray):
|
|
53
|
+
# For large arrays, we might want to skip or compress
|
|
54
|
+
# For now, skip them as they're likely debug info
|
|
55
|
+
continue
|
|
56
|
+
elif key == "player_position" and isinstance(value, tuple):
|
|
57
|
+
# Convert tuple with potential numpy types
|
|
58
|
+
sanitized[key] = [convert_numpy_to_python(v) for v in value]
|
|
59
|
+
else:
|
|
60
|
+
sanitized[key] = convert_numpy_to_python(value)
|
|
61
|
+
|
|
62
|
+
return sanitized
|
examples/vlm/README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Crafter VLM Pipeline
|
|
2
|
+
|
|
3
|
+
This folder captures the reference workflow for fine-tuning Crafter policies with
|
|
4
|
+
multimodal (text + image) prompts. It stitches together the new image-aware tracing
|
|
5
|
+
plumbing with lightweight utilities for dataset curation and training.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
1. **Verify image capture**
|
|
10
|
+
```
|
|
11
|
+
uv run python examples/vlm/crafter_image_only_agent.py --seed 7 --steps 5
|
|
12
|
+
```
|
|
13
|
+
This writes PNG frames to `examples/vlm/output/frames/` and produces a JSONL preview
|
|
14
|
+
of OpenAI-style image-only user messages.
|
|
15
|
+
|
|
16
|
+
2. **Collect traced rollouts**
|
|
17
|
+
Use the Crafter task app (or your existing pipeline) with tracing enabled. The new
|
|
18
|
+
tracing schema automatically records `observation_image_base64` and stores image parts
|
|
19
|
+
in LM call records.
|
|
20
|
+
|
|
21
|
+
3. **Export multimodal SFT rows**
|
|
22
|
+
```
|
|
23
|
+
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
24
|
+
--db traces/v3/synth_ai.db \
|
|
25
|
+
--output examples/vlm/output/crafter_traces_full.jsonl
|
|
26
|
+
```
|
|
27
|
+
The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
|
|
28
|
+
`metadata.assistant_has_image` flags per turn.
|
|
29
|
+
|
|
30
|
+
4. **Filter to image-rich turns**
|
|
31
|
+
```
|
|
32
|
+
uv run python examples/vlm/filter_image_rows.py \
|
|
33
|
+
--input examples/vlm/output/crafter_traces_full.jsonl \
|
|
34
|
+
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
5. **(Optional) Split validation or augment**, then upload using the standard CLI:
|
|
38
|
+
```
|
|
39
|
+
uv run python examples/warming_up_to_rl/run_fft_and_save.py \
|
|
40
|
+
--toml examples/vlm/configs/crafter_vlm_gpt4o.toml \
|
|
41
|
+
--data examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Config & Utilities
|
|
45
|
+
|
|
46
|
+
| File | Purpose |
|
|
47
|
+
| --- | --- |
|
|
48
|
+
| `configs/crafter_vlm_gpt4o.toml` | Sample Synth job targeting an image-capable model (`openai/gpt-4o-mini`). Set `job.data` or pass `--data` explicitly. |
|
|
49
|
+
| `crafter_image_only_agent.py` | Captures frames and builds image-only prompts for sanity checks. |
|
|
50
|
+
| `filter_image_rows.py` | Extracts rows with image parts from exported JSONL datasets. |
|
|
51
|
+
|
|
52
|
+
## Notes & Next Steps
|
|
53
|
+
|
|
54
|
+
- The training config assumes full-finetuning (`mode = "sft_offline"`). Adjust the
|
|
55
|
+
model id, hardware, or hyperparameters to match available infrastructure.
|
|
56
|
+
- Dataset rows emitted by `export_trace_sft.py` already contain OpenAI multimodal
|
|
57
|
+
content parts like:
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"role": "user",
|
|
61
|
+
"content": [
|
|
62
|
+
{"type": "text", "text": "..."},
|
|
63
|
+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..." }}
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
- See `PROPOSAL.md` for a deeper dive into outstanding work (longer rollouts,
|
|
68
|
+
richer multimodal augmentations, evaluation ideas).
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[job]
|
|
2
|
+
model = "openai/gpt-4o-mini-2024-07-18"
|
|
3
|
+
modalities = ["text", "image"]
|
|
4
|
+
# data = "examples/vlm/output/crafter_vlm_dataset.jsonl"
|
|
5
|
+
description = "Crafter VLM SFT (text + image prompts)"
|
|
6
|
+
|
|
7
|
+
[compute]
|
|
8
|
+
gpu_type = "A100"
|
|
9
|
+
gpu_count = 1
|
|
10
|
+
nodes = 1
|
|
11
|
+
|
|
12
|
+
[data]
|
|
13
|
+
topology = {}
|
|
14
|
+
# validation_path = "examples/vlm/output/crafter_vlm_dataset.val.jsonl"
|
|
15
|
+
|
|
16
|
+
[training]
|
|
17
|
+
mode = "sft_offline"
|
|
18
|
+
use_qlora = false
|
|
19
|
+
|
|
20
|
+
[training.validation]
|
|
21
|
+
enabled = true
|
|
22
|
+
evaluation_strategy = "steps"
|
|
23
|
+
eval_steps = 50
|
|
24
|
+
save_best_model_at_end = true
|
|
25
|
+
metric_for_best_model = "val.loss"
|
|
26
|
+
greater_is_better = false
|
|
27
|
+
|
|
28
|
+
[hyperparameters]
|
|
29
|
+
n_epochs = 1
|
|
30
|
+
train_kind = "fft"
|
|
31
|
+
per_device_batch = 1
|
|
32
|
+
gradient_accumulation_steps = 32
|
|
33
|
+
sequence_length = 4096
|
|
34
|
+
learning_rate = 1e-5
|
|
35
|
+
warmup_ratio = 0.03
|
|
36
|
+
weight_decay = 0.01
|
|
37
|
+
|
|
38
|
+
[hyperparameters.parallelism]
|
|
39
|
+
use_deepspeed = true
|
|
40
|
+
deepspeed_stage = 2
|
|
41
|
+
fsdp = false
|
|
42
|
+
bf16 = true
|
|
43
|
+
fp16 = false
|
|
44
|
+
activation_checkpointing = true
|