synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1709 -243
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -264
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev4.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Run a minimal Crafter agent that emits image-only prompts and saves rendered frames.
|
|
4
|
+
|
|
5
|
+
This script demonstrates the multimodal observation pipeline by:
|
|
6
|
+
1. Initialising a `CrafterClassicEnvironment` with a deterministic seed.
|
|
7
|
+
2. Capturing `observation_image_base64` at each step and writing PNG frames.
|
|
8
|
+
3. Building OpenAI-style user messages that contain only an image part.
|
|
9
|
+
4. Emitting a small JSONL preview of the messages so they can be inspected or fed
|
|
10
|
+
directly into the fine-tuning dataset builder.
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
uv run python examples/vlm/crafter_image_only_agent.py --seed 7 --steps 5
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import asyncio
|
|
20
|
+
import base64
|
|
21
|
+
import json
|
|
22
|
+
import random
|
|
23
|
+
from collections.abc import Iterable
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any
|
|
26
|
+
from uuid import uuid4
|
|
27
|
+
|
|
28
|
+
from synth_ai.environments.environment.tools import EnvToolCall
|
|
29
|
+
from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
|
|
30
|
+
from synth_ai.environments.examples.crafter_classic.taskset import (
|
|
31
|
+
CrafterTaskInstance,
|
|
32
|
+
CrafterTaskInstanceMetadata,
|
|
33
|
+
)
|
|
34
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
|
35
|
+
|
|
36
|
+
ACTION_NAME_TO_ID = {
|
|
37
|
+
"noop": 0,
|
|
38
|
+
"move_left": 1,
|
|
39
|
+
"move_right": 2,
|
|
40
|
+
"move_up": 3,
|
|
41
|
+
"move_down": 4,
|
|
42
|
+
"do": 5,
|
|
43
|
+
"sleep": 6,
|
|
44
|
+
"place_stone": 7,
|
|
45
|
+
"place_table": 8,
|
|
46
|
+
"place_furnace": 9,
|
|
47
|
+
"place_plant": 10,
|
|
48
|
+
"make_wood_pickaxe": 11,
|
|
49
|
+
"make_stone_pickaxe": 12,
|
|
50
|
+
"make_iron_pickaxe": 13,
|
|
51
|
+
"make_wood_sword": 14,
|
|
52
|
+
"make_stone_sword": 15,
|
|
53
|
+
"make_iron_sword": 16,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _build_task_instance(seed: int) -> CrafterTaskInstance:
|
|
58
|
+
"""Construct a minimal Crafter task instance with the requested seed."""
|
|
59
|
+
|
|
60
|
+
impetus = Impetus(instructions="Explore the world and survive.")
|
|
61
|
+
intent = Intent(
|
|
62
|
+
rubric={"goal": "Unlock achievements and stay alive."},
|
|
63
|
+
gold_trajectories=None,
|
|
64
|
+
gold_state_diff={},
|
|
65
|
+
)
|
|
66
|
+
metadata = CrafterTaskInstanceMetadata(
|
|
67
|
+
difficulty="custom",
|
|
68
|
+
seed=seed,
|
|
69
|
+
num_trees_radius=0,
|
|
70
|
+
num_cows_radius=0,
|
|
71
|
+
num_hostiles_radius=0,
|
|
72
|
+
)
|
|
73
|
+
instance = CrafterTaskInstance(
|
|
74
|
+
id=uuid4(),
|
|
75
|
+
impetus=impetus,
|
|
76
|
+
intent=intent,
|
|
77
|
+
metadata=metadata,
|
|
78
|
+
is_reproducible=True,
|
|
79
|
+
initial_engine_snapshot=None,
|
|
80
|
+
)
|
|
81
|
+
# Attach environment config expected by the engine
|
|
82
|
+
instance.config = {"seed": seed, "length": 256, "area": [64, 64]}
|
|
83
|
+
return instance
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _select_actions(action_names: Iterable[str], steps: int) -> list[int]:
|
|
87
|
+
resolved: list[int] = []
|
|
88
|
+
names = list(action_names)
|
|
89
|
+
if not names:
|
|
90
|
+
names = ["move_right", "move_down", "move_left", "move_up", "do"]
|
|
91
|
+
for idx in range(steps):
|
|
92
|
+
name = names[idx % len(names)]
|
|
93
|
+
action_id = ACTION_NAME_TO_ID.get(name)
|
|
94
|
+
if action_id is None:
|
|
95
|
+
raise ValueError(f"Unknown Crafter action: {name}")
|
|
96
|
+
resolved.append(action_id)
|
|
97
|
+
return resolved
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _save_base64_png(data: str, path: Path) -> None:
|
|
101
|
+
"""Decode a base64 string (with or without data URL prefix) and write to disk."""
|
|
102
|
+
|
|
103
|
+
if data.startswith("data:"):
|
|
104
|
+
_, _, encoded = data.partition(",")
|
|
105
|
+
else:
|
|
106
|
+
encoded = data
|
|
107
|
+
path.write_bytes(base64.b64decode(encoded))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _build_image_only_message(data_url: str) -> dict[str, Any]:
|
|
111
|
+
return {
|
|
112
|
+
"role": "user",
|
|
113
|
+
"content": [{"type": "image_url", "image_url": {"url": data_url}}],
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
async def run(args: argparse.Namespace) -> None:
|
|
118
|
+
output_dir = Path(args.output_dir).resolve()
|
|
119
|
+
frames_dir = output_dir / "frames"
|
|
120
|
+
frames_dir.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
messages_path = output_dir / "image_only_messages.jsonl"
|
|
122
|
+
|
|
123
|
+
task_instance = _build_task_instance(args.seed)
|
|
124
|
+
env = CrafterClassicEnvironment(task_instance)
|
|
125
|
+
|
|
126
|
+
# Initialise environment
|
|
127
|
+
raw_obs = await env.initialize()
|
|
128
|
+
observation = getattr(raw_obs, "observation", raw_obs)
|
|
129
|
+
|
|
130
|
+
action_ids = _select_actions(args.actions, args.steps)
|
|
131
|
+
records: list[dict[str, Any]] = []
|
|
132
|
+
|
|
133
|
+
for step_idx in range(args.steps):
|
|
134
|
+
obs_dict = observation if isinstance(observation, dict) else {}
|
|
135
|
+
image_b64 = obs_dict.get("observation_image_base64")
|
|
136
|
+
data_url = obs_dict.get("observation_image_data_url")
|
|
137
|
+
|
|
138
|
+
if image_b64:
|
|
139
|
+
frame_path = frames_dir / f"step_{step_idx:03d}.png"
|
|
140
|
+
_save_base64_png(image_b64, frame_path)
|
|
141
|
+
|
|
142
|
+
if data_url:
|
|
143
|
+
message = _build_image_only_message(data_url)
|
|
144
|
+
else:
|
|
145
|
+
message = {
|
|
146
|
+
"role": "user",
|
|
147
|
+
"content": [{"type": "text", "text": "Image missing from observation."}],
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
records.append(
|
|
151
|
+
{
|
|
152
|
+
"step": step_idx,
|
|
153
|
+
"action_id": action_ids[step_idx],
|
|
154
|
+
"message": message,
|
|
155
|
+
"observation_keys": sorted(obs_dict.keys()),
|
|
156
|
+
}
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# For the very first step, show the message structure
|
|
160
|
+
if step_idx == 0:
|
|
161
|
+
print("=== Image-only message example ===")
|
|
162
|
+
print(json.dumps(message, indent=2))
|
|
163
|
+
|
|
164
|
+
tool_call = EnvToolCall(tool="interact", args={"action": int(action_ids[step_idx])})
|
|
165
|
+
env_step = await env.step(tool_call)
|
|
166
|
+
observation = getattr(env_step, "observation", env_step)
|
|
167
|
+
|
|
168
|
+
# Wrap up and dump the preview JSONL
|
|
169
|
+
await env.terminate()
|
|
170
|
+
with messages_path.open("w", encoding="utf-8") as fh:
|
|
171
|
+
for record in records:
|
|
172
|
+
fh.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
173
|
+
|
|
174
|
+
print(f"Saved {len(records)} frames -> {frames_dir}")
|
|
175
|
+
print(f"Saved image-only message preview -> {messages_path}")
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def parse_args() -> argparse.Namespace:
|
|
179
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
180
|
+
parser.add_argument("--seed", type=int, default=7, help="Crafter environment seed")
|
|
181
|
+
parser.add_argument("--steps", type=int, default=5, help="Number of env steps to capture")
|
|
182
|
+
parser.add_argument(
|
|
183
|
+
"--actions",
|
|
184
|
+
nargs="*",
|
|
185
|
+
default=["move_right", "move_down", "move_left", "move_up", "do"],
|
|
186
|
+
help="Sequence of Crafter action names to cycle through",
|
|
187
|
+
)
|
|
188
|
+
default_output = Path("examples/vlm/temp")
|
|
189
|
+
parser.add_argument(
|
|
190
|
+
"--output-dir",
|
|
191
|
+
type=Path,
|
|
192
|
+
default=default_output,
|
|
193
|
+
help=f"Directory for frames and message preview (default: {default_output})",
|
|
194
|
+
)
|
|
195
|
+
parser.add_argument(
|
|
196
|
+
"--randomise",
|
|
197
|
+
action="store_true",
|
|
198
|
+
help="Shuffle the provided action sequence before running",
|
|
199
|
+
)
|
|
200
|
+
args = parser.parse_args()
|
|
201
|
+
if args.randomise:
|
|
202
|
+
random.shuffle(args.actions)
|
|
203
|
+
return args
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
if __name__ == "__main__":
|
|
207
|
+
asyncio.run(run(parse_args()))
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Crafter agent that calls the OpenAI Chat Completions API with image + text prompts.
|
|
4
|
+
|
|
5
|
+
The harness mirrors the text-based agent workflow from `examples/warming_up_to_rl`:
|
|
6
|
+
* Uses the Crafter policy to build prompts, maintain history, and parse tool calls.
|
|
7
|
+
* Executes actions against the Synth Crafter environment (no HTTP task app required).
|
|
8
|
+
* Persists every rendered frame to `examples/vlm/temp/` so you can inspect exactly
|
|
9
|
+
what the VLM saw.
|
|
10
|
+
|
|
11
|
+
Requirements:
|
|
12
|
+
- `OPENAI_API_KEY` environment variable.
|
|
13
|
+
- `openai` Python package (installed via project dependencies).
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
uv run python examples/vlm/crafter_openai_vlm_agent.py \
|
|
17
|
+
--model gpt-4o-mini-2024-07-18 --seeds 10 --steps 10
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import asyncio
|
|
24
|
+
import base64
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Any
|
|
29
|
+
from uuid import uuid4
|
|
30
|
+
|
|
31
|
+
from examples.warming_up_to_rl.task_app.synth_envs_hosted.envs.crafter.environment import (
|
|
32
|
+
CrafterEnvironmentWrapper,
|
|
33
|
+
)
|
|
34
|
+
from examples.warming_up_to_rl.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
|
|
35
|
+
from openai import OpenAI
|
|
36
|
+
from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
|
|
37
|
+
from synth_ai.environments.examples.crafter_classic.taskset import (
|
|
38
|
+
CrafterTaskInstance,
|
|
39
|
+
CrafterTaskInstanceMetadata,
|
|
40
|
+
)
|
|
41
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
|
42
|
+
|
|
43
|
+
DEFAULT_OUTPUT = Path("examples/vlm/temp")
|
|
44
|
+
FRAME_SUBDIR = "openai_agent_frames"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class EpisodeResult:
|
|
48
|
+
def __init__(self, seed: int) -> None:
|
|
49
|
+
self.seed = seed
|
|
50
|
+
self.steps_taken: int = 0
|
|
51
|
+
self.achievements: set[str] = set()
|
|
52
|
+
self.total_reward: float = 0.0
|
|
53
|
+
self.tool_calls: int = 0
|
|
54
|
+
|
|
55
|
+
def record_observation(self, observation: dict[str, Any]) -> None:
|
|
56
|
+
obs = observation.get("observation") if isinstance(observation, dict) else None
|
|
57
|
+
if not isinstance(obs, dict):
|
|
58
|
+
return
|
|
59
|
+
ach = obs.get("achievements_status")
|
|
60
|
+
if isinstance(ach, dict):
|
|
61
|
+
for name, unlocked in ach.items():
|
|
62
|
+
if unlocked:
|
|
63
|
+
self.achievements.add(str(name))
|
|
64
|
+
reward = obs.get("reward_last_step")
|
|
65
|
+
if isinstance(reward, (int, float)):
|
|
66
|
+
self.total_reward += float(reward)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _ensure_client() -> OpenAI:
|
|
70
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
71
|
+
if not api_key:
|
|
72
|
+
raise RuntimeError("OPENAI_API_KEY must be set for OpenAI calls")
|
|
73
|
+
return OpenAI(api_key=api_key)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _build_task_instance(seed: int) -> CrafterTaskInstance:
|
|
77
|
+
impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
|
|
78
|
+
intent = Intent(
|
|
79
|
+
rubric={"goal": "Maximise Crafter achievements."},
|
|
80
|
+
gold_trajectories=None,
|
|
81
|
+
gold_state_diff={},
|
|
82
|
+
)
|
|
83
|
+
metadata = CrafterTaskInstanceMetadata(
|
|
84
|
+
difficulty="custom",
|
|
85
|
+
seed=seed,
|
|
86
|
+
num_trees_radius=0,
|
|
87
|
+
num_cows_radius=0,
|
|
88
|
+
num_hostiles_radius=0,
|
|
89
|
+
)
|
|
90
|
+
instance = CrafterTaskInstance(
|
|
91
|
+
id=uuid4(),
|
|
92
|
+
impetus=impetus,
|
|
93
|
+
intent=intent,
|
|
94
|
+
metadata=metadata,
|
|
95
|
+
is_reproducible=True,
|
|
96
|
+
initial_engine_snapshot=None,
|
|
97
|
+
)
|
|
98
|
+
instance.config = {"seed": seed, "length": 256, "area": [64, 64]}
|
|
99
|
+
return instance
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
|
|
103
|
+
obs = observation.get("observation") if isinstance(observation, dict) else None
|
|
104
|
+
if not isinstance(obs, dict):
|
|
105
|
+
return
|
|
106
|
+
base64_data = obs.get("observation_image_base64")
|
|
107
|
+
if not isinstance(base64_data, str) or not base64_data:
|
|
108
|
+
return
|
|
109
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
try:
|
|
111
|
+
path.write_bytes(base64.b64decode(base64_data))
|
|
112
|
+
except Exception:
|
|
113
|
+
# Best-effort; corrupted frames should not halt rollout
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
|
|
118
|
+
request = dict(payload)
|
|
119
|
+
request["model"] = model
|
|
120
|
+
request.setdefault("temperature", temperature)
|
|
121
|
+
request.setdefault("max_tokens", 512)
|
|
122
|
+
# Remove vendor-specific knobs unsupported by OpenAI
|
|
123
|
+
request.pop("stop_after_tool_calls", None)
|
|
124
|
+
request.pop("thinking_mode", None)
|
|
125
|
+
request.pop("thinking_budget", None)
|
|
126
|
+
max_completion = request.pop("max_completion_tokens", None)
|
|
127
|
+
if max_completion is not None:
|
|
128
|
+
request.setdefault("max_tokens", max_completion)
|
|
129
|
+
return request
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
async def _run_episode(
|
|
133
|
+
*,
|
|
134
|
+
seed: int,
|
|
135
|
+
client: OpenAI,
|
|
136
|
+
model: str,
|
|
137
|
+
max_steps: int,
|
|
138
|
+
output_dir: Path,
|
|
139
|
+
temperature: float,
|
|
140
|
+
) -> EpisodeResult:
|
|
141
|
+
task_instance = _build_task_instance(seed)
|
|
142
|
+
env = CrafterClassicEnvironment(task_instance)
|
|
143
|
+
wrapper = CrafterEnvironmentWrapper(env, seed=seed)
|
|
144
|
+
policy = CrafterPolicy(inference_url="openai://chat-completions", model=model)
|
|
145
|
+
await policy.initialize({"use_tools": True, "model": model})
|
|
146
|
+
|
|
147
|
+
episode_result = EpisodeResult(seed=seed)
|
|
148
|
+
|
|
149
|
+
observation_packet = await wrapper.initialize()
|
|
150
|
+
episode_result.record_observation(observation_packet)
|
|
151
|
+
|
|
152
|
+
frames_root = output_dir / FRAME_SUBDIR / f"seed_{seed:04d}"
|
|
153
|
+
_decode_and_save_image(observation_packet, frames_root / "step_000.png")
|
|
154
|
+
|
|
155
|
+
for step_idx in range(max_steps):
|
|
156
|
+
obs_dict = observation_packet.get("observation")
|
|
157
|
+
if not isinstance(obs_dict, dict):
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
|
|
161
|
+
tool_calls, meta = await policy.step(
|
|
162
|
+
observation_text=obs_text,
|
|
163
|
+
metadata={"raw_observation": observation_packet},
|
|
164
|
+
)
|
|
165
|
+
if "inference_request" not in meta:
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
episode_result.steps_taken += 1
|
|
169
|
+
inference_request = _normalise_openai_request(
|
|
170
|
+
meta["inference_request"],
|
|
171
|
+
model=model,
|
|
172
|
+
temperature=temperature,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
response = client.chat.completions.create(**inference_request)
|
|
176
|
+
response_dict = response.model_dump()
|
|
177
|
+
|
|
178
|
+
assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
|
|
179
|
+
response_dict,
|
|
180
|
+
use_tools=policy.use_tools,
|
|
181
|
+
)
|
|
182
|
+
if not assistant_tool_calls:
|
|
183
|
+
print(
|
|
184
|
+
f"Seed {seed}: no tool calls returned by model; ending episode early at step {step_idx}."
|
|
185
|
+
)
|
|
186
|
+
break
|
|
187
|
+
|
|
188
|
+
episode_result.tool_calls += len(assistant_tool_calls)
|
|
189
|
+
|
|
190
|
+
assistant_message = response_dict["choices"][0].get("message") or {}
|
|
191
|
+
assistant_text = assistant_message.get("content")
|
|
192
|
+
|
|
193
|
+
env_response = await wrapper.step(assistant_tool_calls)
|
|
194
|
+
if not isinstance(env_response, dict):
|
|
195
|
+
raise RuntimeError(
|
|
196
|
+
f"Unexpected environment response type: {type(env_response)!r}"
|
|
197
|
+
)
|
|
198
|
+
episode_result.record_observation(env_response)
|
|
199
|
+
|
|
200
|
+
policy._append_assistant_turn( # noqa: SLF001
|
|
201
|
+
assistant_text,
|
|
202
|
+
assistant_tool_calls,
|
|
203
|
+
env_response,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
frame_path = frames_root / f"step_{step_idx + 1:03d}.png"
|
|
207
|
+
_decode_and_save_image(env_response, frame_path)
|
|
208
|
+
|
|
209
|
+
if env_response.get("done"):
|
|
210
|
+
break
|
|
211
|
+
observation_packet = env_response
|
|
212
|
+
|
|
213
|
+
await wrapper.terminate()
|
|
214
|
+
return episode_result
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
async def main() -> None:
|
|
218
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
219
|
+
parser.add_argument("--model", default="gpt-4o-mini-2024-07-18", help="OpenAI model id")
|
|
220
|
+
parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
|
|
221
|
+
parser.add_argument("--steps", type=int, default=10, help="Max steps per seed")
|
|
222
|
+
parser.add_argument("--temperature", type=float, default=0.6, help="Sampling temperature")
|
|
223
|
+
parser.add_argument(
|
|
224
|
+
"--output-dir",
|
|
225
|
+
type=Path,
|
|
226
|
+
default=DEFAULT_OUTPUT,
|
|
227
|
+
help=f"Directory for saved frames and summaries (default: {DEFAULT_OUTPUT})",
|
|
228
|
+
)
|
|
229
|
+
args = parser.parse_args()
|
|
230
|
+
|
|
231
|
+
client = _ensure_client()
|
|
232
|
+
results: list[EpisodeResult] = []
|
|
233
|
+
|
|
234
|
+
seeds = list(range(args.seeds))
|
|
235
|
+
print(f"Running {len(seeds)} Crafter episodes with model={args.model}")
|
|
236
|
+
|
|
237
|
+
for seed in seeds:
|
|
238
|
+
result = await _run_episode(
|
|
239
|
+
seed=seed,
|
|
240
|
+
client=client,
|
|
241
|
+
model=args.model,
|
|
242
|
+
max_steps=args.steps,
|
|
243
|
+
output_dir=args.output_dir,
|
|
244
|
+
temperature=args.temperature,
|
|
245
|
+
)
|
|
246
|
+
results.append(result)
|
|
247
|
+
print(
|
|
248
|
+
f"Seed {seed:02d}: steps={result.steps_taken}, "
|
|
249
|
+
f"achievements={len(result.achievements)}, "
|
|
250
|
+
f"tool_calls={result.tool_calls}, reward≈{result.total_reward:.3f}"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
summary = {
|
|
254
|
+
"model": args.model,
|
|
255
|
+
"episodes": len(results),
|
|
256
|
+
"mean_steps": round(
|
|
257
|
+
sum(res.steps_taken for res in results) / max(len(results), 1), 2
|
|
258
|
+
),
|
|
259
|
+
"mean_achievements": round(
|
|
260
|
+
sum(len(res.achievements) for res in results) / max(len(results), 1), 2
|
|
261
|
+
),
|
|
262
|
+
"total_tool_calls": sum(res.tool_calls for res in results),
|
|
263
|
+
"output_dir": str(args.output_dir / FRAME_SUBDIR),
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
summary_path = args.output_dir / "openai_agent_summary.json"
|
|
268
|
+
summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
|
|
269
|
+
|
|
270
|
+
print("\nSummary")
|
|
271
|
+
print("-------")
|
|
272
|
+
print(json.dumps(summary, indent=2))
|
|
273
|
+
print(f"\nFrames saved in: {summary['output_dir']}")
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
if __name__ == "__main__":
|
|
277
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Filter SFT JSONL rows to those that contain image content.
|
|
4
|
+
|
|
5
|
+
This is a convenience wrapper around `examples/warming_up_to_rl/export_trace_sft.py`
|
|
6
|
+
output now that each record's metadata includes `has_image`, `user_has_image`, and
|
|
7
|
+
`assistant_has_image`.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
uv run python examples/vlm/filter_image_rows.py \
|
|
11
|
+
--input examples/sft/ft_data/crafter_traces.jsonl \
|
|
12
|
+
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_args() -> argparse.Namespace:
|
|
23
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
24
|
+
parser.add_argument("--input", type=Path, required=True, help="Source JSONL dataset")
|
|
25
|
+
parser.add_argument("--output", type=Path, required=True, help="Filtered JSONL path")
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--include-assistant",
|
|
28
|
+
action="store_true",
|
|
29
|
+
help="Require the assistant message to include an image as well",
|
|
30
|
+
)
|
|
31
|
+
return parser.parse_args()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def main() -> None:
|
|
35
|
+
args = parse_args()
|
|
36
|
+
src = args.input
|
|
37
|
+
dst = args.output
|
|
38
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
|
|
40
|
+
kept = 0
|
|
41
|
+
total = 0
|
|
42
|
+
with src.open("r", encoding="utf-8") as reader, dst.open("w", encoding="utf-8") as writer:
|
|
43
|
+
for line in reader:
|
|
44
|
+
total += 1
|
|
45
|
+
try:
|
|
46
|
+
record = json.loads(line)
|
|
47
|
+
except json.JSONDecodeError:
|
|
48
|
+
continue
|
|
49
|
+
metadata = record.get("metadata") or {}
|
|
50
|
+
has_user_image = bool(metadata.get("user_has_image"))
|
|
51
|
+
has_assistant_image = bool(metadata.get("assistant_has_image"))
|
|
52
|
+
if not has_user_image:
|
|
53
|
+
continue
|
|
54
|
+
if args.include_assistant and not has_assistant_image:
|
|
55
|
+
continue
|
|
56
|
+
writer.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
57
|
+
kept += 1
|
|
58
|
+
|
|
59
|
+
print(f"Filtered {kept} / {total} rows with user images -> {dst}")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
if __name__ == "__main__":
|
|
63
|
+
main()
|