synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +8 -11
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/run_eval.py +36 -37
- examples/rl/run_rl_and_save.py +5 -5
- examples/rl/task_app/math_single_step.py +65 -43
- examples/rl/task_app/math_task_app.py +3 -3
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +5 -5
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +78 -21
- examples/warming_up_to_rl/groq_test.py +4 -4
- examples/warming_up_to_rl/manage_secrets.py +13 -18
- examples/warming_up_to_rl/run_eval.py +42 -44
- examples/warming_up_to_rl/run_fft_and_save.py +11 -16
- examples/warming_up_to_rl/run_local_rollout.py +1 -3
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
- examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
- examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
- examples/warming_up_to_rl/run_rl_and_save.py +5 -6
- examples/warming_up_to_rl/run_rollout_remote.py +8 -10
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +26 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +128 -21
- synth_ai/api/train/cli.py +80 -64
- synth_ai/api/train/config_finder.py +7 -2
- synth_ai/api/train/env_resolver.py +1 -1
- synth_ai/api/train/pollers.py +2 -1
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +1 -2
- synth_ai/api/train/utils.py +13 -44
- synth_ai/cli/__init__.py +8 -0
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +1 -2
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +2 -1
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +2 -1
- synth_ai/cli/root.py +11 -13
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +529 -179
- synth_ai/cli/traces.py +6 -4
- synth_ai/cli/watch.py +12 -18
- synth_ai/demo_registry.py +1 -1
- synth_ai/demos/core/cli.py +36 -43
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +17 -25
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +2 -5
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +4 -7
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/handshake.py +9 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +18 -10
- synth_ai/inference/client.py +15 -5
- synth_ai/jobs/client.py +78 -83
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +91 -24
- synth_ai/learning/config.py +2 -38
- synth_ai/learning/ft_client.py +4 -59
- synth_ai/learning/health.py +5 -6
- synth_ai/learning/jobs.py +31 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -4
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -8
- synth_ai/{rl → learning/rl}/env_keys.py +39 -15
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -281
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -24
- synth_ai/learning/validators.py +25 -28
- synth_ai/lm/__init__.py +21 -47
- synth_ai/main.py +4 -0
- synth_ai/task/__init__.py +25 -27
- synth_ai/task/apps/__init__.py +7 -8
- synth_ai/task/auth.py +8 -8
- synth_ai/task/client.py +14 -14
- synth_ai/task/contracts.py +36 -35
- synth_ai/task/datasets.py +6 -5
- synth_ai/task/errors.py +10 -10
- synth_ai/task/health.py +17 -9
- synth_ai/task/json.py +58 -23
- synth_ai/task/proxy.py +13 -9
- synth_ai/task/rubrics.py +16 -15
- synth_ai/task/server.py +12 -12
- synth_ai/task/tracing_utils.py +4 -4
- synth_ai/task/vendors.py +5 -6
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/decorators.py +18 -16
- synth_ai/tracing_v3/hooks.py +5 -5
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/session_tracer.py +40 -14
- synth_ai/tracing_v3/storage/base.py +85 -0
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -7
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -4
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/main.py +6 -6
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
- synth_ai/{lm → v0/lm}/overrides.py +2 -2
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev8.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/RECORD +268 -238
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -20
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1038
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -152
- examples/rl_old/task_app.py +0 -1131
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/experimental/synth_oss.py +0 -445
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -249
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -838
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev8.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,10 +8,11 @@ rendered surroundings appeared only as iron/stone due to a mismatched
|
|
|
8
8
|
hardcoded mapping.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
from typing import Dict, Any, List, Set
|
|
12
|
-
import numpy as np
|
|
13
|
-
import re
|
|
14
11
|
import itertools
|
|
12
|
+
import re
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
15
16
|
|
|
16
17
|
VIEW_SIZE = 5 # Default view size for the map (match eval_rollout_table)
|
|
17
18
|
|
|
@@ -58,9 +59,9 @@ ACTION_ALIASES = {
|
|
|
58
59
|
"craft_iron_sword": "make_iron_sword",
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
VALID_PRIMARY_ACTIONS:
|
|
62
|
-
VALID_ACTION_ALIASES:
|
|
63
|
-
ALL_VALID_ACTION_STRINGS:
|
|
62
|
+
VALID_PRIMARY_ACTIONS: set[str] = set(CRAFTER_ACTIONS.keys())
|
|
63
|
+
VALID_ACTION_ALIASES: set[str] = set(ACTION_ALIASES.keys())
|
|
64
|
+
ALL_VALID_ACTION_STRINGS: set[str] = VALID_PRIMARY_ACTIONS | VALID_ACTION_ALIASES
|
|
64
65
|
|
|
65
66
|
|
|
66
67
|
def validate_action(action: str) -> bool:
|
|
@@ -69,7 +70,7 @@ def validate_action(action: str) -> bool:
|
|
|
69
70
|
return normalized in ALL_VALID_ACTION_STRINGS
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
def parse_actions(action_text: str) ->
|
|
73
|
+
def parse_actions(action_text: str) -> list[str]:
|
|
73
74
|
"""Extract actions from response text.
|
|
74
75
|
|
|
75
76
|
Tries multiple parsing strategies:
|
|
@@ -79,7 +80,6 @@ def parse_actions(action_text: str) -> List[str]:
|
|
|
79
80
|
4. Plain action names if they match valid actions
|
|
80
81
|
5. Newline-separated actions
|
|
81
82
|
"""
|
|
82
|
-
import json
|
|
83
83
|
|
|
84
84
|
# First try the original <action> tag format
|
|
85
85
|
matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
|
|
@@ -132,7 +132,7 @@ def parse_actions(action_text: str) -> List[str]:
|
|
|
132
132
|
return actions
|
|
133
133
|
|
|
134
134
|
|
|
135
|
-
def format_observation(obs_data:
|
|
135
|
+
def format_observation(obs_data: dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
|
|
136
136
|
"""Format a Crafter observation dictionary into a human-readable string.
|
|
137
137
|
|
|
138
138
|
This is critical for preventing massive token counts when observations
|
|
@@ -154,11 +154,11 @@ def format_observation(obs_data: Dict[str, Any], step_count: int = 0, max_steps:
|
|
|
154
154
|
if obs_data.get("steps") is not None
|
|
155
155
|
else obs_data.get("num_steps_taken")
|
|
156
156
|
)
|
|
157
|
-
if isinstance(step_from_obs,
|
|
157
|
+
if isinstance(step_from_obs, int | float) and step_from_obs >= 0:
|
|
158
158
|
step_count = int(step_from_obs)
|
|
159
159
|
|
|
160
160
|
max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
|
|
161
|
-
if isinstance(max_steps_from_obs,
|
|
161
|
+
if isinstance(max_steps_from_obs, int | float) and max_steps_from_obs > 0:
|
|
162
162
|
max_steps = int(max_steps_from_obs)
|
|
163
163
|
|
|
164
164
|
# Format inventory (skip health as it's shown separately)
|
|
@@ -255,7 +255,7 @@ _FALLBACK_ID_TO_NAME = {
|
|
|
255
255
|
}
|
|
256
256
|
|
|
257
257
|
|
|
258
|
-
def _format_semantic_map_view(obs_data:
|
|
258
|
+
def _format_semantic_map_view(obs_data: dict[str, Any], view_size: int = VIEW_SIZE) -> str:
|
|
259
259
|
"""Format the semantic map into a text representation using dynamic IDs.
|
|
260
260
|
|
|
261
261
|
Shows a local view around the player with nearby objects.
|
|
@@ -280,9 +280,9 @@ def _format_semantic_map_view(obs_data: Dict[str, Any], view_size: int = VIEW_SI
|
|
|
280
280
|
use_list = isinstance(_ID_TO_NAME, list) and len(_ID_TO_NAME) > 0
|
|
281
281
|
|
|
282
282
|
# Build matrix centered at player, then transpose for human-friendly view
|
|
283
|
-
matrix:
|
|
283
|
+
matrix: list[list[str]] = []
|
|
284
284
|
for dy in range(-half, half + 1):
|
|
285
|
-
row_tokens:
|
|
285
|
+
row_tokens: list[str] = []
|
|
286
286
|
for dx in range(-half, half + 1):
|
|
287
287
|
x, y = px + dx, py + dy
|
|
288
288
|
if not (0 <= x < sem_arr.shape[0] and 0 <= y < sem_arr.shape[1]):
|
|
@@ -298,8 +298,8 @@ def _format_semantic_map_view(obs_data: Dict[str, Any], view_size: int = VIEW_SI
|
|
|
298
298
|
row_tokens.append(name)
|
|
299
299
|
matrix.append(row_tokens)
|
|
300
300
|
|
|
301
|
-
transposed = list(zip(*matrix))
|
|
302
|
-
grid_rows:
|
|
301
|
+
transposed = list(zip(*matrix, strict=False))
|
|
302
|
+
grid_rows: list[str] = [" ".join(row) for row in transposed]
|
|
303
303
|
return (
|
|
304
304
|
"\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
|
|
305
305
|
)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import contextlib
|
|
3
4
|
import os
|
|
4
|
-
from typing import Optional
|
|
5
5
|
|
|
6
6
|
from fastapi import FastAPI
|
|
7
7
|
from fastapi.middleware.cors import CORSMiddleware
|
|
@@ -15,9 +15,9 @@ class TaskApp:
|
|
|
15
15
|
|
|
16
16
|
def __init__(
|
|
17
17
|
self,
|
|
18
|
-
service_base_url:
|
|
19
|
-
vllm_base_url:
|
|
20
|
-
default_model:
|
|
18
|
+
service_base_url: str | None = None,
|
|
19
|
+
vllm_base_url: str | None = None,
|
|
20
|
+
default_model: str | None = None,
|
|
21
21
|
) -> None:
|
|
22
22
|
self.service_base_url = service_base_url or os.getenv(
|
|
23
23
|
"SERVICE_BASE_URL", "http://localhost:8000"
|
|
@@ -67,38 +67,39 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
|
|
|
67
67
|
@app.middleware("http")
|
|
68
68
|
async def validate_environment(request, call_next):
|
|
69
69
|
# Check if this is an environment-related request
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
70
|
+
path = request.url.path
|
|
71
|
+
if (
|
|
72
|
+
path.startswith("/env/") or path.startswith("/rollout")
|
|
73
|
+
) and request.method == "POST":
|
|
74
|
+
# We need to read the body to check env_name
|
|
75
|
+
body = await request.body()
|
|
76
|
+
try:
|
|
77
|
+
import json
|
|
78
|
+
|
|
79
|
+
data = json.loads(body) if body else {}
|
|
80
|
+
env_name = data.get("env_name", "").lower()
|
|
81
|
+
|
|
82
|
+
# Check if environment is allowed
|
|
83
|
+
if env_name and env_name not in [e.lower() for e in allowed_environments]:
|
|
84
|
+
from fastapi import HTTPException
|
|
85
|
+
|
|
86
|
+
raise HTTPException(
|
|
87
|
+
status_code=403,
|
|
88
|
+
detail=f"Environment '{env_name}' not allowed. This service only handles: {allowed_environments}",
|
|
89
|
+
)
|
|
90
|
+
except json.JSONDecodeError:
|
|
91
|
+
pass # Invalid JSON, let the endpoint handle it
|
|
92
|
+
|
|
93
|
+
# Recreate request with the body we consumed
|
|
94
|
+
request._body = body
|
|
94
95
|
|
|
95
96
|
response = await call_next(request)
|
|
96
97
|
return response
|
|
97
98
|
|
|
98
99
|
# Mount routers
|
|
100
|
+
from .branching import router as branching_router
|
|
99
101
|
from .environment_routes import router as env_router
|
|
100
102
|
from .rollout import router as rollout_router
|
|
101
|
-
from .branching import router as branching_router
|
|
102
103
|
|
|
103
104
|
app.include_router(env_router, prefix="/env", tags=["environment"])
|
|
104
105
|
|
|
@@ -109,10 +110,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
|
|
|
109
110
|
app.include_router(policy_router, prefix="/policy", tags=["policy"])
|
|
110
111
|
except Exception as _e:
|
|
111
112
|
# Log lightweight message; policy endpoints will be unavailable
|
|
112
|
-
|
|
113
|
+
with contextlib.suppress(Exception):
|
|
113
114
|
print(f"[hosted_app] Skipping policy routes: {_e}", flush=True)
|
|
114
|
-
except Exception:
|
|
115
|
-
pass
|
|
116
115
|
|
|
117
116
|
app.include_router(rollout_router, tags=["rollout"])
|
|
118
117
|
app.include_router(branching_router, tags=["branching"])
|
|
@@ -148,7 +147,6 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
|
|
|
148
147
|
- If X-API-Key header is provided and mismatches, returns 401.
|
|
149
148
|
- Otherwise returns 200 with basic info.
|
|
150
149
|
"""
|
|
151
|
-
import os as _os
|
|
152
150
|
|
|
153
151
|
# Check if any environment API keys are configured
|
|
154
152
|
from synth_ai.task.auth import allowed_environment_api_keys
|
|
@@ -190,7 +188,7 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
|
|
|
190
188
|
try:
|
|
191
189
|
hdr = request.headers
|
|
192
190
|
snapshot = {
|
|
193
|
-
"path": str(
|
|
191
|
+
"path": str(request.url.path),
|
|
194
192
|
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
195
193
|
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
196
194
|
"have_authorization": bool(hdr.get("authorization")),
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import contextlib
|
|
4
5
|
import logging
|
|
5
|
-
from typing import Any
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
8
|
import httpx
|
|
8
9
|
|
|
@@ -15,7 +16,7 @@ class OpenAIClient:
|
|
|
15
16
|
def __init__(
|
|
16
17
|
self,
|
|
17
18
|
base_url: str,
|
|
18
|
-
api_key:
|
|
19
|
+
api_key: str | None = None,
|
|
19
20
|
timeout_s: float = 120.0,
|
|
20
21
|
) -> None:
|
|
21
22
|
self.base_url = base_url.rstrip("/")
|
|
@@ -27,8 +28,8 @@ class OpenAIClient:
|
|
|
27
28
|
self.headers["Authorization"] = f"Bearer {api_key}"
|
|
28
29
|
|
|
29
30
|
def _fix_model_parameters(
|
|
30
|
-
self, request:
|
|
31
|
-
) ->
|
|
31
|
+
self, request: dict[str, Any], target_url: str | None = None
|
|
32
|
+
) -> dict[str, Any]:
|
|
32
33
|
"""
|
|
33
34
|
Fix parameter compatibility for newer OpenAI models.
|
|
34
35
|
|
|
@@ -91,9 +92,25 @@ class OpenAIClient:
|
|
|
91
92
|
try:
|
|
92
93
|
tools = fixed_request.get("tools")
|
|
93
94
|
if isinstance(tools, list) and tools:
|
|
95
|
+
# Choose the first provided function name from tools schema (e.g., run_command)
|
|
96
|
+
func_name = None
|
|
97
|
+
for t in tools:
|
|
98
|
+
try:
|
|
99
|
+
cand = None
|
|
100
|
+
if isinstance(t, dict):
|
|
101
|
+
f = t.get("function")
|
|
102
|
+
if isinstance(f, dict):
|
|
103
|
+
cand = f.get("name")
|
|
104
|
+
if isinstance(cand, str) and cand:
|
|
105
|
+
func_name = cand
|
|
106
|
+
break
|
|
107
|
+
except Exception:
|
|
108
|
+
continue
|
|
109
|
+
if not func_name:
|
|
110
|
+
func_name = "run_command"
|
|
94
111
|
fixed_request["tool_choice"] = {
|
|
95
112
|
"type": "function",
|
|
96
|
-
"function": {"name":
|
|
113
|
+
"function": {"name": func_name},
|
|
97
114
|
}
|
|
98
115
|
fixed_request["parallel_tool_calls"] = False
|
|
99
116
|
except Exception:
|
|
@@ -103,11 +120,11 @@ class OpenAIClient:
|
|
|
103
120
|
|
|
104
121
|
async def generate(
|
|
105
122
|
self,
|
|
106
|
-
request:
|
|
107
|
-
base_url:
|
|
108
|
-
timeout_s:
|
|
109
|
-
extra_headers:
|
|
110
|
-
) ->
|
|
123
|
+
request: dict[str, Any],
|
|
124
|
+
base_url: str | None = None,
|
|
125
|
+
timeout_s: float | None = None,
|
|
126
|
+
extra_headers: dict[str, str] | None = None,
|
|
127
|
+
) -> dict[str, Any]:
|
|
111
128
|
"""
|
|
112
129
|
Send a chat completion request to the inference server.
|
|
113
130
|
|
|
@@ -135,18 +152,15 @@ class OpenAIClient:
|
|
|
135
152
|
logger.info(f"Inference POST target: {url}")
|
|
136
153
|
if extra_headers:
|
|
137
154
|
logger.info(f"Extra headers: {extra_headers}")
|
|
138
|
-
|
|
139
|
-
keys_preview = sorted(
|
|
155
|
+
with contextlib.suppress(Exception):
|
|
156
|
+
keys_preview = sorted(processed_request.keys())
|
|
140
157
|
logger.info(f"Request keys: {keys_preview}")
|
|
141
|
-
except Exception:
|
|
142
|
-
pass
|
|
143
158
|
|
|
144
159
|
# Final hard-guard for OpenAI: ensure unsupported field is not present
|
|
145
160
|
try:
|
|
146
|
-
if "openai" in url.lower():
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
161
|
+
if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
|
|
162
|
+
processed_request.pop("stop_after_tool_calls", None)
|
|
163
|
+
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
150
164
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
151
165
|
low_url = url.lower()
|
|
152
166
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
@@ -284,7 +298,7 @@ class OpenAIClient:
|
|
|
284
298
|
processed_request.pop(k, None)
|
|
285
299
|
# Force structured tool choice
|
|
286
300
|
if processed_request.get("tool_choice") == "required":
|
|
287
|
-
func_name = "
|
|
301
|
+
func_name = "run_command"
|
|
288
302
|
try:
|
|
289
303
|
tools_arr = processed_request.get("tools") or []
|
|
290
304
|
if isinstance(tools_arr, list) and tools_arr:
|
|
@@ -367,9 +381,9 @@ class OpenAIClient:
|
|
|
367
381
|
|
|
368
382
|
async def check_health(
|
|
369
383
|
self,
|
|
370
|
-
base_url:
|
|
371
|
-
timeout_s:
|
|
372
|
-
) ->
|
|
384
|
+
base_url: str | None = None,
|
|
385
|
+
timeout_s: float | None = None,
|
|
386
|
+
) -> dict[str, Any]:
|
|
373
387
|
"""
|
|
374
388
|
Check if the inference service is healthy.
|
|
375
389
|
|
|
@@ -403,13 +417,13 @@ class OpenAIClient:
|
|
|
403
417
|
|
|
404
418
|
async def generate_with_retries(
|
|
405
419
|
self,
|
|
406
|
-
request:
|
|
407
|
-
base_url:
|
|
408
|
-
timeout_s:
|
|
420
|
+
request: dict[str, Any],
|
|
421
|
+
base_url: str | None = None,
|
|
422
|
+
timeout_s: float | None = None,
|
|
409
423
|
max_retries: int = 4,
|
|
410
424
|
backoff_factor: float = 2.0,
|
|
411
|
-
extra_headers:
|
|
412
|
-
) ->
|
|
425
|
+
extra_headers: dict[str, str] | None = None,
|
|
426
|
+
) -> dict[str, Any]:
|
|
413
427
|
"""
|
|
414
428
|
Generate with exponential backoff retries for transient errors.
|
|
415
429
|
|
|
@@ -482,7 +496,7 @@ class OpenAIClient:
|
|
|
482
496
|
) from e
|
|
483
497
|
except Exception:
|
|
484
498
|
# If we can't parse the response, don't retry 400 errors
|
|
485
|
-
|
|
499
|
+
with contextlib.suppress(Exception):
|
|
486
500
|
logger.error(
|
|
487
501
|
{
|
|
488
502
|
"non_overload_400_unparsed": True,
|
|
@@ -490,8 +504,6 @@ class OpenAIClient:
|
|
|
490
504
|
"payload": processed_request,
|
|
491
505
|
}
|
|
492
506
|
)
|
|
493
|
-
except Exception:
|
|
494
|
-
pass
|
|
495
507
|
raise RuntimeError(
|
|
496
508
|
f"Inference 400 response (unparsed): {e.response.text if e.response is not None else 'Bad Request'}"
|
|
497
509
|
) from e
|
|
@@ -528,7 +540,7 @@ class OpenAIClient:
|
|
|
528
540
|
|
|
529
541
|
def create_inference_client(
|
|
530
542
|
task_app: Any,
|
|
531
|
-
api_key:
|
|
543
|
+
api_key: str | None = None,
|
|
532
544
|
) -> OpenAIClient:
|
|
533
545
|
"""
|
|
534
546
|
Create an inference client using TaskApp configuration.
|
|
@@ -549,6 +561,57 @@ def create_inference_client(
|
|
|
549
561
|
except Exception:
|
|
550
562
|
api_key = None
|
|
551
563
|
|
|
564
|
+
import json as _json
|
|
565
|
+
import os as _os
|
|
566
|
+
import time as _time
|
|
567
|
+
|
|
568
|
+
if _os.getenv("SYNTH_FAKE_INFERENCE", "").strip():
|
|
569
|
+
|
|
570
|
+
class _DummyClient:
|
|
571
|
+
async def generate_with_retries(
|
|
572
|
+
self,
|
|
573
|
+
request: dict[str, Any],
|
|
574
|
+
base_url: str | None = None,
|
|
575
|
+
max_retries: int = 0,
|
|
576
|
+
backoff_factor: float = 1.0,
|
|
577
|
+
extra_headers: dict[str, str] | None = None,
|
|
578
|
+
) -> dict[str, Any]:
|
|
579
|
+
tool_call = {
|
|
580
|
+
"id": "call_dummy",
|
|
581
|
+
"type": "function",
|
|
582
|
+
"function": {
|
|
583
|
+
"name": "interact_many",
|
|
584
|
+
"arguments": _json.dumps({"actions": ["move_right"]}),
|
|
585
|
+
},
|
|
586
|
+
}
|
|
587
|
+
return {
|
|
588
|
+
"id": f"cmpl-{int(_time.time())}",
|
|
589
|
+
"object": "chat.completion",
|
|
590
|
+
"created": int(_time.time()),
|
|
591
|
+
"model": request.get("model") or "dummy-model",
|
|
592
|
+
"choices": [
|
|
593
|
+
{
|
|
594
|
+
"index": 0,
|
|
595
|
+
"message": {
|
|
596
|
+
"role": "assistant",
|
|
597
|
+
"content": "",
|
|
598
|
+
"tool_calls": [tool_call],
|
|
599
|
+
},
|
|
600
|
+
"finish_reason": "tool_calls",
|
|
601
|
+
}
|
|
602
|
+
],
|
|
603
|
+
"usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
async def check_health(
|
|
607
|
+
self,
|
|
608
|
+
base_url: str | None = None,
|
|
609
|
+
timeout_s: float | None = None,
|
|
610
|
+
) -> dict[str, Any]:
|
|
611
|
+
return {"status": "ok", "dummy": True}
|
|
612
|
+
|
|
613
|
+
return _DummyClient()
|
|
614
|
+
|
|
552
615
|
return OpenAIClient(
|
|
553
616
|
base_url=task_app.vllm_base_url,
|
|
554
617
|
api_key=api_key,
|
|
@@ -12,7 +12,6 @@ For Modal deployment:
|
|
|
12
12
|
from __future__ import annotations
|
|
13
13
|
|
|
14
14
|
import os
|
|
15
|
-
from typing import Optional
|
|
16
15
|
|
|
17
16
|
import modal
|
|
18
17
|
|
|
@@ -26,7 +25,6 @@ except ImportError:
|
|
|
26
25
|
|
|
27
26
|
from synth_envs_hosted.hosted_app import create_app
|
|
28
27
|
|
|
29
|
-
|
|
30
28
|
# Local development mode
|
|
31
29
|
if __name__ == "__main__":
|
|
32
30
|
import uvicorn
|