PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show

examples/README.md +1 -0
examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -2
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +152 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +274 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +415 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +61 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +6 -6
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +62 -0
examples/rl/configs/rl_from_base_qwen17.toml +79 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +21 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +6 -6
examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +33 -3
examples/swe/task_app/grpo_swe_mini.py +4 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
examples/task_apps/enron/__init__.py +1 -0
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +155 -17
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +61 -69
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +21 -0
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +7 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +11 -0
synth_ai/cli/task_app_serve.py +11 -0
synth_ai/cli/task_apps.py +110 -1499
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +5 -0
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/http.py +8 -22
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +4 -5
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +4 -2
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +0 -1
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +294 -0
synth_ai/utils/http.py +172 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
synth_ai/cli/man.py +0 -106
synth_ai/cli/tui.py +0 -57
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -906
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py ADDED Viewed

@@ -0,0 +1,305 @@
+"""Shared utilities for Crafter environment and policy.
+This module formats Crafter observations for the LLM and parses actions.
+It now mirrors the ludic_private implementation for semantic map rendering
+by dynamically deriving the id->name mapping from the actual Crafter env
+when available, with a sensible fallback. This fixes the issue where the
+rendered surroundings appeared only as iron/stone due to a mismatched
+hardcoded mapping.
+"""
+import itertools
+import re
+from typing import Any
+import numpy as np
+VIEW_SIZE = 5  # Default view size for the map (match eval_rollout_table)
+# Action mappings from the game
+CRAFTER_ACTIONS = {
+    "noop": 0,
+    "move_left": 1,
+    "move_right": 2,
+    "move_up": 3,
+    "move_down": 4,
+    "do": 5,
+    "sleep": 6,
+    "place_stone": 7,
+    "place_table": 8,
+    "place_furnace": 9,
+    "place_plant": 10,
+    "make_wood_pickaxe": 11,
+    "make_stone_pickaxe": 12,
+    "make_iron_pickaxe": 13,
+    "make_wood_sword": 14,
+    "make_stone_sword": 15,
+    "make_iron_sword": 16,
+}
+# Common action aliases
+ACTION_ALIASES = {
+    # Movement aliases
+    "left": "move_left",
+    "right": "move_right",
+    "up": "move_up",
+    "down": "move_down",
+    # Interaction aliases
+    "interact": "do",
+    "use": "do",
+    "action": "do",
+    # Sleep
+    "rest": "sleep",
+    # Crafting
+    "craft_wood_pickaxe": "make_wood_pickaxe",
+    "craft_stone_pickaxe": "make_stone_pickaxe",
+    "craft_iron_pickaxe": "make_iron_pickaxe",
+    "craft_wood_sword": "make_wood_sword",
+    "craft_stone_sword": "make_stone_sword",
+    "craft_iron_sword": "make_iron_sword",
+}
+VALID_PRIMARY_ACTIONS: set[str] = set(CRAFTER_ACTIONS.keys())
+VALID_ACTION_ALIASES: set[str] = set(ACTION_ALIASES.keys())
+ALL_VALID_ACTION_STRINGS: set[str] = VALID_PRIMARY_ACTIONS | VALID_ACTION_ALIASES
+def validate_action(action: str) -> bool:
+    """Check if an action string is valid."""
+    normalized = action.strip().lower().replace(" ", "_")
+    return normalized in ALL_VALID_ACTION_STRINGS
+def parse_actions(action_text: str) -> list[str]:
+    """Extract actions from response text.
+    Tries multiple parsing strategies:
+    1. <action>...</action> tags (original format)
+    2. [action]...[/action] or [action]... format
+    3. ACTION: prefix format
+    4. Plain action names if they match valid actions
+    5. Newline-separated actions
+    """
+    # First try the original <action> tag format
+    matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
+    if matches:
+        return [m.strip() for m in matches if validate_action(m.strip())]
+    # Try [action] format
+    matches = re.findall(r"\[action\](.*?)(?:\[/action\]|\n|$)", action_text, re.IGNORECASE)
+    if matches:
+        return [m.strip() for m in matches if validate_action(m.strip())]
+    # If no tags found, try to parse plain text
+    text = action_text.strip()
+    # Check if the entire text is a valid action
+    if validate_action(text):
+        return [text]
+    # Try splitting by newlines and checking each line
+    lines = text.split("\n")
+    actions = []
+    for line in lines:
+        line = line.strip()
+        # Remove various prefixes
+        for prefix in ["ACTION:", "Action:", "action:", "ACTION", "-", "*", "•", "**ACTION:**"]:
+            if line.startswith(prefix):
+                line = line[len(prefix) :].strip()
+                break
+        # Also handle numbered lists
+        if re.match(r"^\d+\.\s*", line):
+            line = re.sub(r"^\d+\.\s*", "", line)
+        # Split by common separators to handle multiple actions on one line
+        parts = re.split(r"[,;]|\s+and\s+|\s+then\s+", line)
+        for part in parts:
+            part = part.strip()
+            # Remove quotes if present
+            if part.startswith('"') and part.endswith('"'):
+                part = part[1:-1]
+            if part.startswith("'") and part.endswith("'"):
+                part = part[1:-1]
+            # Check if it's a valid action
+            if part and validate_action(part):
+                actions.append(part)
+    return actions
+def format_observation(obs_data: dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
+    """Format a Crafter observation dictionary into a human-readable string.
+    This is critical for preventing massive token counts when observations
+    contain large numpy arrays or deeply nested structures.
+    """
+    if not obs_data:
+        return ""
+    # Extract key information
+    health = obs_data.get("health") or obs_data.get("inventory", {}).get("health", 0)
+    inventory_dict = obs_data.get("inventory", {})
+    pos = obs_data.get("player_position", [0, 0])
+    direction = obs_data.get("player_direction", [0, 1])
+    achievements = obs_data.get("achievements_status", {})
+    # Prefer step/max from observation if provided by the env
+    step_from_obs = (
+        obs_data.get("steps")
+        if obs_data.get("steps") is not None
+        else obs_data.get("num_steps_taken")
+    )
+    if isinstance(step_from_obs, int | float) and step_from_obs >= 0:
+        step_count = int(step_from_obs)
+    max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
+    if isinstance(max_steps_from_obs, int | float) and max_steps_from_obs > 0:
+        max_steps = int(max_steps_from_obs)
+    # Format inventory (skip health as it's shown separately)
+    inv_items = [f"{k}:{v}" for k, v in inventory_dict.items() if v > 0 and k != "health"]
+    inventory_str = ", ".join(inv_items) if inv_items else "empty"
+    # Format achievements
+    achieved_list = [k for k, v in achievements.items() if v]
+    achievements_str = ", ".join(achieved_list) if achieved_list else "none"
+    # Format semantic map view (simplified version)
+    map_view = _format_semantic_map_view(obs_data, VIEW_SIZE)
+    return (
+        f"=== CRAFTER GAME STATE ===\n"
+        f"Step: {step_count}/{max_steps}\n"
+        f"Health: {health}\n"
+        f"Position: {pos}\n"
+        f"Facing: {direction}\n"
+        f"Inventory: {inventory_str}\n"
+        f"Achievements: {achievements_str}\n"
+        f"{map_view}\n\n"
+        f"Choose your next actions.\n"
+    )
+def _try_build_dynamic_mapping():
+    """Attempt to build id->name mapping from a real Crafter env.
+    Returns a list where index is semantic ID and value is the lowercase name.
+    On failure (crafter not installed or internal API changed), returns None.
+    """
+    try:
+        import crafter  # type: ignore
+    except Exception:
+        return None
+    dummyenv = None
+    try:
+        dummyenv = crafter.Env()
+        # Combine material IDs and semantic view object IDs
+        world_ids = getattr(dummyenv, "_world", None)
+        sem_view = getattr(dummyenv, "_sem_view", None)
+        if world_ids is None or sem_view is None:
+            return None
+        mat_ids = getattr(world_ids, "_mat_ids", None)
+        obj_ids = getattr(sem_view, "_obj_ids", None)
+        if not isinstance(mat_ids, dict) or not isinstance(obj_ids, dict):
+            return None
+        max_id = max(max(mat_ids.values()), max(obj_ids.values())) + 1
+        id_to_item = ["void"] * max_id
+        for name, idx in itertools.chain(mat_ids.items(), obj_ids.items()):
+            if name is None:
+                clean = "none"
+            elif hasattr(name, "__name__"):
+                clean = name.__name__.lower()
+            else:
+                clean = str(name).lower()
+            if 0 <= idx < len(id_to_item):
+                id_to_item[idx] = clean
+        return id_to_item
+    except Exception:
+        return None
+    finally:
+        try:
+            if dummyenv is not None:
+                dummyenv.close()
+        except Exception:
+            pass
+# Build dynamic mapping if possible; otherwise fall back to a basic map
+_ID_TO_NAME = _try_build_dynamic_mapping()
+_FALLBACK_ID_TO_NAME = {
+    0: "none",  # None from materials
+    1: "water",
+    2: "grass",
+    3: "stone",
+    4: "path",
+    5: "sand",
+    6: "tree",
+    7: "lava",
+    8: "coal",
+    9: "iron",
+    10: "diamond",
+    11: "table",
+    12: "furnace",
+    13: "player",
+    14: "cow",
+    15: "zombie",
+    16: "skeleton",
+    17: "arrow",
+    18: "plant",
+}
+def _format_semantic_map_view(obs_data: dict[str, Any], view_size: int = VIEW_SIZE) -> str:
+    """Format the semantic map into a text representation using dynamic IDs.
+    Shows a local view around the player with nearby objects.
+    """
+    semantic_map = obs_data.get("semantic_map")
+    player_position = obs_data.get("player_position", [0, 0])
+    if semantic_map is None:
+        return "Map view unavailable"
+    # Convert to numpy array if needed
+    sem_arr = np.asarray(semantic_map)
+    if sem_arr.ndim == 1:
+        # Reshape flat array to 2D
+        side = int(len(sem_arr) ** 0.5)
+        sem_arr = sem_arr.reshape(side, side)
+    px, py = map(int, player_position)
+    half = view_size // 2
+    # Choose mapping source
+    use_list = isinstance(_ID_TO_NAME, list) and len(_ID_TO_NAME) > 0
+    # Build matrix centered at player, then transpose for human-friendly view
+    matrix: list[list[str]] = []
+    for dy in range(-half, half + 1):
+        row_tokens: list[str] = []
+        for dx in range(-half, half + 1):
+            x, y = px + dx, py + dy
+            if not (0 <= x < sem_arr.shape[0] and 0 <= y < sem_arr.shape[1]):
+                row_tokens.append("void")
+            elif dx == 0 and dy == 0:
+                row_tokens.append("player")
+            else:
+                obj_id = int(sem_arr[x, y])
+                if use_list and 0 <= obj_id < len(_ID_TO_NAME):
+                    name = _ID_TO_NAME[obj_id]  # type: ignore[index]
+                else:
+                    name = _FALLBACK_ID_TO_NAME.get(obj_id, str(obj_id))
+                row_tokens.append(name)
+        matrix.append(row_tokens)
+    transposed = list(zip(*matrix, strict=False))
+    grid_rows: list[str] = [" ".join(row) for row in transposed]
+    return (
+        "\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
+    )

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""OpenAI tools schema for Crafter, defined in Python."""
+# Pass this list directly to OpenAI/vLLM `tools=`
+TOOLS_SCHEMA = [
+    {
+        "type": "function",
+        "function": {
+            "name": "interact_many",
+            "description": "Execute a short sequence of Crafter actions in order (1-8).",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "actions": {
+                        "type": "array",
+                        "description": "List of Crafter actions to execute sequentially.",
+                        "items": {
+                            "type": "string",
+                            "enum": [
+                                "noop",
+                                "move_left",
+                                "move_right",
+                                "move_up",
+                                "move_down",
+                                "do",
+                                "sleep",
+                                "place_stone",
+                                "place_table",
+                                "place_furnace",
+                                "place_plant",
+                                "make_wood_pickaxe",
+                                "make_stone_pickaxe",
+                                "make_iron_pickaxe",
+                                "make_wood_sword",
+                                "make_stone_sword",
+                                "make_iron_sword",
+                            ],
+                        },
+                        "minItems": 1,
+                        "maxItems": 8,
+                    }
+                },
+                "required": ["actions"],
+                "additionalProperties": False,
+            },
+        },
+    }
+]

examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py ADDED Viewed

@@ -0,0 +1,204 @@
+from __future__ import annotations
+import contextlib
+import os
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from starlette.requests import Request
+class TaskApp:
+    """Holds service configuration and shared state."""
+    def __init__(
+        self,
+        service_base_url: str | None = None,
+        vllm_base_url: str | None = None,
+        default_model: str | None = None,
+    ) -> None:
+        self.service_base_url = service_base_url or os.getenv(
+            "SERVICE_BASE_URL", "http://localhost:8000"
+        )
+        self.vllm_base_url = vllm_base_url or os.getenv("VLLM_BASE_URL", "http://localhost:8001")
+        self.default_model = default_model or os.getenv("DEFAULT_MODEL")
+class ServiceInfo(BaseModel):
+    """Service discovery response."""
+    service: dict
+    inference: dict
+def create_app(allowed_environments: list[str] = None) -> FastAPI:
+    """FastAPI app factory.
+    Args:
+        allowed_environments: List of environment names this service is allowed to handle.
+                            If None, all environments are allowed (for backward compatibility).
+    """
+    env_filter = f" ({', '.join(allowed_environments)})" if allowed_environments else ""
+    app = FastAPI(
+        title=f"GRPO Synth Envs Hosted Service{env_filter}",
+        description=f"Hosted environment and policy service for GRPO training{env_filter}",
+        version="0.1.0",
+    )
+    # Add CORS middleware for development
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # Initialize task app configuration
+    task_app = TaskApp()
+    app.state.task_app = task_app
+    app.state.allowed_environments = allowed_environments
+    # Add environment validation middleware
+    if allowed_environments:
+        @app.middleware("http")
+        async def validate_environment(request, call_next):
+            # Check if this is an environment-related request
+            path = request.url.path
+            if (
+                path.startswith("/env/") or path.startswith("/rollout")
+            ) and request.method == "POST":
+                # We need to read the body to check env_name
+                body = await request.body()
+                try:
+                    import json
+                    data = json.loads(body) if body else {}
+                    env_name = data.get("env_name", "").lower()
+                    # Check if environment is allowed
+                    if env_name and env_name not in [e.lower() for e in allowed_environments]:
+                        from fastapi import HTTPException
+                        raise HTTPException(
+                            status_code=403,
+                            detail=f"Environment '{env_name}' not allowed. This service only handles: {allowed_environments}",
+                        )
+                except json.JSONDecodeError:
+                    pass  # Invalid JSON, let the endpoint handle it
+                # Recreate request with the body we consumed
+                request._body = body
+            response = await call_next(request)
+            return response
+    # Mount routers
+    from .branching import router as branching_router
+    from .environment_routes import router as env_router
+    from .rollout import router as rollout_router
+    app.include_router(env_router, prefix="/env", tags=["environment"])
+    # Policy routes are optional; skip if optional envs are missing in this build
+    try:
+        from .policy_routes import router as policy_router
+        app.include_router(policy_router, prefix="/policy", tags=["policy"])
+    except Exception as _e:
+        # Log lightweight message; policy endpoints will be unavailable
+        with contextlib.suppress(Exception):
+            print(f"[hosted_app] Skipping policy routes: {_e}", flush=True)
+    app.include_router(rollout_router, tags=["rollout"])
+    app.include_router(branching_router, tags=["branching"])
+    @app.get("/info", response_model=ServiceInfo)
+    async def get_info() -> ServiceInfo:
+        """Service discovery endpoint."""
+        return ServiceInfo(
+            service={
+                "base_url": task_app.service_base_url,
+                "endpoints": {
+                    "env": "/env/*",
+                    "policy": "/policy/*",
+                    "rollout": "/rollout",
+                    "branch": "/branch",
+                    "run": "/run/*",
+                },
+            },
+            inference={
+                "base_url": task_app.vllm_base_url,
+                "endpoints": {
+                    "chat_completions": "/v1/chat/completions",
+                },
+                "default_model": task_app.default_model,
+            },
+        )
+    @app.get("/health")
+    async def health_check(request: Request) -> dict:
+        """Health and auth sanity check.
+        - Returns 503 if server missing ENVIRONMENT_API_KEY (misconfigured container).
+        - If X-API-Key header is provided and mismatches, returns 401.
+        - Otherwise returns 200 with basic info.
+        """
+        # Check if any environment API keys are configured
+        from synth_ai.task.auth import allowed_environment_api_keys
+        allowed_keys = allowed_environment_api_keys()
+        if not allowed_keys:
+            # Server-side misconfiguration; rollout would fail with 503
+            return JSONResponse(
+                status_code=503,
+                content={
+                    "status": "unhealthy",
+                    "detail": "Auth not configured: missing ENVIRONMENT_API_KEY in task service environment",
+                },
+            )
+        # Authorize using all header variants without typed Header params (avoid 422s)
+        from synth_ai.task.auth import is_api_key_header_authorized
+        authorized = is_api_key_header_authorized(request)
+        if not authorized:
+            # Soft-pass 200 with authorized=False to avoid failing CLI preflight
+            primary_key = list(allowed_keys)[0] if allowed_keys else None
+            prefix = primary_key[: max(1, len(primary_key) // 2)] if primary_key else None
+            content = {"status": "healthy", "authorized": False}
+            if prefix:
+                content["expected_api_key_prefix"] = prefix
+            return JSONResponse(status_code=200, content=content)
+        return {
+            "status": "healthy",
+            "authorized": True,
+            "service": {"base_url": task_app.service_base_url},
+        }
+    # Log and surface 422 validation errors with header presence
+    from fastapi.exceptions import RequestValidationError
+    @app.exception_handler(RequestValidationError)
+    async def _on_validation_error(request: Request, exc: RequestValidationError):
+        try:
+            hdr = request.headers
+            snapshot = {
+                "path": str(request.url.path),
+                "have_x_api_key": bool(hdr.get("x-api-key")),
+                "have_x_api_keys": bool(hdr.get("x-api-keys")),
+                "have_authorization": bool(hdr.get("authorization")),
+                "errors": exc.errors()[:5],
+            }
+            print("[422] validation", snapshot, flush=True)
+        except Exception:
+            pass
+        return JSONResponse(
+            status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
+        )
+    return app

examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Inference module for OpenAI-compatible API clients."""
+from .openai_client import OpenAIClient, create_inference_client
+__all__ = ["OpenAIClient", "create_inference_client"]

synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl