synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/__init__.py +16 -0
- examples/crafter_debug_render.py +23 -17
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
- examples/qwen_coder/configs/coder_lora_small.toml +58 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +64 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +18 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +38 -0
- examples/qwen_coder/validate_jsonl.py +59 -0
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +53 -52
- examples/rl/run_rl_and_save.py +29 -12
- examples/rl/task_app/math_single_step.py +180 -41
- examples/rl/task_app/math_task_app.py +14 -6
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +117 -0
- examples/sft/generate_traces.py +162 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +105 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +571 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +618 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1079 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1869 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +137 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +277 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/analyze_trace_db.py +12 -10
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
- examples/warming_up_to_rl/export_trace_sft.py +218 -36
- examples/warming_up_to_rl/groq_test.py +15 -8
- examples/warming_up_to_rl/manage_secrets.py +29 -25
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +137 -61
- examples/warming_up_to_rl/run_fft_and_save.py +131 -60
- examples/warming_up_to_rl/run_local_rollout.py +88 -39
- examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
- examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
- examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
- examples/warming_up_to_rl/run_rl_and_save.py +35 -12
- examples/warming_up_to_rl/run_rollout_remote.py +44 -19
- examples/warming_up_to_rl/task_app/README.md +6 -2
- examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
- synth/__init__.py +14 -0
- synth_ai/__init__.py +20 -4
- synth_ai/api/models/supported.py +376 -0
- synth_ai/api/train/builders.py +157 -26
- synth_ai/api/train/cli.py +213 -57
- synth_ai/api/train/config_finder.py +65 -5
- synth_ai/api/train/env_resolver.py +33 -15
- synth_ai/api/train/pollers.py +13 -4
- synth_ai/api/train/supported_algos.py +139 -0
- synth_ai/api/train/task_app.py +5 -3
- synth_ai/api/train/utils.py +33 -48
- synth_ai/cli/__init__.py +19 -4
- synth_ai/cli/_modal_wrapper.py +28 -0
- synth_ai/cli/_typer_patch.py +49 -0
- synth_ai/cli/balance.py +2 -3
- synth_ai/cli/calc.py +1 -1
- synth_ai/cli/demo.py +21 -6
- synth_ai/cli/recent.py +2 -2
- synth_ai/cli/rl_demo.py +77 -17
- synth_ai/cli/root.py +116 -39
- synth_ai/cli/status.py +2 -2
- synth_ai/cli/task_apps.py +1699 -259
- synth_ai/cli/traces.py +7 -4
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +12 -18
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +68 -31
- synth_ai/demos/core/cli.py +516 -194
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/environment.py +76 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/base.py +0 -2
- synth_ai/handshake.py +11 -9
- synth_ai/http.py +1 -1
- synth_ai/http_client.py +43 -11
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +20 -6
- synth_ai/jobs/client.py +103 -78
- synth_ai/learning/__init__.py +41 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +121 -29
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +13 -7
- synth_ai/learning/jobs.py +43 -47
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +267 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +295 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +25 -24
- synth_ai/lm/__init__.py +21 -47
- synth_ai/task/__init__.py +26 -27
- synth_ai/task/apps/__init__.py +18 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/contracts.py +37 -35
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +15 -14
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +43 -17
- synth_ai/task/tracing_utils.py +12 -7
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +2 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +18 -15
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +63 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +21 -8
- synth_ai/tracing_v3/storage/factory.py +10 -8
- synth_ai/tracing_v3/storage/utils.py +4 -2
- synth_ai/tracing_v3/turso/daemon.py +7 -2
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1173 -0
- synth_ai/tracing_v3/utils.py +4 -3
- synth_ai/v0/api/__init__.py +8 -0
- synth_ai/v0/api/models/__init__.py +8 -0
- synth_ai/v0/api/models/supported.py +8 -0
- synth_ai/v0/config/__init__.py +15 -0
- synth_ai/v0/config/base_url.py +12 -0
- synth_ai/v0/lm/__init__.py +51 -0
- synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
- synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
- synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
- synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
- synth_ai/{lm → v0/lm}/config.py +6 -1
- synth_ai/{lm → v0/lm}/core/all.py +9 -9
- synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
- synth_ai/{lm → v0/lm}/core/main.py +19 -7
- synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
- synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
- synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
- synth_ai/{lm → v0/lm}/overrides.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
- synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
- synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
- synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
- synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
- synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
- synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
- synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
- synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
- synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
- synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
- synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
- synth_ai/v0/tracing/upload.py +32 -135
- synth_ai/v0/tracing_v3/__init__.py +10 -0
- synth_ai/v0/tracing_v3/abstractions.py +3 -0
- synth_ai/v0/tracing_v3/decorators.py +3 -0
- synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
- synth_ai/v0/tracing_v3/session_tracer.py +3 -0
- synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
- examples/common_old/backend.py +0 -21
- examples/evals_old/README.md +0 -98
- examples/evals_old/__init__.py +0 -6
- examples/evals_old/compare_models.py +0 -1037
- examples/evals_old/example_log.md +0 -145
- examples/evals_old/run_demo.sh +0 -126
- examples/evals_old/trace_analysis.py +0 -270
- examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
- examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
- examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
- examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
- examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
- examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
- examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
- examples/finetuning_old/synth_qwen_v1/README.md +0 -68
- examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
- examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
- examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
- examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
- examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
- examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
- examples/finetuning_old/synth_qwen_v1/util.py +0 -147
- examples/rl_old/task_app.py +0 -962
- examples/warming_up_to_rl/old/event_rewards.md +0 -234
- examples/warming_up_to_rl/old/notes.md +0 -73
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
- /synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
- /synth_ai/{lm → v0/lm}/constants.py +0 -0
- /synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
- /synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
- /synth_ai/{lm → v0/lm}/injection.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/tools/base.py +0 -0
- /synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
- /synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
- /synth_ai/{lm → v0/lm}/warmup.py +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
"""Shared utilities for Crafter environment and policy.
|
|
2
|
+
|
|
3
|
+
This module formats Crafter observations for the LLM and parses actions.
|
|
4
|
+
It now mirrors the ludic_private implementation for semantic map rendering
|
|
5
|
+
by dynamically deriving the id->name mapping from the actual Crafter env
|
|
6
|
+
when available, with a sensible fallback. This fixes the issue where the
|
|
7
|
+
rendered surroundings appeared only as iron/stone due to a mismatched
|
|
8
|
+
hardcoded mapping.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import itertools
|
|
12
|
+
import re
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
VIEW_SIZE = 5 # Default view size for the map (match eval_rollout_table)
|
|
18
|
+
|
|
19
|
+
# Action mappings from the game
|
|
20
|
+
CRAFTER_ACTIONS = {
|
|
21
|
+
"noop": 0,
|
|
22
|
+
"move_left": 1,
|
|
23
|
+
"move_right": 2,
|
|
24
|
+
"move_up": 3,
|
|
25
|
+
"move_down": 4,
|
|
26
|
+
"do": 5,
|
|
27
|
+
"sleep": 6,
|
|
28
|
+
"place_stone": 7,
|
|
29
|
+
"place_table": 8,
|
|
30
|
+
"place_furnace": 9,
|
|
31
|
+
"place_plant": 10,
|
|
32
|
+
"make_wood_pickaxe": 11,
|
|
33
|
+
"make_stone_pickaxe": 12,
|
|
34
|
+
"make_iron_pickaxe": 13,
|
|
35
|
+
"make_wood_sword": 14,
|
|
36
|
+
"make_stone_sword": 15,
|
|
37
|
+
"make_iron_sword": 16,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# Common action aliases
|
|
41
|
+
ACTION_ALIASES = {
|
|
42
|
+
# Movement aliases
|
|
43
|
+
"left": "move_left",
|
|
44
|
+
"right": "move_right",
|
|
45
|
+
"up": "move_up",
|
|
46
|
+
"down": "move_down",
|
|
47
|
+
# Interaction aliases
|
|
48
|
+
"interact": "do",
|
|
49
|
+
"use": "do",
|
|
50
|
+
"action": "do",
|
|
51
|
+
# Sleep
|
|
52
|
+
"rest": "sleep",
|
|
53
|
+
# Crafting
|
|
54
|
+
"craft_wood_pickaxe": "make_wood_pickaxe",
|
|
55
|
+
"craft_stone_pickaxe": "make_stone_pickaxe",
|
|
56
|
+
"craft_iron_pickaxe": "make_iron_pickaxe",
|
|
57
|
+
"craft_wood_sword": "make_wood_sword",
|
|
58
|
+
"craft_stone_sword": "make_stone_sword",
|
|
59
|
+
"craft_iron_sword": "make_iron_sword",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
VALID_PRIMARY_ACTIONS: set[str] = set(CRAFTER_ACTIONS.keys())
|
|
63
|
+
VALID_ACTION_ALIASES: set[str] = set(ACTION_ALIASES.keys())
|
|
64
|
+
ALL_VALID_ACTION_STRINGS: set[str] = VALID_PRIMARY_ACTIONS | VALID_ACTION_ALIASES
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def validate_action(action: str) -> bool:
|
|
68
|
+
"""Check if an action string is valid."""
|
|
69
|
+
normalized = action.strip().lower().replace(" ", "_")
|
|
70
|
+
return normalized in ALL_VALID_ACTION_STRINGS
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def parse_actions(action_text: str) -> list[str]:
|
|
74
|
+
"""Extract actions from response text.
|
|
75
|
+
|
|
76
|
+
Tries multiple parsing strategies:
|
|
77
|
+
1. <action>...</action> tags (original format)
|
|
78
|
+
2. [action]...[/action] or [action]... format
|
|
79
|
+
3. ACTION: prefix format
|
|
80
|
+
4. Plain action names if they match valid actions
|
|
81
|
+
5. Newline-separated actions
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
# First try the original <action> tag format
|
|
85
|
+
matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
|
|
86
|
+
if matches:
|
|
87
|
+
return [m.strip() for m in matches if validate_action(m.strip())]
|
|
88
|
+
|
|
89
|
+
# Try [action] format
|
|
90
|
+
matches = re.findall(r"\[action\](.*?)(?:\[/action\]|\n|$)", action_text, re.IGNORECASE)
|
|
91
|
+
if matches:
|
|
92
|
+
return [m.strip() for m in matches if validate_action(m.strip())]
|
|
93
|
+
|
|
94
|
+
# If no tags found, try to parse plain text
|
|
95
|
+
text = action_text.strip()
|
|
96
|
+
|
|
97
|
+
# Check if the entire text is a valid action
|
|
98
|
+
if validate_action(text):
|
|
99
|
+
return [text]
|
|
100
|
+
|
|
101
|
+
# Try splitting by newlines and checking each line
|
|
102
|
+
lines = text.split("\n")
|
|
103
|
+
actions = []
|
|
104
|
+
for line in lines:
|
|
105
|
+
line = line.strip()
|
|
106
|
+
|
|
107
|
+
# Remove various prefixes
|
|
108
|
+
for prefix in ["ACTION:", "Action:", "action:", "ACTION", "-", "*", "•", "**ACTION:**"]:
|
|
109
|
+
if line.startswith(prefix):
|
|
110
|
+
line = line[len(prefix) :].strip()
|
|
111
|
+
break
|
|
112
|
+
|
|
113
|
+
# Also handle numbered lists
|
|
114
|
+
if re.match(r"^\d+\.\s*", line):
|
|
115
|
+
line = re.sub(r"^\d+\.\s*", "", line)
|
|
116
|
+
|
|
117
|
+
# Split by common separators to handle multiple actions on one line
|
|
118
|
+
parts = re.split(r"[,;]|\s+and\s+|\s+then\s+", line)
|
|
119
|
+
|
|
120
|
+
for part in parts:
|
|
121
|
+
part = part.strip()
|
|
122
|
+
# Remove quotes if present
|
|
123
|
+
if part.startswith('"') and part.endswith('"'):
|
|
124
|
+
part = part[1:-1]
|
|
125
|
+
if part.startswith("'") and part.endswith("'"):
|
|
126
|
+
part = part[1:-1]
|
|
127
|
+
|
|
128
|
+
# Check if it's a valid action
|
|
129
|
+
if part and validate_action(part):
|
|
130
|
+
actions.append(part)
|
|
131
|
+
|
|
132
|
+
return actions
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def format_observation(obs_data: dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
|
|
136
|
+
"""Format a Crafter observation dictionary into a human-readable string.
|
|
137
|
+
|
|
138
|
+
This is critical for preventing massive token counts when observations
|
|
139
|
+
contain large numpy arrays or deeply nested structures.
|
|
140
|
+
"""
|
|
141
|
+
if not obs_data:
|
|
142
|
+
return ""
|
|
143
|
+
|
|
144
|
+
# Extract key information
|
|
145
|
+
health = obs_data.get("health") or obs_data.get("inventory", {}).get("health", 0)
|
|
146
|
+
inventory_dict = obs_data.get("inventory", {})
|
|
147
|
+
pos = obs_data.get("player_position", [0, 0])
|
|
148
|
+
direction = obs_data.get("player_direction", [0, 1])
|
|
149
|
+
achievements = obs_data.get("achievements_status", {})
|
|
150
|
+
|
|
151
|
+
# Prefer step/max from observation if provided by the env
|
|
152
|
+
step_from_obs = (
|
|
153
|
+
obs_data.get("steps")
|
|
154
|
+
if obs_data.get("steps") is not None
|
|
155
|
+
else obs_data.get("num_steps_taken")
|
|
156
|
+
)
|
|
157
|
+
if isinstance(step_from_obs, int | float) and step_from_obs >= 0:
|
|
158
|
+
step_count = int(step_from_obs)
|
|
159
|
+
|
|
160
|
+
max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
|
|
161
|
+
if isinstance(max_steps_from_obs, int | float) and max_steps_from_obs > 0:
|
|
162
|
+
max_steps = int(max_steps_from_obs)
|
|
163
|
+
|
|
164
|
+
# Format inventory (skip health as it's shown separately)
|
|
165
|
+
inv_items = [f"{k}:{v}" for k, v in inventory_dict.items() if v > 0 and k != "health"]
|
|
166
|
+
inventory_str = ", ".join(inv_items) if inv_items else "empty"
|
|
167
|
+
|
|
168
|
+
# Format achievements
|
|
169
|
+
achieved_list = [k for k, v in achievements.items() if v]
|
|
170
|
+
achievements_str = ", ".join(achieved_list) if achieved_list else "none"
|
|
171
|
+
|
|
172
|
+
# Format semantic map view (simplified version)
|
|
173
|
+
map_view = _format_semantic_map_view(obs_data, VIEW_SIZE)
|
|
174
|
+
|
|
175
|
+
return (
|
|
176
|
+
f"=== CRAFTER GAME STATE ===\n"
|
|
177
|
+
f"Step: {step_count}/{max_steps}\n"
|
|
178
|
+
f"Health: {health}\n"
|
|
179
|
+
f"Position: {pos}\n"
|
|
180
|
+
f"Facing: {direction}\n"
|
|
181
|
+
f"Inventory: {inventory_str}\n"
|
|
182
|
+
f"Achievements: {achievements_str}\n"
|
|
183
|
+
f"{map_view}\n\n"
|
|
184
|
+
f"Choose your next actions.\n"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _try_build_dynamic_mapping():
|
|
189
|
+
"""Attempt to build id->name mapping from a real Crafter env.
|
|
190
|
+
|
|
191
|
+
Returns a list where index is semantic ID and value is the lowercase name.
|
|
192
|
+
On failure (crafter not installed or internal API changed), returns None.
|
|
193
|
+
"""
|
|
194
|
+
try:
|
|
195
|
+
import crafter # type: ignore
|
|
196
|
+
except Exception:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
dummyenv = None
|
|
200
|
+
try:
|
|
201
|
+
dummyenv = crafter.Env()
|
|
202
|
+
# Combine material IDs and semantic view object IDs
|
|
203
|
+
world_ids = getattr(dummyenv, "_world", None)
|
|
204
|
+
sem_view = getattr(dummyenv, "_sem_view", None)
|
|
205
|
+
if world_ids is None or sem_view is None:
|
|
206
|
+
return None
|
|
207
|
+
mat_ids = getattr(world_ids, "_mat_ids", None)
|
|
208
|
+
obj_ids = getattr(sem_view, "_obj_ids", None)
|
|
209
|
+
if not isinstance(mat_ids, dict) or not isinstance(obj_ids, dict):
|
|
210
|
+
return None
|
|
211
|
+
max_id = max(max(mat_ids.values()), max(obj_ids.values())) + 1
|
|
212
|
+
id_to_item = ["void"] * max_id
|
|
213
|
+
for name, idx in itertools.chain(mat_ids.items(), obj_ids.items()):
|
|
214
|
+
if name is None:
|
|
215
|
+
clean = "none"
|
|
216
|
+
elif hasattr(name, "__name__"):
|
|
217
|
+
clean = name.__name__.lower()
|
|
218
|
+
else:
|
|
219
|
+
clean = str(name).lower()
|
|
220
|
+
if 0 <= idx < len(id_to_item):
|
|
221
|
+
id_to_item[idx] = clean
|
|
222
|
+
return id_to_item
|
|
223
|
+
except Exception:
|
|
224
|
+
return None
|
|
225
|
+
finally:
|
|
226
|
+
try:
|
|
227
|
+
if dummyenv is not None:
|
|
228
|
+
dummyenv.close()
|
|
229
|
+
except Exception:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# Build dynamic mapping if possible; otherwise fall back to a basic map
|
|
234
|
+
_ID_TO_NAME = _try_build_dynamic_mapping()
|
|
235
|
+
_FALLBACK_ID_TO_NAME = {
|
|
236
|
+
0: "none", # None from materials
|
|
237
|
+
1: "water",
|
|
238
|
+
2: "grass",
|
|
239
|
+
3: "stone",
|
|
240
|
+
4: "path",
|
|
241
|
+
5: "sand",
|
|
242
|
+
6: "tree",
|
|
243
|
+
7: "lava",
|
|
244
|
+
8: "coal",
|
|
245
|
+
9: "iron",
|
|
246
|
+
10: "diamond",
|
|
247
|
+
11: "table",
|
|
248
|
+
12: "furnace",
|
|
249
|
+
13: "player",
|
|
250
|
+
14: "cow",
|
|
251
|
+
15: "zombie",
|
|
252
|
+
16: "skeleton",
|
|
253
|
+
17: "arrow",
|
|
254
|
+
18: "plant",
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _format_semantic_map_view(obs_data: dict[str, Any], view_size: int = VIEW_SIZE) -> str:
|
|
259
|
+
"""Format the semantic map into a text representation using dynamic IDs.
|
|
260
|
+
|
|
261
|
+
Shows a local view around the player with nearby objects.
|
|
262
|
+
"""
|
|
263
|
+
semantic_map = obs_data.get("semantic_map")
|
|
264
|
+
player_position = obs_data.get("player_position", [0, 0])
|
|
265
|
+
|
|
266
|
+
if semantic_map is None:
|
|
267
|
+
return "Map view unavailable"
|
|
268
|
+
|
|
269
|
+
# Convert to numpy array if needed
|
|
270
|
+
sem_arr = np.asarray(semantic_map)
|
|
271
|
+
if sem_arr.ndim == 1:
|
|
272
|
+
# Reshape flat array to 2D
|
|
273
|
+
side = int(len(sem_arr) ** 0.5)
|
|
274
|
+
sem_arr = sem_arr.reshape(side, side)
|
|
275
|
+
|
|
276
|
+
px, py = map(int, player_position)
|
|
277
|
+
half = view_size // 2
|
|
278
|
+
|
|
279
|
+
# Choose mapping source
|
|
280
|
+
use_list = isinstance(_ID_TO_NAME, list) and len(_ID_TO_NAME) > 0
|
|
281
|
+
|
|
282
|
+
# Build matrix centered at player, then transpose for human-friendly view
|
|
283
|
+
matrix: list[list[str]] = []
|
|
284
|
+
for dy in range(-half, half + 1):
|
|
285
|
+
row_tokens: list[str] = []
|
|
286
|
+
for dx in range(-half, half + 1):
|
|
287
|
+
x, y = px + dx, py + dy
|
|
288
|
+
if not (0 <= x < sem_arr.shape[0] and 0 <= y < sem_arr.shape[1]):
|
|
289
|
+
row_tokens.append("void")
|
|
290
|
+
elif dx == 0 and dy == 0:
|
|
291
|
+
row_tokens.append("player")
|
|
292
|
+
else:
|
|
293
|
+
obj_id = int(sem_arr[x, y])
|
|
294
|
+
if use_list and 0 <= obj_id < len(_ID_TO_NAME):
|
|
295
|
+
name = _ID_TO_NAME[obj_id] # type: ignore[index]
|
|
296
|
+
else:
|
|
297
|
+
name = _FALLBACK_ID_TO_NAME.get(obj_id, str(obj_id))
|
|
298
|
+
row_tokens.append(name)
|
|
299
|
+
matrix.append(row_tokens)
|
|
300
|
+
|
|
301
|
+
transposed = list(zip(*matrix, strict=False))
|
|
302
|
+
grid_rows: list[str] = [" ".join(row) for row in transposed]
|
|
303
|
+
return (
|
|
304
|
+
"\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
|
|
305
|
+
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""OpenAI tools schema for Crafter, defined in Python."""
|
|
2
|
+
|
|
3
|
+
# Pass this list directly to OpenAI/vLLM `tools=`
|
|
4
|
+
TOOLS_SCHEMA = [
|
|
5
|
+
{
|
|
6
|
+
"type": "function",
|
|
7
|
+
"function": {
|
|
8
|
+
"name": "interact_many",
|
|
9
|
+
"description": "Execute a short sequence of Crafter actions in order (1-8).",
|
|
10
|
+
"parameters": {
|
|
11
|
+
"type": "object",
|
|
12
|
+
"properties": {
|
|
13
|
+
"actions": {
|
|
14
|
+
"type": "array",
|
|
15
|
+
"description": "List of Crafter actions to execute sequentially.",
|
|
16
|
+
"items": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"enum": [
|
|
19
|
+
"noop",
|
|
20
|
+
"move_left",
|
|
21
|
+
"move_right",
|
|
22
|
+
"move_up",
|
|
23
|
+
"move_down",
|
|
24
|
+
"do",
|
|
25
|
+
"sleep",
|
|
26
|
+
"place_stone",
|
|
27
|
+
"place_table",
|
|
28
|
+
"place_furnace",
|
|
29
|
+
"place_plant",
|
|
30
|
+
"make_wood_pickaxe",
|
|
31
|
+
"make_stone_pickaxe",
|
|
32
|
+
"make_iron_pickaxe",
|
|
33
|
+
"make_wood_sword",
|
|
34
|
+
"make_stone_sword",
|
|
35
|
+
"make_iron_sword",
|
|
36
|
+
],
|
|
37
|
+
},
|
|
38
|
+
"minItems": 1,
|
|
39
|
+
"maxItems": 8,
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"required": ["actions"],
|
|
43
|
+
"additionalProperties": False,
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
}
|
|
47
|
+
]
|