PyPI - synth-ai - Versions diffs - 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl - Mend

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.9.dev6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (351) hide show

examples/__init__.py +16 -0
examples/crafter_debug_render.py +23 -17
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
examples/qwen_coder/configs/coder_lora_small.toml +58 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +64 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +18 -0
examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +38 -0
examples/qwen_coder/validate_jsonl.py +59 -0
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +53 -52
examples/rl/run_rl_and_save.py +29 -12
examples/rl/task_app/math_single_step.py +180 -41
examples/rl/task_app/math_task_app.py +14 -6
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +117 -0
examples/sft/generate_traces.py +162 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +105 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +571 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +618 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1079 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1869 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +137 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +277 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/analyze_trace_db.py +12 -10
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
examples/warming_up_to_rl/export_trace_sft.py +218 -36
examples/warming_up_to_rl/groq_test.py +15 -8
examples/warming_up_to_rl/manage_secrets.py +29 -25
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +137 -61
examples/warming_up_to_rl/run_fft_and_save.py +131 -60
examples/warming_up_to_rl/run_local_rollout.py +88 -39
examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
examples/warming_up_to_rl/run_rl_and_save.py +35 -12
examples/warming_up_to_rl/run_rollout_remote.py +44 -19
examples/warming_up_to_rl/task_app/README.md +6 -2
examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
synth/__init__.py +14 -0
synth_ai/__init__.py +20 -4
synth_ai/api/models/supported.py +376 -0
synth_ai/api/train/builders.py +157 -26
synth_ai/api/train/cli.py +213 -57
synth_ai/api/train/config_finder.py +65 -5
synth_ai/api/train/env_resolver.py +33 -15
synth_ai/api/train/pollers.py +13 -4
synth_ai/api/train/supported_algos.py +139 -0
synth_ai/api/train/task_app.py +5 -3
synth_ai/api/train/utils.py +33 -48
synth_ai/cli/__init__.py +19 -4
synth_ai/cli/_modal_wrapper.py +28 -0
synth_ai/cli/_typer_patch.py +49 -0
synth_ai/cli/balance.py +2 -3
synth_ai/cli/calc.py +1 -1
synth_ai/cli/demo.py +21 -6
synth_ai/cli/recent.py +2 -2
synth_ai/cli/rl_demo.py +77 -17
synth_ai/cli/root.py +116 -39
synth_ai/cli/status.py +2 -2
synth_ai/cli/task_apps.py +1699 -259
synth_ai/cli/traces.py +7 -4
synth_ai/cli/turso.py +73 -0
synth_ai/cli/watch.py +12 -18
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +68 -31
synth_ai/demos/core/cli.py +516 -194
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +64 -28
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/examples/crafter_classic/environment.py +76 -1
synth_ai/environments/reproducibility/tree.py +5 -6
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +10 -9
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/evals/base.py +0 -2
synth_ai/handshake.py +11 -9
synth_ai/http.py +1 -1
synth_ai/http_client.py +43 -11
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +20 -6
synth_ai/jobs/client.py +103 -78
synth_ai/learning/__init__.py +41 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +121 -29
synth_ai/learning/config.py +2 -40
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +4 -56
synth_ai/learning/health.py +13 -7
synth_ai/learning/jobs.py +43 -47
synth_ai/{rl → learning/rl}/__init__.py +14 -5
synth_ai/learning/rl/client.py +267 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -10
synth_ai/{rl → learning/rl}/env_keys.py +45 -16
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -253
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +295 -0
synth_ai/learning/sse.py +25 -26
synth_ai/learning/validators.py +25 -24
synth_ai/lm/__init__.py +21 -47
synth_ai/task/__init__.py +26 -27
synth_ai/task/apps/__init__.py +18 -19
synth_ai/task/auth.py +35 -23
synth_ai/task/client.py +15 -13
synth_ai/task/contracts.py +37 -35
synth_ai/task/datasets.py +9 -6
synth_ai/task/errors.py +11 -10
synth_ai/task/health.py +17 -11
synth_ai/task/json.py +58 -24
synth_ai/task/proxy.py +15 -14
synth_ai/task/rubrics.py +22 -15
synth_ai/task/server.py +43 -17
synth_ai/task/tracing_utils.py +12 -7
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +5 -7
synth_ai/tracing_v3/__init__.py +2 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +18 -15
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +6 -4
synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +63 -16
synth_ai/tracing_v3/storage/base.py +89 -1
synth_ai/tracing_v3/storage/config.py +21 -8
synth_ai/tracing_v3/storage/factory.py +10 -8
synth_ai/tracing_v3/storage/utils.py +4 -2
synth_ai/tracing_v3/turso/daemon.py +7 -2
synth_ai/tracing_v3/turso/models.py +5 -2
synth_ai/tracing_v3/turso/native_manager.py +1173 -0
synth_ai/tracing_v3/utils.py +4 -3
synth_ai/v0/api/__init__.py +8 -0
synth_ai/v0/api/models/__init__.py +8 -0
synth_ai/v0/api/models/supported.py +8 -0
synth_ai/v0/config/__init__.py +15 -0
synth_ai/v0/config/base_url.py +12 -0
synth_ai/v0/lm/__init__.py +51 -0
synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
synth_ai/{lm → v0/lm}/config.py +6 -1
synth_ai/{lm → v0/lm}/core/all.py +9 -9
synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
synth_ai/{lm → v0/lm}/core/main.py +19 -7
synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
synth_ai/{lm → v0/lm}/overrides.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
synth_ai/v0/tracing/upload.py +32 -135
synth_ai/v0/tracing_v3/__init__.py +10 -0
synth_ai/v0/tracing_v3/abstractions.py +3 -0
synth_ai/v0/tracing_v3/decorators.py +3 -0
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
synth_ai/v0/tracing_v3/session_tracer.py +3 -0
synth_ai-0.2.9.dev6.dist-info/METADATA +191 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/RECORD +291 -262
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/top_level.txt +1 -0
examples/common_old/backend.py +0 -21
examples/evals_old/README.md +0 -98
examples/evals_old/__init__.py +0 -6
examples/evals_old/compare_models.py +0 -1037
examples/evals_old/example_log.md +0 -145
examples/evals_old/run_demo.sh +0 -126
examples/evals_old/trace_analysis.py +0 -270
examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
examples/finetuning_old/synth_qwen_v1/README.md +0 -68
examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
examples/finetuning_old/synth_qwen_v1/util.py +0 -147
examples/rl_old/task_app.py +0 -962
examples/warming_up_to_rl/old/event_rewards.md +0 -234
examples/warming_up_to_rl/old/notes.md +0 -73
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
synth_ai/experimental/synth_oss.py +0 -446
synth_ai/install_sqld.sh +0 -40
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -246
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/manager.py +0 -774
synth_ai/zyk/__init__.py +0 -30
synth_ai-0.2.9.dev5.dist-info/METADATA +0 -131
/synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
/synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
/synth_ai/{lm → v0/lm}/constants.py +0 -0
/synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
/synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
/synth_ai/{lm → v0/lm}/injection.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
/synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/tools/base.py +0 -0
/synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/warmup.py +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev6.dist-info}/licenses/LICENSE +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- """Environment implementations."""
1	+ """Environment implementations."""

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py CHANGED Viewed

@@ -3,4 +3,4 @@
 from .environment import CrafterEnvironmentWrapper
 from .policy import CrafterPolicy
-__all__ = ["CrafterEnvironmentWrapper", "CrafterPolicy"]
+__all__ = ["CrafterEnvironmentWrapper", "CrafterPolicy"]

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py CHANGED Viewed

	@@ -1 +1 @@
1	- # wraps hosted app
1	+ # wraps hosted app

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py CHANGED Viewed

@@ -1,19 +1,56 @@
 from __future__ import annotations
-from typing import Any, Dict, List, Optional
+import base64
 import logging
+from io import BytesIO
+from typing import Any
-from synth_ai.environments.stateful.core import StatefulEnvironment
+import numpy as np
+from PIL import Image
 from synth_ai.environments.environment.tools import EnvToolCall
+from synth_ai.environments.stateful.core import StatefulEnvironment
 from ...utils import convert_numpy_to_python
-from .tools import TOOLS_SCHEMA
 from .shared import CRAFTER_ACTIONS, _format_semantic_map_view
+from .tools import TOOLS_SCHEMA
 logger = logging.getLogger(__name__)
+def _encode_image_to_base64(image_array: Any) -> dict[str, Any] | None:
+    """Encode an RGB ndarray into a base64 PNG payload with metadata."""
+    if not isinstance(image_array, np.ndarray):
+        return None
+    if image_array.ndim != 3 or image_array.shape[-1] not in (1, 3, 4):
+        return None
+    try:
+        # Ensure uint8 for PIL compatibility
+        array_uint8 = (
+            image_array.astype("uint8")
+            if image_array.dtype != np.uint8
+            else image_array  # pragma: no cover - fast path
+        )
+        mode = "L" if array_uint8.shape[-1] == 1 else "RGB"
+        if array_uint8.shape[-1] == 4:
+            mode = "RGBA"
+        img = Image.fromarray(array_uint8, mode=mode)
+        buffer = BytesIO()
+        img.save(buffer, format="PNG")
+        encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
+        width = int(array_uint8.shape[1])
+        height = int(array_uint8.shape[0])
+        return {
+            "format": "png",
+            "width": width,
+            "height": height,
+            "data": encoded,
+            "data_url": f"data:image/png;base64,{encoded}",
+        }
+    except Exception:
+        return None
 class CrafterEnvironmentWrapper:
     """Host-side environment wrapper matching the sketch contract.
@@ -25,20 +62,20 @@ class CrafterEnvironmentWrapper:
       - snapshot()/restore() handled at route level; this wrapper exposes checkpoint via synth-ai
     """
-    def __init__(self, env: StatefulEnvironment, seed: Optional[int] = None) -> None:
+    def __init__(self, env: StatefulEnvironment, seed: int | None = None) -> None:
         self.env = env
         self.seed = seed
         self.step_idx = 0
-        self.last_observation: Optional[Dict[str, Any]] = None
-        self.last_info: Optional[Dict[str, Any]] = None
+        self.last_observation: dict[str, Any] | None = None
+        self.last_info: dict[str, Any] | None = None
-    async def initialize(self) -> Dict[str, Any]:
+    async def initialize(self) -> dict[str, Any]:
         obs = await self.env.initialize()
         # synth-ai InternalObservation expected to expose .observation (dict-like)
         self.step_idx = 0
         self.last_observation = getattr(obs, "observation", obs)  # tolerate dict-like
         self.last_info = getattr(obs, "info", None)
-        out_obs: Dict[str, Any] = convert_numpy_to_python(self.last_observation) or {}
+        out_obs = self._prepare_observation(self.last_observation)
         # Attach a 7x7 semantic map patch centered on player for client-side rendering
         try:
             pub = self.env.engine._get_public_state_from_env()  # type: ignore[attr-defined]
@@ -47,13 +84,13 @@ class CrafterEnvironmentWrapper:
             size = 7
             half = size // 2
             patch = []
-            H = len(sem) if hasattr(sem, "__len__") else 0
-            W = len(sem[0]) if H and hasattr(sem[0], "__len__") else 0
+            height = len(sem) if hasattr(sem, "__len__") else 0
+            width = len(sem[0]) if height and hasattr(sem[0], "__len__") else 0
             for dy in range(-half, half + 1):
                 row = []
                 for dx in range(-half, half + 1):
                     x, y = int(px) + dx, int(py) + dy
-                    if 0 <= x < H and 0 <= y < W:
+                    if 0 <= x < height and 0 <= y < width:
                         row.append(int(sem[x][y]))
                     else:
                         row.append(0)
@@ -68,7 +105,7 @@ class CrafterEnvironmentWrapper:
             "step_idx": self.step_idx,
         }
-    async def step(self, tool_calls: List[Dict[str, Any]] | List[EnvToolCall]) -> Dict[str, Any]:
+    async def step(self, tool_calls: list[dict[str, Any]] | list[EnvToolCall]) -> dict[str, Any]:
         # Normalize JSON tool_calls into EnvToolCall instances if needed
         # Underlying synth-ai environment expects only tool="interact" with args={"action": <action_name>}.
         # LLM may emit:
@@ -79,9 +116,9 @@ class CrafterEnvironmentWrapper:
         allowed_actions = set(
             TOOLS_SCHEMA[0]["function"]["parameters"]["properties"]["actions"]["items"]["enum"]
         )
-        normalized: List[EnvToolCall] = []
+        normalized: list[EnvToolCall] = []
-        def _action_to_int(action: Any) -> Optional[int]:
+        def _action_to_int(action: Any) -> int | None:
             # Handle invalid actions gracefully instead of failing
             if isinstance(action, int):
                 return action
@@ -90,6 +127,7 @@ class CrafterEnvironmentWrapper:
                 logger.warning("Unknown Crafter action: %s - ignoring", action_str)
                 return None  # Signal to skip this action
             return CRAFTER_ACTIONS[action_str]
         for tc in tool_calls:
             if isinstance(tc, EnvToolCall):
                 # Expand interact_many; otherwise coerce non-interact tools into interact(action=tool)
@@ -103,12 +141,12 @@ class CrafterEnvironmentWrapper:
                             )
                 elif tc.tool != "interact":
                     candidate_action = tc.args.get("action") if isinstance(tc.args, dict) else None
-                    resolved_action = candidate_action if candidate_action in allowed_actions else tc.tool
+                    resolved_action = (
+                        candidate_action if candidate_action in allowed_actions else tc.tool
+                    )
                     action_int = _action_to_int(resolved_action)
                     if action_int is not None:  # Skip invalid actions
-                        normalized.append(
-                            EnvToolCall(tool="interact", args={"action": action_int})
-                        )
+                        normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
                 else:
                     normalized.append(tc)
             else:
@@ -120,13 +158,14 @@ class CrafterEnvironmentWrapper:
                 args = tc.get("arguments") or tc.get("args") or {}
                 if isinstance(args, str):
                     import json as _json
                     try:
                         args = _json.loads(args)
                     except Exception:
                         args = {}
                 # Expand interact_many into multiple interacts
                 if tool_name == "interact_many":
-                    for action in (args.get("actions") or []):
+                    for action in args.get("actions") or []:
                         action_int = _action_to_int(action)
                         if action_int is not None:  # Skip invalid actions
                             normalized.append(
@@ -135,26 +174,35 @@ class CrafterEnvironmentWrapper:
                 else:
                     # For any non-interact tool, resolve to an interact action.
                     # Support a packed list of actions under 'actions' for convenience.
-                    if isinstance(args, dict) and isinstance(args.get("actions"), list) and args.get("actions"):
+                    if (
+                        isinstance(args, dict)
+                        and isinstance(args.get("actions"), list)
+                        and args.get("actions")
+                    ):
                         for action in args.get("actions"):
                             action_int = _action_to_int(action)
                             if action_int is not None:
-                                normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
+                                normalized.append(
+                                    EnvToolCall(tool="interact", args={"action": action_int})
+                                )
                     else:
                         candidate_action = None
                         if isinstance(args, dict) and "action" in args:
                             candidate_action = args["action"]
                         # If the caller provided a numeric action id, accept it directly
-                        action_int: Optional[int]
-                        if isinstance(candidate_action, int):
-                            action_int = _action_to_int(candidate_action)
-                        elif isinstance(candidate_action, str) and candidate_action in allowed_actions:
+                        action_int: int | None
+                        if isinstance(candidate_action, int) or (
+                            isinstance(candidate_action, str)
+                            and candidate_action in allowed_actions
+                        ):
                             action_int = _action_to_int(candidate_action)
                         else:
                             # Fallback: interpret the tool name itself as the action label
                             action_int = _action_to_int(tool_name)
                         if action_int is not None:
-                            normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
+                            normalized.append(
+                                EnvToolCall(tool="interact", args={"action": action_int})
+                            )
         # Ensure we have at least one valid action; default to noop if none provided
         if not normalized:
@@ -162,7 +210,7 @@ class CrafterEnvironmentWrapper:
             normalized.append(EnvToolCall(tool="interact", args={"action": 0}))  # noop action
         # Pre-step logging: capture current public state and print concise summary
-        before_state: Optional[Dict[str, Any]] = None
+        before_state: dict[str, Any] | None = None
         try:
             pub_before = self.env.engine._get_public_state_from_env()  # type: ignore[attr-defined]
             before_state = {
@@ -173,7 +221,9 @@ class CrafterEnvironmentWrapper:
                 "semantic_map": pub_before.semantic_map,
             }
             actions_printable = [
-                (tc.args.get("action") if isinstance(tc.args, dict) else None) if isinstance(tc, EnvToolCall) else None
+                (tc.args.get("action") if isinstance(tc.args, dict) else None)
+                if isinstance(tc, EnvToolCall)
+                else None
                 for tc in normalized
             ]
             logger.info(
@@ -185,7 +235,11 @@ class CrafterEnvironmentWrapper:
                 [k for k, v in before_state["achievements_status"].items() if v],
                 actions_printable,
             )
-            logger.info("Surroundings BEFORE (seed=%s):\n%s", str(self.seed), _format_semantic_map_view(before_state))
+            logger.info(
+                "Surroundings BEFORE (seed=%s):\n%s",
+                str(self.seed),
+                _format_semantic_map_view(before_state),
+            )
         except Exception as _:
             # Logging should not interfere with stepping; fail-fast elsewhere
             pass
@@ -212,7 +266,7 @@ class CrafterEnvironmentWrapper:
         ach_added_latest: list[str] | None = None
         try:
             pub_after = self.env.engine._get_public_state_from_env()  # type: ignore[attr-defined]
-            after_dict: Dict[str, Any] = {
+            after_dict: dict[str, Any] = {
                 "inventory": pub_after.inventory,
                 "achievements_status": pub_after.achievements_status,
                 "player_position": list(pub_after.player_position),
@@ -236,8 +290,8 @@ class CrafterEnvironmentWrapper:
                     # Position delta
                     pb = before_state.get("player_position", [0, 0])
                     pa = after_dict.get("player_position", [0, 0])
-                    pb_t = (int(pb[0]), int(pb[1])) if isinstance(pb, (list, tuple)) else (0, 0)
-                    pa_t = (int(pa[0]), int(pa[1])) if isinstance(pa, (list, tuple)) else (0, 0)
+                    pb_t = (int(pb[0]), int(pb[1])) if isinstance(pb, list | tuple) else (0, 0)
+                    pa_t = (int(pa[0]), int(pa[1])) if isinstance(pa, list | tuple) else (0, 0)
                     delta = (pa_t[0] - pb_t[0], pa_t[1] - pb_t[1])
                     # Inventory changes
@@ -253,11 +307,17 @@ class CrafterEnvironmentWrapper:
                     inv_changes = ", ".join(changed_items) if changed_items else "none"
                     # Achievements gained/lost
-                    ach_b = {k for k, v in (before_state.get("achievements_status", {}) or {}).items() if v}
-                    ach_a = {k for k, v in (after_dict.get("achievements_status", {}) or {}).items() if v}
-                    ach_added = sorted(list(ach_a - ach_b))
+                    ach_b = {
+                        k
+                        for k, v in (before_state.get("achievements_status", {}) or {}).items()
+                        if v
+                    }
+                    ach_a = {
+                        k for k, v in (after_dict.get("achievements_status", {}) or {}).items() if v
+                    }
+                    ach_added = sorted(ach_a - ach_b)
                     ach_added_latest = ach_added
-                    ach_removed = sorted(list(ach_b - ach_a))
+                    ach_removed = sorted(ach_b - ach_a)
                     logger.info(
                         "Changes: pos %s->%s Δ=%s | inv %s | ach +%s -%s",
@@ -272,16 +332,23 @@ class CrafterEnvironmentWrapper:
                     if reward is None and ach_added_latest:
                         try:
                             reward = float(len(ach_added_latest))
-                            logger.info("Reward shaping applied: +%s (achievements added)", len(ach_added_latest))
+                            logger.info(
+                                "Reward shaping applied: +%s (achievements added)",
+                                len(ach_added_latest),
+                            )
                         except Exception:
                             pass
                 except Exception:
                     pass
-            logger.info("Surroundings AFTER (seed=%s):\n%s", str(self.seed), _format_semantic_map_view(after_dict))
+            logger.info(
+                "Surroundings AFTER (seed=%s):\n%s",
+                str(self.seed),
+                _format_semantic_map_view(after_dict),
+            )
         except Exception as _:
             pass
-        result: Dict[str, Any] = {
-            "observation": convert_numpy_to_python(observation),
+        result: dict[str, Any] = {
+            "observation": self._prepare_observation(observation),
             "step_idx": self.step_idx,
             "done": bool(done) if done is not None else False,  # Ensure boolean
         }
@@ -293,13 +360,13 @@ class CrafterEnvironmentWrapper:
             size = 7
             half = size // 2
             patch = []
-            H = len(sem) if hasattr(sem, "__len__") else 0
-            W = len(sem[0]) if H and hasattr(sem[0], "__len__") else 0
+            height = len(sem) if hasattr(sem, "__len__") else 0
+            width = len(sem[0]) if height and hasattr(sem[0], "__len__") else 0
             for dy in range(-half, half + 1):
                 row = []
                 for dx in range(-half, half + 1):
                     x, y = px + dx, py + dy
-                    if 0 <= x < H and 0 <= y < W:
+                    if 0 <= x < height and 0 <= y < width:
                         row.append(int(sem[x][y]))
                     else:
                         row.append(0)
@@ -309,10 +376,7 @@ class CrafterEnvironmentWrapper:
                 obs_out["semantic_map_patch7"] = patch
         except Exception:
             pass
-        if info is not None:
-            result_info = convert_numpy_to_python(info)
-        else:
-            result_info = {}
+        result_info = convert_numpy_to_python(info) if info is not None else {}
         # Attach achievements delta for downstream metrics if useful
         if ach_added_latest is not None:
             try:
@@ -340,6 +404,7 @@ class CrafterEnvironmentWrapper:
             # Build reverse action map for readability
             int_to_action = {v: k for k, v in CRAFTER_ACTIONS.items()}
             from collections import Counter
             action_ids = []
             for tc in normalized:
                 if isinstance(tc, EnvToolCall) and isinstance(tc.args, dict):
@@ -371,29 +436,57 @@ class CrafterEnvironmentWrapper:
             )
         except Exception:
             pass
         return result
-    async def checkpoint(self) -> Dict[str, Any]:
+    def _prepare_observation(self, observation: Any) -> dict[str, Any]:
+        """Convert raw observation into a JSON-serializable dict with encoded image."""
+        obs_dict: dict[str, Any]
+        image_payload: dict[str, Any] | None = None
+        if isinstance(observation, dict):
+            image_payload = _encode_image_to_base64(observation.get("observation_image"))
+            # Work on a shallow copy to avoid mutating engine state
+            sanitized = dict(observation)
+            sanitized.pop("observation_image", None)
+            obs_dict = convert_numpy_to_python(sanitized) or {}
+        else:
+            obs_dict = convert_numpy_to_python(observation) or {}
+        if not isinstance(obs_dict, dict):
+            obs_dict = {"value": obs_dict}
+        if image_payload:
+            obs_dict["observation_image_base64"] = image_payload["data"]
+            obs_dict["observation_image_format"] = image_payload["format"]
+            obs_dict["observation_image_width"] = image_payload["width"]
+            obs_dict["observation_image_height"] = image_payload["height"]
+            obs_dict["observation_image_data_url"] = image_payload["data_url"]
+        return obs_dict
+    async def checkpoint(self) -> dict[str, Any]:
         obs = await self.env.checkpoint()
         observation = getattr(obs, "observation", obs)
         info = getattr(obs, "info", None)
         return {
             "observation": convert_numpy_to_python(observation),
             "info": convert_numpy_to_python(info) if info else None,
-            "step_idx": self.step_idx
+            "step_idx": self.step_idx,
         }
-    async def terminate(self) -> Dict[str, Any]:
+    async def terminate(self) -> dict[str, Any]:
         obs = await self.env.terminate()
         observation = getattr(obs, "observation", obs)
         info = getattr(obs, "info", None)
         return {
             "observation": convert_numpy_to_python(observation),
             "info": convert_numpy_to_python(info) if info else None,
-            "step_idx": self.step_idx
+            "step_idx": self.step_idx,
         }
-    def state_dict(self) -> Dict[str, Any]:
+    def state_dict(self) -> dict[str, Any]:
         return {
             "seed": self.seed,
             "step_idx": self.step_idx,
@@ -401,13 +494,13 @@ class CrafterEnvironmentWrapper:
             "last_info": self.last_info,
         }
-    def load_state_dict(self, state: Dict[str, Any]) -> None:
+    def load_state_dict(self, state: dict[str, Any]) -> None:
         self.seed = state["seed"]
         self.step_idx = int(state["step_idx"])
         self.last_observation = state["last_observation"]
         self.last_info = state["last_info"]
-    async def serialize(self) -> Dict[str, Any]:
+    async def serialize(self) -> dict[str, Any]:
         return {
             "name": "crafter",
             "config": {"seed": self.seed},
@@ -417,9 +510,9 @@ class CrafterEnvironmentWrapper:
     @classmethod
     async def deserialize(
         cls,
-        payload: Dict[str, Any],
+        payload: dict[str, Any],
         env: StatefulEnvironment,
-    ) -> "CrafterEnvironmentWrapper":
+    ) -> CrafterEnvironmentWrapper:
         seed = payload["config"]["seed"]
         wrapper = cls(env=env, seed=seed)
         wrapper.load_state_dict(payload["state"])

synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev6__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.9.dev6py3-none-any.whl