PyPI - synth-ai - Versions diffs - 0.2.9.dev7__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

synth-ai 0.2.9.dev7py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (323) hide show

examples/__init__.py +16 -0
examples/crafter_debug_render.py +8 -11
examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
examples/multi_step/crafter_rl_lora.md +29 -0
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
examples/qwen_coder/configs/coder_lora_small.toml +58 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +65 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +19 -0
examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +39 -0
examples/qwen_coder/todos.md +38 -0
examples/qwen_coder/validate_jsonl.py +60 -0
examples/rl/run_eval.py +36 -37
examples/rl/run_rl_and_save.py +5 -5
examples/rl/task_app/math_single_step.py +65 -43
examples/rl/task_app/math_task_app.py +3 -3
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +117 -0
examples/sft/generate_traces.py +162 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +105 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +571 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +618 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1079 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1869 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +137 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/vlm/PROPOSAL.md +53 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +277 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/analyze_trace_db.py +5 -5
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
examples/warming_up_to_rl/export_trace_sft.py +78 -21
examples/warming_up_to_rl/groq_test.py +4 -4
examples/warming_up_to_rl/manage_secrets.py +13 -18
examples/warming_up_to_rl/run_eval.py +42 -44
examples/warming_up_to_rl/run_fft_and_save.py +11 -16
examples/warming_up_to_rl/run_local_rollout.py +1 -3
examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
examples/warming_up_to_rl/run_rl_and_save.py +5 -6
examples/warming_up_to_rl/run_rollout_remote.py +8 -10
examples/warming_up_to_rl/task_app/README.md +6 -2
examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
synth_ai/api/models/supported.py +376 -0
synth_ai/api/train/builders.py +128 -21
synth_ai/api/train/cli.py +80 -64
synth_ai/api/train/config_finder.py +7 -2
synth_ai/api/train/env_resolver.py +1 -1
synth_ai/api/train/pollers.py +2 -1
synth_ai/api/train/supported_algos.py +139 -0
synth_ai/api/train/task_app.py +1 -2
synth_ai/api/train/utils.py +13 -44
synth_ai/cli/__init__.py +8 -0
synth_ai/cli/_modal_wrapper.py +28 -0
synth_ai/cli/_typer_patch.py +49 -0
synth_ai/cli/balance.py +1 -2
synth_ai/cli/calc.py +1 -1
synth_ai/cli/demo.py +2 -1
synth_ai/cli/recent.py +2 -2
synth_ai/cli/rl_demo.py +2 -1
synth_ai/cli/root.py +11 -13
synth_ai/cli/status.py +2 -2
synth_ai/cli/task_apps.py +529 -179
synth_ai/cli/traces.py +6 -4
synth_ai/cli/watch.py +12 -18
synth_ai/demo_registry.py +1 -1
synth_ai/demos/core/cli.py +36 -43
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +17 -25
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/crafter_classic/environment.py +76 -1
synth_ai/environments/reproducibility/tree.py +2 -5
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +4 -7
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/handshake.py +9 -9
synth_ai/http.py +1 -1
synth_ai/http_client.py +18 -10
synth_ai/inference/client.py +15 -5
synth_ai/jobs/client.py +78 -83
synth_ai/learning/__init__.py +41 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +91 -24
synth_ai/learning/config.py +2 -38
synth_ai/learning/ft_client.py +4 -59
synth_ai/learning/health.py +5 -6
synth_ai/learning/jobs.py +31 -47
synth_ai/{rl → learning/rl}/__init__.py +14 -4
synth_ai/learning/rl/client.py +267 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -8
synth_ai/{rl → learning/rl}/env_keys.py +39 -15
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -281
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +295 -0
synth_ai/learning/sse.py +25 -24
synth_ai/learning/validators.py +25 -28
synth_ai/lm/__init__.py +21 -47
synth_ai/task/__init__.py +25 -27
synth_ai/task/apps/__init__.py +7 -8
synth_ai/task/auth.py +8 -8
synth_ai/task/client.py +14 -14
synth_ai/task/contracts.py +36 -35
synth_ai/task/datasets.py +6 -5
synth_ai/task/errors.py +10 -10
synth_ai/task/health.py +17 -9
synth_ai/task/json.py +58 -23
synth_ai/task/proxy.py +13 -9
synth_ai/task/rubrics.py +16 -15
synth_ai/task/server.py +12 -12
synth_ai/task/tracing_utils.py +4 -4
synth_ai/task/vendors.py +5 -6
synth_ai/tracing_v3/__init__.py +2 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/decorators.py +18 -16
synth_ai/tracing_v3/hooks.py +5 -5
synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
synth_ai/tracing_v3/session_tracer.py +40 -14
synth_ai/tracing_v3/storage/base.py +85 -0
synth_ai/tracing_v3/storage/config.py +21 -8
synth_ai/tracing_v3/storage/factory.py +10 -7
synth_ai/tracing_v3/storage/utils.py +4 -2
synth_ai/tracing_v3/turso/daemon.py +7 -2
synth_ai/tracing_v3/turso/models.py +2 -2
synth_ai/tracing_v3/turso/native_manager.py +1173 -0
synth_ai/tracing_v3/utils.py +4 -4
synth_ai/v0/api/__init__.py +8 -0
synth_ai/v0/api/models/__init__.py +8 -0
synth_ai/v0/api/models/supported.py +8 -0
synth_ai/v0/config/__init__.py +15 -0
synth_ai/v0/config/base_url.py +12 -0
synth_ai/v0/lm/__init__.py +51 -0
synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
synth_ai/{lm → v0/lm}/config.py +6 -1
synth_ai/{lm → v0/lm}/core/all.py +9 -9
synth_ai/{lm → v0/lm}/core/main.py +6 -6
synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
synth_ai/{lm → v0/lm}/overrides.py +2 -2
synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
synth_ai/v0/tracing_v3/__init__.py +10 -0
synth_ai/v0/tracing_v3/abstractions.py +3 -0
synth_ai/v0/tracing_v3/decorators.py +3 -0
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
synth_ai/v0/tracing_v3/session_tracer.py +3 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/RECORD +269 -233
examples/common_old/backend.py +0 -20
examples/evals_old/README.md +0 -98
examples/evals_old/__init__.py +0 -6
examples/evals_old/compare_models.py +0 -1038
examples/evals_old/example_log.md +0 -145
examples/evals_old/run_demo.sh +0 -126
examples/evals_old/trace_analysis.py +0 -270
examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
examples/finetuning_old/synth_qwen_v1/README.md +0 -68
examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
examples/finetuning_old/synth_qwen_v1/util.py +0 -152
examples/rl_old/task_app.py +0 -1131
synth_ai/experimental/synth_oss.py +0 -445
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -249
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/manager.py +0 -838
synth_ai/zyk/__init__.py +0 -30
/synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
/synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
/synth_ai/{lm → v0/lm}/constants.py +0 -0
/synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
/synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
/synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
/synth_ai/{lm → v0/lm}/injection.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
/synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/tools/base.py +0 -0
/synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/warmup.py +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py CHANGED Viewed

@@ -1,19 +1,56 @@
 from __future__ import annotations
-from typing import Any, Dict, List, Optional
+import base64
 import logging
+from io import BytesIO
+from typing import Any
-from synth_ai.environments.stateful.core import StatefulEnvironment
+import numpy as np
+from PIL import Image
 from synth_ai.environments.environment.tools import EnvToolCall
+from synth_ai.environments.stateful.core import StatefulEnvironment
 from ...utils import convert_numpy_to_python
-from .tools import TOOLS_SCHEMA
 from .shared import CRAFTER_ACTIONS, _format_semantic_map_view
+from .tools import TOOLS_SCHEMA
 logger = logging.getLogger(__name__)
+def _encode_image_to_base64(image_array: Any) -> dict[str, Any] | None:
+    """Encode an RGB ndarray into a base64 PNG payload with metadata."""
+    if not isinstance(image_array, np.ndarray):
+        return None
+    if image_array.ndim != 3 or image_array.shape[-1] not in (1, 3, 4):
+        return None
+    try:
+        # Ensure uint8 for PIL compatibility
+        array_uint8 = (
+            image_array.astype("uint8")
+            if image_array.dtype != np.uint8
+            else image_array  # pragma: no cover - fast path
+        )
+        mode = "L" if array_uint8.shape[-1] == 1 else "RGB"
+        if array_uint8.shape[-1] == 4:
+            mode = "RGBA"
+        img = Image.fromarray(array_uint8, mode=mode)
+        buffer = BytesIO()
+        img.save(buffer, format="PNG")
+        encoded = base64.b64encode(buffer.getvalue()).decode("ascii")
+        width = int(array_uint8.shape[1])
+        height = int(array_uint8.shape[0])
+        return {
+            "format": "png",
+            "width": width,
+            "height": height,
+            "data": encoded,
+            "data_url": f"data:image/png;base64,{encoded}",
+        }
+    except Exception:
+        return None
 class CrafterEnvironmentWrapper:
     """Host-side environment wrapper matching the sketch contract.
@@ -25,20 +62,20 @@ class CrafterEnvironmentWrapper:
       - snapshot()/restore() handled at route level; this wrapper exposes checkpoint via synth-ai
     """
-    def __init__(self, env: StatefulEnvironment, seed: Optional[int] = None) -> None:
+    def __init__(self, env: StatefulEnvironment, seed: int | None = None) -> None:
         self.env = env
         self.seed = seed
         self.step_idx = 0
-        self.last_observation: Optional[Dict[str, Any]] = None
-        self.last_info: Optional[Dict[str, Any]] = None
+        self.last_observation: dict[str, Any] | None = None
+        self.last_info: dict[str, Any] | None = None
-    async def initialize(self) -> Dict[str, Any]:
+    async def initialize(self) -> dict[str, Any]:
         obs = await self.env.initialize()
         # synth-ai InternalObservation expected to expose .observation (dict-like)
         self.step_idx = 0
         self.last_observation = getattr(obs, "observation", obs)  # tolerate dict-like
         self.last_info = getattr(obs, "info", None)
-        out_obs: Dict[str, Any] = convert_numpy_to_python(self.last_observation) or {}
+        out_obs = self._prepare_observation(self.last_observation)
         # Attach a 7x7 semantic map patch centered on player for client-side rendering
         try:
             pub = self.env.engine._get_public_state_from_env()  # type: ignore[attr-defined]
@@ -47,13 +84,13 @@ class CrafterEnvironmentWrapper:
             size = 7
             half = size // 2
             patch = []
-            H = len(sem) if hasattr(sem, "__len__") else 0
-            W = len(sem[0]) if H and hasattr(sem[0], "__len__") else 0
+            height = len(sem) if hasattr(sem, "__len__") else 0
+            width = len(sem[0]) if height and hasattr(sem[0], "__len__") else 0
             for dy in range(-half, half + 1):
                 row = []
                 for dx in range(-half, half + 1):
                     x, y = int(px) + dx, int(py) + dy
-                    if 0 <= x < H and 0 <= y < W:
+                    if 0 <= x < height and 0 <= y < width:
                         row.append(int(sem[x][y]))
                     else:
                         row.append(0)
@@ -68,7 +105,7 @@ class CrafterEnvironmentWrapper:
             "step_idx": self.step_idx,
         }
-    async def step(self, tool_calls: List[Dict[str, Any]] | List[EnvToolCall]) -> Dict[str, Any]:
+    async def step(self, tool_calls: list[dict[str, Any]] | list[EnvToolCall]) -> dict[str, Any]:
         # Normalize JSON tool_calls into EnvToolCall instances if needed
         # Underlying synth-ai environment expects only tool="interact" with args={"action": <action_name>}.
         # LLM may emit:
@@ -79,9 +116,9 @@ class CrafterEnvironmentWrapper:
         allowed_actions = set(
             TOOLS_SCHEMA[0]["function"]["parameters"]["properties"]["actions"]["items"]["enum"]
         )
-        normalized: List[EnvToolCall] = []
+        normalized: list[EnvToolCall] = []
-        def _action_to_int(action: Any) -> Optional[int]:
+        def _action_to_int(action: Any) -> int | None:
             # Handle invalid actions gracefully instead of failing
             if isinstance(action, int):
                 return action
@@ -153,10 +190,8 @@ class CrafterEnvironmentWrapper:
                         if isinstance(args, dict) and "action" in args:
                             candidate_action = args["action"]
                         # If the caller provided a numeric action id, accept it directly
-                        action_int: Optional[int]
-                        if isinstance(candidate_action, int):
-                            action_int = _action_to_int(candidate_action)
-                        elif (
+                        action_int: int | None
+                        if isinstance(candidate_action, int) or (
                             isinstance(candidate_action, str)
                             and candidate_action in allowed_actions
                         ):
@@ -175,7 +210,7 @@ class CrafterEnvironmentWrapper:
             normalized.append(EnvToolCall(tool="interact", args={"action": 0}))  # noop action
         # Pre-step logging: capture current public state and print concise summary
-        before_state: Optional[Dict[str, Any]] = None
+        before_state: dict[str, Any] | None = None
         try:
             pub_before = self.env.engine._get_public_state_from_env()  # type: ignore[attr-defined]
             before_state = {
@@ -231,7 +266,7 @@ class CrafterEnvironmentWrapper:
         ach_added_latest: list[str] | None = None
         try:
             pub_after = self.env.engine._get_public_state_from_env()  # type: ignore[attr-defined]
-            after_dict: Dict[str, Any] = {
+            after_dict: dict[str, Any] = {
                 "inventory": pub_after.inventory,
                 "achievements_status": pub_after.achievements_status,
                 "player_position": list(pub_after.player_position),
@@ -255,8 +290,8 @@ class CrafterEnvironmentWrapper:
                     # Position delta
                     pb = before_state.get("player_position", [0, 0])
                     pa = after_dict.get("player_position", [0, 0])
-                    pb_t = (int(pb[0]), int(pb[1])) if isinstance(pb, (list, tuple)) else (0, 0)
-                    pa_t = (int(pa[0]), int(pa[1])) if isinstance(pa, (list, tuple)) else (0, 0)
+                    pb_t = (int(pb[0]), int(pb[1])) if isinstance(pb, list | tuple) else (0, 0)
+                    pa_t = (int(pa[0]), int(pa[1])) if isinstance(pa, list | tuple) else (0, 0)
                     delta = (pa_t[0] - pb_t[0], pa_t[1] - pb_t[1])
                     # Inventory changes
@@ -280,9 +315,9 @@ class CrafterEnvironmentWrapper:
                     ach_a = {
                         k for k, v in (after_dict.get("achievements_status", {}) or {}).items() if v
                     }
-                    ach_added = sorted(list(ach_a - ach_b))
+                    ach_added = sorted(ach_a - ach_b)
                     ach_added_latest = ach_added
-                    ach_removed = sorted(list(ach_b - ach_a))
+                    ach_removed = sorted(ach_b - ach_a)
                     logger.info(
                         "Changes: pos %s->%s Δ=%s | inv %s | ach +%s -%s",
@@ -312,8 +347,8 @@ class CrafterEnvironmentWrapper:
             )
         except Exception as _:
             pass
-        result: Dict[str, Any] = {
-            "observation": convert_numpy_to_python(observation),
+        result: dict[str, Any] = {
+            "observation": self._prepare_observation(observation),
             "step_idx": self.step_idx,
             "done": bool(done) if done is not None else False,  # Ensure boolean
         }
@@ -325,13 +360,13 @@ class CrafterEnvironmentWrapper:
             size = 7
             half = size // 2
             patch = []
-            H = len(sem) if hasattr(sem, "__len__") else 0
-            W = len(sem[0]) if H and hasattr(sem[0], "__len__") else 0
+            height = len(sem) if hasattr(sem, "__len__") else 0
+            width = len(sem[0]) if height and hasattr(sem[0], "__len__") else 0
             for dy in range(-half, half + 1):
                 row = []
                 for dx in range(-half, half + 1):
                     x, y = px + dx, py + dy
-                    if 0 <= x < H and 0 <= y < W:
+                    if 0 <= x < height and 0 <= y < width:
                         row.append(int(sem[x][y]))
                     else:
                         row.append(0)
@@ -341,10 +376,7 @@ class CrafterEnvironmentWrapper:
                 obs_out["semantic_map_patch7"] = patch
         except Exception:
             pass
-        if info is not None:
-            result_info = convert_numpy_to_python(info)
-        else:
-            result_info = {}
+        result_info = convert_numpy_to_python(info) if info is not None else {}
         # Attach achievements delta for downstream metrics if useful
         if ach_added_latest is not None:
             try:
@@ -404,9 +436,37 @@ class CrafterEnvironmentWrapper:
             )
         except Exception:
             pass
         return result
-    async def checkpoint(self) -> Dict[str, Any]:
+    def _prepare_observation(self, observation: Any) -> dict[str, Any]:
+        """Convert raw observation into a JSON-serializable dict with encoded image."""
+        obs_dict: dict[str, Any]
+        image_payload: dict[str, Any] | None = None
+        if isinstance(observation, dict):
+            image_payload = _encode_image_to_base64(observation.get("observation_image"))
+            # Work on a shallow copy to avoid mutating engine state
+            sanitized = dict(observation)
+            sanitized.pop("observation_image", None)
+            obs_dict = convert_numpy_to_python(sanitized) or {}
+        else:
+            obs_dict = convert_numpy_to_python(observation) or {}
+        if not isinstance(obs_dict, dict):
+            obs_dict = {"value": obs_dict}
+        if image_payload:
+            obs_dict["observation_image_base64"] = image_payload["data"]
+            obs_dict["observation_image_format"] = image_payload["format"]
+            obs_dict["observation_image_width"] = image_payload["width"]
+            obs_dict["observation_image_height"] = image_payload["height"]
+            obs_dict["observation_image_data_url"] = image_payload["data_url"]
+        return obs_dict
+    async def checkpoint(self) -> dict[str, Any]:
         obs = await self.env.checkpoint()
         observation = getattr(obs, "observation", obs)
         info = getattr(obs, "info", None)
@@ -416,7 +476,7 @@ class CrafterEnvironmentWrapper:
             "step_idx": self.step_idx,
         }
-    async def terminate(self) -> Dict[str, Any]:
+    async def terminate(self) -> dict[str, Any]:
         obs = await self.env.terminate()
         observation = getattr(obs, "observation", obs)
         info = getattr(obs, "info", None)
@@ -426,7 +486,7 @@ class CrafterEnvironmentWrapper:
             "step_idx": self.step_idx,
         }
-    def state_dict(self) -> Dict[str, Any]:
+    def state_dict(self) -> dict[str, Any]:
         return {
             "seed": self.seed,
             "step_idx": self.step_idx,
@@ -434,13 +494,13 @@ class CrafterEnvironmentWrapper:
             "last_info": self.last_info,
         }
-    def load_state_dict(self, state: Dict[str, Any]) -> None:
+    def load_state_dict(self, state: dict[str, Any]) -> None:
         self.seed = state["seed"]
         self.step_idx = int(state["step_idx"])
         self.last_observation = state["last_observation"]
         self.last_info = state["last_info"]
-    async def serialize(self) -> Dict[str, Any]:
+    async def serialize(self) -> dict[str, Any]:
         return {
             "name": "crafter",
             "config": {"seed": self.seed},
@@ -450,9 +510,9 @@ class CrafterEnvironmentWrapper:
     @classmethod
     async def deserialize(
         cls,
-        payload: Dict[str, Any],
+        payload: dict[str, Any],
         env: StatefulEnvironment,
-    ) -> "CrafterEnvironmentWrapper":
+    ) -> CrafterEnvironmentWrapper:
         seed = payload["config"]["seed"]
         wrapper = cls(env=env, seed=seed)
         wrapper.load_state_dict(payload["state"])

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from __future__ import annotations
-from typing import Any, Dict, List, Optional, Tuple
 from abc import ABC, abstractmethod
+from typing import Any
 from .react_agent import CrafterReActAgent
 from .tools import TOOLS_SCHEMA
@@ -12,15 +13,15 @@ class Policy(ABC):
     @abstractmethod
     def prepare_inference_request(
-        self, observation: Dict[str, Any], history: List[Dict[str, Any]] = None
-    ) -> Tuple[List[Dict[str, Any]], Optional[List[Dict[str, Any]]]]:
+        self, observation: dict[str, Any], history: list[dict[str, Any]] = None
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
         """Prepare an inference request."""
         pass
     @abstractmethod
     def parse_model_response(
-        self, response: str, observation: Dict[str, Any]
-    ) -> List[Dict[str, Any]]:
+        self, response: str, observation: dict[str, Any]
+    ) -> list[dict[str, Any]]:
         """Parse model response into tool calls."""
         pass
@@ -39,23 +40,23 @@ class CrafterPolicy(Policy):
     name: str = "crafter-react"
-    def __init__(self, inference_url: str, model: Optional[str] = None) -> None:
+    def __init__(self, inference_url: str, model: str | None = None) -> None:
         self.inference_url = inference_url
         self.model = model
         self.use_tools = True
         # Sampling parameters (populated via initialize(config))
-        self.temperature: Optional[float] = None
-        self.top_p: Optional[float] = None
-        self.max_tokens: Optional[int] = None
+        self.temperature: float | None = None
+        self.top_p: float | None = None
+        self.max_tokens: int | None = None
         # Thinking controls (populated via initialize(config))
-        self.thinking_mode: Optional[str] = None
-        self.thinking_budget: Optional[int] = None
+        self.thinking_mode: str | None = None
+        self.thinking_budget: int | None = None
         # Rolling conversation and action history for non-Markov policies
-        self.history_messages: List[Dict[str, str]] = []  # chat-style without system
+        self.history_messages: list[dict[str, str]] = []  # chat-style without system
         self.turn_index: int = 0
-        self.trajectory_history: List[Dict[str, Any]] = []  # env/policy step records
+        self.trajectory_history: list[dict[str, Any]] = []  # env/policy step records
-    async def initialize(self, config: Dict[str, Any]) -> None:
+    async def initialize(self, config: dict[str, Any]) -> None:
         if "inference_url" in config:
             self.inference_url = config["inference_url"]
         if "model" in config:
@@ -91,15 +92,15 @@ class CrafterPolicy(Policy):
     def _append_assistant_turn(
         self,
-        assistant_text: Optional[str],
-        tool_calls: Optional[List[Dict[str, Any]]],
-        env_result: Optional[Dict[str, Any]],
+        assistant_text: str | None,
+        tool_calls: list[dict[str, Any]] | None,
+        env_result: dict[str, Any] | None,
     ) -> None:
         # Record assistant content (if any)
         if assistant_text is not None:
             self.history_messages.append({"role": "assistant", "content": assistant_text})
         # Keep structured step record for training/analysis
-        record: Dict[str, Any] = {"turn": self.turn_index}
+        record: dict[str, Any] = {"turn": self.turn_index}
         if tool_calls is not None:
             record["tool_calls"] = tool_calls
         if env_result is not None:
@@ -109,13 +110,17 @@ class CrafterPolicy(Policy):
     def build_inference_request(
         self,
         observation_text: str,
-        history: Optional[List[Dict[str, str]]] = None,
-        turn: Optional[int] = None,
-    ) -> Dict[str, Any]:
+        history: list[dict[str, Any]] | None = None,
+        turn: int | None = None,
+        image_parts: list[dict[str, Any]] | None = None,
+    ) -> dict[str, Any]:
         messages = CrafterReActAgent.build_messages(
-            observation=observation_text, history=history, turn=turn
+            observation=observation_text,
+            history=history,
+            turn=turn,
+            image_parts=image_parts,
         )
-        payload: Dict[str, Any] = {
+        payload: dict[str, Any] = {
             "messages": messages,
         }
         if self.model is not None:
@@ -150,9 +155,9 @@ class CrafterPolicy(Policy):
     @staticmethod
     def parse_response_to_tool_calls(
-        response: Dict[str, Any],
+        response: dict[str, Any],
         use_tools: bool = True,
-    ) -> List[Dict[str, Any]]:
+    ) -> list[dict[str, Any]]:
         """Turn an inference response into environment tool calls.
         - If tools were used, expect tool_calls-compatible output and forward as-is
@@ -162,7 +167,7 @@ class CrafterPolicy(Policy):
         """
         # First check if we got actual tool calls
         choices = response.get("choices", [])
-        tool_calls: List[Dict[str, Any]] = []
+        tool_calls: list[dict[str, Any]] = []
         for choice in choices:
             msg = choice.get("message", {})
@@ -192,7 +197,7 @@ class CrafterPolicy(Policy):
         if tool_calls:
             # Normalize common degenerate pattern ["move_right", "do"] when nothing is nearby.
             # If previous env_result indicates no interaction target, drop trailing 'do'.
-            normalized: List[Dict[str, Any]] = []
+            normalized: list[dict[str, Any]] = []
             for tc in tool_calls:
                 if tc and isinstance(tc, dict) and tc.get("tool_name") == "interact_many":
                     args = tc.get("arguments")
@@ -242,9 +247,9 @@ class CrafterPolicy(Policy):
     async def step(
         self,
         observation_text: str,
-        state: Optional[Dict[str, Any]] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
+        state: dict[str, Any] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
         """Stateful step: update policy history and prepare inference request.
         Inputs (via metadata, optional):
@@ -261,9 +266,9 @@ class CrafterPolicy(Policy):
         """
         # If caller provided results from previous cycle, record them first
         if metadata is not None:
-            prev_assistant_text: Optional[str] = None
-            prev_tool_calls: Optional[List[Dict[str, Any]]] = None
-            prev_env_result: Optional[Dict[str, Any]] = None
+            prev_assistant_text: str | None = None
+            prev_tool_calls: list[dict[str, Any]] | None = None
+            prev_env_result: dict[str, Any] | None = None
             if "prev_assistant_text" in metadata:
                 prev_assistant_text = metadata["prev_assistant_text"]
             if "prev_tool_calls" in metadata:
@@ -283,7 +288,7 @@ class CrafterPolicy(Policy):
         # Build user message by combining the current observation text
         # (formatted surroundings/inventory) with the previous 3 tool calls as context.
         # Most recent first.
-        lines: List[str] = []
+        lines: list[str] = []
         def _format_tool_call_line_for_context(
             tool_name: str, arguments: Any, max_chars: int = 500
@@ -291,7 +296,7 @@ class CrafterPolicy(Policy):
             import json as _json
             # Render arguments compactly, then clip to max_chars
-            if isinstance(arguments, (dict, list)):
+            if isinstance(arguments, dict | list):
                 try:
                     rendered = _json.dumps(arguments, ensure_ascii=False, separators=(",", ":"))
                 except Exception:
@@ -321,7 +326,7 @@ class CrafterPolicy(Policy):
         # If trajectory history is empty (first few turns), fall back to metadata once
         if not lines and metadata is not None and metadata.get("prev_tool_calls"):
-            calls: List[Dict[str, Any]] = metadata["prev_tool_calls"]
+            calls: list[dict[str, Any]] = metadata["prev_tool_calls"]
             for call in reversed(calls):
                 if len(lines) >= 3:
                     break
@@ -338,10 +343,18 @@ class CrafterPolicy(Policy):
         # Combine observation with context so the model always sees surroundings/inventory
         combined_text = f"{observation_text}\n\n{context_text}"
+        raw_observation: dict[str, Any] | None = None
+        if metadata is not None:
+            raw_candidate = metadata.get("raw_observation")
+            if isinstance(raw_candidate, dict):
+                raw_observation = raw_candidate
+        image_parts = self._extract_image_parts(raw_observation)
         payload = self.build_inference_request(
             combined_text,
             history=[],  # no prior user/assistant history
             turn=self.turn_index,
+            image_parts=image_parts,
         )
         # print("Debugging only:; ", payload)
         meta_out = {
@@ -352,19 +365,19 @@ class CrafterPolicy(Policy):
         }
         return [], meta_out
-    def state_dict(self) -> Dict[str, Any]:
+    def state_dict(self) -> dict[str, Any]:
         return {
             "turn_index": self.turn_index,
             "history_messages": self.history_messages,
             "trajectory_history": self.trajectory_history,
         }
-    def load_state_dict(self, state: Dict[str, Any]) -> None:
+    def load_state_dict(self, state: dict[str, Any]) -> None:
         self.turn_index = int(state["turn_index"])
         self.history_messages = state["history_messages"]
         self.trajectory_history = state["trajectory_history"]
-    async def serialize(self) -> Dict[str, Any]:
+    async def serialize(self) -> dict[str, Any]:
         return {
             "name": self.name,
             "config": {
@@ -376,7 +389,7 @@ class CrafterPolicy(Policy):
         }
     @classmethod
-    async def deserialize(cls, payload: Dict[str, Any]) -> "CrafterPolicy":
+    async def deserialize(cls, payload: dict[str, Any]) -> CrafterPolicy:
         config = payload["config"]
         state = payload["state"]
         policy = cls(
@@ -391,22 +404,26 @@ class CrafterPolicy(Policy):
         return None
     def prepare_inference_request(
-        self, observation: Dict[str, Any], history: List[Dict[str, Any]] = None
-    ) -> Tuple[List[Dict[str, Any]], Optional[List[Dict[str, Any]]]]:
+        self, observation: dict[str, Any], history: list[dict[str, Any]] = None
+    ) -> tuple[list[dict[str, Any]], list[dict[str, Any]] | None]:
         """Prepare an inference request (implementing abstract method)."""
         # Format observation with rich contextual information
         observation_text = self._format_observation_for_llm(observation)
+        image_parts = self._extract_image_parts(observation)
         # Build messages (observation_text already formatted; no raw matrices)
         messages = CrafterReActAgent.build_messages(
-            observation=observation_text, history=history, turn=self.turn_index
+            observation=observation_text,
+            history=history,
+            turn=self.turn_index,
+            image_parts=image_parts,
         )
         # Return messages and tools schema
         tools = TOOLS_SCHEMA if self.use_tools else None
         return messages, tools
-    def _format_observation_for_llm(self, observation: Dict[str, Any]) -> str:
+    def _format_observation_for_llm(self, observation: dict[str, Any]) -> str:
         """Format observation with rich contextual information for the LLM using the shared formatter."""
         from .shared import format_observation
@@ -423,17 +440,22 @@ class CrafterPolicy(Policy):
         # Get additional info from the observation wrapper
         info = observation.get("info", {})
-        if isinstance(info, dict):
-            # Merge health from info into obs_data for the formatter
-            if "health" in info and "health" not in obs_data:
-                obs_data = dict(obs_data)  # Make a copy
-                obs_data["health"] = info["health"]
+        if isinstance(info, dict) and "health" in info and "health" not in obs_data:
+            obs_data = dict(obs_data)  # Make a copy
+            obs_data["health"] = info["health"]
         return format_observation(obs_data, step_count=step_idx, max_steps=max_steps)
+    def _extract_image_parts(
+        self, observation: dict[str, Any] | None
+    ) -> list[dict[str, Any]]:
+        """Crafter policy uses text-only prompts; do not attach image parts."""
+        return []
     def parse_model_response(
-        self, response: str, observation: Dict[str, Any]
-    ) -> List[Dict[str, Any]]:
+        self, response: str, observation: dict[str, Any]
+    ) -> list[dict[str, Any]]:
         """Parse model response into tool calls (implementing abstract method).
         Note: Despite the type hint, vLLM actually returns a dict response,

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -7,7 +7,7 @@ utilities to keep a single parser.
 from __future__ import annotations
-from typing import Dict, List, Optional
+from typing import Any
 from .shared import parse_actions
@@ -81,19 +81,27 @@ class CrafterReActAgent:
     @staticmethod
     def build_messages(
-        observation: str, history: Optional[List[Dict[str, str]]] = None, turn: Optional[int] = None
-    ) -> List[Dict[str, str]]:
+        observation: str,
+        history: list[dict[str, Any]] | None = None,
+        turn: int | None = None,
+        image_parts: list[dict[str, Any]] | None = None,
+    ) -> list[dict[str, Any]]:
         """Construct OpenAI-style messages list for vLLM generation."""
-        msgs: List[Dict[str, str]] = [
+        msgs: list[dict[str, Any]] = [
             {"role": "system", "content": CrafterReActAgent.get_system_prompt()}
         ]
         if history:
             msgs.extend(history)
-        msgs.append({"role": "user", "content": observation})
+        user_content: Any
+        if image_parts:
+            user_content = [{"type": "text", "text": observation}] + list(image_parts)
+        else:
+            user_content = observation
+        msgs.append({"role": "user", "content": user_content})
         return msgs
     @staticmethod
-    def parse_actions_from_response(response_text: str) -> List[str]:
+    def parse_actions_from_response(response_text: str) -> list[str]:
         return parse_actions(response_text)

synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.10__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev7py3-none-any.whl → 0.2.10py3-none-any.whl