PyPI - synth-ai - Versions diffs - 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl - Mend

synth-ai 0.2.9.dev7py3-none-any.whl → 0.2.9.dev9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (327) hide show

examples/__init__.py +16 -0
examples/crafter_debug_render.py +8 -11
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
examples/qwen_coder/configs/coder_lora_small.toml +58 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +64 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +18 -0
examples/qwen_coder/scripts/train_coder_30b.sh +21 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +38 -0
examples/qwen_coder/validate_jsonl.py +59 -0
examples/rl/run_eval.py +36 -37
examples/rl/run_rl_and_save.py +5 -5
examples/rl/task_app/math_single_step.py +65 -43
examples/rl/task_app/math_task_app.py +3 -3
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +117 -0
examples/sft/generate_traces.py +162 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +105 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +571 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +618 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1079 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1869 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +137 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +277 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/analyze_trace_db.py +5 -5
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
examples/warming_up_to_rl/export_trace_sft.py +78 -21
examples/warming_up_to_rl/groq_test.py +4 -4
examples/warming_up_to_rl/manage_secrets.py +13 -18
examples/warming_up_to_rl/run_eval.py +42 -44
examples/warming_up_to_rl/run_fft_and_save.py +11 -16
examples/warming_up_to_rl/run_local_rollout.py +1 -3
examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
examples/warming_up_to_rl/run_rl_and_save.py +5 -6
examples/warming_up_to_rl/run_rollout_remote.py +8 -10
examples/warming_up_to_rl/task_app/README.md +6 -2
examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
synth/__init__.py +14 -0
synth_ai/__init__.py +26 -4
synth_ai/api/models/supported.py +376 -0
synth_ai/api/train/builders.py +128 -21
synth_ai/api/train/cli.py +80 -64
synth_ai/api/train/config_finder.py +7 -2
synth_ai/api/train/env_resolver.py +1 -1
synth_ai/api/train/pollers.py +2 -1
synth_ai/api/train/supported_algos.py +139 -0
synth_ai/api/train/task_app.py +1 -2
synth_ai/api/train/utils.py +13 -44
synth_ai/cli/__init__.py +8 -0
synth_ai/cli/_modal_wrapper.py +28 -0
synth_ai/cli/_typer_patch.py +49 -0
synth_ai/cli/balance.py +1 -2
synth_ai/cli/calc.py +1 -1
synth_ai/cli/demo.py +2 -1
synth_ai/cli/recent.py +2 -2
synth_ai/cli/rl_demo.py +2 -1
synth_ai/cli/root.py +11 -13
synth_ai/cli/status.py +2 -2
synth_ai/cli/task_apps.py +529 -179
synth_ai/cli/traces.py +6 -4
synth_ai/cli/watch.py +12 -18
synth_ai/demo_registry.py +1 -1
synth_ai/demos/core/cli.py +36 -43
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +17 -25
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/crafter_classic/environment.py +76 -1
synth_ai/environments/reproducibility/tree.py +2 -5
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +4 -7
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/handshake.py +9 -9
synth_ai/http.py +1 -1
synth_ai/http_client.py +18 -10
synth_ai/inference/client.py +15 -5
synth_ai/jobs/client.py +78 -83
synth_ai/learning/__init__.py +41 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +91 -24
synth_ai/learning/config.py +2 -38
synth_ai/learning/ft_client.py +4 -59
synth_ai/learning/health.py +5 -6
synth_ai/learning/jobs.py +31 -47
synth_ai/{rl → learning/rl}/__init__.py +14 -4
synth_ai/learning/rl/client.py +267 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -8
synth_ai/{rl → learning/rl}/env_keys.py +39 -15
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -281
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +295 -0
synth_ai/learning/sse.py +25 -24
synth_ai/learning/validators.py +25 -28
synth_ai/lm/__init__.py +21 -47
synth_ai/main.py +6 -0
synth_ai/task/__init__.py +25 -27
synth_ai/task/apps/__init__.py +7 -8
synth_ai/task/auth.py +8 -8
synth_ai/task/client.py +14 -14
synth_ai/task/contracts.py +36 -35
synth_ai/task/datasets.py +6 -5
synth_ai/task/errors.py +10 -10
synth_ai/task/health.py +17 -9
synth_ai/task/json.py +58 -23
synth_ai/task/proxy.py +13 -9
synth_ai/task/rubrics.py +16 -15
synth_ai/task/server.py +12 -12
synth_ai/task/tracing_utils.py +4 -4
synth_ai/task/vendors.py +5 -6
synth_ai/tracing_v3/__init__.py +2 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/decorators.py +18 -16
synth_ai/tracing_v3/hooks.py +5 -5
synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
synth_ai/tracing_v3/session_tracer.py +40 -14
synth_ai/tracing_v3/storage/base.py +85 -0
synth_ai/tracing_v3/storage/config.py +21 -8
synth_ai/tracing_v3/storage/factory.py +10 -7
synth_ai/tracing_v3/storage/utils.py +4 -2
synth_ai/tracing_v3/turso/daemon.py +7 -2
synth_ai/tracing_v3/turso/models.py +2 -2
synth_ai/tracing_v3/turso/native_manager.py +1173 -0
synth_ai/tracing_v3/utils.py +4 -4
synth_ai/v0/api/__init__.py +8 -0
synth_ai/v0/api/models/__init__.py +8 -0
synth_ai/v0/api/models/supported.py +8 -0
synth_ai/v0/config/__init__.py +15 -0
synth_ai/v0/config/base_url.py +12 -0
synth_ai/v0/lm/__init__.py +51 -0
synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
synth_ai/{lm → v0/lm}/config.py +6 -1
synth_ai/{lm → v0/lm}/core/all.py +9 -9
synth_ai/{lm → v0/lm}/core/main.py +6 -6
synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
synth_ai/{lm → v0/lm}/overrides.py +2 -2
synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
synth_ai/v0/tracing_v3/__init__.py +10 -0
synth_ai/v0/tracing_v3/abstractions.py +3 -0
synth_ai/v0/tracing_v3/decorators.py +3 -0
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
synth_ai/v0/tracing_v3/session_tracer.py +3 -0
synth_ai-0.2.9.dev9.dist-info/METADATA +191 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/RECORD +268 -238
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/top_level.txt +1 -0
examples/common_old/backend.py +0 -20
examples/evals_old/README.md +0 -98
examples/evals_old/__init__.py +0 -6
examples/evals_old/compare_models.py +0 -1038
examples/evals_old/example_log.md +0 -145
examples/evals_old/run_demo.sh +0 -126
examples/evals_old/trace_analysis.py +0 -270
examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
examples/finetuning_old/synth_qwen_v1/README.md +0 -68
examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
examples/finetuning_old/synth_qwen_v1/util.py +0 -152
examples/rl_old/task_app.py +0 -1131
examples/warming_up_to_rl/old/event_rewards.md +0 -234
examples/warming_up_to_rl/old/notes.md +0 -73
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
synth_ai/experimental/synth_oss.py +0 -445
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -249
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/manager.py +0 -838
synth_ai/zyk/__init__.py +0 -30
synth_ai-0.2.9.dev7.dist-info/METADATA +0 -131
/synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
/synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
/synth_ai/{lm → v0/lm}/constants.py +0 -0
/synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
/synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
/synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
/synth_ai/{lm → v0/lm}/injection.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
/synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/tools/base.py +0 -0
/synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/warmup.py +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.9.dev9.dist-info}/licenses/LICENSE +0 -0

examples/warming_up_to_rl/run_eval.py CHANGED Viewed

@@ -7,16 +7,19 @@ Baseline evaluation script (public-friendly skeleton)
 """
 from __future__ import annotations
-import os
+import argparse
+import asyncio
+import contextlib
 import json
+import os
 import re
-from typing import Any, Dict, List, Optional
-from collections import Counter
-import asyncio
-import httpx
-import argparse
 import tomllib
+from collections import Counter
 from pathlib import Path
+from typing import Any
+import httpx
 class TaskAppClient:
@@ -25,12 +28,12 @@ class TaskAppClient:
     This is a public-friendly shim for examples, pending SDK surface consolidation.
     """
-    def __init__(self, base_url: str, api_key: Optional[str] = None) -> None:
+    def __init__(self, base_url: str, api_key: str | None = None) -> None:
         self.base_url = base_url.rstrip("/")
         self.api_key = api_key
-        self._client: Optional[httpx.AsyncClient] = None
+        self._client: httpx.AsyncClient | None = None
-    async def __aenter__(self) -> "TaskAppClient":
+    async def __aenter__(self) -> TaskAppClient:
         headers = {}
         if self.api_key:
             headers["X-API-Key"] = self.api_key
@@ -56,9 +59,9 @@ class TaskAppClient:
             )
         return self._client
-    async def initialize(self, env_name: str, config: Dict[str, Any]) -> Dict[str, Any]:
+    async def initialize(self, env_name: str, config: dict[str, Any]) -> dict[str, Any]:
         """POST /env/{env_name}/initialize (compat route supported in task app)."""
-        payload: Dict[str, Any] = {
+        payload: dict[str, Any] = {
             "seed": config.get("seed"),
         }
         # Allow both world_config and config inputs; env routes will normalize difficulty
@@ -71,30 +74,30 @@ class TaskAppClient:
         return resp.json()
     async def step(
-        self, env_name: str, env_id: str, tool_calls: List[Dict[str, Any]]
-    ) -> Dict[str, Any]:
+        self, env_name: str, env_id: str, tool_calls: list[dict[str, Any]]
+    ) -> dict[str, Any]:
         """POST /env/{env_name}/step with wrapped tool_calls in action."""
         payload = {"env_id": env_id, "action": {"tool_calls": tool_calls}}
         resp = await self.client.post(f"/env/{env_name}/step", json=payload)
         resp.raise_for_status()
         return resp.json()
-    async def terminate(self, env_name: str, env_id: str) -> Dict[str, Any]:
+    async def terminate(self, env_name: str, env_id: str) -> dict[str, Any]:
         resp = await self.client.post(f"/env/{env_name}/terminate", json={"env_id": env_id})
         resp.raise_for_status()
         return resp.json()
-    async def get_info(self) -> Dict[str, Any]:
+    async def get_info(self) -> dict[str, Any]:
         resp = await self.client.get("/info")
         resp.raise_for_status()
         return resp.json()
-    async def proxy_groq_chat(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+    async def proxy_groq_chat(self, payload: dict[str, Any]) -> dict[str, Any]:
         resp = await self.client.post("/proxy/groq/v1/chat/completions", json=payload)
         resp.raise_for_status()
         return resp.json()
-    async def vllm_chat(self, vllm_base_url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+    async def vllm_chat(self, vllm_base_url: str, payload: dict[str, Any]) -> dict[str, Any]:
         async with httpx.AsyncClient(base_url=vllm_base_url.rstrip("/"), timeout=60.0) as c:
             resp = await c.post("/v1/chat/completions", json=payload)
             # Do not raise for status to surface body in errors
@@ -114,13 +117,13 @@ class TaskAppClient:
         seed: int,
         difficulty: str,
         policy_name: str,
-        policy_config: Dict[str, Any],
+        policy_config: dict[str, Any],
         max_turns: int,
-    ) -> Dict[str, Any]:
-        ops: List[str] = []
+    ) -> dict[str, Any]:
+        ops: list[str] = []
         for _ in range(max_turns):
             ops.extend(["agent", "env"])
-        payload: Dict[str, Any] = {
+        payload: dict[str, Any] = {
             "run_id": run_id,
             "env": {
                 "env_name": env_name,
@@ -150,7 +153,7 @@ MAX_TURNS = int(os.getenv("MAX_TURNS", "10"))
 CONCURRENCY = int(os.getenv("CONCURRENCY", "1"))
-def _interact_tool_schema() -> List[Dict[str, Any]]:
+def _interact_tool_schema() -> list[dict[str, Any]]:
     return [
         {
             "type": "function",
@@ -171,13 +174,12 @@ def _interact_tool_schema() -> List[Dict[str, Any]]:
 def _build_messages_from_observation(
-    observation: Dict[str, Any], history: List[Dict[str, Any]]
-) -> List[Dict[str, Any]]:
+    observation: dict[str, Any], history: list[dict[str, Any]]
+) -> list[dict[str, Any]]:
     inv = observation.get("inventory") or {}
     pos = observation.get("player_position") or []
     ach = observation.get("achievements_status") or {}
-    turns_taken = observation.get("num_steps_taken") or 0
-    user_lines: List[str] = []
+    user_lines: list[str] = []
     user_lines.append("Environment: CrafterClassic")
     user_lines.append(f"Player position: {pos}")
     user_lines.append(f"Inventory: {json.dumps(inv, ensure_ascii=False)}")
@@ -193,7 +195,7 @@ def _build_messages_from_observation(
     return [{"role": "user", "content": content}]
-def _parse_tool_calls_from_openai_response(data: Dict[str, Any]) -> List[str]:
+def _parse_tool_calls_from_openai_response(data: dict[str, Any]) -> list[str]:
     try:
         choices = data.get("choices")
         if isinstance(choices, list) and choices:
@@ -248,11 +250,11 @@ async def _choose_actions_via_llm(
     client: TaskAppClient,
     provider: str,
     model: str,
-    observation: Dict[str, Any],
-    history: List[Dict[str, Any]],
-) -> List[str]:
+    observation: dict[str, Any],
+    history: list[dict[str, Any]],
+) -> list[str]:
     messages = _build_messages_from_observation(observation, history)
-    payload: Dict[str, Any] = {
+    payload: dict[str, Any] = {
         "model": model,
         "messages": messages,
         "tools": _interact_tool_schema(),
@@ -279,8 +281,8 @@ async def _choose_actions_via_llm(
     return actions or []
-def _expand_actions_to_tool_calls(actions: List[str]) -> List[Dict[str, Any]]:
-    out: List[Dict[str, Any]] = []
+def _expand_actions_to_tool_calls(actions: list[str]) -> list[dict[str, Any]]:
+    out: list[dict[str, Any]] = []
     for a in actions[:5]:
         out.append({"tool": "interact", "args": {"action": a}})
     return out
@@ -293,9 +295,7 @@ def _detect_provider(model: str) -> str:
     return "vllm"
-def _rollout_inference_url_from_cfg(
-    cfg: Dict[str, Any], default_vllm: Optional[str]
-) -> Optional[str]:
+def _rollout_inference_url_from_cfg(cfg: dict[str, Any], default_vllm: str | None) -> str | None:
     # Prefer explicit inference_url in TOML; else fall back to discovered vLLM base
     url = cfg.get("inference_url")
     if isinstance(url, str) and url:
@@ -303,14 +303,14 @@ def _rollout_inference_url_from_cfg(
     return default_vllm
-async def eval_episode(client: TaskAppClient, seed: int) -> Dict[str, Any]:
+async def eval_episode(client: TaskAppClient, seed: int) -> dict[str, Any]:
     env_name = "CrafterClassic"
-    history: List[Dict[str, Any]] = []
+    history: list[dict[str, Any]] = []
     achievements: set[str] = set()
     turns = 0
     # Initialize environment
-    init_cfg: Dict[str, Any] = {
+    init_cfg: dict[str, Any] = {
         "seed": seed,
         "world_config": {"difficulty": os.getenv("DIFFICULTY", "easy")},
     }
@@ -343,10 +343,8 @@ async def eval_episode(client: TaskAppClient, seed: int) -> Dict[str, Any]:
                 if isinstance(nxt, dict):
                     observation = nxt
     finally:
-        try:
+        with contextlib.suppress(Exception):
             await client.terminate(env_name, env_id)
-        except Exception:
-            pass
     return {"seed": seed, "turns": turns, "achievements": sorted(achievements)}
@@ -377,7 +375,7 @@ async def main() -> None:
     args = parser.parse_args()
     global TASK_APP_URL, MODEL, NUM_EPISODES, MAX_TURNS, CONCURRENCY
-    cfg: Dict[str, Any] = {}
+    cfg: dict[str, Any] = {}
     if args.toml:
         with open(args.toml, "rb") as f:
             cfg = tomllib.load(f)
@@ -418,7 +416,7 @@ async def main() -> None:
                     try:
                         run_id = f"eval-{seed}"
                         # Build policy config from TOML (explicit control; no server-side guessing)
-                        policy_cfg: Dict[str, Any] = {
+                        policy_cfg: dict[str, Any] = {
                             "model": cfg.get("model", MODEL),
                             "inference_url": inf_url,
                         }

examples/warming_up_to_rl/run_fft_and_save.py CHANGED Viewed

@@ -6,14 +6,12 @@ import json
 import os
 import sys
 import time
+import tomllib
 from pathlib import Path
-from typing import Any, Dict, Tuple, List
+from typing import Any
-import tomllib
-import re
 import requests
 from dotenv import load_dotenv
 from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
@@ -25,7 +23,7 @@ def mask(val: str) -> str:
 def post_multipart(
     base: str, api_key: str, path: str, file_field: str, filepath: Path
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
     """Upload a file, trying backend-specific endpoints with fallbacks.
     Priority:
@@ -40,7 +38,7 @@ def post_multipart(
         f"{base.rstrip('/')}/{path.lstrip('/')}",  # e.g., /learning/files
         f"{base.rstrip('/')}/files",  # OpenAI-style
     ]
-    last_err: Dict[str, Any] | None = None
+    last_err: dict[str, Any] | None = None
     for ep in endpoints:
         try:
             r = requests.post(ep, headers=headers, files=files, data=data, timeout=300)
@@ -72,7 +70,7 @@ def post_multipart(
     return last_err or {"error": True, "detail": "upload_failed_all_endpoints"}
-def post_json(base: str, api_key: str, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
+def post_json(base: str, api_key: str, path: str, body: dict[str, Any]) -> dict[str, Any]:
     url = f"{base.rstrip('/')}/{path.lstrip('/')}"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     r = requests.post(url, headers=headers, data=json.dumps(body), timeout=120)
@@ -82,7 +80,7 @@ def post_json(base: str, api_key: str, path: str, body: Dict[str, Any]) -> Dict[
         return {"status": r.status_code, "text": r.text[:400]}
-def get_json(base: str, api_key: str, path: str) -> Dict[str, Any]:
+def get_json(base: str, api_key: str, path: str) -> dict[str, Any]:
     url = f"{base.rstrip('/')}/{path.lstrip('/')}"
     headers = {"Authorization": f"Bearer {api_key}"}
     r = requests.get(url, headers=headers, timeout=30)
@@ -92,9 +90,9 @@ def get_json(base: str, api_key: str, path: str) -> Dict[str, Any]:
         return {"status": r.status_code, "text": r.text[:400]}
-def _find_fft_configs() -> List[Path]:
+def _find_fft_configs() -> list[Path]:
     """Find FFT TOML configs in standard locations."""
-    candidates: List[Path] = []
+    candidates: list[Path] = []
     # Check current directory configs/
     cwd = Path.cwd()
@@ -206,10 +204,7 @@ def main() -> None:
             # Try relative to cwd first, then relative to config directory
             cwd_relative = Path.cwd() / p
             config_relative = config_path.parent / p
-            if cwd_relative.exists():
-                p = cwd_relative.resolve()
-            else:
-                p = config_relative.resolve()
+            p = cwd_relative.resolve() if cwd_relative.exists() else config_relative.resolve()
         data_file = p
     if data_file is None:
         print("Missing dataset path in --data or [job].data", file=sys.stderr)
@@ -274,7 +269,7 @@ def main() -> None:
                 )
     # 2) Build job payload
-    hp_block: Dict[str, Any] = {
+    hp_block: dict[str, Any] = {
         "n_epochs": int(hp_cfg.get("n_epochs") or 1),
     }
     # Optional extras if present
@@ -295,7 +290,7 @@ def main() -> None:
     if parallel:
         hp_block["parallelism"] = parallel
-    compute_block: Dict[str, Any] = {}
+    compute_block: dict[str, Any] = {}
     for k in ("gpu_type", "gpu_count", "nodes"):
         if k in compute_cfg:
             compute_block[k] = compute_cfg[k]

examples/warming_up_to_rl/run_local_rollout.py CHANGED Viewed

@@ -7,14 +7,12 @@ import argparse
 import asyncio
 import json
 import os
+import sys
 from pathlib import Path
 from typing import Any
-import sys
 import httpx
 from dotenv import load_dotenv
 from synth_ai.task import (
     RolloutEnvSpec,
     RolloutPolicySpec,

examples/warming_up_to_rl/run_local_rollout_modal.py CHANGED Viewed

@@ -7,14 +7,12 @@ import argparse
 import asyncio
 import json
 import os
+import sys
 from pathlib import Path
 from typing import Any
-import sys
 import httpx
 from dotenv import load_dotenv
 from synth_ai.task import (
     RolloutEnvSpec,
     RolloutPolicySpec,
@@ -122,7 +120,7 @@ async def main() -> None:
     base_url = args.base_url
     if args.base_url == "http://localhost:8010":
         print("\nTask app configuration:")
-        base_url_input = input(f"Task app base URL [http://localhost:8001]: ").strip()
+        base_url_input = input("Task app base URL [http://localhost:8001]: ").strip()
         base_url = base_url_input if base_url_input else "http://localhost:8001"
     model = args.model

examples/warming_up_to_rl/run_local_rollout_parallel.py CHANGED Viewed

@@ -5,7 +5,6 @@ from __future__ import annotations
 import argparse
 import asyncio
-import json
 import os
 from collections import Counter
 from pathlib import Path
@@ -13,15 +12,13 @@ from statistics import mean, median
 from typing import Any
 from dotenv import load_dotenv
-from synth_ai.task import TaskAppClient
 from synth_ai.task import (
     RolloutEnvSpec,
     RolloutPolicySpec,
     RolloutRecordConfig,
     RolloutRequest,
     RolloutSafetyConfig,
+    TaskAppClient,
 )

examples/warming_up_to_rl/run_local_rollout_traced.py CHANGED Viewed

@@ -7,13 +7,11 @@ import argparse
 import asyncio
 import json
 import os
+import sys
 from pathlib import Path
 from typing import Any
-import sys
 import httpx
 from synth_ai.task import (
     RolloutEnvSpec,
     RolloutPolicySpec,
@@ -331,7 +329,7 @@ async def main() -> None:
     base_url = args.base_url
     if args.base_url == "http://localhost:8001":
         print("\nTask app configuration:")
-        base_url_input = input(f"Task app base URL [http://localhost:8001]: ").strip()
+        base_url_input = input("Task app base URL [http://localhost:8001]: ").strip()
         base_url = base_url_input if base_url_input else "http://localhost:8001"
     api_key = args.api_key or os.getenv("ENVIRONMENT_API_KEY")
@@ -379,7 +377,7 @@ async def main() -> None:
     print("\nRollout configuration:")
     max_llm_calls = args.max_llm_calls
     if args.max_llm_calls == 1:
-        max_llm_calls_input = input(f"Max LLM calls [10]: ").strip()
+        max_llm_calls_input = input("Max LLM calls [10]: ").strip()
         max_llm_calls = int(max_llm_calls_input) if max_llm_calls_input else 10
     # Override args with prompted values

examples/warming_up_to_rl/run_rl_and_save.py CHANGED Viewed

@@ -5,16 +5,15 @@ import argparse
 import json
 import os
 import sys
+import tomllib
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any
-import tomllib
 import requests
 from synth_ai.config.base_url import PROD_BASE_URL_DEFAULT
-def _load_toml(path: Path) -> Dict[str, Any]:
+def _load_toml(path: Path) -> dict[str, Any]:
     if not path.exists():
         print(f"config not found: {path}", file=sys.stderr)
         sys.exit(2)
@@ -74,7 +73,7 @@ def main() -> None:
         sys.exit(2)
     # Build create-job payload. Send full TOML under data.config, plus endpoint_base_url.
-    payload: Dict[str, Any] = {
+    payload: dict[str, Any] = {
         "job_type": "rl",
         # Optional: compute pass-through
         "compute": cfg.get("compute", {}) if isinstance(cfg.get("compute"), dict) else {},
@@ -87,7 +86,7 @@ def main() -> None:
     backend = str(args.backend).rstrip("/")
     url = f"{backend}/rl/jobs"
-    api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("synth_key") or "").strip()
+    api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("SYNTH_KEY") or "").strip()
     if not api_key:
         print("Missing SYNTH_API_KEY in env", file=sys.stderr)
         sys.exit(2)

examples/warming_up_to_rl/run_rollout_remote.py CHANGED Viewed

@@ -10,6 +10,14 @@ import os
 import sys
 import httpx
+from synth_ai.task import (
+    RolloutEnvSpec,
+    RolloutPolicySpec,
+    RolloutRecordConfig,
+    RolloutRequest,
+    RolloutSafetyConfig,
+    TaskAppClient,
+)
 def check_health(base_url: str, api_key: str) -> None:
@@ -30,16 +38,6 @@ def check_health(base_url: str, api_key: str) -> None:
         print(f"warning: failed to call /health: {exc}")
-from synth_ai.task import (
-    RolloutEnvSpec,
-    RolloutPolicySpec,
-    RolloutRecordConfig,
-    RolloutRequest,
-    RolloutSafetyConfig,
-    TaskAppClient,
-)
 def build_request(
     *,
     run_id: str,

examples/warming_up_to_rl/task_app/README.md CHANGED Viewed

@@ -25,8 +25,12 @@ uvx synth-ai deploy grpo-crafter --name grpo-crafter-task-app
 Requirements:
 - Modal CLI installed and authenticated (`modal token new`).
-- Secrets `crafter-environment-sdk`, `groq-api-key`, and `openai-api-key`
-  available in your Modal account.
+- Either provide an `.env` with `ENVIRONMENT_API_KEY`, `GROQ_API_KEY`, and `OPENAI_API_KEY`
+  (recommended; pass via `--env-file`). The deploy command injects these values via an inline
+  Modal secret plus `Secret.from_dotenv`, so the minted environment key stays in sync with
+  what the CLI sends.
+- Or ensure Modal secrets `groq-api-key` and `openai-api-key` exist and continue to supply
+  model vendor credentials that way.
 The CLI generates a Modal entrypoint on the fly using the shared
 `TaskAppConfig`, ensuring the container matches the local FastAPI behavior.

synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.9.dev9__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev7py3-none-any.whl → 0.2.9.dev9py3-none-any.whl