PyPI - synth-ai - Versions diffs - 0.2.9.dev7__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

synth-ai 0.2.9.dev7py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (323) hide show

examples/__init__.py +16 -0
examples/crafter_debug_render.py +8 -11
examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
examples/multi_step/crafter_rl_lora.md +29 -0
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
examples/qwen_coder/configs/coder_lora_small.toml +58 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +65 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +19 -0
examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +39 -0
examples/qwen_coder/todos.md +38 -0
examples/qwen_coder/validate_jsonl.py +60 -0
examples/rl/run_eval.py +36 -37
examples/rl/run_rl_and_save.py +5 -5
examples/rl/task_app/math_single_step.py +65 -43
examples/rl/task_app/math_task_app.py +3 -3
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +117 -0
examples/sft/generate_traces.py +162 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +105 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +571 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +618 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1079 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1869 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +137 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/vlm/PROPOSAL.md +53 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +277 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/analyze_trace_db.py +5 -5
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
examples/warming_up_to_rl/export_trace_sft.py +78 -21
examples/warming_up_to_rl/groq_test.py +4 -4
examples/warming_up_to_rl/manage_secrets.py +13 -18
examples/warming_up_to_rl/run_eval.py +42 -44
examples/warming_up_to_rl/run_fft_and_save.py +11 -16
examples/warming_up_to_rl/run_local_rollout.py +1 -3
examples/warming_up_to_rl/run_local_rollout_modal.py +2 -4
examples/warming_up_to_rl/run_local_rollout_parallel.py +1 -4
examples/warming_up_to_rl/run_local_rollout_traced.py +3 -5
examples/warming_up_to_rl/run_rl_and_save.py +5 -6
examples/warming_up_to_rl/run_rollout_remote.py +8 -10
examples/warming_up_to_rl/task_app/README.md +6 -2
examples/warming_up_to_rl/task_app/grpo_crafter.py +234 -35
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +2 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +131 -114
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +101 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +73 -51
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +14 -6
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +16 -16
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +32 -34
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +94 -31
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +303 -203
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +328 -225
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +13 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
synth_ai/api/models/supported.py +376 -0
synth_ai/api/train/builders.py +128 -21
synth_ai/api/train/cli.py +80 -64
synth_ai/api/train/config_finder.py +7 -2
synth_ai/api/train/env_resolver.py +1 -1
synth_ai/api/train/pollers.py +2 -1
synth_ai/api/train/supported_algos.py +139 -0
synth_ai/api/train/task_app.py +1 -2
synth_ai/api/train/utils.py +13 -44
synth_ai/cli/__init__.py +8 -0
synth_ai/cli/_modal_wrapper.py +28 -0
synth_ai/cli/_typer_patch.py +49 -0
synth_ai/cli/balance.py +1 -2
synth_ai/cli/calc.py +1 -1
synth_ai/cli/demo.py +2 -1
synth_ai/cli/recent.py +2 -2
synth_ai/cli/rl_demo.py +2 -1
synth_ai/cli/root.py +11 -13
synth_ai/cli/status.py +2 -2
synth_ai/cli/task_apps.py +529 -179
synth_ai/cli/traces.py +6 -4
synth_ai/cli/watch.py +12 -18
synth_ai/demo_registry.py +1 -1
synth_ai/demos/core/cli.py +36 -43
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +17 -25
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +3 -4
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -4
synth_ai/demos/demo_task_apps/math/modal_task_app.py +16 -18
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/crafter_classic/environment.py +76 -1
synth_ai/environments/reproducibility/tree.py +2 -5
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +4 -7
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/handshake.py +9 -9
synth_ai/http.py +1 -1
synth_ai/http_client.py +18 -10
synth_ai/inference/client.py +15 -5
synth_ai/jobs/client.py +78 -83
synth_ai/learning/__init__.py +41 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +91 -24
synth_ai/learning/config.py +2 -38
synth_ai/learning/ft_client.py +4 -59
synth_ai/learning/health.py +5 -6
synth_ai/learning/jobs.py +31 -47
synth_ai/{rl → learning/rl}/__init__.py +14 -4
synth_ai/learning/rl/client.py +267 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -8
synth_ai/{rl → learning/rl}/env_keys.py +39 -15
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -281
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +295 -0
synth_ai/learning/sse.py +25 -24
synth_ai/learning/validators.py +25 -28
synth_ai/lm/__init__.py +21 -47
synth_ai/task/__init__.py +25 -27
synth_ai/task/apps/__init__.py +7 -8
synth_ai/task/auth.py +8 -8
synth_ai/task/client.py +14 -14
synth_ai/task/contracts.py +36 -35
synth_ai/task/datasets.py +6 -5
synth_ai/task/errors.py +10 -10
synth_ai/task/health.py +17 -9
synth_ai/task/json.py +58 -23
synth_ai/task/proxy.py +13 -9
synth_ai/task/rubrics.py +16 -15
synth_ai/task/server.py +12 -12
synth_ai/task/tracing_utils.py +4 -4
synth_ai/task/vendors.py +5 -6
synth_ai/tracing_v3/__init__.py +2 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/decorators.py +18 -16
synth_ai/tracing_v3/hooks.py +5 -5
synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
synth_ai/tracing_v3/session_tracer.py +40 -14
synth_ai/tracing_v3/storage/base.py +85 -0
synth_ai/tracing_v3/storage/config.py +21 -8
synth_ai/tracing_v3/storage/factory.py +10 -7
synth_ai/tracing_v3/storage/utils.py +4 -2
synth_ai/tracing_v3/turso/daemon.py +7 -2
synth_ai/tracing_v3/turso/models.py +2 -2
synth_ai/tracing_v3/turso/native_manager.py +1173 -0
synth_ai/tracing_v3/utils.py +4 -4
synth_ai/v0/api/__init__.py +8 -0
synth_ai/v0/api/models/__init__.py +8 -0
synth_ai/v0/api/models/supported.py +8 -0
synth_ai/v0/config/__init__.py +15 -0
synth_ai/v0/config/base_url.py +12 -0
synth_ai/v0/lm/__init__.py +51 -0
synth_ai/{lm → v0/lm}/caching/ephemeral.py +2 -2
synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
synth_ai/{lm → v0/lm}/config.py +6 -1
synth_ai/{lm → v0/lm}/core/all.py +9 -9
synth_ai/{lm → v0/lm}/core/main.py +6 -6
synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
synth_ai/{lm → v0/lm}/core/synth_models.py +2 -14
synth_ai/{lm → v0/lm}/core/vendor_clients.py +2 -2
synth_ai/{lm → v0/lm}/overrides.py +2 -2
synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +9 -9
synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +10 -10
synth_ai/{lm → v0/lm}/vendors/openai_standard.py +8 -8
synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
synth_ai/{lm → v0/lm}/vendors/synth_client.py +1 -1
synth_ai/v0/tracing_v3/__init__.py +10 -0
synth_ai/v0/tracing_v3/abstractions.py +3 -0
synth_ai/v0/tracing_v3/decorators.py +3 -0
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
synth_ai/v0/tracing_v3/session_tracer.py +3 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/RECORD +269 -233
examples/common_old/backend.py +0 -20
examples/evals_old/README.md +0 -98
examples/evals_old/__init__.py +0 -6
examples/evals_old/compare_models.py +0 -1038
examples/evals_old/example_log.md +0 -145
examples/evals_old/run_demo.sh +0 -126
examples/evals_old/trace_analysis.py +0 -270
examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -243
examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -119
examples/finetuning_old/synth_qwen_v1/README.md +0 -68
examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -243
examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
examples/finetuning_old/synth_qwen_v1/infer.py +0 -36
examples/finetuning_old/synth_qwen_v1/poll.py +0 -46
examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1933
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -210
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -237
examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
examples/finetuning_old/synth_qwen_v1/util.py +0 -152
examples/rl_old/task_app.py +0 -1131
synth_ai/experimental/synth_oss.py +0 -445
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -211
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -249
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -329
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/manager.py +0 -838
synth_ai/zyk/__init__.py +0 -30
/synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
/synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
/synth_ai/{lm → v0/lm}/constants.py +0 -0
/synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/core/exceptions.py +0 -0
/synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
/synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
/synth_ai/{lm → v0/lm}/injection.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
/synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/tools/base.py +0 -0
/synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/warmup.py +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev7.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0

examples/rl/run_eval.py CHANGED Viewed

@@ -5,24 +5,24 @@ from __future__ import annotations
 import argparse
 import asyncio
+import contextlib
 import json
 import os
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+import tomllib
+from typing import Any
 import httpx
-import tomllib
 class TaskAppClient:
     """Minimal async client for math single-step task app."""
-    def __init__(self, base_url: str, api_key: Optional[str] = None) -> None:
+    def __init__(self, base_url: str, api_key: str | None = None) -> None:
         self.base_url = base_url.rstrip("/")
         self.api_key = api_key
-        self._client: Optional[httpx.AsyncClient] = None
+        self._client: httpx.AsyncClient | None = None
-    async def __aenter__(self) -> "TaskAppClient":
+    async def __aenter__(self) -> TaskAppClient:
         headers = {"X-API-Key": self.api_key} if self.api_key else {}
         self._client = httpx.AsyncClient(
             base_url=self.base_url,
@@ -49,32 +49,30 @@ class TaskAppClient:
             )
         return self._client
-    async def initialize(self, split: str, seed: int | None) -> Dict[str, Any]:
-        payload: Dict[str, Any] = {"config": {"split": split}}
+    async def initialize(self, split: str, seed: int | None) -> dict[str, Any]:
+        payload: dict[str, Any] = {"config": {"split": split}}
         if seed is not None:
             payload["seed"] = seed
         resp = await self.client.post("/env/math/initialize", json=payload)
         resp.raise_for_status()
         return resp.json()
-    async def step(self, env_id: str, tool_calls: List[Dict[str, Any]]) -> Dict[str, Any]:
+    async def step(self, env_id: str, tool_calls: list[dict[str, Any]]) -> dict[str, Any]:
         payload = {"env_id": env_id, "action": {"tool_calls": tool_calls}}
         resp = await self.client.post("/env/math/step", json=payload)
         resp.raise_for_status()
         return resp.json()
     async def terminate(self, env_id: str) -> None:
-        try:
+        with contextlib.suppress(Exception):
             await self.client.post("/env/math/terminate", json={"env_id": env_id})
-        except Exception:
-            pass
-    async def get_info(self) -> Dict[str, Any]:
+    async def get_info(self) -> dict[str, Any]:
         resp = await self.client.get("/info")
         resp.raise_for_status()
         return resp.json()
-    async def rollout(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+    async def rollout(self, payload: dict[str, Any]) -> dict[str, Any]:
         resp = await self.client.post("/rollout", json=payload)
         resp.raise_for_status()
         return resp.json()
@@ -82,10 +80,10 @@ class TaskAppClient:
     async def post_inference(
         self,
         url: str,
-        payload: Dict[str, Any],
+        payload: dict[str, Any],
         *,
-        headers: Dict[str, str] | None = None,
-    ) -> Dict[str, Any]:
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
         async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as c:
             resp = await c.post(url, json=payload, headers=headers)
         resp.raise_for_status()
@@ -96,7 +94,7 @@ TOOL_NAME = "math_submit"
 DEFAULT_SPLIT = os.getenv("MATH_EVAL_DEFAULT_SPLIT", "validation")
-def _math_tool_schema() -> List[Dict[str, Any]]:
+def _math_tool_schema() -> list[dict[str, Any]]:
     return [
         {
             "type": "function",
@@ -123,7 +121,7 @@ def _math_tool_schema() -> List[Dict[str, Any]]:
     ]
-def _build_messages(problem: str) -> List[Dict[str, Any]]:
+def _build_messages(problem: str) -> list[dict[str, Any]]:
     return [
         {
             "role": "system",
@@ -139,18 +137,18 @@ def _build_messages(problem: str) -> List[Dict[str, Any]]:
     ]
-def _parse_tool_calls(data: Dict[str, Any]) -> List[Dict[str, Any]]:
+def _parse_tool_calls(data: dict[str, Any]) -> list[dict[str, Any]]:
     choices = data.get("choices") or []
     if not choices:
         return []
     message = choices[0].get("message") or {}
     raw_calls = message.get("tool_calls") or []
-    tool_calls: List[Dict[str, Any]] = []
+    tool_calls: list[dict[str, Any]] = []
     for call in raw_calls:
         function = call.get("function") or {}
         name = function.get("name")
         arguments = function.get("arguments")
-        parsed_args: Dict[str, Any]
+        parsed_args: dict[str, Any]
         if isinstance(arguments, str):
             try:
                 parsed_args = json.loads(arguments)
@@ -164,7 +162,7 @@ def _parse_tool_calls(data: Dict[str, Any]) -> List[Dict[str, Any]]:
     return tool_calls
-def _detect_provider(model: str, hint: Optional[str]) -> str:
+def _detect_provider(model: str, hint: str | None) -> str:
     if hint:
         return hint.lower()
     lowered = (model or "").lower()
@@ -193,10 +191,10 @@ async def _choose_actions(
     provider: str,
     model: str,
     problem: str,
-    policy_cfg: Dict[str, Any],
-) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
+    policy_cfg: dict[str, Any],
+) -> tuple[list[dict[str, Any]], dict[str, Any]]:
     messages = _build_messages(problem)
-    payload: Dict[str, Any] = {
+    payload: dict[str, Any] = {
         "model": model,
         "messages": messages,
         "tools": _math_tool_schema(),
@@ -239,7 +237,7 @@ async def _choose_actions(
     return tool_calls, body
-def _tool_to_answer(tool_calls: List[Dict[str, Any]]) -> str:
+def _tool_to_answer(tool_calls: list[dict[str, Any]]) -> str:
     if not tool_calls:
         return ""
     args = tool_calls[0].get("args") or {}
@@ -251,11 +249,11 @@ async def eval_episode(
     client: TaskAppClient,
     *,
     split: str,
-    seed: Optional[int],
+    seed: int | None,
     model: str,
     provider: str,
-    policy_cfg: Dict[str, Any],
-) -> Dict[str, Any]:
+    policy_cfg: dict[str, Any],
+) -> dict[str, Any]:
     created = await client.initialize(split, seed)
     env_id = created["env_id"]
     observation = created.get("observation") or {}
@@ -288,10 +286,10 @@ async def eval_via_rollout(
     *,
     run_id: str,
     split: str,
-    seed: Optional[int],
+    seed: int | None,
     model: str,
-    policy_cfg: Dict[str, Any],
-) -> Dict[str, Any]:
+    policy_cfg: dict[str, Any],
+) -> dict[str, Any]:
     payload = {
         "run_id": run_id,
         "env": {
@@ -314,6 +312,7 @@ async def eval_via_rollout(
     steps = traj.get("steps") or []
     step = steps[0] if steps else {}
     info = step.get("info") or {}
+    observation = step.get("obs") or {}
     return {
         "seed": seed,
         "split": split,
@@ -328,14 +327,14 @@ async def eval_via_rollout(
     }
-def _load_config(path: Optional[str]) -> Dict[str, Any]:
+def _load_config(path: str | None) -> dict[str, Any]:
     if not path:
         return {}
     with open(path, "rb") as fh:
         return tomllib.load(fh)
-def _default_policy_cfg(cfg: Dict[str, Any]) -> Dict[str, Any]:
+def _default_policy_cfg(cfg: dict[str, Any]) -> dict[str, Any]:
     policy = dict(cfg.get("policy") or {})
     if "inference_url" not in policy:
         env_url = os.getenv("INFERENCE_URL")
@@ -380,8 +379,8 @@ async def main() -> None:
     api_key = os.getenv("ENVIRONMENT_API_KEY")
     successes = 0
-    failures: Dict[str, int] = {}
-    results: List[Dict[str, Any]] = []
+    failures: dict[str, int] = {}
+    results: list[dict[str, Any]] = []
     async with TaskAppClient(task_app_url, api_key=api_key) as client:
         for episode in range(episodes):

examples/rl/run_rl_and_save.py CHANGED Viewed

@@ -7,14 +7,14 @@ import argparse
 import json
 import os
 import sys
+import tomllib
 from pathlib import Path
-from typing import Any, Dict
+from typing import Any
 import requests
-import tomllib
-def _load_toml(path: Path) -> Dict[str, Any]:
+def _load_toml(path: Path) -> dict[str, Any]:
     if not path.exists():
         print(f"config not found: {path}", file=sys.stderr)
         sys.exit(2)
@@ -65,7 +65,7 @@ def main() -> None:
         )
         sys.exit(2)
-    payload: Dict[str, Any] = {
+    payload: dict[str, Any] = {
         "job_type": "rl",
         "compute": cfg.get("compute", {}),
         "data": {
@@ -77,7 +77,7 @@ def main() -> None:
     backend = str(args.backend).rstrip("/")
     url = f"{backend}/rl/jobs"
-    api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("synth_key") or "").strip()
+    api_key = (os.getenv("SYNTH_API_KEY") or os.getenv("SYNTH_KEY") or "").strip()
     if not api_key:
         print("Missing SYNTH_API_KEY in env", file=sys.stderr)
         sys.exit(2)

examples/rl/task_app/math_single_step.py CHANGED Viewed

@@ -1,21 +1,22 @@
-from __future__ import annotations
 """Task app configuration for a single-step math reasoning environment."""
+from __future__ import annotations
 import contextlib
 import os
 import random
 import re
 import uuid
+from collections.abc import Iterable, Mapping, MutableMapping, Sequence
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, Iterable, Mapping, MutableMapping, Optional, Sequence, cast
+from typing import Any, cast
 import httpx
 from datasets import load_dataset
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field
+from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
 from synth_ai.task.contracts import (
     RolloutMetrics,
     RolloutRequest,
@@ -25,9 +26,9 @@ from synth_ai.task.contracts import (
     TaskInfo,
 )
 from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
+from synth_ai.task.errors import http_exception
 from synth_ai.task.rubrics import Rubric, load_rubric
 from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
-from synth_ai.task.errors import http_exception
 from synth_ai.task.tracing_utils import (
     build_tracer_factory,
     resolve_sft_output_dir,
@@ -35,7 +36,6 @@ from synth_ai.task.tracing_utils import (
     tracing_env_enabled,
 )
 from synth_ai.task.vendors import normalize_vendor_keys
-from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
 from synth_ai.tracing_v3.session_tracer import SessionTracer
 REPO_ROOT = Path(__file__).resolve().parents[3]
@@ -43,7 +43,7 @@ REPO_ROOT = Path(__file__).resolve().parents[3]
 _modal_volume_candidate = Path(
     os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")
 ).expanduser()
-_modal_volume_root: Optional[Path] = None
+_modal_volume_root: Path | None = None
 try:
     _modal_volume_candidate.mkdir(parents=True, exist_ok=True)
     _modal_volume_root = _modal_volume_candidate
@@ -105,7 +105,7 @@ MATH_DATASET_SPEC = TaskDatasetSpec(
 _BOXED_MARKERS: tuple[str, ...] = ("\\boxed", "boxed")
-def _extract_boxed(text: str) -> Optional[str]:
+def _extract_boxed(text: str) -> str | None:
     if not text:
         return None
     for marker in _BOXED_MARKERS:
@@ -174,9 +174,9 @@ class MathDataset:
         self.name = name
         self.config = config
         self.splits = [split for split in splits if split]
-        self._cache: Dict[str, Any] = {}
+        self._cache: dict[str, Any] = {}
         self._local_dir = os.getenv("MATH_DATASET_LOCAL_DIR")
-        self._hf_token: Optional[str] = None
+        self._hf_token: str | None = None
         for key in HF_TOKEN_ENV_KEYS:
             value = os.getenv(key)
             if value:
@@ -186,7 +186,7 @@ class MathDataset:
                     break
         # No multi-candidate fallback: enforce explicit dataset id
-    def _local_file_for_split(self, split: str) -> Optional[Path]:
+    def _local_file_for_split(self, split: str) -> Path | None:
         specific = os.getenv(f"MATH_DATASET_LOCAL_{split.upper()}_FILE")
         if specific:
             path = Path(specific).expanduser()
@@ -213,7 +213,7 @@ class MathDataset:
                 self._cache[split] = dataset["train"]
             else:
                 try:
-                    load_kwargs: Dict[str, Any] = {"split": split}
+                    load_kwargs: dict[str, Any] = {"split": split}
                     if self.config:
                         load_kwargs["name"] = self.config
                     if self._hf_token:
@@ -227,7 +227,7 @@ class MathDataset:
                             tmp_path = target.with_name(target.name + ".tmp")
                             try:
                                 local_dir.mkdir(parents=True, exist_ok=True)
-                                getattr(ds, "to_json")(str(tmp_path))
+                                ds.to_json(str(tmp_path))
                                 tmp_path.replace(target)
                             except Exception:
                                 with contextlib.suppress(FileNotFoundError):
@@ -241,7 +241,7 @@ class MathDataset:
                     raise RuntimeError(" ".join(hints)) from exc
         return self._cache[split]
-    def sample(self, *, split: str, index: Optional[int] = None) -> Dict[str, Any]:
+    def sample(self, *, split: str, index: int | None = None) -> dict[str, Any]:
         dataset = self._load_split(split)
         if len(dataset) == 0:
             raise RuntimeError(f"Dataset split '{split}' is empty")
@@ -326,9 +326,9 @@ class MathEnvironmentManager:
     def __init__(self, dataset: MathDataset) -> None:
         self.dataset = dataset
-        self._states: Dict[str, MathEnvState] = {}
+        self._states: dict[str, MathEnvState] = {}
-    def create(self, *, split: str, index: Optional[int], seed: Optional[int]) -> MathEnvState:
+    def create(self, *, split: str, index: int | None, seed: int | None) -> MathEnvState:
         if index is None and seed is not None:
             index = seed
         sample = self.dataset.sample(split=split, index=index)
@@ -354,11 +354,11 @@ class MathEnvironmentManager:
 class InitializePayload(BaseModel):
-    seed: Optional[int] = None
-    config: Dict[str, Any] = Field(default_factory=dict)
+    seed: int | None = None
+    config: dict[str, Any] = Field(default_factory=dict)
-def _observation_from_state(state: MathEnvState) -> Dict[str, Any]:
+def _observation_from_state(state: MathEnvState) -> dict[str, Any]:
     return {
         "problem": state.problem,
         "split": state.split,
@@ -390,12 +390,12 @@ def _score_submission(
 math_router = APIRouter()
-def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[Dict[str, Any]]:
+def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[dict[str, Any]]:
     """Return a compact, log-friendly preview of tool calls.
     Truncates long fields to avoid noisy logs and leaking excessive content.
     """
-    preview: list[Dict[str, Any]] = []
+    preview: list[dict[str, Any]] = []
     for call in list(tool_calls or [])[:3]:
         args = dict(call.get("args") or {})
         answer = str(args.get("answer") or "")
@@ -412,7 +412,7 @@ def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[Dict[st
 def _event_and_outcome_components(
     tool_calls: Sequence[Mapping[str, Any]], *, correct: bool, reward: float
-) -> Dict[str, float]:
+) -> dict[str, float]:
     """Approximate component-wise scores for RL-style logs.
     - env:     task-level scalar reward (our single-step outcome)
@@ -434,7 +434,7 @@ def _event_and_outcome_components(
 @math_router.post("/env/math/initialize")
-async def initialize_env(request: Request, payload: InitializePayload) -> Dict[str, Any]:
+async def initialize_env(request: Request, payload: InitializePayload) -> dict[str, Any]:
     manager: MathEnvironmentManager = request.app.state.math_env_manager
     split = str(payload.config.get("split") or DEFAULT_SPLIT)
     seed = payload.seed
@@ -450,7 +450,7 @@ async def initialize_env(request: Request, payload: InitializePayload) -> Dict[s
 @math_router.post("/env/math/step")
-async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
+async def step_env(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
     manager: MathEnvironmentManager = request.app.state.math_env_manager
     env_id = str(payload.get("env_id") or "")
     if not env_id:
@@ -463,7 +463,7 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
     action = payload.get("action") or {}
     tool_calls = action.get("tool_calls") or payload.get("tool_calls") or []
     reward, status, correct = _score_submission(state, tool_calls)
-    try:
+    with contextlib.suppress(Exception):
         print(
             "[MATH_STEP] env_id=",
             state.env_id,
@@ -483,8 +483,6 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
             _event_and_outcome_components(tool_calls, correct=correct, reward=reward),
             flush=True,
         )
-    except Exception:
-        pass
     state.done = True
     observation = _observation_from_state(state)
@@ -502,7 +500,7 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
 @math_router.post("/env/math/terminate")
-async def terminate_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
+async def terminate_env(request: Request, payload: dict[str, Any]) -> dict[str, Any]:
     manager: MathEnvironmentManager = request.app.state.math_env_manager
     env_id = str(payload.get("env_id") or "")
     if env_id:
@@ -525,7 +523,7 @@ def _resolve_inference_url(base_url: str) -> str:
 async def _call_inference(
     policy_config: Mapping[str, Any], observation: Mapping[str, Any]
-) -> tuple[list[Dict[str, Any]], Dict[str, Any]]:
+) -> tuple[list[dict[str, Any]], dict[str, Any]]:
     inference_url = str(policy_config.get("inference_url") or "").rstrip("/")
     if not inference_url:
         raise RuntimeError("policy.config.inference_url required for rollout")
@@ -557,7 +555,7 @@ async def _call_inference(
         },
     ]
-    payload: Dict[str, Any] = {
+    payload: dict[str, Any] = {
         "model": model,
         "messages": messages,
         "tools": [
@@ -626,7 +624,7 @@ async def _call_inference(
             function = call.get("function") or {}
             name = function.get("name")
             arguments = function.get("arguments")
-            parsed_args: Dict[str, Any]
+            parsed_args: dict[str, Any]
             if isinstance(arguments, str):
                 try:
                     import json
@@ -640,7 +638,7 @@ async def _call_inference(
                 parsed_args = {}
             tool_calls.append({"tool": name, "args": parsed_args})
     # Lightweight provider-side logging
-    try:
+    with contextlib.suppress(Exception):
         print(
             "[MATH_INFER] model=",
             model,
@@ -648,8 +646,6 @@ async def _call_inference(
             _preview_tool_calls(tool_calls),
             flush=True,
         )
-    except Exception:
-        pass
     return tool_calls, data
@@ -664,9 +660,9 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
         "index": sample["index"],
     }
-    tool_calls: list[Dict[str, Any]] = []
-    inference_payload: Dict[str, Any] | None = None
-    error_info: Dict[str, Any] = {}
+    tool_calls: list[dict[str, Any]] = []
+    inference_payload: dict[str, Any] | None = None
+    error_info: dict[str, Any] = {}
     try:
         tool_calls, inference_payload = await _call_inference(
             request.policy.config or {}, observation
@@ -691,7 +687,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
     )
     # Log a concise summary so we can debug reward=0 issues in production
-    try:
+    with contextlib.suppress(Exception):
         print(
             "[MATH_ROLLOUT] run=",
             request.run_id,
@@ -711,8 +707,6 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             _event_and_outcome_components(tool_calls, correct=correct, reward=reward),
             flush=True,
         )
-    except Exception:
-        pass
     step = RolloutStep(
         obs=observation,
@@ -749,6 +743,34 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
         details={"status": status, "correct": correct},
     )
+    # Include a minimal trace when requested or tracing is enabled via env
+    include_trace = bool(
+        (request.record and getattr(request.record, "return_trace", False))
+        or os.getenv("TASKAPP_TRACING_ENABLED")
+    )
+    trace_payload = None
+    if include_trace:
+        try:
+            # Minimal structured trace for assertions
+            trace_payload = {
+                "session_id": str(uuid.uuid4()),
+                "events_count": 1,
+                "decision_rewards": [reward],
+                "lm_calls": (
+                    [{"prompt": str(observation.get("problem", "")), "response": str(tool_calls)}]
+                    if tool_calls
+                    else []
+                ),
+                "metadata": {
+                    "env": "math_single_step",
+                    "split": sample["split"],
+                    "index": sample["index"],
+                    "status": status,
+                },
+            }
+        except Exception:
+            trace_payload = None
     return RolloutResponse(
         run_id=request.run_id,
         trajectories=[trajectory],
@@ -756,7 +778,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
         metrics=metrics,
         aborted=False,
         ops_executed=2,
-        trace=None,
+        trace=trace_payload,
     )
@@ -854,7 +876,7 @@ EVENTS_RUBRIC: Rubric = cast(
 )
-def describe_taskset(dataset: MathDataset) -> Dict[str, Any]:
+def describe_taskset(dataset: MathDataset) -> dict[str, Any]:
     return {
         **MATH_DATASET_SPEC.model_dump(),
         "hf_dataset": DATASET_NAME,
@@ -895,7 +917,7 @@ def build_config() -> TaskAppConfig:
     )
     sft_output_dir = resolve_sft_output_dir()
-    app_state: Dict[str, Any] = {
+    app_state: dict[str, Any] = {
         "math_dataset": dataset,
         "math_env_manager": MathEnvironmentManager(dataset),
         "tracing_enabled": tracing_enabled,

examples/rl/task_app/math_task_app.py CHANGED Viewed

@@ -8,10 +8,10 @@ from pathlib import Path
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse
 from starlette.requests import Request
+from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
 from synth_ai.task.server import create_task_app, run_task_app
 from .math_single_step import build_config
-from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
 def fastapi_app():
@@ -73,7 +73,7 @@ def fastapi_app():
         try:
             hdr = request.headers
             snapshot = {
-                "path": str(getattr(request, "url").path),
+                "path": str(request.url.path),
                 "have_x_api_key": bool(hdr.get("x-api-key")),
                 "have_x_api_keys": bool(hdr.get("x-api-keys")),
                 "have_authorization": bool(hdr.get("authorization")),

synth-ai 0.2.9.dev7__py3-none-any.whl → 0.2.10__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev7py3-none-any.whl → 0.2.10py3-none-any.whl