PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/task_apps/banking77/deploy_wrapper.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""Lightweight Modal deploy wrapper for Banking77 task app (web)."""
+from __future__ import annotations
+import os
+from pathlib import Path
+try:
+    import modal  # type: ignore
+except Exception as exc:  # pragma: no cover
+    raise SystemExit(f"Modal is required to deploy: {exc}")
+_here = Path(__file__).resolve()
+_parents = list(_here.parents)
+REPO_ROOT = _parents[3] if len(_parents) > 3 else Path.cwd()
+app = modal.App("synth-banking77-web")
+_image = (
+    modal.Image.debian_slim(python_version="3.11")
+    .pip_install(
+        "synth-ai",
+        "datasets>=2.14.0",
+        "fastapi>=0.115.0",
+        "pydantic>=2.0.0",
+        "httpx>=0.26.0",
+        "python-dotenv>=1.0.0",
+    )
+    .env({"PYTHONPATH": "/opt/synth_ai_repo"})
+    .add_local_dir(str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai", copy=True)
+    .add_local_dir(str(REPO_ROOT / "examples"), "/opt/synth_ai_repo/examples", copy=True)
+)
+_env_file = REPO_ROOT / ".env"
+if _env_file.exists():
+    _image = _image.add_local_file(str(_env_file), "/opt/synth_ai_repo/.env")
+@app.function(image=_image, timeout=600)
+@modal.asgi_app()
+def web():
+    # Lazy import the task app to avoid local heavy deps
+    import contextlib
+    with contextlib.suppress(Exception):
+        from dotenv import load_dotenv  # type: ignore
+        load_dotenv(str(REPO_ROOT / ".env"), override=False)
+    from examples.task_apps.banking77.banking77_task_app import fastapi_app  # type: ignore
+    return fastapi_app()

examples/task_apps/crafter/CREATE_SFT_DATASET.md CHANGED Viewed

	@@ -271,3 +271,7 @@ min_official_score = 0.01 # Filter by outcome_rewards
271 271	- `QUERY_EXAMPLES.md` - SQL queries for trace analysis
272 272
273 273
274	+
275	+
276	+
277	+

examples/task_apps/crafter/FILTER_COMMAND_STATUS.md CHANGED Viewed

	@@ -172,3 +172,7 @@ cat ft_data/crafter_image_only_sft.jsonl \| jq .
172 172	Action Required: Debug why messages aren't being saved to the database despite correct code path.
173 173
174 174
175	+
176	+
177	+
178	+

examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md CHANGED Viewed

	@@ -266,3 +266,7 @@ sqlite3 traces/v3/crafter_eval.db \
266 266	Status: 🎉 WORKING END-TO-END!
267 267
268 268
269	+
270	+
271	+
272	+

examples/task_apps/crafter/task_app/README.md CHANGED Viewed

@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
 ## Local development
 ```bash
-uvx synth-ai serve grpo-crafter --port 8001
+uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
 # Optional extras:
 #   --env-file path/to/.env    # load additional environment variables
 #   --reload                   # enable uvicorn auto-reload

examples/task_apps/crafter/task_app/grpo_crafter.py CHANGED Viewed

@@ -6,12 +6,17 @@ import json
 import logging
 import os
 import sys
+from urllib.parse import parse_qs, urlparse
 from collections.abc import Iterable, Sequence
 from contextlib import suppress
 from dataclasses import dataclass
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
+from fastapi import HTTPException
+from pydantic import BaseModel
 from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
 from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
 from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
@@ -37,7 +42,16 @@ except Exception:  # pragma: no cover - utils unavailable if optional deps missi
         """Fallback to shared utility for URL normalization."""
         return normalize_inference_url(raw_url) if raw_url else raw_url
-    def extract_trace_correlation_id(_raw_url):
+    def extract_trace_correlation_id(_raw_url, mode=None):
+        if not isinstance(_raw_url, str):
+            return None
+        parsed = urlparse(_raw_url)
+        query_params = parse_qs(parsed.query or "")
+        for key in ("cid", "trace", "trace_correlation_id"):
+            values = query_params.get(key) or []
+            for value in values:
+                if isinstance(value, str) and value.strip():
+                    return value.strip()
         return None
 logger = logging.getLogger(__name__)
@@ -651,12 +665,20 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
             if stripped:
                 return stripped
-    return extract_trace_correlation_id(policy_cfg.get("inference_url"))
+    return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
 async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
     request = _coerce_math_to_crafter(request)
+    record_cfg = request.record.model_copy(
+        update={
+            "return_trace": True,
+            "trace_format": "structured",
+        }
+    )
+    request = request.model_copy(update={"record": record_cfg})
     policy_cfg = dict(request.policy.config or {})
     logger.info(
         "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
@@ -800,11 +822,49 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
         trace_correlation_id,
     )
     data = legacy_response.model_dump()
+    legacy_trace = getattr(legacy_response, "trace", None)
+    if legacy_trace is not None:
+        if isinstance(legacy_trace, dict):
+            legacy_trace_preview = list(legacy_trace.keys())[:5]
+        else:
+            legacy_trace_preview = type(legacy_trace)
+        logger.info(
+            "ROLLOUT_EXEC: legacy response trace present type=%s preview=%s",
+            type(legacy_trace),
+            legacy_trace_preview,
+        )
+    logger.debug(
+        "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
+        sorted(data.keys()),
+        bool(data.get("trace")),
+    )
     metrics = data.get("metrics", {}) or {}
     metrics.setdefault("outcome_score", None)
     metrics.setdefault("events_score", None)
     metrics.setdefault("details", {})
     data["metrics"] = metrics
+    if data.get("trace") is None:
+        legacy_trace = getattr(legacy_response, "trace", None)
+        if legacy_trace is not None:
+            data["trace"] = legacy_trace
+        else:
+            tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
+            if callable(tracer_factory):
+                tracer = tracer_factory()
+                logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
+                if isinstance(tracer, SessionTracer):
+                    try:
+                        await tracer.initialize()
+                        if tracer.db is not None:
+                            trace_row = await tracer.db.get_session_trace(request.run_id)
+                            if trace_row is not None:
+                                data["trace"] = trace_row
+                    except Exception as exc:
+                        logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
+                    finally:
+                        with suppress(Exception):
+                            await tracer.close()
     # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
     # Use fallback if somehow missing
@@ -820,12 +880,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
     if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
         existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
     data["pipeline_metadata"] = existing_meta
     # Add trace_correlation_id to each trajectory (required for RL training pipeline)
     if "trajectories" in data:
+        normalized_trajs: list[dict[str, Any]] = []
         for traj in data.get("trajectories", []):
-            if isinstance(traj, dict):
-                traj["trace_correlation_id"] = final_cid
+            if isinstance(traj, BaseModel):
+                traj_dict = traj.model_dump()
+            elif isinstance(traj, dict):
+                traj_dict = dict(traj)
+            else:
+                continue
+            traj_dict["trace_correlation_id"] = final_cid
+            if not traj_dict.get("inference_url"):
+                inferred_url = policy_cfg.get("inference_url")
+                if inferred_url:
+                    traj_dict["inference_url"] = inferred_url
+            normalized_trajs.append(traj_dict)
+        if normalized_trajs:
+            data["trajectories"] = normalized_trajs
+            logger.info(
+                "ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
+                request.run_id,
+                normalized_trajs[0].get("inference_url") if normalized_trajs else None,
+            )
     logger.info(
         "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
         request.run_id,
@@ -844,6 +922,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
             request.run_id,
             existing_meta,
         )
+    if data.get("trace") is None:
+        raise HTTPException(
+            status_code=500,
+            detail="trace_payload_missing: task app did not emit a SessionTrace",
+        )
     # ASSERTION: Verify trace_correlation_id is present in response at all required levels
     assert "trace_correlation_id" in data, (
@@ -962,6 +1046,7 @@ register_task_app(
                 (str(RUBRICS_ROOT), "/opt/synth_ai_repo/examples/multi_step/rubrics"),
             ),
             secret_names=("groq-api-key", "openai-api-key"),
+            env_vars={"SERVICE": "MODAL"},
             memory=16384,
             cpu=4.0,
             max_containers=10,

examples/task_apps/crafter/task_app/grpo_crafter_task_app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 This module now delegates to the TaskAppConfig defined in the colocated example at
 `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
 (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
-`uvx synth-ai serve grpo-crafter` for local development and testing.
+`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
 """
 from __future__ import annotations

examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py CHANGED Viewed

@@ -197,6 +197,8 @@ class CrafterPolicy(Policy):
         if self.use_tools:
             payload["tools"] = TOOLS_SCHEMA
             payload["tool_choice"] = "required"
+            payload["function_call"] = {"name": "interact_many"}
+            payload["parallel_tool_calls"] = False
             # Ensure the inference server injects family-specific stop sequences
             # to terminate immediately after the first tool call for compliance.
             payload["stop_after_tool_calls"] = 1
@@ -207,13 +209,7 @@ class CrafterPolicy(Policy):
         response: dict[str, Any],
         use_tools: bool = True,
     ) -> list[dict[str, Any]]:
-        """Turn an inference response into environment tool calls.
-        - If tools were used, expect tool_calls-compatible output and forward as-is
-          in our simple JSON format: {"tool_name": str, "arguments": {...}}.
-        - If no tools, parse plain-text actions using CrafterReActAgent parser and
-          wrap them into a single interact_many tool call.
-        """
+        """Turn an inference response into environment tool calls."""
         # First check if we got actual tool calls
         choices = response.get("choices", [])
         tool_calls: list[dict[str, Any]] = []
@@ -272,24 +268,6 @@ class CrafterPolicy(Policy):
                     normalized.append(tc)
             return normalized
-        # Otherwise, parse plain text content for actions
-        text = ""
-        for choice in choices:
-            msg = choice.get("message", {})
-            content = msg.get("content", "")
-            if content:
-                text = content
-                break
-        if text:
-            # Try to parse actions from the text
-            from .shared import parse_actions
-            actions = parse_actions(text)
-            if actions:
-                # Wrap actions in interact_many tool call
-                return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
         # No actions found
         return []
@@ -542,7 +520,7 @@ class CrafterPolicy(Policy):
             "claude-3",         # All Claude 3 models support vision
             "gemini",           # Gemini models
             "qwen-vl",          # Qwen Vision-Language models
-            "qwen2-vl",         # Qwen2 VL
+            "qwen3-vl",         # Qwen3 VL
             "pixtral",          # Mistral's vision model
             "llava",            # LLaVA models
             "phi-3-vision",     # Microsoft Phi-3 Vision

examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -45,8 +45,7 @@ class CrafterReActAgent:
             "Action policy:\n"
             "- Always return a single tool call: interact_many({actions: [...]})\n"
             "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
-            "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
-            "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
+            "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
             "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
             "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
             "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"

examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import contextlib
+import logging
 import os
 from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from starlette.requests import Request
+logger = logging.getLogger(__name__)
+_VERSION_LOGGED = False
+def _resolve_task_app_version() -> str:
+    env_version = os.getenv("TASK_APP_VERSION")
+    if isinstance(env_version, str) and env_version.strip():
+        return env_version.strip()
+    try:
+        import importlib.metadata as importlib_metadata  # python 3.11 stdlib
+        pkg_version = importlib_metadata.version("synth-ai")
+        if isinstance(pkg_version, str) and pkg_version.strip():
+            return pkg_version.strip()
+    except Exception:
+        pass
+    try:
+        import synth_ai
+        attr_version = getattr(synth_ai, "__version__", None)
+        if isinstance(attr_version, str) and attr_version.strip():
+            return attr_version.strip()
+    except Exception:
+        pass
+    return "unknown"
+def _log_task_app_version_once() -> None:
+    global _VERSION_LOGGED
+    if _VERSION_LOGGED:
+        return
+    version = _resolve_task_app_version()
+    build_id = os.getenv("TASK_APP_BUILD_ID")
+    if build_id:
+        logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
+    else:
+        logger.info("TASK_APP_VERSION: %s", version)
+    _VERSION_LOGGED = True
 class TaskApp:
     """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
         allow_headers=["*"],
     )
+    _log_task_app_version_once()
     # Initialize task app configuration
     task_app = TaskApp()
     app.state.task_app = task_app

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl