PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/task_apps/pokemon_red/task_app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_I
 from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
     PalletTownProgressionCompositeReward,
 )
-from synth_ai.task.apps import TaskAppEntry, register_task_app
+from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
 from synth_ai.task.contracts import (
     RolloutMetrics,
     RolloutRequest,
@@ -29,6 +29,8 @@ from synth_ai.task.tracing_utils import (
     tracing_env_enabled,
 )
 from synth_ai.tracing_v3.session_tracer import SessionTracer
+from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
+from datetime import datetime, UTC
 logger = logging.getLogger(__name__)
@@ -260,8 +262,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             {
                 "role": "system",
                 "content": (
-                    "You are controlling Pokémon Red. Respond with a single tool call named 'press_button' "
-                    "with JSON arguments {button: 'A|B|UP|DOWN|LEFT|RIGHT|START|SELECT', frames: 1-120}."
+                    "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
+                    "Your goal is to make progress in the game. "
+                    "IMPORTANT: Always use the 'execute_sequence' tool to submit 5-10 actions per call. "
+                    "Do not reason about which tool to use - execute_sequence is the only tool available. "
+                    "Choose appropriate button presses based on what you see in the game screen. "
+                    "Plan 5-10 actions ahead to play efficiently. "
+                    "CRITICAL: If stuck in a text box (text_box_active=True), try pressing B button first, then try A. "
+                    "Always respond with exactly one tool call containing 5-10 actions."
                 ),
             },
             {
@@ -277,7 +285,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                     "type": "function",
                     "function": {
                         "name": "execute_sequence",
-                        "description": "Execute multiple button presses in sequence. More efficient than separate calls. Recommended: 5-10 actions per call.",
+                        "description": "Execute multiple button presses in sequence. More efficient than separate calls. ALWAYS use this tool. Plan 5-10 actions ahead to play efficiently.",
                         "parameters": {
                             "type": "object",
                             "properties": {
@@ -300,31 +308,15 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                                         },
                                         "required": ["button", "frames"]
                                     },
-                                    "minItems": 1,
-                                    "maxItems": 20,
-                                    "description": "Sequence of button presses to execute"
+                                    "minItems": 5,
+                                    "maxItems": 10,
+                                    "description": "Sequence of 5-10 button presses to execute. Plan ahead to navigate efficiently."
                                 }
                             },
                             "required": ["actions"],
                             "additionalProperties": False,
                         },
                     },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "press_button",
-                        "description": "Press a single Game Boy button for N frames (use execute_sequence for multiple actions)",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {
-                                "button": {"type": "string", "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"]},
-                                "frames": {"type": "integer", "minimum": 1, "maximum": 120},
-                            },
-                            "required": ["button"],
-                            "additionalProperties": False,
-                        },
-                    },
                 }
             ],
             "tool_choice": {"type": "function", "function": {"name": "execute_sequence"}},
@@ -350,35 +342,154 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
                 inference_url = inference_url + "/v1/chat/completions"
+        # Debug: print exact payload being sent
+        import json as _json_debug
+        print(f"\n{'='*80}")
+        print(f"[pokemon_red] INFERENCE REQUEST DEBUG")
+        print(f"{'='*80}")
+        print(f"Inference URL: {inference_url}")
+        print(f"Payload keys: {list(payload.keys())}")
+        print(f"Payload (formatted):")
+        print(_json_debug.dumps(payload, indent=2)[:2000])
+        print(f"{'='*80}\n")
         if is_external:
             # External API: use direct HTTP client with auth header
             headers = {}
+            import os
             if "api.openai.com" in inference_url:
-                import os
                 api_key = os.getenv("OPENAI_API_KEY")
                 if api_key:
                     headers["Authorization"] = f"Bearer {api_key}"
+            elif "modal.run" in inference_url or "synth" in inference_url.lower():
+                # Synth API: use SYNTH_API_KEY
+                api_key = os.getenv("SYNTH_API_KEY")
+                if api_key:
+                    headers["Authorization"] = f"Bearer {api_key}"
+                print(f"[pokemon_red] Using Synth API auth: {'Bearer ' + api_key[:10] + '...' if api_key else 'NONE'}")
+                # For 30B-A3B models, require H200 (A100 doesn't have enough memory)
+                model_id = payload.get("model", "")
+                if "30B-A3B" in model_id or "A3B" in model_id:
+                    headers["X-GPU-Preference"] = "H200"
+                    print(f"[pokemon_red] Setting X-GPU-Preference: H200 (required for A3B MoE)")
-            async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
+            async with httpx.AsyncClient(timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)) as client:  # 30 min read timeout for cold starts
                 resp = await client.post(inference_url, json=payload, headers=headers)
         else:
             # Internal proxy: use local base_url
             async with httpx.AsyncClient(
                 base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
-                timeout=httpx.Timeout(60.0)
+                timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)  # 30 min read timeout for cold starts
             ) as client:
                 resp = await client.post(inference_url, json=payload)
         resp.raise_for_status()
         data = resp.json()
-        # Extract first tool call
+        # Record user message (system + user)
+        if tracer_instance is not None:
+            try:
+                print(f"[pokemon_red] Recording messages: tracer_instance={tracer_instance is not None}", flush=True)
+                # Record system message
+                await tracer_instance.record_message(
+                    content=messages[0].get("content", ""),
+                    message_type="system",
+                )
+                # Record user message
+                user_msg_content = messages[1].get("content", "")
+                if isinstance(user_msg_content, list):
+                    # For multimodal content, extract text summary
+                    text_parts = [item.get("text", "") for item in user_msg_content if item.get("type") == "text"]
+                    user_msg_content = " ".join(text_parts) if text_parts else str(user_msg_content)
+                await tracer_instance.record_message(
+                    content=user_msg_content,
+                    message_type="user",
+                )
+                print(f"[pokemon_red] Recorded user messages", flush=True)
+            except Exception as exc:
+                logger.debug(f"[pokemon_red] Failed to record user messages: {exc}")
+                print(f"[pokemon_red] ERROR recording user messages: {exc}", flush=True)
+        # Debug logging for tool calls
+        print(f"\n{'='*80}")
+        print(f"[pokemon_red] INFERENCE RESPONSE DEBUG")
+        print(f"{'='*80}")
+        print(f"Response status: {resp.status_code}")
+        print(f"Response keys: {list(data.keys())}")
         choices = data.get("choices") or []
+        if choices:
+            message = choices[0].get("message") or {}
+            print(f"Message keys: {list(message.keys())}")
+            print(f"Message content preview: {str(message.get('content', ''))[:200]}")
+            print(f"Tool calls: {message.get('tool_calls', [])}")
+            print(f"Full message (formatted):")
+            print(_json_debug.dumps(message, indent=2)[:1500])
+        print(f"{'='*80}\n")
+        # Record assistant message/tool calls
+        if tracer_instance is not None:
+            try:
+                message = choices[0].get("message", {}) if choices else {}
+                tool_calls = message.get("tool_calls", [])
+                content = message.get("content", "")
+                if tool_calls:
+                    # Record tool calls as assistant message
+                    import json as _json_record
+                    await tracer_instance.record_message(
+                        content=_json_record.dumps(tool_calls) if tool_calls else (content or ""),
+                        message_type="assistant",
+                        metadata={"is_tool_call": True} if tool_calls else {},
+                    )
+                elif content:
+                    # Record text content as assistant message
+                    await tracer_instance.record_message(
+                        content=content,
+                        message_type="assistant",
+                    )
+            except Exception as exc:
+                logger.debug(f"[pokemon_red] Failed to record assistant message: {exc}")
+        # Extract first tool call
         if not choices:
+            print("[pokemon_red] WARNING: No choices in inference response")
             return {}
         message = choices[0].get("message") or {}
         raw_calls = message.get("tool_calls") or []
+        # If no structured tool_calls, try parsing XML tool calls from content
+        if not raw_calls:
+            content = message.get("content", "")
+            if content and "<tool_call>" in content:
+                import re as _re
+                import json as _json_parse
+                # Parse XML tool calls: <tool_call>{...}</tool_call>
+                xml_pattern = r'<tool_call>\s*({.*?})\s*</tool_call>'
+                matches = _re.findall(xml_pattern, content, _re.DOTALL)
+                if matches:
+                    print(f"[pokemon_red] Parsed {len(matches)} XML tool call(s) from content")
+                    try:
+                        tool_data = _json_parse.loads(matches[0])
+                        tool_name = tool_data.get("name", "")
+                        args = tool_data.get("arguments", {})
+                        print(f"[pokemon_red] Parsed tool: {tool_name}, args: {str(args)[:200]}")
+                        # Handle execute_sequence tool
+                        if tool_name == "execute_sequence":
+                            return {"actions": args.get("actions", [])}
+                        # Handle press_button tool (legacy single action)
+                        if tool_name == "press_button":
+                            return {"button": args.get("button"), "frames": int(args.get("frames") or 30)}
+                    except Exception as parse_err:
+                        print(f"[pokemon_red] Error parsing XML tool call: {parse_err}")
         if not raw_calls:
+            print(f"[pokemon_red] WARNING: No tool_calls in response. Content: {message.get('content', '')[:200]}")
             return {}
         f = raw_calls[0].get("function") or {}
         tool_name = f.get("name", "")
         args = f.get("arguments")
@@ -437,6 +548,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                     action_context = _build_action_context(prev_state, current_state)
                     step_reward = await reward_fn.score(current_state, action_context)
+                    # Record environment event
+                    if tracer_instance is not None:
+                        try:
+                            event = EnvironmentEvent(
+                                system_instance_id="environment:pokemon_red",
+                                time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
+                                reward=step_reward,
+                                terminated=False,
+                                truncated=False,
+                                system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
+                                system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
+                                metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
+                            )
+                            await tracer_instance.record_event(event)
+                        except Exception as exc:
+                            logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
                     sequence_reward += step_reward
                     sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
@@ -488,6 +616,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                 current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
                 action_context = _build_action_context(prev_state, current_state)
                 step_reward = await reward_fn.score(current_state, action_context)
+                # Record environment event
+                if tracer_instance is not None:
+                    try:
+                        event = EnvironmentEvent(
+                            system_instance_id="environment:pokemon_red",
+                            time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
+                            reward=step_reward,
+                            terminated=False,
+                            truncated=False,
+                            system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
+                            system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
+                            metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
+                        )
+                        await tracer_instance.record_event(event)
+                    except Exception as exc:
+                        logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
                 total_reward += step_reward
                 # Track reward components if non-zero
@@ -528,6 +673,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             # Attempt policy-driven step if policy.config present
             policy_cfg = request.policy.config or {}
             if policy_cfg:
+                print(f"[pokemon_red] Calling _call_inference: tracer_instance={tracer_instance is not None}", flush=True)
                 try:
                     action = await _call_inference(policy_cfg, final_obs if isinstance(final_obs, Mapping) else {})
@@ -546,6 +692,23 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                             action_context = _build_action_context(prev_state, current_state)
                             step_reward = await reward_fn.score(current_state, action_context)
+                            # Record environment event
+                            if tracer_instance is not None:
+                                try:
+                                    event = EnvironmentEvent(
+                                        system_instance_id="environment:pokemon_red",
+                                        time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
+                                        reward=step_reward,
+                                        terminated=False,
+                                        truncated=False,
+                                        system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
+                                        system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
+                                        metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
+                                    )
+                                    await tracer_instance.record_event(event)
+                                except Exception as exc:
+                                    logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
                             sequence_reward += step_reward
                             sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
@@ -684,23 +847,58 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
             # End session and get trace
             session_trace = await tracer_instance.end_session()
-            # Build trace payload if requested
+            # Build trace payload if requested - ALWAYS use full format when return_trace=True
+            # This ensures markov_blanket_message_history is always included
             record_config = getattr(request, 'record', None)
+            print(f"[pokemon_red] TRACE DEBUG: record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
+            if session_trace:
+                print(f"[pokemon_red] TRACE DEBUG: IMMEDIATELY AFTER end_session: session_trace has {len(session_trace.markov_blanket_message_history)} messages, {len(session_trace.event_history)} events", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: session_trace.markov_blanket_message_history type: {type(session_trace.markov_blanket_message_history)}", flush=True)
+                if session_trace.markov_blanket_message_history:
+                    print(f"[pokemon_red] TRACE DEBUG: First message type: {type(session_trace.markov_blanket_message_history[0])}, content: {str(session_trace.markov_blanket_message_history[0].content)[:100]}", flush=True)
+                else:
+                    print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY RIGHT AFTER end_session!", flush=True)
             if record_config and getattr(record_config, 'return_trace', False) and session_trace:
-                trace_payload = {
-                    "session_id": session_trace.session_id,
-                    "created_at": session_trace.created_at.isoformat() if session_trace.created_at else None,
-                    "metadata": dict(session_trace.metadata or {}),
-                    "num_timesteps": session_trace.num_timesteps,
-                    "num_events": session_trace.num_events,
-                    "num_messages": session_trace.num_messages,
-                }
+                # Always return full trace with all messages and events (no compact format)
+                import dataclasses
+                trace_payload = session_trace.to_dict()
+                print(f"[pokemon_red] TRACE DEBUG: to_dict() returned keys: {list(trace_payload.keys())}", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: to_dict() markov_blanket_message_history length: {len(trace_payload.get('markov_blanket_message_history', []))}", flush=True)
+                # Always manually serialize messages and events to ensure they're included
+                # asdict() may not recursively serialize nested dataclasses correctly
+                from synth_ai.tracing_v3.abstractions import SessionEventMarkovBlanketMessage, BaseEvent
+                if session_trace.markov_blanket_message_history:
+                    print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.markov_blanket_message_history)} messages", flush=True)
+                    trace_payload["markov_blanket_message_history"] = [
+                        dataclasses.asdict(msg) if isinstance(msg, SessionEventMarkovBlanketMessage) else (msg if isinstance(msg, dict) else str(msg))
+                        for msg in session_trace.markov_blanket_message_history
+                    ]
+                else:
+                    print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.markov_blanket_message_history is EMPTY!", flush=True)
+                if session_trace.event_history:
+                    print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.event_history)} events", flush=True)
+                    trace_payload["event_history"] = [
+                        dataclasses.asdict(evt) if isinstance(evt, BaseEvent) else (evt if isinstance(evt, dict) else str(evt))
+                        for evt in session_trace.event_history
+                    ]
+                else:
+                    print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.event_history is EMPTY!", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: Final trace payload has {len(trace_payload.get('markov_blanket_message_history', []))} messages, {len(trace_payload.get('event_history', []))} events", flush=True)
+                print(f"[pokemon_red] TRACE DEBUG: Final trace payload keys: {list(trace_payload.keys())}", flush=True)
+            else:
+                print(f"[pokemon_red] TRACE DEBUG: SKIPPING trace payload build - record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
         except Exception as exc:
             logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
+            print(f"[pokemon_red] TRACE DEBUG EXCEPTION: {exc}", flush=True)
+            import traceback
+            print(f"[pokemon_red] TRACE DEBUG EXCEPTION TRACEBACK: {traceback.format_exc()}", flush=True)
     # Fallback trace payload if no tracer but CLI needs it
     if trace_payload is None:
         record_config = getattr(request, 'record', None)
+        print(f"[pokemon_red] TRACE DEBUG: trace_payload is None, using fallback. record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}", flush=True)
         if record_config and getattr(record_config, 'return_trace', False):
             trace_payload = {
                 "session_id": request.run_id,
@@ -718,8 +916,22 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
                 "num_events": len(steps),
                 "num_messages": len(steps) * 2,
             }
+            print(f"[pokemon_red] TRACE DEBUG: Created fallback trace_payload with keys: {list(trace_payload.keys())}", flush=True)
+    print(f"[pokemon_red] TRACE DEBUG: About to return RolloutResponse with trace_payload={trace_payload is not None}, keys={list(trace_payload.keys()) if trace_payload else []}", flush=True)
+    if trace_payload:
+        import json as _json_final
+        markov_msgs = trace_payload.get('markov_blanket_message_history', [])
+        event_history = trace_payload.get('event_history', [])
+        print(f"[pokemon_red] TRACE DEBUG: trace_payload markov_blanket_message_history length: {len(markov_msgs)}", flush=True)
+        print(f"[pokemon_red] TRACE DEBUG: trace_payload event_history length: {len(event_history)}", flush=True)
+        if markov_msgs:
+            print(f"[pokemon_red] TRACE DEBUG: First markov message type: {type(markov_msgs[0]) if markov_msgs else None}", flush=True)
+            print(f"[pokemon_red] TRACE DEBUG: First markov message (first 500 chars): {_json_final.dumps(markov_msgs[0] if markov_msgs else {}, indent=2, default=str)[:500]}", flush=True)
+        else:
+            print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY in final trace_payload!", flush=True)
-    return RolloutResponse(
+    response = RolloutResponse(
         run_id=request.run_id,
         trajectories=[trajectory],
         branches={},
@@ -728,6 +940,14 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
         ops_executed=len(request.ops or []),
         trace=trace_payload,
     )
+    # Final check: inspect what's actually in the response
+    if response.trace:
+        import json as _json_response
+        resp_markov = response.trace.get('markov_blanket_message_history', []) if isinstance(response.trace, dict) else []
+        print(f"[pokemon_red] TRACE DEBUG: Response.trace markov_blanket_message_history length: {len(resp_markov)}", flush=True)
+    return response
 def import_datetime():
@@ -788,11 +1008,40 @@ def build_config() -> TaskAppConfig:
 register_task_app(
     entry=TaskAppEntry(
         app_id="pokemon_red",
-        description="Pokémon Red demo task app",
+        description="Pokémon Red demo task app with vision support",
         config_factory=build_config,
         aliases=("pokemon_red_demo",),
         env_files=(),
-        modal=None,
+        modal=ModalDeploymentConfig(
+            app_name="pokemon-red-vision-task-app",
+            python_version="3.11",
+            pip_packages=(
+                "fastapi>=0.100.0",
+                "uvicorn>=0.23.0",
+                "pydantic>=2.0.0",
+                "numpy>=1.24.0",
+                "aiohttp>=3.8.0",
+                "httpx>=0.24.0",
+                "python-dotenv>=1.0.1",
+                # Tracing/DB runtime deps
+                "sqlalchemy>=2.0.42",
+                "aiosqlite>=0.21.0",
+                "greenlet>=3.2.3",
+                # Pokemon Red environment
+                "pyboy>=2.0.0",
+                "pillow>=9.0.0",
+            ),
+            extra_local_dirs=(
+                # Mount repo root so local modules resolve when deployed on Modal
+                ("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
+                ("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
+                ("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
+            ),
+            secret_names=("openai-api-key", "groq-api-key"),
+            memory=16384,
+            cpu=4.0,
+            max_containers=10,
+        ),
     )
 )

examples/task_apps/sokoban/README.md CHANGED Viewed

@@ -20,7 +20,7 @@ Sokoban is a classic puzzle game where the player must push boxes onto target lo
 cd /path/to/synth-ai
 # Start the Sokoban task app on port 8911
-uvx synth-ai task-app serve sokoban --port 8911
+uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
 ```
 The server will be available at `http://localhost:8911`.
@@ -283,7 +283,7 @@ lsof -i :8911
 kill -9 $(lsof -ti :8911)
 # Restart
-uvx synth-ai task-app serve sokoban --port 8911
+uvx synth-ai task-app deploy --runtime uvicorn sokoban --port 8911
 ```
 ## Examples
@@ -304,4 +304,3 @@ To add new features:
 ## License
 MIT

examples/task_apps/verilog/eval_groq_qwen32b.toml CHANGED Viewed

@@ -1,24 +1,22 @@
 # Verilog Eval Config for Groq Qwen3-32B
-# Quick eval to test Verilog task app before RL training
-[task_app]
-# Update this with your Modal URL after deployment
-url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
+# Quick eval to test the Verilog task app before RL training
 [eval]
-num_episodes = 3  # Quick test with 3 seeds
+app_id = "grpo-verilog"
+task_app_url = "https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run"
+model = "groq:qwen3-32b"
 seeds = [0, 1, 2]
-max_steps = 15    # More steps for Verilog compilation chains
+max_turns = 15
+concurrency = 1
+return_trace = true
+trace_format = "structured"
+[eval.env_config]
+difficulty = "medium"
-[policy]
+[eval.policy_config]
 provider = "groq"
 model = "qwen/qwen3-32b"
 temperature = 0.2
 max_tokens = 768
 inference_url = "https://api.groq.com/openai/v1/chat/completions"
-[env]
-difficulty = "medium"  # Can be "easy", "medium", or "hard"

examples/task_apps/verilog/task_app/grpo_verilog_task_app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """Compatibility wrapper for the GRPO Verilog task app.
 This mirrors the Crafter task app wrapper while delegating configuration to
-`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai serve grpo-verilog`,
+`grpo_verilog.py`. Normal usage should prefer `uvx synth-ai deploy --runtime uvicorn grpo-verilog`,
 but the module remains for direct execution or importing the FastAPI app.
 """

examples/vlm/configs/crafter_vlm_gpt4o.toml CHANGED Viewed

@@ -1,4 +1,7 @@
-type = "sft"
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
 [job]
 model = "openai/gpt-4o-mini-2024-07-18"

examples/warming_up_to_rl/configs/crafter_fft.toml CHANGED Viewed

@@ -1,7 +1,10 @@
 # Crafter Full Finetune (FFT) example on H100
 # Adjust paths and hyperparameters to your environment before running.
-type = "sft"
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
 [job]
 model = "Qwen/Qwen3-4B"               # base model to finetune

examples/warming_up_to_rl/configs/crafter_fft_4b.toml CHANGED Viewed

@@ -1,7 +1,5 @@
 # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
-type = "sft"
 [algorithm]
 type = "offline"
 method = "supervised_finetune"

examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml CHANGED Viewed

@@ -1,7 +1,5 @@
 # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
-type = "rl"
 [algorithm]
 type = "online"
 method = "policy_gradient"
@@ -40,6 +38,7 @@ health_interval_ms = 300
 [model]
 # Base model start
 base = "Qwen/Qwen3-4B"
+trainer_mode = "full"
 label = "crafter-rl-from-base"
 [rollout]
@@ -50,6 +49,7 @@ policy_name = "crafter-react"
 max_concurrent_rollouts = 8
 batches_per_step = 2
 ops = ["agent", "env"]
+task_app_origin_rewards_only = true
 [evaluation]
 # Run baseline evaluation over the first 100 seeds every 20 training iterations
@@ -62,6 +62,7 @@ seeds = [
 [training]
 num_epochs = 1
 iterations_per_epoch = 10
+max_turns = 10
 batch_size = 16
 group_size = 4
 gradient_accumulation_steps = 1

examples/warming_up_to_rl/run_local_rollout_traced.py CHANGED Viewed

@@ -448,7 +448,7 @@ async def main() -> None:
             print(f"Ops executed: {ops}")
             print(
-                "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai serve …` to persist traces/SFT."
+                "Tip: export TASKAPP_TRACING_ENABLED=1 and optionally TASKAPP_SFT_OUTPUT_DIR before running `uvx synth-ai deploy --runtime uvicorn …` to persist traces/SFT."
             )
         except httpx.HTTPStatusError as exc:
             detail = (

examples/warming_up_to_rl/task_app/README.md CHANGED Viewed

@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
 ## Local development
 ```bash
-uvx synth-ai serve grpo-crafter --port 8001
+uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
 # Optional extras:
 #   --env-file path/to/.env    # load additional environment variables
 #   --reload                   # enable uvicorn auto-reload

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl