PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "Qwen/Qwen3-4B"
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+max_turns = 20
+concurrency = 1
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.policy_config]
+provider = "synth"
+model = "Qwen/Qwen3-4B"
+inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
+temperature = 0.6
+top_p = 0.95
+max_tokens = 2048
+use_vision = false
+image_only_mode = false
+max_llm_calls = 10
+[eval.env_config.env_params]
+max_steps_per_episode = 20

examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml ADDED Viewed

@@ -0,0 +1,25 @@
+# Evaluate the finetuned Qwen3-4B checkpoint on Crafter.
+# Replace model with the fft: job id returned by the SFT run.
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "fft:REPLACE-WITH-SFT-JOB-ID"
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+max_turns = 10
+concurrency = 4
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "compact"
+return_trace = false
+[eval.policy_config]
+provider = "synth"
+model = "fft:REPLACE-WITH-SFT-JOB-ID"
+temperature = 0.2
+top_p = 0.8
+max_tokens = 512
+use_vision = true
+image_only_mode = false
+max_llm_calls = 10
+tool_choice = "auto"

examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml ADDED Viewed

@@ -0,0 +1,26 @@
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+max_turns = 20
+concurrency = 2
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "structured"
+return_trace = true
+[eval.policy_config]
+provider = "synth"
+model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
+inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
+temperature = 0.2
+top_p = 0.8
+max_tokens = 1024
+use_vision = false
+image_only_mode = false
+max_llm_calls = 10
+tool_choice = "auto"
+[eval.env_config.env_params]
+max_steps_per_episode = 20

examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml ADDED Viewed

@@ -0,0 +1,25 @@
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "qwen/qwen3-32b"
+seeds = [ 0, 1, 2,]
+max_turns = 10
+concurrency = 1
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "full"
+return_trace = true
+[eval.policy_config]
+provider = "groq"
+model = "qwen/qwen3-32b"
+inference_url = "https://api.groq.com/openai"
+temperature = 0.6
+top_p = 0.95
+max_tokens = 8192
+use_vision = false
+image_only_mode = false
+max_llm_calls = 10
+[eval.env_config.env_params]
+max_steps_per_episode = 10

examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml ADDED Viewed

@@ -0,0 +1,29 @@
+# Crafter rollout config for GPT-OSS-120B served from OpenAI-compatible APIs.
+# Replace the task_app_url with your deployed Crafter task app URL.
+# The run stores full traces so we can keep the LLM reasoning for fine-tuning.
+[eval]
+app_id = "grpo-crafter"
+task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
+model = "openai/gpt-oss-120b"
+seeds = [0, 1, 2]
+max_turns = 10
+concurrency = 1
+env_name = "crafter"
+policy_name = "crafter-react"
+trace_format = "full"
+return_trace = true
+[eval.env_config]
+env_params = { max_steps_per_episode = 10 }
+[eval.policy_config]
+provider = "groq"
+model = "openai/gpt-oss-120b"
+inference_url = "https://api.groq.com/openai"
+temperature = 0.6
+top_p = 0.9
+max_tokens = 768
+use_vision = false
+image_only_mode = false
+max_llm_calls = 10

examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml ADDED Viewed

@@ -0,0 +1,10 @@
+# Filters Crafter traces into an instruction-tuning dataset.
+# Assumes you stored rollouts in traces/v3/crafter_blog.db via `uvx synth-ai eval`.
+[filter]
+db = "sqlite+libsql://http://127.0.0.1:8080"
+output = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
+min_official_score = 0.1
+models = ["qwen/qwen3-32b", "openai/gpt-oss-120b"]
+shuffle = true
+shuffle_seed = 42

examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml ADDED Viewed

@@ -0,0 +1,75 @@
+# Example RL config with smoke testing enabled
+# This config demonstrates auto-starting task app and sqld for easy smoke testing
+type = "rl"
+# Smoke testing configuration - AUTO-STARTS services in background!
+[smoke]
+# Auto-start the task app server
+task_app_name = "grpo-crafter"  # Your task app name (use "synth-ai task-app list" to see available apps)
+task_app_port = 8765
+task_app_env_file = ".env"  # Required for this task app
+task_app_force = true  # Kill any existing process on this port
+# Auto-start sqld for tracing
+sqld_auto_start = true
+sqld_db_path = "./traces/local.db"
+sqld_hrana_port = 8080
+sqld_http_port = 8081
+# Test parameters
+env_name = "crafter"
+policy_name = "crafter-react"
+max_steps = 10
+policy = "gpt-5-nano"  # Use gpt-5-nano policy with mock backend
+model = "gpt-4o-mini"  # Real model to use via OpenAI
+mock_backend = "openai"  # Use OpenAI backend for real inference and tool calls
+return_trace = true
+use_mock = true  # Use mock proxy that routes to OpenAI
+# RL Training Configuration (used by actual training, not smoke tests)
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
+[policy]
+model_name = "Qwen/Qwen3-4B"
+trainer_mode = "full"
+label = "crafter-rl-demo"
+[compute]
+gpu_type = "H100"
+gpu_count = 2
+[compute.topology]
+type = "single_node_split"
+gpus_for_vllm = 1
+gpus_for_training = 1
+[services]
+task_url = "http://localhost:8765"
+[rollout]
+env_name = "crafter"
+policy_name = "crafter-react"
+max_turns = 10
+episodes_per_batch = 16
+max_concurrent_rollouts = 4
+task_app_origin_rewards_only = true
+[training]
+num_epochs = 1
+iterations_per_epoch = 10
+max_turns = 10
+batch_size = 4
+group_size = 4
+learning_rate = 5e-5
+weight_sync_interval = 1
+log_interval = 1
+[evaluation]
+instances = 2
+every_n_iters = 1
+seeds = [0, 1]

examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml ADDED Viewed

@@ -0,0 +1,91 @@
+# Continue training the finetuned Crafter policy with GRPO-style RL.
+# Fill in task_url with your deployed task app and set model.source to the
+# finetuned model id returned by `uvx synth-ai train --type sft`.
+type = "rl"
+# [smoke] section is OPTIONAL and only used by `synth-ai smoke` command for local testing.
+# This section is completely IGNORED by the RL trainer and will not affect training jobs.
+# It allows you to quickly test your task app without passing many CLI arguments:
+#   uvx synth-ai smoke --config this-file.toml
+# All values are optional; CLI args override TOML values.
+[smoke]
+task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
+env_name = "crafter"
+policy_name = "crafter-react"
+max_steps = 10
+policy = "mock"  # mock, gpt-5-nano, openai, groq
+model = "gpt-5-nano"
+mock_backend = "openai"  # synthetic or openai
+mock_port = 0  # 0 = auto-assign
+return_trace = true
+use_mock = true
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
+[services]
+task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
+judge_url = "https://synth-backend-dev-docker.onrender.com/api"
+[compute]
+gpu_type = "H200"
+gpu_count = 2
+[compute.topology]
+reference_placement = "none"
+[topology]
+type = "single_node_split"
+reference_placement = "none"
+gpus_for_vllm = 1
+gpus_for_training = 1
+gpus_for_ref = 0
+tensor_parallel = 1
+[vllm]
+tensor_parallel_size = 1
+max_model_len = 8192
+[reference]
+placement = "none"
+[model]
+base = "Qwen/Qwen3-4B"
+trainer_mode = "lora"
+label = "crafter-rl-baseline"
+[rollout]
+env_name = "crafter"
+policy_name = "crafter-react"
+max_turns = 10
+episodes_per_batch = 20
+max_concurrent_rollouts = 8
+rubric_rewards_only = false
+task_app_origin_rewards_only = true
+[evaluation]
+instances = 100
+every_n_iters = 20
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
+[training]
+num_epochs = 1
+iterations_per_epoch = 1
+max_turns = 10
+batch_size = 2
+group_size = 2
+learning_rate = 5e-6
+weight_sync_interval = 1
+log_interval = 1
+max_completion_tokens = 256
+async_semaphore_max = 4
+[training.weight_sync]
+enable = true
+targets = ["policy"]
+weight_sync_interval = 1
+[rubric]
+enabled = false

examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml ADDED Viewed

@@ -0,0 +1,40 @@
+# Full-finetune Qwen3-4B on filtered Crafter traces.
+# Update the `data` path once `uvx synth-ai filter` produces your JSONL.
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "fft"
+[job]
+model = "Qwen/Qwen3-4B"
+data = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
+poll_seconds = 1800
+[compute]
+gpu_type = "H100"
+gpu_count = 4
+nodes = 1
+[data.topology]
+container_count = 4
+[training]
+mode = "full_finetune"
+use_qlora = false
+[hyperparameters]
+n_epochs = 2
+world_size = 4
+sequence_length = 2048
+per_device_batch = 2
+gradient_accumulation_steps = 64
+learning_rate = 8e-6
+warmup_ratio = 0.03
+[hyperparameters.parallelism]
+use_deepspeed = true
+deepspeed_stage = 3
+fsdp = false
+bf16 = true
+fp16 = false

examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""Warming Up to RL baseline for Crafter.
+This baseline demonstrates how to evaluate an LLM agent on the Crafter survival game
+without requiring a deployed task app. This is the recommended starting point for coding
+agents to get a baseline score before making changes.
+Quick Start:
+    # Run a quick 3-task baseline
+    uvx synth-ai baseline warming_up_to_rl --split train --seeds 0,1,2
+    # Full train evaluation
+    uvx synth-ai baseline warming_up_to_rl --split train
+    # Compare models
+    uvx synth-ai baseline warming_up_to_rl --model groq:openai/gpt-oss-20b
+"""
+from __future__ import annotations
+import json
+from typing import Any
+try:
+    import crafter
+    CRAFTER_AVAILABLE = True
+except ImportError:
+    CRAFTER_AVAILABLE = False
+from synth_ai.baseline import BaselineConfig, BaselineTaskRunner, DataSplit, TaskResult
+from synth_ai.types import EventReward, OutcomeReward
+class CrafterRunner(BaselineTaskRunner):
+    """Task runner for Crafter environment."""
+    def __init__(self, policy_config: dict[str, Any], env_config: dict[str, Any]):
+        super().__init__(policy_config, env_config)
+        self.max_steps = env_config.get("max_steps", 1000)
+    async def run_task(self, seed: int) -> TaskResult:
+        """Run a single Crafter episode."""
+        if not CRAFTER_AVAILABLE:
+            raise ImportError(
+                "Crafter not installed. Install with: pip install crafter"
+            )
+        # Create environment
+        env = crafter.Env()
+        env.reset()
+        # Initialize tracking
+        event_rewards: list[EventReward] = []
+        achievements = {}
+        step_count = 0
+        # Get model configuration
+        from synth_ai.inference.client import InferenceClient
+        client = InferenceClient()
+        model = self.policy_config.get("model", "gpt-4o-mini")
+        temperature = self.policy_config.get("temperature", 0.7)
+        # Define action tool
+        actions = [
+            "noop", "move_left", "move_right", "move_up", "move_down",
+            "do", "sleep", "place_stone", "place_table", "place_furnace",
+            "place_plant", "make_wood_pickaxe", "make_stone_pickaxe",
+            "make_iron_pickaxe", "make_wood_sword", "make_stone_sword",
+            "make_iron_sword"
+        ]
+        action_tool = {
+            "type": "function",
+            "function": {
+                "name": "take_action",
+                "description": "Take an action in the Crafter world",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {
+                            "type": "string",
+                            "enum": actions,
+                            "description": f"Action to take. Available: {', '.join(actions)}",
+                        }
+                    },
+                    "required": ["action"],
+                },
+            },
+        }
+        # Run episode
+        done = False
+        while not done and step_count < self.max_steps:
+            # Get observation (would include visual state in full implementation)
+            obs_str = f"Crafter Step {step_count}\n"
+            obs_str += f"Current achievements: {achievements}\n"
+            obs_str += "What action should you take to survive and progress?"
+            # Get action from model
+            try:
+                response = await client.generate(
+                    model=model,
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": "You are an expert at survival games. Use the take_action tool to survive and achieve goals in Crafter.",
+                        },
+                        {"role": "user", "content": obs_str},
+                    ],
+                    tools=[action_tool],
+                    temperature=temperature,
+                    max_tokens=100,
+                )
+                # Extract action
+                action_name = "noop"
+                if response.get("tool_calls"):
+                    tool_call = response["tool_calls"][0]
+                    args = json.loads(tool_call["function"]["arguments"])
+                    action_name = args.get("action", "noop")
+                action_idx = actions.index(action_name) if action_name in actions else 0
+                # Take step
+                obs, reward, done, info = env.step(action_idx)
+                # Update achievements
+                if "achievements" in info:
+                    achievements.update(info["achievements"])
+                # Track rewards
+                if reward > 0:
+                    event_rewards.append(
+                        EventReward(
+                            event_id=f"step_{step_count}",
+                            reward=reward,
+                            metadata={"action": action_name, "achievements": achievements.copy()},
+                        )
+                    )
+                step_count += 1
+            except Exception as e:
+                done = True
+                break
+        # Calculate outcome reward based on achievements
+        total_achievements = sum(achievements.values())
+        success = total_achievements >= 3  # At least 3 achievements
+        return TaskResult(
+            success=success,
+            outcome_reward=OutcomeReward(
+                reward=float(total_achievements),
+                metadata={
+                    "steps": step_count,
+                    "achievements": achievements,
+                    "seed": seed,
+                },
+            ),
+            event_rewards=event_rewards,
+            total_steps=step_count,
+            metadata={"achievements": achievements},
+        )
+# Define baseline configuration (only if Crafter is available)
+if CRAFTER_AVAILABLE:
+    warming_up_to_rl_baseline = BaselineConfig(
+        baseline_id="warming_up_to_rl",
+        name="Warming Up to RL - Crafter",
+        description="Crafter survival game baseline for comparing agent performance on RL tasks",
+        task_runner=CrafterRunner,
+        splits={
+            "train": DataSplit(name="train", seeds=list(range(20))),
+            "val": DataSplit(name="val", seeds=list(range(20, 25))),
+            "test": DataSplit(name="test", seeds=list(range(25, 30))),
+        },
+        default_policy_config={
+            "model": "gpt-4o-mini",
+            "temperature": 0.7,
+        },
+        default_env_config={
+            "max_steps": 1000,
+        },
+        tags=["rl", "survival", "achievements", "blog-post"],
+    )

examples/dev/qwen3_32b_qlora_4xh100.toml CHANGED Viewed

@@ -1,3 +1,8 @@
+[algorithm]
+type = "offline"
+method = "sft"
+variety = "qlora"
 [job]
 model = "Qwen/Qwen3-32B"
 # Optionally set here; you can also pass --dataset

examples/multi_step/configs/VERILOG_REWARDS.md CHANGED Viewed

	@@ -88,3 +88,7 @@ Expected output for successful rollout:
88 88	- `mean_return` ≈ 1.0+ (if full submit success)
89 89
90 90
91	+
92	+
93	+
94	+

examples/multi_step/configs/VERILOG_RL_CHECKLIST.md CHANGED Viewed

	@@ -181,3 +181,7 @@ Before starting RL training, verify:
181 181	- [verilog_rl_lora.toml](./verilog_rl_lora.toml) - Training configuration
182 182
183 183
184	+
185	+
186	+
187	+

examples/multi_step/configs/crafter_rl_outcome.toml CHANGED Viewed

@@ -6,7 +6,7 @@ method = "policy_gradient"
 variety = "gspo"
 [services]
-# Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
+# Replace with the Modal URL printed by `uvx synth-ai deploy --runtime modal --modal-mode serve grpo-crafter`
 task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
 [compute]
@@ -46,6 +46,7 @@ policy_name = "crafter-react"
 max_concurrent_rollouts = 12
 batches_per_step = 2
 ops = ["agent", "env"]
+task_app_origin_rewards_only = true
 [evaluation]
 instances = 10

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl