PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show

examples/README.md +1 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
examples/qwen_coder/configs/coder_lora_small.toml +2 -1
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +154 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +275 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +423 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +62 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +1 -1
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +37 -0
examples/rl/configs/rl_from_base_qwen17.toml +76 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +22 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/sft/README.md +5 -5
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +1 -1
examples/swe/task_app/grpo_swe_mini.py +0 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
examples/task_apps/enron/__init__.py +1 -0
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/cli.py +30 -7
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/cli/__init__.py +62 -78
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/setup.py +266 -0
synth_ai/cli/status.py +1 -1
synth_ai/cli/task_app_deploy.py +16 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +16 -0
synth_ai/cli/task_app_serve.py +18 -0
synth_ai/cli/task_apps.py +71 -31
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train.py +18 -0
synth_ai/cli/tui.py +7 -2
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +8 -8
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +1 -1
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +2 -3
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/tui/cli/query_experiments.py +4 -4
synth_ai/tui/cli/query_experiments_v3.py +4 -4
synth_ai/tui/dashboard.py +14 -9
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +287 -0
synth_ai/utils/http.py +169 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
synth_ai/cli/man.py +0 -106
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/http.py +0 -26
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0

examples/qwen_vl/test_image_validation.py ADDED Viewed

@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+"""
+Quick test script to demonstrate image validation.
+Run from synth-ai root:
+    uv run python examples/qwen_vl/test_image_validation.py
+"""
+from synth_ai.learning.sft.data import coerce_example, validate_vision_example
+# Test cases
+test_cases = [
+    {
+        "name": "Valid - HTTP URL",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Describe this"},
+                        {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
+                    ],
+                },
+                {"role": "assistant", "content": "A beautiful image"},
+            ]
+        },
+        "should_pass": True,
+    },
+    {
+        "name": "Valid - Base64",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgo..."}},
+                    ],
+                },
+                {"role": "assistant", "content": "An image"},
+            ]
+        },
+        "should_pass": True,
+    },
+    {
+        "name": "Invalid - Empty URL",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "What's this?"},
+                        {"type": "image_url", "image_url": {"url": ""}},  # Empty!
+                    ],
+                },
+                {"role": "assistant", "content": "Response"},
+            ]
+        },
+        "should_pass": False,
+    },
+    {
+        "name": "Invalid - Missing URL field",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {}},  # No url field!
+                    ],
+                },
+                {"role": "assistant", "content": "Response"},
+            ]
+        },
+        "should_pass": False,
+    },
+    {
+        "name": "Invalid - Null URL",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": None}},  # Null!
+                    ],
+                },
+                {"role": "assistant", "content": "Response"},
+            ]
+        },
+        "should_pass": False,
+    },
+    {
+        "name": "Invalid - Whitespace URL",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": "   "}},  # Whitespace!
+                    ],
+                },
+                {"role": "assistant", "content": "Response"},
+            ]
+        },
+        "should_pass": False,
+    },
+    {
+        "name": "Invalid - Mixed valid and invalid",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": "https://example.com/valid.jpg"}},
+                        {"type": "image_url", "image_url": {"url": ""}},  # One invalid!
+                    ],
+                },
+                {"role": "assistant", "content": "Response"},
+            ]
+        },
+        "should_pass": False,
+    },
+    {
+        "name": "Invalid - Non-string URL",
+        "data": {
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": 12345}},  # Integer!
+                    ],
+                },
+                {"role": "assistant", "content": "Response"},
+            ]
+        },
+        "should_pass": False,
+    },
+]
+def main():
+    print("=" * 80)
+    print("IMAGE VALIDATION TEST")
+    print("=" * 80)
+    print()
+    passed = 0
+    failed = 0
+    for test in test_cases:
+        name = test["name"]
+        data = test["data"]
+        should_pass = test["should_pass"]
+        try:
+            example = coerce_example(data)
+            is_valid, error = validate_vision_example(example, require_images=True)
+            if should_pass:
+                if is_valid:
+                    print(f"✅ PASS: {name}")
+                    print(f"   → Correctly accepted valid example")
+                    passed += 1
+                else:
+                    print(f"❌ FAIL: {name}")
+                    print(f"   → Should pass but got error: {error}")
+                    failed += 1
+            else:
+                if not is_valid:
+                    print(f"✅ PASS: {name}")
+                    print(f"   → Correctly rejected: {error}")
+                    passed += 1
+                else:
+                    print(f"❌ FAIL: {name}")
+                    print(f"   → Should fail but passed validation")
+                    failed += 1
+        except Exception as exc:
+            if should_pass:
+                print(f"❌ FAIL: {name}")
+                print(f"   → Unexpected exception: {exc}")
+                failed += 1
+            else:
+                print(f"✅ PASS: {name}")
+                print(f"   → Correctly raised exception: {exc}")
+                passed += 1
+        print()
+    print("=" * 80)
+    print(f"RESULTS: {passed}/{len(test_cases)} passed, {failed}/{len(test_cases)} failed")
+    print("=" * 80)
+    if failed == 0:
+        print("🎉 All tests passed!")
+        return 0
+    else:
+        print(f"⚠️  {failed} test(s) failed")
+        return 1
+if __name__ == "__main__":
+    exit(main())

examples/qwen_vl/test_sft_vision_data.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""Generate test vision SFT dataset for Qwen3-VL-2B."""
+import base64
+import json
+from pathlib import Path
+from io import BytesIO
+try:
+    from PIL import Image
+except ImportError:
+    print("❌ PIL not available")
+    exit(1)
+BASE_DIR = Path(__file__).resolve().parent
+def create_test_image(color: str) -> str:
+    """Create a 64x64 colored square and return base64 data URL."""
+    colors = {
+        "red": (255, 0, 0),
+        "blue": (0, 0, 255),
+        "green": (0, 255, 0),
+        "yellow": (255, 255, 0),
+        "purple": (128, 0, 128),
+    }
+    img = Image.new('RGB', (64, 64), color=colors[color])
+    buffer = BytesIO()
+    img.save(buffer, format='PNG')
+    b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
+    return f"data:image/png;base64,{b64}"
+def main():
+    output_dir = BASE_DIR / "test_data"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_file = output_dir / "vision_sft_test.jsonl"
+    # Create 10 training examples with different colored images
+    examples = []
+    colors = ["red", "blue", "green", "yellow", "purple"]
+    for i, color in enumerate(colors):
+        # Simple color identification
+        examples.append({
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "What color is this image? Answer in one word."},
+                        {"type": "image_url", "image_url": {"url": create_test_image(color)}},
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": color.capitalize(),
+                },
+            ],
+            "metadata": {"example_id": f"color_{i}", "type": "color_id"},
+        })
+        # Describe the image
+        examples.append({
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Describe this image briefly."},
+                        {"type": "image_url", "image_url": {"url": create_test_image(color)}},
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": f"This is a {color} colored square image.",
+                },
+            ],
+            "metadata": {"example_id": f"describe_{i}", "type": "description"},
+        })
+    # Write JSONL
+    with output_file.open("w", encoding="utf-8") as f:
+        for example in examples:
+            f.write(json.dumps(example) + "\n")
+    print(f"✅ Created {len(examples)} vision SFT examples")
+    print(f"   Output: {output_file}")
+    print(f"   Size: {output_file.stat().st_size / 1024:.1f} KB")
+    # Validate with SDK
+    try:
+        from synth_ai.learning.sft.data import load_jsonl, validate_vision_example
+        loaded = load_jsonl(output_file, min_messages=1)
+        print(f"   Loaded: {len(loaded)} examples")
+        valid_count = 0
+        for ex in loaded:
+            is_valid, error = validate_vision_example(ex, require_images=True)
+            if is_valid:
+                valid_count += 1
+            else:
+                print(f"   ⚠️  Invalid example: {error}")
+        print(f"   Valid: {valid_count}/{len(loaded)}")
+    except ImportError:
+        print("   (SDK validation skipped - synth_ai not available)")
+if __name__ == "__main__":
+    main()

examples/rl/README.md CHANGED Viewed

@@ -52,7 +52,7 @@ uvx synth-ai serve math-single-step \
   --port 8101 \
   --env-file examples/rl/.env \
   --trace traces/math \
-  --trace-db traces/math/synth_ai.db
+  --trace-db traces/math/task_app_traces_<timestamp>.db
 ```
 Deploy or serve on Modal using the same env file; the registration includes a `ModalDeploymentConfig` that installs the `datasets` package automatically.

examples/rl/configs/eval_base_qwen.toml ADDED Viewed

@@ -0,0 +1,17 @@
+type = "rl"
+provider = "synth"
+task_app_url = "http://localhost:8101"
+model = "Qwen/Qwen3-1.7B"
+split = "validation"
+num_episodes = 50
+seed_start = 0
+[policy]
+inference_url = "https://agent-learning.onrender.com/api/inference"
+max_tokens = 128
+temperature = 0.0
+# Optionally supply custom headers
+# [policy.headers]
+# Authorization = "Bearer ..."

examples/rl/configs/eval_rl_qwen.toml ADDED Viewed

@@ -0,0 +1,13 @@
+type = "rl"
+provider = "synth"
+task_app_url = "https://your-math-task.modal.run"
+model = "rl:REPLACE_WITH_JOB_ID"
+split = "test"
+num_episodes = 200
+seed_start = 100000
+[policy]
+inference_url = "https://your-inference-host"
+max_tokens = 128
+temperature = 0.0

examples/rl/configs/rl_from_base_qwen.toml ADDED Viewed

@@ -0,0 +1,37 @@
+type = "rl"
+[services]
+task_url = "https://your-math-task.modal.run"
+[model]
+base = "Qwen/Qwen3-4B"
+[policy]
+model = "Qwen/Qwen3-4B"
+inference_url = "https://your-inference-host"
+max_tokens = 128
+temperature = 0.0
+[data]
+split = "train"
+seed_start = 0
+episodes_per_iteration = 2048
+evaluation_split = "validation"
+evaluation_episodes = 256
+[training]
+max_turns = 1
+ops = ["agent", "env"]
+batch_size = 128
+group_size = 1024
+reward_positive = 1.0
+reward_negative_no_tool = -1.0
+reward_negative_no_answer = -0.5
+learning_rate = 5e-6
+[compute]
+gpu_type = "A10G"
+gpu_count = 4
+[tags]
+experiment = "math_single_step"

examples/rl/configs/rl_from_base_qwen17.toml ADDED Viewed

@@ -0,0 +1,76 @@
+type = "rl"
+[algorithm]
+type = "online"
+method = "policy_gradient"
+variety = "gspo"
+[services]
+task_url = "http://localhost:8101"
+[model]
+base = "Qwen/Qwen3-1.7B"
+[policy]
+model = "Qwen/Qwen3-1.7B"
+inference_url = "https://agent-learning.onrender.com/api/inference"
+max_tokens = 1028
+temperature = 0.2
+[data]
+split = "train"
+seed_start = 0
+episodes_per_iteration = 1280  # 8 per group * 4 groups per batch * 2 batches per step * 20 steps
+evaluation_split = "validation"
+evaluation_episodes = 50
+[training]
+max_turns = 1
+ops = ["agent", "env"]
+batch_size = 2
+group_size = 16
+reward_positive = 1.0
+reward_negative_no_tool = -1.0
+reward_negative_no_answer = -0.5
+learning_rate = 5e-6
+log_interval = 1
+weight_sync_interval = 1
+[training.weight_sync]
+enable = true
+targets = ["policy"]
+[compute]
+gpu_type = "H100"
+gpu_count = 4
+[topology]
+type = "single_node_split"
+gpus_for_vllm = 2
+gpus_for_training = 1
+gpus_for_ref = 1
+tensor_parallel = 1
+[vllm]
+tensor_parallel_size = 1
+max_model_len = 4096
+[reference]
+placement = "dedicated"
+port = 8002
+tp = 1
+health_max_wait_s = 180
+health_interval_ms = 300
+[rollout]
+policy_name = "math-single-step"
+max_turns = 1
+episodes_per_batch = 32  # group_size * batch_size
+[evaluation]
+instances = 32
+every_n_iters = 10
+seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+[tags]
+experiment = "math_single_step_qwen17"

examples/rl/configs/rl_from_ft_qwen.toml ADDED Viewed

@@ -0,0 +1,37 @@
+type = "rl"
+[services]
+task_url = "https://your-math-task.modal.run"
+[model]
+source = "ft:REPLACE_WITH_MODEL_ID"
+[policy]
+model = "ft:REPLACE_WITH_MODEL_ID"
+inference_url = "https://your-inference-host"
+max_tokens = 128
+temperature = 0.0
+[data]
+split = "train"
+seed_start = 0
+episodes_per_iteration = 2048
+evaluation_split = "validation"
+evaluation_episodes = 256
+[training]
+max_turns = 1
+ops = ["agent", "env"]
+batch_size = 128
+group_size = 1024
+reward_positive = 1.0
+reward_negative_no_tool = -1.0
+reward_negative_no_answer = -0.5
+learning_rate = 5e-6
+[compute]
+gpu_type = "A10G"
+gpu_count = 4
+[tags]
+experiment = "math_single_step_from_fft"

synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl