synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +0 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
- examples/task_apps/enron/__init__.py +1 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +62 -78
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +71 -31
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +7 -2
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +8 -8
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +2 -3
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/tui/cli/query_experiments.py +4 -4
- synth_ai/tui/cli/query_experiments_v3.py +4 -4
- synth_ai/tui/dashboard.py +14 -9
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
synth_ai/cli/tui.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
CLI: Interactive TUI dashboard for Synth AI.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
import importlib
|
|
6
7
|
import os
|
|
7
8
|
|
|
8
9
|
import click
|
|
@@ -24,19 +25,23 @@ def register(cli):
|
|
|
24
25
|
|
|
25
26
|
# Import here to avoid circular imports and handle optional dependencies
|
|
26
27
|
try:
|
|
27
|
-
|
|
28
|
+
module = importlib.import_module("synth_ai.tui.dashboard")
|
|
28
29
|
except (ImportError, ModuleNotFoundError) as e:
|
|
29
30
|
console.print("[red]Error:[/red] TUI dashboard not available.")
|
|
30
31
|
console.print(f"Missing dependencies: {e}")
|
|
31
32
|
console.print("Install with: pip install textual")
|
|
32
33
|
return
|
|
33
|
-
except Exception
|
|
34
|
+
except Exception:
|
|
34
35
|
# Handle other import errors (like missing libsql, type annotation issues, etc.)
|
|
35
36
|
console.print("[red]Error:[/red] TUI dashboard not available.")
|
|
36
37
|
console.print("This may be due to missing dependencies or Python version compatibility.")
|
|
37
38
|
console.print("Try: pip install textual libsql")
|
|
38
39
|
console.print("If using Python < 3.10, you may need to update Python or install eval_type_backport.")
|
|
39
40
|
return
|
|
41
|
+
tui_main = getattr(module, "main", None)
|
|
42
|
+
if not callable(tui_main):
|
|
43
|
+
console.print("[red]Error:[/red] TUI dashboard entrypoint not available.")
|
|
44
|
+
return
|
|
40
45
|
|
|
41
46
|
# Set environment variables for the TUI to use
|
|
42
47
|
os.environ.setdefault("TUI_DB_URL", db_url)
|
synth_ai/cli/turso.py
CHANGED
synth_ai/cli/watch.py
CHANGED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Namespace for demo task apps (math, crafter, etc.)."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
|
|
5
|
+
# Ensure registry entries are loaded for CLI discovery.
|
|
6
|
+
with contextlib.suppress(Exception): # pragma: no cover - optional on downstream installs
|
|
7
|
+
from synth_ai.demos.math import task_app_entry # noqa: F401
|
|
8
|
+
|
|
9
|
+
with contextlib.suppress(Exception): # pragma: no cover - optional on downstream installs
|
|
10
|
+
from synth_ai.demos.crafter import grpo_crafter_task_app # noqa: F401
|
synth_ai/demos/core/__init__.py
CHANGED
|
@@ -1 +1,28 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Compatibility layer exposing the legacy demo helpers.
|
|
3
|
+
|
|
4
|
+
Historically these utilities lived in ``synth_ai.demos.core`` as a module.
|
|
5
|
+
Upstream refactors moved the implementation under
|
|
6
|
+
``synth_ai.demos.demo_task_apps.core``. Several call sites (including the new
|
|
7
|
+
vision tests) still import the older path, so we re-export everything here.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from synth_ai.demos.demo_task_apps import core as _demo_core
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
name
|
|
16
|
+
for name in dir(_demo_core)
|
|
17
|
+
if not name.startswith("_")
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
globals().update({name: getattr(_demo_core, name) for name in __all__})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def __getattr__(name: str):
|
|
24
|
+
if name in __all__:
|
|
25
|
+
value = getattr(_demo_core, name)
|
|
26
|
+
globals()[name] = value
|
|
27
|
+
return value
|
|
28
|
+
raise AttributeError(name)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Crafter demo task app
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
|
|
2
|
+
|
|
3
|
+
type = "sft"
|
|
4
|
+
|
|
5
|
+
[algorithm]
|
|
6
|
+
type = "offline"
|
|
7
|
+
method = "supervised_finetune"
|
|
8
|
+
variety = "fft"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
[job]
|
|
12
|
+
model = "Qwen/Qwen3-4B"
|
|
13
|
+
data = "ft_data/crafter_sft.jsonl"
|
|
14
|
+
|
|
15
|
+
[compute]
|
|
16
|
+
# Adjust as needed for your quota
|
|
17
|
+
gpu_type = "H100"
|
|
18
|
+
gpu_count = 1
|
|
19
|
+
nodes = 1
|
|
20
|
+
|
|
21
|
+
[data]
|
|
22
|
+
# Optional topology metadata (left empty for now)
|
|
23
|
+
topology = {}
|
|
24
|
+
|
|
25
|
+
# Optional local validation dataset path (JSONL). If set, the client will upload
|
|
26
|
+
# this file and wire up validation so the frontend can display val.loss.
|
|
27
|
+
# validation_path = "../ft_data/crafter_validation.jsonl"
|
|
28
|
+
|
|
29
|
+
[training]
|
|
30
|
+
mode = "sft_offline"
|
|
31
|
+
use_qlora = false
|
|
32
|
+
|
|
33
|
+
# Validation settings to emit val.loss on the frontend
|
|
34
|
+
[training.validation]
|
|
35
|
+
enabled = true
|
|
36
|
+
evaluation_strategy = "steps"
|
|
37
|
+
eval_steps = 20
|
|
38
|
+
save_best_model_at_end = true
|
|
39
|
+
metric_for_best_model = "val.loss"
|
|
40
|
+
greater_is_better = false
|
|
41
|
+
|
|
42
|
+
[hyperparameters]
|
|
43
|
+
# Minimal safe defaults; backend can override
|
|
44
|
+
n_epochs = 1
|
|
45
|
+
batch_size = 1
|
|
46
|
+
gradient_accumulation_steps = 64
|
|
47
|
+
sequence_length = 4096
|
|
48
|
+
learning_rate = 5e-6
|
|
49
|
+
warmup_ratio = 0.03
|
|
50
|
+
train_kind = "fft"
|
|
51
|
+
|
|
52
|
+
# Optional parallelism block example
|
|
53
|
+
#[hyperparameters.parallelism]
|
|
54
|
+
# tensor_parallel_size = 1
|
|
55
|
+
# pipeline_parallel_size = 1
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Compatibility wrapper for the GRPO Crafter task app.
|
|
2
|
+
|
|
3
|
+
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
|
+
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
|
+
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
|
+
Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import importlib.util
|
|
13
|
+
from contextlib import suppress
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from fastapi.exceptions import RequestValidationError
|
|
17
|
+
from fastapi.responses import JSONResponse
|
|
18
|
+
from starlette.requests import Request
|
|
19
|
+
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, registry
|
|
20
|
+
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
21
|
+
from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_build_config():
|
|
25
|
+
import synth_ai
|
|
26
|
+
|
|
27
|
+
synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
|
|
28
|
+
module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
|
|
29
|
+
|
|
30
|
+
if not module_path.exists():
|
|
31
|
+
raise ImportError(
|
|
32
|
+
f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
spec = importlib.util.spec_from_file_location(
|
|
36
|
+
"warming_up_to_rl.task_app.grpo_crafter", module_path
|
|
37
|
+
)
|
|
38
|
+
if spec is None or spec.loader is None:
|
|
39
|
+
raise ImportError(f"Could not load task app module at {module_path}")
|
|
40
|
+
module = importlib.util.module_from_spec(spec)
|
|
41
|
+
import sys
|
|
42
|
+
|
|
43
|
+
sys.modules.setdefault(spec.name, module)
|
|
44
|
+
|
|
45
|
+
from synth_ai.task import apps as task_apps
|
|
46
|
+
|
|
47
|
+
original_register = task_apps.registry.register
|
|
48
|
+
|
|
49
|
+
def _safe_register(entry):
|
|
50
|
+
with suppress(ValueError):
|
|
51
|
+
original_register(entry)
|
|
52
|
+
|
|
53
|
+
task_apps.registry.register = _safe_register
|
|
54
|
+
try:
|
|
55
|
+
spec.loader.exec_module(module)
|
|
56
|
+
finally:
|
|
57
|
+
task_apps.registry.register = original_register
|
|
58
|
+
return module.build_config
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
build_config = _load_build_config()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
APP_ID = "grpo-crafter"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _build_base_config() -> TaskAppConfig:
|
|
68
|
+
return build_config()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
_REGISTERED_ENTRY = registry.get(APP_ID)
|
|
73
|
+
except Exception: # pragma: no cover - registry unavailable in some contexts
|
|
74
|
+
MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
|
|
75
|
+
else:
|
|
76
|
+
MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def build_task_app_config() -> TaskAppConfig:
|
|
80
|
+
base = _build_base_config()
|
|
81
|
+
return base.clone()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def fastapi_app():
|
|
85
|
+
app = create_task_app(build_task_app_config())
|
|
86
|
+
|
|
87
|
+
filtered_routes = []
|
|
88
|
+
for route in app.router.routes:
|
|
89
|
+
path = getattr(route, "path", None)
|
|
90
|
+
methods = getattr(route, "methods", set()) or set()
|
|
91
|
+
if path in {"/health", "/health/rollout"} and "GET" in methods:
|
|
92
|
+
continue
|
|
93
|
+
filtered_routes.append(route)
|
|
94
|
+
app.router.routes = filtered_routes
|
|
95
|
+
|
|
96
|
+
def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
|
|
97
|
+
if not env_key:
|
|
98
|
+
return None
|
|
99
|
+
prefix = env_key[: max(1, len(env_key) // 2)]
|
|
100
|
+
print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
|
|
101
|
+
return prefix
|
|
102
|
+
|
|
103
|
+
@app.get("/health")
|
|
104
|
+
async def health(request: Request):
|
|
105
|
+
env_key = normalize_environment_api_key()
|
|
106
|
+
if not env_key:
|
|
107
|
+
return JSONResponse(
|
|
108
|
+
status_code=503,
|
|
109
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
110
|
+
)
|
|
111
|
+
if not is_api_key_header_authorized(request):
|
|
112
|
+
prefix = _log_env_key_prefix("health", env_key)
|
|
113
|
+
content = {"status": "healthy", "authorized": False}
|
|
114
|
+
if prefix:
|
|
115
|
+
content["expected_api_key_prefix"] = prefix
|
|
116
|
+
return JSONResponse(status_code=200, content=content)
|
|
117
|
+
return {"status": "healthy", "authorized": True}
|
|
118
|
+
|
|
119
|
+
@app.get("/health/rollout")
|
|
120
|
+
async def health_rollout(request: Request):
|
|
121
|
+
env_key = normalize_environment_api_key()
|
|
122
|
+
if not env_key:
|
|
123
|
+
return JSONResponse(
|
|
124
|
+
status_code=503,
|
|
125
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
126
|
+
)
|
|
127
|
+
if not is_api_key_header_authorized(request):
|
|
128
|
+
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
129
|
+
content = {"status": "healthy", "authorized": False}
|
|
130
|
+
if prefix:
|
|
131
|
+
content["expected_api_key_prefix"] = prefix
|
|
132
|
+
return JSONResponse(status_code=200, content=content)
|
|
133
|
+
return {"ok": True, "authorized": True}
|
|
134
|
+
|
|
135
|
+
@app.exception_handler(RequestValidationError)
|
|
136
|
+
async def _on_validation_error(request: Request, exc: RequestValidationError):
|
|
137
|
+
try:
|
|
138
|
+
hdr = request.headers
|
|
139
|
+
snapshot = {
|
|
140
|
+
"path": str(request.url.path),
|
|
141
|
+
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
142
|
+
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
143
|
+
"have_authorization": bool(hdr.get("authorization")),
|
|
144
|
+
"errors": exc.errors()[:5],
|
|
145
|
+
}
|
|
146
|
+
print("[422] validation", snapshot, flush=True)
|
|
147
|
+
except Exception:
|
|
148
|
+
pass
|
|
149
|
+
return JSONResponse(
|
|
150
|
+
status_code=422,
|
|
151
|
+
content={"status": "invalid", "detail": exc.errors()[:5]},
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return app
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def register_demo_entry() -> None:
|
|
158
|
+
description = "Crafter demo task app"
|
|
159
|
+
entry = TaskAppEntry(
|
|
160
|
+
app_id="crafter-demo",
|
|
161
|
+
description=description,
|
|
162
|
+
config_factory=build_task_app_config,
|
|
163
|
+
aliases=("crafter",),
|
|
164
|
+
modal=MODAL_DEPLOYMENT,
|
|
165
|
+
)
|
|
166
|
+
with suppress(ValueError):
|
|
167
|
+
registry.register(entry)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
register_demo_entry()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
|
|
175
|
+
parser.add_argument("--host", default="0.0.0.0")
|
|
176
|
+
parser.add_argument("--port", type=int, default=8001)
|
|
177
|
+
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
178
|
+
args = parser.parse_args()
|
|
179
|
+
|
|
180
|
+
run_task_app(
|
|
181
|
+
build_task_app_config,
|
|
182
|
+
host=args.host,
|
|
183
|
+
port=args.port,
|
|
184
|
+
reload=args.reload,
|
|
185
|
+
)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
|
|
2
|
+
|
|
3
|
+
type = "rl"
|
|
4
|
+
|
|
5
|
+
[algorithm]
|
|
6
|
+
type = "online"
|
|
7
|
+
method = "policy_gradient"
|
|
8
|
+
variety = "gspo"
|
|
9
|
+
|
|
10
|
+
[services]
|
|
11
|
+
task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
|
|
12
|
+
|
|
13
|
+
[compute]
|
|
14
|
+
# Cluster shape for RL pipeline
|
|
15
|
+
gpu_type = "H100"
|
|
16
|
+
gpu_count = 8
|
|
17
|
+
|
|
18
|
+
[topology]
|
|
19
|
+
# Split GPUs across vLLM, training, and reference
|
|
20
|
+
# Must sum to compute.gpu_count
|
|
21
|
+
type = "single_node_split"
|
|
22
|
+
gpus_for_vllm = 4
|
|
23
|
+
gpus_for_training = 3
|
|
24
|
+
gpus_for_ref = 1
|
|
25
|
+
tensor_parallel = 4
|
|
26
|
+
|
|
27
|
+
[vllm]
|
|
28
|
+
# Serving tensor parallel size
|
|
29
|
+
tensor_parallel_size = 4
|
|
30
|
+
max_model_len = 8192
|
|
31
|
+
|
|
32
|
+
[reference]
|
|
33
|
+
# Required by trainer/runtime; ensures dedicated/scoped scoring server config exists
|
|
34
|
+
placement = "dedicated"
|
|
35
|
+
port = 8002
|
|
36
|
+
tp = 1
|
|
37
|
+
health_max_wait_s = 180
|
|
38
|
+
health_interval_ms = 300
|
|
39
|
+
|
|
40
|
+
[model]
|
|
41
|
+
# Base model start
|
|
42
|
+
base = "Qwen/Qwen3-4B"
|
|
43
|
+
label = "crafter-rl-from-base"
|
|
44
|
+
|
|
45
|
+
[rollout]
|
|
46
|
+
max_turns = 10
|
|
47
|
+
episodes_per_batch = 64
|
|
48
|
+
policy_name = "crafter"
|
|
49
|
+
|
|
50
|
+
[evaluation]
|
|
51
|
+
# Run baseline evaluation over the first 100 seeds every 20 training iterations
|
|
52
|
+
instances = 10
|
|
53
|
+
every_n_iters = 10
|
|
54
|
+
seeds = [
|
|
55
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
[training]
|
|
59
|
+
log_interval = 1
|
|
60
|
+
weight_sync_interval = 1
|
|
61
|
+
# Additional RL hyperparameters can go here
|
|
62
|
+
|
|
63
|
+
# Stepwise rewards (Crafter decision-level)
|
|
64
|
+
step_rewards_enabled = true
|
|
65
|
+
step_rewards_mode = "decision_stepwise" # "off" | "decision_stepwise" | "env_sparse"
|
|
66
|
+
step_rewards_beta = 0.0
|
|
67
|
+
step_rewards_indicator_lambda = 1.0
|
|
68
|
+
# Optional selector for decision scalar: "unique" | "absolute" (default unique)
|
|
69
|
+
event_rewards_kind = "unique"
|
|
70
|
+
|
|
71
|
+
[training.weight_sync]
|
|
72
|
+
enable = true
|
|
73
|
+
targets = ["policy"]
|
|
74
|
+
weight_sync_interval = 1
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class CopySpec:
|
|
12
|
+
source: str
|
|
13
|
+
destination: str
|
|
14
|
+
make_executable: bool = False
|
|
15
|
+
|
|
16
|
+
def absolute_source(self) -> Path:
|
|
17
|
+
return (REPO_ROOT / self.source).resolve()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class DemoTemplate:
|
|
22
|
+
template_id: str
|
|
23
|
+
name: str
|
|
24
|
+
description: str
|
|
25
|
+
copy_specs: tuple[CopySpec, ...]
|
|
26
|
+
default_subdir: str | None = None
|
|
27
|
+
env_lines: tuple[str, ...] = ()
|
|
28
|
+
config_source: str | None = None
|
|
29
|
+
config_destination: str = "demo_config.toml"
|
|
30
|
+
requires_modal: bool = False
|
|
31
|
+
post_copy: Callable[[Path], None] | None = None
|
|
32
|
+
default_secret_name: str | None = None
|
|
33
|
+
|
|
34
|
+
def iter_copy_specs(self) -> Iterator[CopySpec]:
|
|
35
|
+
yield from self.copy_specs
|
|
36
|
+
|
|
37
|
+
def config_source_path(self) -> Path | None:
|
|
38
|
+
if not self.config_source:
|
|
39
|
+
return None
|
|
40
|
+
return (REPO_ROOT / self.config_source).resolve()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _postprocess_math_modal(root: Path) -> None:
|
|
44
|
+
task_path = (root / "task_app.py").resolve()
|
|
45
|
+
if not task_path.exists():
|
|
46
|
+
return
|
|
47
|
+
text = task_path.read_text(encoding="utf-8")
|
|
48
|
+
text = text.replace('App("hendrycks-math-task-app")', 'App("hendrycks-math-task-app-demo")')
|
|
49
|
+
text = text.replace(
|
|
50
|
+
'DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-secret"',
|
|
51
|
+
'DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-demo-secret"',
|
|
52
|
+
)
|
|
53
|
+
task_path.write_text(text, encoding="utf-8")
|
|
54
|
+
|
|
55
|
+
deploy_script = root / "deploy_task_app.sh"
|
|
56
|
+
if deploy_script.exists():
|
|
57
|
+
import stat
|
|
58
|
+
|
|
59
|
+
mode = deploy_script.stat().st_mode
|
|
60
|
+
deploy_script.chmod(mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
DEMO_TEMPLATES: dict[str, DemoTemplate] = {
|
|
64
|
+
"math-modal": DemoTemplate(
|
|
65
|
+
template_id="math-modal",
|
|
66
|
+
name="Math Single-Step (Modal deployment)",
|
|
67
|
+
description="Packaged modal task app matching examples/rl math environment.",
|
|
68
|
+
copy_specs=(
|
|
69
|
+
CopySpec(
|
|
70
|
+
"synth_ai/demos/math/modal_task_app.py",
|
|
71
|
+
"task_app.py",
|
|
72
|
+
),
|
|
73
|
+
CopySpec(
|
|
74
|
+
"synth_ai/demos/math/README.md",
|
|
75
|
+
"README.md",
|
|
76
|
+
),
|
|
77
|
+
CopySpec(
|
|
78
|
+
"synth_ai/demos/math/deploy_task_app.sh",
|
|
79
|
+
"deploy_task_app.sh",
|
|
80
|
+
make_executable=True,
|
|
81
|
+
),
|
|
82
|
+
CopySpec(
|
|
83
|
+
"synth_ai/demos/math/config.toml",
|
|
84
|
+
"configs/rl_from_base_qwen17.toml",
|
|
85
|
+
),
|
|
86
|
+
),
|
|
87
|
+
default_subdir="math_demo",
|
|
88
|
+
env_lines=(
|
|
89
|
+
"# Required for task app auth to environment service",
|
|
90
|
+
"ENVIRONMENT_API_KEY=",
|
|
91
|
+
"",
|
|
92
|
+
"# Optional: for CLI job submission and proxying OpenAI models",
|
|
93
|
+
"SYNTH_API_KEY=",
|
|
94
|
+
"OPENAI_API_KEY=",
|
|
95
|
+
"",
|
|
96
|
+
"# Optional: set to 'prod' to use production names",
|
|
97
|
+
"ENVIRONMENT=",
|
|
98
|
+
),
|
|
99
|
+
config_source="synth_ai/demos/math/config.toml",
|
|
100
|
+
requires_modal=True,
|
|
101
|
+
post_copy=lambda root: _postprocess_math_modal(root),
|
|
102
|
+
default_secret_name="hendrycks-math-task-app-demo-secret",
|
|
103
|
+
),
|
|
104
|
+
"crafter-local": DemoTemplate(
|
|
105
|
+
template_id="crafter-local",
|
|
106
|
+
name="Crafter GRPO (local FastAPI)",
|
|
107
|
+
description="Lightweight wrapper around examples/warming_up_to_rl/task_app/grpo_crafter for local experimentation.",
|
|
108
|
+
copy_specs=(
|
|
109
|
+
CopySpec(
|
|
110
|
+
"synth_ai/demos/crafter/grpo_crafter_task_app.py",
|
|
111
|
+
"task_app.py",
|
|
112
|
+
),
|
|
113
|
+
CopySpec(
|
|
114
|
+
"synth_ai/demos/crafter/README.md",
|
|
115
|
+
"README.md",
|
|
116
|
+
),
|
|
117
|
+
CopySpec(
|
|
118
|
+
"synth_ai/demos/crafter/configs/rl_from_base_qwen4b.toml",
|
|
119
|
+
"configs/rl_from_base_qwen4b.toml",
|
|
120
|
+
),
|
|
121
|
+
CopySpec(
|
|
122
|
+
"synth_ai/demos/crafter/configs/crafter_fft_4b.toml",
|
|
123
|
+
"configs/crafter_fft_4b.toml",
|
|
124
|
+
),
|
|
125
|
+
CopySpec(
|
|
126
|
+
"examples/warming_up_to_rl/task_app/grpo_crafter.py",
|
|
127
|
+
"grpo_crafter.py",
|
|
128
|
+
),
|
|
129
|
+
CopySpec(
|
|
130
|
+
"examples/warming_up_to_rl/task_app/synth_envs_hosted",
|
|
131
|
+
"synth_envs_hosted",
|
|
132
|
+
),
|
|
133
|
+
CopySpec(
|
|
134
|
+
"examples/warming_up_to_rl/run_local_rollout.py",
|
|
135
|
+
"run_local_rollout.py",
|
|
136
|
+
),
|
|
137
|
+
CopySpec(
|
|
138
|
+
"examples/warming_up_to_rl/run_local_rollout_traced.py",
|
|
139
|
+
"run_local_rollout_traced.py",
|
|
140
|
+
),
|
|
141
|
+
CopySpec(
|
|
142
|
+
"examples/warming_up_to_rl/shared.py",
|
|
143
|
+
"shared.py",
|
|
144
|
+
),
|
|
145
|
+
CopySpec(
|
|
146
|
+
"examples/warming_up_to_rl/export_trace_sft.py",
|
|
147
|
+
"export_trace_sft.py",
|
|
148
|
+
),
|
|
149
|
+
CopySpec(
|
|
150
|
+
"examples/warming_up_to_rl/run_fft_and_save.py",
|
|
151
|
+
"run_fft_and_save.py",
|
|
152
|
+
),
|
|
153
|
+
CopySpec(
|
|
154
|
+
"examples/warming_up_to_rl/run_local_rollout_modal.py",
|
|
155
|
+
"run_local_rollout_modal.py",
|
|
156
|
+
),
|
|
157
|
+
),
|
|
158
|
+
default_subdir="crafter_demo",
|
|
159
|
+
env_lines=(
|
|
160
|
+
"ENVIRONMENT_API_KEY=",
|
|
161
|
+
"SYNTH_API_KEY=",
|
|
162
|
+
"",
|
|
163
|
+
"# Optional: URL for existing Crafter task app",
|
|
164
|
+
"TASK_APP_BASE_URL=",
|
|
165
|
+
),
|
|
166
|
+
default_secret_name="grpo-crafter-demo-secret",
|
|
167
|
+
),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def list_demo_templates() -> tuple[DemoTemplate, ...]:
|
|
172
|
+
return tuple(DEMO_TEMPLATES.values())
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def get_demo_template(template_id: str) -> DemoTemplate | None:
|
|
176
|
+
return DEMO_TEMPLATES.get(template_id)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Package namespace for Math demo task app
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Minimal helpers for the math task app.
|
|
4
|
+
|
|
5
|
+
This module provides a local fallback for install_problem_bank_into_shared so
|
|
6
|
+
the modal task app can import it without requiring an external math_rl package.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def install_problem_bank_into_shared() -> None:
|
|
11
|
+
"""No-op placeholder for installing the Hendrycks MATH problem bank.
|
|
12
|
+
|
|
13
|
+
In production deployments, this can download or unpack the problem bank
|
|
14
|
+
into a shared directory. For the demo scaffold, it is a no-op.
|
|
15
|
+
"""
|
|
16
|
+
return None
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from fastapi import FastAPI
|
|
6
|
+
from starlette.middleware.cors import CORSMiddleware
|
|
7
|
+
|
|
8
|
+
# Reuse the examples/rl task_app routes if available
|
|
9
|
+
try:
|
|
10
|
+
from synth_ai.examples.rl.task_app import make_app as make_rl_app # type: ignore
|
|
11
|
+
except Exception: # fallback path when imported from repo root
|
|
12
|
+
try:
|
|
13
|
+
from examples.rl.task_app import make_app as make_rl_app # type: ignore
|
|
14
|
+
except Exception as e: # pragma: no cover
|
|
15
|
+
raise ImportError(f"Unable to import RL task app: {e}") from e
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_app() -> FastAPI:
|
|
19
|
+
# Configure math defaults via env (consumed by RL task_app helpers)
|
|
20
|
+
os.environ.setdefault("DEMO_ENV_NAME", "math")
|
|
21
|
+
os.environ.setdefault("DEMO_POLICY_NAME", "math-react")
|
|
22
|
+
# Build base app
|
|
23
|
+
app = make_rl_app()
|
|
24
|
+
# CORS for local demo
|
|
25
|
+
app.add_middleware(
|
|
26
|
+
CORSMiddleware,
|
|
27
|
+
allow_origins=["*"],
|
|
28
|
+
allow_credentials=True,
|
|
29
|
+
allow_methods=["*"],
|
|
30
|
+
allow_headers=["*"],
|
|
31
|
+
)
|
|
32
|
+
return app
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def run(host: str = "127.0.0.1", port: int = 8080):
|
|
36
|
+
import uvicorn
|
|
37
|
+
|
|
38
|
+
uvicorn.run(create_app(), host=host, port=int(os.getenv("PORT", port)))
|