synth-ai 0.2.9.dev1__py3-none-any.whl → 0.2.9.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

synth_ai/demo_registry.py CHANGED
@@ -62,7 +62,7 @@ DEMO_TEMPLATES: tuple[DemoTemplate, ...] = (
62
62
  make_executable=True,
63
63
  ),
64
64
  CopySpec(
65
- "examples/rl/configs/rl_from_base_qwen17.toml",
65
+ "synth_ai/demos/demo_task_apps/math/config.toml",
66
66
  "configs/rl_from_base_qwen17.toml",
67
67
  ),
68
68
  ),
@@ -78,7 +78,7 @@ DEMO_TEMPLATES: tuple[DemoTemplate, ...] = (
78
78
  "# Optional: set to 'prod' to use production names",
79
79
  "ENVIRONMENT=",
80
80
  ),
81
- config_source="examples/rl/configs/rl_from_base_qwen17.toml",
81
+ config_source="synth_ai/demos/demo_task_apps/math/config.toml",
82
82
  requires_modal=True,
83
83
  post_copy=lambda root: _postprocess_math_modal(root),
84
84
  ),
@@ -88,19 +88,19 @@ DEMO_TEMPLATES: tuple[DemoTemplate, ...] = (
88
88
  description="Lightweight wrapper around synth_ai.task.apps.grpo_crafter for local experimentation.",
89
89
  copy_specs=(
90
90
  CopySpec(
91
- "examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py",
91
+ "synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py",
92
92
  "task_app.py",
93
93
  ),
94
94
  CopySpec(
95
- "examples/warming_up_to_rl/task_app/README.md",
95
+ "synth_ai/demos/demo_task_apps/crafter/README.md",
96
96
  "README.md",
97
97
  ),
98
98
  CopySpec(
99
- "examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml",
99
+ "synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml",
100
100
  "configs/rl_from_base_qwen4b.toml",
101
101
  ),
102
102
  CopySpec(
103
- "examples/warming_up_to_rl/configs/crafter_fft_4b.toml",
103
+ "synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml",
104
104
  "configs/crafter_fft_4b.toml",
105
105
  ),
106
106
  ),
@@ -112,7 +112,7 @@ DEMO_TEMPLATES: tuple[DemoTemplate, ...] = (
112
112
  "# Optional: URL for existing Crafter task app",
113
113
  "TASK_APP_BASE_URL=",
114
114
  ),
115
- config_source="examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml",
115
+ config_source="synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml",
116
116
  config_destination="demo_config.toml",
117
117
  requires_modal=False,
118
118
  post_copy=lambda root: _postprocess_crafter_local(root),
@@ -0,0 +1 @@
1
+ # Crafter demo task app
@@ -0,0 +1,54 @@
1
+ # FFT job config for Qwen/Qwen3-4B on Crafter SFT dataset
2
+
3
+ [algorithm]
4
+ type = "offline"
5
+ method = "supervised_finetune"
6
+ variety = "fft"
7
+
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-4B"
11
+ # Limit training to the first 100 conversations (export a 100-row JSONL and point to it here)
12
+ # data = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.head100.jsonl"
13
+
14
+ [compute]
15
+ # Adjust as needed for your quota
16
+ gpu_type = "H100"
17
+ gpu_count = 1
18
+ nodes = 1
19
+
20
+ [data]
21
+ # Optional topology metadata (left empty for now)
22
+ topology = {}
23
+
24
+ # Optional local validation dataset path (JSONL). If set, the client will upload
25
+ # this file and wire up validation so the frontend can display val.loss.
26
+ validation_path = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.tokens_1000000_seed_123.val_2000.jsonl"
27
+
28
+ [training]
29
+ mode = "sft_offline"
30
+ use_qlora = false
31
+
32
+ # Validation settings to emit val.loss on the frontend
33
+ [training.validation]
34
+ enabled = true
35
+ evaluation_strategy = "steps"
36
+ eval_steps = 20
37
+ save_best_model_at_end = true
38
+ metric_for_best_model = "val.loss"
39
+ greater_is_better = false
40
+
41
+ [hyperparameters]
42
+ # Minimal safe defaults; backend can override
43
+ n_epochs = 1
44
+ batch_size = 1
45
+ gradient_accumulation_steps = 64
46
+ sequence_length = 4096
47
+ learning_rate = 5e-6
48
+ warmup_ratio = 0.03
49
+ train_kind = "fft"
50
+
51
+ # Optional parallelism block example
52
+ #[hyperparameters.parallelism]
53
+ # tensor_parallel_size = 1
54
+ # pipeline_parallel_size = 1
@@ -0,0 +1,73 @@
1
+ # RL training starting from base Qwen/Qwen3-4B (TOML-only model selection)
2
+
3
+ [algorithm]
4
+ type = "online"
5
+ method = "policy_gradient"
6
+ variety = "gspo"
7
+
8
+
9
+ [services]
10
+ task_url = "https://synth-laboratories--grpo-crafter-task-app-final-warming--ceb5b2.modal.run"
11
+
12
+ [compute]
13
+ # Cluster shape for RL pipeline
14
+ gpu_type = "H100"
15
+ gpu_count = 8
16
+
17
+ [topology]
18
+ # Split GPUs across vLLM, training, and reference
19
+ # Must sum to compute.gpu_count
20
+ type = "single_node_split"
21
+ gpus_for_vllm = 4
22
+ gpus_for_training = 3
23
+ gpus_for_ref = 1
24
+ tensor_parallel = 4
25
+
26
+ [vllm]
27
+ # Serving tensor parallel size
28
+ tensor_parallel_size = 4
29
+ max_model_len = 8192
30
+
31
+ [reference]
32
+ # Required by trainer/runtime; ensures dedicated/scoped scoring server config exists
33
+ placement = "dedicated"
34
+ port = 8002
35
+ tp = 1
36
+ health_max_wait_s = 180
37
+ health_interval_ms = 300
38
+
39
+ [model]
40
+ # Base model start
41
+ base = "Qwen/Qwen3-4B"
42
+ label = "crafter-rl-from-base"
43
+
44
+ [rollout]
45
+ max_turns = 10
46
+ episodes_per_batch = 64
47
+ policy_name = "crafter"
48
+
49
+ [evaluation]
50
+ # Run baseline evaluation over the first 100 seeds every 20 training iterations
51
+ instances = 10
52
+ every_n_iters = 10
53
+ seeds = [
54
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
55
+ ]
56
+
57
+ [training]
58
+ log_interval = 1
59
+ weight_sync_interval = 1
60
+ # Additional RL hyperparameters can go here
61
+
62
+ # Stepwise rewards (Crafter decision-level)
63
+ step_rewards_enabled = true
64
+ step_rewards_mode = "decision_stepwise" # "off" | "decision_stepwise" | "env_sparse"
65
+ step_rewards_beta = 0.0
66
+ step_rewards_indicator_lambda = 1.0
67
+ # Optional selector for decision scalar: "unique" | "absolute" (default unique)
68
+ event_rewards_kind = "unique"
69
+
70
+ [training.weight_sync]
71
+ enable = true
72
+ targets = ["policy"]
73
+ weight_sync_interval = 1
@@ -0,0 +1,165 @@
1
+
2
+ """Compatibility wrapper for the GRPO Crafter task app.
3
+
4
+ This module now delegates to the shared TaskAppConfig defined in
5
+ `synth_ai.task.apps.grpo_crafter`. It is kept for legacy usage (running the
6
+ file directly or targeting `fastapi_app` from external tooling). Prefer using
7
+ `uvx synth-ai serve grpo-crafter` for local development and testing.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ from pathlib import Path
14
+
15
+ from fastapi.exceptions import RequestValidationError
16
+ from fastapi.responses import JSONResponse
17
+ from starlette.requests import Request
18
+
19
+ from synth_ai.task.apps import ModalDeploymentConfig, registry
20
+ from synth_ai.task.apps.grpo_crafter import build_config
21
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
22
+ from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
23
+
24
+
25
+ APP_ID = "grpo-crafter"
26
+
27
+
28
+ _BASE_CONFIG = build_config()
29
+ TASK_APP_CONFIG = TaskAppConfig(
30
+ app_id="grpo-crafter",
31
+ name=_BASE_CONFIG.name,
32
+ description=_BASE_CONFIG.description,
33
+ base_task_info=_BASE_CONFIG.base_task_info,
34
+ describe_taskset=_BASE_CONFIG.describe_taskset,
35
+ provide_task_instances=_BASE_CONFIG.provide_task_instances,
36
+ rollout=_BASE_CONFIG.rollout,
37
+ dataset_registry=_BASE_CONFIG.dataset_registry,
38
+ rubrics=_BASE_CONFIG.rubrics,
39
+ proxy=_BASE_CONFIG.proxy,
40
+ routers=_BASE_CONFIG.routers,
41
+ middleware=_BASE_CONFIG.middleware,
42
+ app_state=_BASE_CONFIG.app_state,
43
+ require_api_key=_BASE_CONFIG.require_api_key,
44
+ expose_debug_env=_BASE_CONFIG.expose_debug_env,
45
+ cors_origins=_BASE_CONFIG.cors_origins,
46
+ startup_hooks=_BASE_CONFIG.startup_hooks,
47
+ shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
48
+ )
49
+
50
+ try:
51
+ _REGISTERED_ENTRY = registry.get(APP_ID)
52
+ except Exception: # pragma: no cover - registry unavailable in some contexts
53
+ MODAL_DEPLOYMENT: ModalDeploymentConfig | None = None
54
+ ENV_FILES: tuple[str, ...] = ()
55
+ else:
56
+ MODAL_DEPLOYMENT = _REGISTERED_ENTRY.modal
57
+ ENV_FILES = tuple(_REGISTERED_ENTRY.env_files)
58
+
59
+
60
+ def build_task_app_config() -> TaskAppConfig:
61
+ """Return a fresh TaskAppConfig for this wrapper."""
62
+
63
+ return TASK_APP_CONFIG.clone()
64
+
65
+
66
+ def fastapi_app():
67
+ """Return the FastAPI application for Modal or other ASGI hosts."""
68
+
69
+ app = create_task_app(build_task_app_config())
70
+
71
+ # Replace default health endpoints so we can permit soft auth failures and log 422s.
72
+ filtered_routes = []
73
+ for route in app.router.routes:
74
+ path = getattr(route, "path", None)
75
+ methods = getattr(route, "methods", set()) or set()
76
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
77
+ continue
78
+ filtered_routes.append(route)
79
+ app.router.routes = filtered_routes
80
+
81
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
82
+ if not env_key:
83
+ return None
84
+ prefix = env_key[: max(1, len(env_key) // 2)]
85
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
86
+ return prefix
87
+
88
+ @app.get("/health")
89
+ async def health(request: Request):
90
+ env_key = normalize_environment_api_key()
91
+ if not env_key:
92
+ return JSONResponse(
93
+ status_code=503,
94
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
95
+ )
96
+ if not is_api_key_header_authorized(request):
97
+ prefix = _log_env_key_prefix("health", env_key)
98
+ content = {"status": "healthy", "authorized": False}
99
+ if prefix:
100
+ content["expected_api_key_prefix"] = prefix
101
+ return JSONResponse(status_code=200, content=content)
102
+ return {"status": "healthy", "authorized": True}
103
+
104
+ @app.get("/health/rollout")
105
+ async def health_rollout(request: Request):
106
+ env_key = normalize_environment_api_key()
107
+ if not env_key:
108
+ return JSONResponse(
109
+ status_code=503,
110
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
111
+ )
112
+ if not is_api_key_header_authorized(request):
113
+ prefix = _log_env_key_prefix("health/rollout", env_key)
114
+ content = {"status": "healthy", "authorized": False}
115
+ if prefix:
116
+ content["expected_api_key_prefix"] = prefix
117
+ return JSONResponse(status_code=200, content=content)
118
+ return {"ok": True, "authorized": True}
119
+
120
+ @app.exception_handler(RequestValidationError)
121
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
122
+ try:
123
+ hdr = request.headers
124
+ snapshot = {
125
+ "path": str(getattr(request, "url").path),
126
+ "have_x_api_key": bool(hdr.get("x-api-key")),
127
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
128
+ "have_authorization": bool(hdr.get("authorization")),
129
+ "errors": exc.errors()[:5],
130
+ }
131
+ print("[422] validation", snapshot, flush=True)
132
+ except Exception:
133
+ pass
134
+ return JSONResponse(
135
+ status_code=422,
136
+ content={"status": "invalid", "detail": exc.errors()[:5]},
137
+ )
138
+
139
+ return app
140
+
141
+
142
+ if __name__ == "__main__":
143
+ parser = argparse.ArgumentParser(description="Run the Crafter task app locally")
144
+ parser.add_argument("--host", default="0.0.0.0")
145
+ parser.add_argument("--port", type=int, default=8001)
146
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
147
+ parser.add_argument(
148
+ "--env-file",
149
+ action="append",
150
+ default=[],
151
+ help="Additional .env files to load before startup",
152
+ )
153
+ args = parser.parse_args()
154
+
155
+ default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
156
+ env_files = [str(default_env)] if default_env.exists() else []
157
+ env_files.extend(args.env_file or [])
158
+
159
+ run_task_app(
160
+ build_task_app_config,
161
+ host=args.host,
162
+ port=args.port,
163
+ reload=args.reload,
164
+ env_files=env_files,
165
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synth-ai
3
- Version: 0.2.9.dev1
3
+ Version: 0.2.9.dev3
4
4
  Summary: RL as a service SDK - Core AI functionality and tracing
5
5
  Author-email: Synth AI <josh@usesynth.ai>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  synth_ai/__init__.py,sha256=NixuXddy4lS2Wmj0F8eMt0HS_oYCTnq3iVVq5VYwWIc,1341
2
2
  synth_ai/__main__.py,sha256=Kh1xBKkTE5Vs2qNMtDuuOXerHUptMcOiF3YziOpC6DA,146
3
- synth_ai/demo_registry.py,sha256=PhBV3oQpNDTPdPqnUcRchOyYB4BZlQkbhPZfIneLskk,8485
3
+ synth_ai/demo_registry.py,sha256=pH-Pyrocp2vNBcw7J80Nu_525nBtI5G_dC4d4mQDYl8,8531
4
4
  synth_ai/handshake.py,sha256=uzoTOpkf9JQgsyKWrlx8gjfQmK3HpqFQAZY1gZDtiIo,3735
5
5
  synth_ai/http.py,sha256=lqjFXDmAP_xgfywK_rDSOVxuMy4rDH9S3Rtu9k1tLmk,1028
6
6
  synth_ai/http_client.py,sha256=_9J8rUGoItUMnJLGZw7r0uXiJeLWR939kByRkvtP1XM,4429
@@ -34,6 +34,10 @@ synth_ai/demos/core/__init__.py,sha256=A2FjhY7KXGtyzdQXqeTPCkEhHfrH-eQg6bvP8HaYh
34
34
  synth_ai/demos/core/cli.py,sha256=5z89ykvJAP-MipT-RYx7BXwTwi8nIAyUb-RcPDpXL5w,57520
35
35
  synth_ai/demos/demo_task_apps/__init__.py,sha256=LmNLB5oHncmVmavbLb6_1fsFYoNxijMd3ksLScBMxiw,243
36
36
  synth_ai/demos/demo_task_apps/core.py,sha256=Eu7gp0VtZ9tE1HPLG14-pkjC1cD_7brsdl2IRbdSBts,14764
37
+ synth_ai/demos/demo_task_apps/crafter/__init__.py,sha256=3SnNZTzBjGR9eudStcww259vPmzoFBHJL-M0GDUD7Qo,24
38
+ synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py,sha256=SfP7YbGyYDpoY4ZWyMiUSeQAjintKPAkJRZz3U2951U,6064
39
+ synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml,sha256=q_cnU3P-eGG_VFOepw9IA1U-m7L-uJH37EUFR3mNWBI,1358
40
+ synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml,sha256=qVSypYOMUd8g6pmiovi7nsgk4jgMBjJIORpIsrmNV4U,1644
37
41
  synth_ai/demos/demo_task_apps/math/__init__.py,sha256=WBzpZwSn7pRarBmhopQi34i9bEm05-71eM3siboOavY,43
38
42
  synth_ai/demos/demo_task_apps/math/_common.py,sha256=SgtVW1pne4pgwGS2gYYQWkmG9BvU2sQTYzlncmUJ0NM,533
39
43
  synth_ai/demos/demo_task_apps/math/app.py,sha256=gNopoAhwM0vzdKuCa7AwQqSwiV2xagrjMxMH9YIniv4,1160
@@ -436,9 +440,9 @@ synth_ai/v0/tracing_v1/events/manage.py,sha256=ZDXXP-ZwLH9LCsmw7Ru9o55d7bl_diPtJ
436
440
  synth_ai/v0/tracing_v1/events/scope.py,sha256=BuBkhSpVHUJt8iGT9HJZF82rbb88mQcd2vM2shg-w2I,2550
437
441
  synth_ai/v0/tracing_v1/events/store.py,sha256=0342lvAcalyJbVEIzQFaPuMQGgwiFm7M5rE6gr-G0E8,9041
438
442
  synth_ai/zyk/__init__.py,sha256=htVLnzTYQ5rxzYpzSYBm7_o6uNKZ3pB_PrqkBrgTRS4,771
439
- synth_ai-0.2.9.dev1.dist-info/licenses/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
440
- synth_ai-0.2.9.dev1.dist-info/METADATA,sha256=6qtxdQl8lJt417-97ymMMs843CRlGDU_lhGPV0TFd8M,5200
441
- synth_ai-0.2.9.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
442
- synth_ai-0.2.9.dev1.dist-info/entry_points.txt,sha256=Neq-3bT7TAijjgOIR77pKL-WYg6TWBDeO8pp_nL4vGY,91
443
- synth_ai-0.2.9.dev1.dist-info/top_level.txt,sha256=fBmtZyVHuKaGa29oHBaaUkrUIWTqSpoVMPiVdCDP3k8,9
444
- synth_ai-0.2.9.dev1.dist-info/RECORD,,
443
+ synth_ai-0.2.9.dev3.dist-info/licenses/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
444
+ synth_ai-0.2.9.dev3.dist-info/METADATA,sha256=Km3_Hf_1bxFLGaBHdljL_V7Xx9WKKkTRKj29ep7J5p8,5200
445
+ synth_ai-0.2.9.dev3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
446
+ synth_ai-0.2.9.dev3.dist-info/entry_points.txt,sha256=Neq-3bT7TAijjgOIR77pKL-WYg6TWBDeO8pp_nL4vGY,91
447
+ synth_ai-0.2.9.dev3.dist-info/top_level.txt,sha256=fBmtZyVHuKaGa29oHBaaUkrUIWTqSpoVMPiVdCDP3k8,9
448
+ synth_ai-0.2.9.dev3.dist-info/RECORD,,