synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/blog_posts/pokemon_vl/README.md +98 -0
  3. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  5. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  6. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  7. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  8. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  9. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  12. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  13. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  15. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  16. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  17. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  18. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  20. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  21. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  22. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  23. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  24. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  25. examples/qwen_vl/README.md +10 -12
  26. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  27. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  28. examples/qwen_vl/collect_data_via_cli.md +76 -84
  29. examples/qwen_vl/collect_vision_traces.py +4 -4
  30. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  31. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  32. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  33. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  34. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  35. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  36. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  37. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  38. examples/qwen_vl/run_vision_comparison.sh +6 -7
  39. examples/rl/README.md +5 -5
  40. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  41. examples/rl/configs/rl_from_base_qwen17.toml +5 -2
  42. examples/rl/task_app/README.md +1 -2
  43. examples/rl/task_app/math_single_step.py +2 -2
  44. examples/run_crafter_demo.sh +2 -2
  45. examples/sft/README.md +1 -1
  46. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  47. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  48. examples/swe/task_app/README.md +32 -2
  49. examples/swe/task_app/grpo_swe_mini.py +4 -0
  50. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  51. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  52. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  53. examples/swe/task_app/morph_backend.py +178 -0
  54. examples/task_apps/crafter/task_app/README.md +1 -1
  55. examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
  56. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  57. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  58. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  59. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
  60. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
  61. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
  62. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  63. examples/task_apps/math/README.md +1 -2
  64. examples/task_apps/pokemon_red/README.md +3 -4
  65. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  66. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  67. examples/task_apps/pokemon_red/task_app.py +36 -5
  68. examples/task_apps/sokoban/README.md +2 -3
  69. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  70. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  71. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  72. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  73. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  74. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
  75. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  76. examples/warming_up_to_rl/task_app/README.md +1 -1
  77. examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
  78. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
  83. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  85. synth_ai/api/train/builders.py +9 -3
  86. synth_ai/api/train/cli.py +125 -10
  87. synth_ai/api/train/configs/__init__.py +8 -1
  88. synth_ai/api/train/configs/rl.py +32 -7
  89. synth_ai/api/train/configs/sft.py +6 -2
  90. synth_ai/api/train/configs/shared.py +59 -2
  91. synth_ai/auth/credentials.py +119 -0
  92. synth_ai/cli/__init__.py +12 -4
  93. synth_ai/cli/commands/__init__.py +17 -0
  94. synth_ai/cli/commands/demo/__init__.py +6 -0
  95. synth_ai/cli/commands/demo/core.py +163 -0
  96. synth_ai/cli/commands/deploy/__init__.py +23 -0
  97. synth_ai/cli/commands/deploy/core.py +614 -0
  98. synth_ai/cli/commands/deploy/errors.py +72 -0
  99. synth_ai/cli/commands/deploy/validation.py +11 -0
  100. synth_ai/cli/commands/eval/__init__.py +19 -0
  101. synth_ai/cli/commands/eval/core.py +1109 -0
  102. synth_ai/cli/commands/eval/errors.py +81 -0
  103. synth_ai/cli/commands/eval/validation.py +133 -0
  104. synth_ai/cli/commands/filter/__init__.py +12 -0
  105. synth_ai/cli/commands/filter/core.py +388 -0
  106. synth_ai/cli/commands/filter/errors.py +55 -0
  107. synth_ai/cli/commands/filter/validation.py +77 -0
  108. synth_ai/cli/commands/help/__init__.py +177 -0
  109. synth_ai/cli/commands/help/core.py +73 -0
  110. synth_ai/cli/commands/status/__init__.py +64 -0
  111. synth_ai/cli/commands/status/client.py +192 -0
  112. synth_ai/cli/commands/status/config.py +92 -0
  113. synth_ai/cli/commands/status/errors.py +20 -0
  114. synth_ai/cli/commands/status/formatters.py +164 -0
  115. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  116. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  117. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  118. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  119. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  120. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  121. synth_ai/cli/commands/status/utils.py +114 -0
  122. synth_ai/cli/commands/train/__init__.py +53 -0
  123. synth_ai/cli/commands/train/core.py +21 -0
  124. synth_ai/cli/commands/train/errors.py +117 -0
  125. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  126. synth_ai/cli/commands/train/judge_validation.py +304 -0
  127. synth_ai/cli/commands/train/validation.py +443 -0
  128. synth_ai/cli/demo.py +2 -162
  129. synth_ai/cli/deploy/__init__.py +28 -0
  130. synth_ai/cli/deploy/core.py +5 -0
  131. synth_ai/cli/deploy/errors.py +23 -0
  132. synth_ai/cli/deploy/validation.py +5 -0
  133. synth_ai/cli/eval/__init__.py +36 -0
  134. synth_ai/cli/eval/core.py +5 -0
  135. synth_ai/cli/eval/errors.py +31 -0
  136. synth_ai/cli/eval/validation.py +5 -0
  137. synth_ai/cli/filter/__init__.py +28 -0
  138. synth_ai/cli/filter/core.py +5 -0
  139. synth_ai/cli/filter/errors.py +23 -0
  140. synth_ai/cli/filter/validation.py +5 -0
  141. synth_ai/cli/modal_serve/__init__.py +12 -0
  142. synth_ai/cli/modal_serve/core.py +14 -0
  143. synth_ai/cli/modal_serve/errors.py +8 -0
  144. synth_ai/cli/modal_serve/validation.py +11 -0
  145. synth_ai/cli/serve/__init__.py +12 -0
  146. synth_ai/cli/serve/core.py +14 -0
  147. synth_ai/cli/serve/errors.py +8 -0
  148. synth_ai/cli/serve/validation.py +11 -0
  149. synth_ai/cli/setup.py +20 -265
  150. synth_ai/cli/status.py +7 -126
  151. synth_ai/cli/task_app_deploy.py +1 -10
  152. synth_ai/cli/task_app_modal_serve.py +4 -9
  153. synth_ai/cli/task_app_serve.py +4 -11
  154. synth_ai/cli/task_apps.py +58 -1487
  155. synth_ai/cli/train/__init__.py +12 -0
  156. synth_ai/cli/train/core.py +21 -0
  157. synth_ai/cli/train/errors.py +8 -0
  158. synth_ai/cli/train/validation.py +24 -0
  159. synth_ai/cli/train.py +1 -14
  160. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  161. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  162. synth_ai/environments/examples/red/engine.py +33 -12
  163. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  164. synth_ai/environments/examples/red/environment.py +26 -0
  165. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  166. synth_ai/http.py +12 -0
  167. synth_ai/judge_schemas.py +10 -11
  168. synth_ai/learning/rl/client.py +3 -1
  169. synth_ai/streaming/__init__.py +29 -0
  170. synth_ai/streaming/config.py +94 -0
  171. synth_ai/streaming/handlers.py +469 -0
  172. synth_ai/streaming/streamer.py +301 -0
  173. synth_ai/streaming/types.py +95 -0
  174. synth_ai/task/validators.py +2 -2
  175. synth_ai/tracing_v3/migration_helper.py +1 -2
  176. synth_ai/utils/env.py +25 -18
  177. synth_ai/utils/http.py +4 -1
  178. synth_ai/utils/modal.py +2 -2
  179. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
  180. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
  181. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  182. synth_ai/cli/tui.py +0 -62
  183. synth_ai/tui/__init__.py +0 -5
  184. synth_ai/tui/__main__.py +0 -13
  185. synth_ai/tui/cli/__init__.py +0 -1
  186. synth_ai/tui/cli/query_experiments.py +0 -164
  187. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  188. synth_ai/tui/dashboard.py +0 -911
  189. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  190. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  191. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  192. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
synth_ai/api/train/cli.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  import importlib
4
5
  import os
5
6
  import time
@@ -17,10 +18,18 @@ try:
17
18
  except Exception as exc: # pragma: no cover - critical dependency
18
19
  raise RuntimeError("Unable to load backend configuration helpers") from exc
19
20
 
21
+ from synth_ai.streaming import (
22
+ CLIHandler,
23
+ JobStreamer,
24
+ LossCurveHandler,
25
+ StreamConfig,
26
+ StreamEndpoints,
27
+ StreamType,
28
+ )
29
+
20
30
  from .builders import build_rl_payload, build_sft_payload
21
31
  from .config_finder import discover_configs, prompt_for_config
22
32
  from .env_resolver import KeySpec, resolve_env
23
- from .pollers import RLJobPoller, SFTJobPoller
24
33
  from .task_app import check_task_app_health
25
34
  from .utils import (
26
35
  REPO_ROOT,
@@ -135,6 +144,62 @@ def _default_backend() -> str:
135
144
  return f"{base}/api" if not base.endswith("/api") else base
136
145
 
137
146
 
147
+ _DEFAULT_SFT_HIDDEN_EVENTS = {
148
+ "sft.created",
149
+ "sft.pricing.check.requested",
150
+ "sft.pricing.check.allowed",
151
+ "sft.stage",
152
+ "snapshot.fetch",
153
+ "hatchet.preflight",
154
+ "hatchet.submission.attempt",
155
+ "hatchet.submission.result",
156
+ "sft.running",
157
+ "sft.status",
158
+ "sft.worker.alive",
159
+ "sft.dispatch.selected",
160
+ "sft.config.prepared",
161
+ "sft.strategy.selected",
162
+ "sft.training.args",
163
+ }
164
+
165
+ _DEFAULT_RL_HIDDEN_SUBSTRINGS = {"modal", "hatchet"}
166
+
167
+
168
+ def _build_stream_components(
169
+ stream_format: str,
170
+ *,
171
+ hidden_event_types: set[str] | None = None,
172
+ hidden_event_substrings: set[str] | None = None,
173
+ ) -> tuple[StreamConfig, list]:
174
+ """Return stream configuration and handlers for the requested format."""
175
+ if stream_format == "chart":
176
+ config = StreamConfig(
177
+ enabled_streams={StreamType.STATUS, StreamType.EVENTS, StreamType.METRICS},
178
+ event_types={
179
+ "sft.progress",
180
+ "sft.training.started",
181
+ "sft.training.finish",
182
+ "sft.validation.summary",
183
+ "rl.train.step",
184
+ "rl.train.started",
185
+ "rl.train.completed",
186
+ "workflow.completed",
187
+ "workflow.failed",
188
+ },
189
+ metric_names={"train.loss"},
190
+ )
191
+ handlers = [LossCurveHandler()]
192
+ else:
193
+ config = StreamConfig.default()
194
+ handlers = [
195
+ CLIHandler(
196
+ hidden_event_types=hidden_event_types or set(),
197
+ hidden_event_substrings=hidden_event_substrings or set(),
198
+ )
199
+ ]
200
+ return config, handlers
201
+
202
+
138
203
  @click.command("train")
139
204
  @click.option(
140
205
  "--config",
@@ -183,6 +248,13 @@ def _default_backend() -> str:
183
248
  "--poll-timeout", default=3600.0, type=float, help="Maximum seconds to poll before timing out"
184
249
  )
185
250
  @click.option("--poll-interval", default=5.0, type=float, help="Seconds between poll attempts")
251
+ @click.option(
252
+ "--stream-format",
253
+ type=click.Choice(["cli", "chart"]),
254
+ default="cli",
255
+ show_default=True,
256
+ help="Streaming output style (cli = line updates, chart = live loss panel)",
257
+ )
186
258
  @click.option(
187
259
  "--examples",
188
260
  "examples_limit",
@@ -204,6 +276,7 @@ def train_command(
204
276
  poll: bool,
205
277
  poll_timeout: float,
206
278
  poll_interval: float,
279
+ stream_format: str,
207
280
  examples_limit: int | None,
208
281
  ) -> None:
209
282
  """Interactive launcher for RL / SFT jobs."""
@@ -302,6 +375,7 @@ def train_command(
302
375
  poll=poll,
303
376
  poll_timeout=poll_timeout,
304
377
  poll_interval=poll_interval,
378
+ stream_format=stream_format,
305
379
  )
306
380
  else:
307
381
  dataset_override_path = Path(dataset_path).expanduser().resolve() if dataset_path else None
@@ -315,13 +389,22 @@ def train_command(
315
389
  poll=poll,
316
390
  poll_timeout=poll_timeout,
317
391
  poll_interval=poll_interval,
392
+ stream_format=stream_format,
318
393
  examples_limit=examples_limit,
319
394
  )
320
395
 
321
396
 
322
397
  def _wait_for_training_file(
323
- backend_base: str, api_key: str, file_id: str, *, timeout: float = 120.0
398
+ backend_base: str, api_key: str, file_id: str, *, timeout: float = 10.0
324
399
  ) -> None:
400
+ """Wait for training file to be visible after upload.
401
+
402
+ Reduced from 120s to 10s because:
403
+ - POST response already confirms file is uploaded
404
+ - Backend now forces read-your-writes consistency
405
+ - By job creation time, replica lag has resolved
406
+ - Quick sanity check only, not critical path
407
+ """
325
408
  url = f"{backend_base.rstrip('/')}/files/{file_id}"
326
409
  headers = {"Authorization": f"Bearer {api_key}"}
327
410
  elapsed = 0.0
@@ -400,6 +483,7 @@ def handle_rl(
400
483
  poll: bool,
401
484
  poll_timeout: float,
402
485
  poll_interval: float,
486
+ stream_format: str,
403
487
  ) -> None:
404
488
  overrides: dict[str, Any] = {
405
489
  "backend": backend_base,
@@ -497,10 +581,25 @@ def handle_rl(
497
581
  click.echo(f"Created job {job_id} (polling disabled)")
498
582
  return
499
583
 
500
- poller = RLJobPoller(backend_base, synth_key, interval=poll_interval, timeout=poll_timeout)
501
- outcome = poller.poll_job(job_id)
502
- click.echo(f"Final status: {outcome.status}")
503
- click.echo(preview_json(outcome.payload, limit=600))
584
+ click.echo("\n=== Streaming Job Progress ===")
585
+ config, handlers = _build_stream_components(
586
+ stream_format, hidden_event_substrings=_DEFAULT_RL_HIDDEN_SUBSTRINGS
587
+ )
588
+ if stream_format == "chart":
589
+ click.echo("Using live loss chart (metric=train.loss)")
590
+ streamer = JobStreamer(
591
+ base_url=backend_base,
592
+ api_key=synth_key,
593
+ job_id=job_id,
594
+ endpoints=StreamEndpoints.rl(job_id),
595
+ config=config,
596
+ handlers=handlers,
597
+ interval_seconds=poll_interval,
598
+ timeout_seconds=poll_timeout,
599
+ )
600
+ final_status = asyncio.run(streamer.stream_until_terminal())
601
+ click.echo(f"Final status: {final_status.get('status', 'unknown')}")
602
+ click.echo(preview_json(final_status, limit=600))
504
603
 
505
604
 
506
605
  def handle_sft(
@@ -514,6 +613,7 @@ def handle_sft(
514
613
  poll: bool,
515
614
  poll_timeout: float,
516
615
  poll_interval: float,
616
+ stream_format: str,
517
617
  examples_limit: int | None,
518
618
  ) -> None:
519
619
  dataset_path = dataset_override
@@ -641,10 +741,25 @@ def handle_sft(
641
741
  click.echo(f"Started job {job_id} (polling disabled)")
642
742
  return
643
743
 
644
- poller = SFTJobPoller(backend_base, synth_key, interval=poll_interval, timeout=poll_timeout)
645
- outcome = poller.poll_job(job_id)
646
- click.echo(f"Final status: {outcome.status}")
647
- click.echo(preview_json(outcome.payload, limit=600))
744
+ click.echo("\n=== Streaming Job Progress ===")
745
+ config, handlers = _build_stream_components(
746
+ stream_format, hidden_event_types=_DEFAULT_SFT_HIDDEN_EVENTS
747
+ )
748
+ if stream_format == "chart":
749
+ click.echo("Using live loss chart (metric=train.loss)")
750
+ streamer = JobStreamer(
751
+ base_url=backend_base,
752
+ api_key=synth_key,
753
+ job_id=job_id,
754
+ endpoints=StreamEndpoints.learning(job_id),
755
+ config=config,
756
+ handlers=handlers,
757
+ interval_seconds=poll_interval,
758
+ timeout_seconds=poll_timeout,
759
+ )
760
+ final_status = asyncio.run(streamer.stream_until_terminal())
761
+ click.echo(f"Final status: {final_status.get('status', 'unknown')}")
762
+ click.echo(preview_json(final_status, limit=600))
648
763
  finally:
649
764
  if limited_path is not None:
650
765
  try:
@@ -5,10 +5,12 @@ from .rl import (
5
5
  JudgeConfig,
6
6
  JudgeOptionsConfig,
7
7
  ModelConfig,
8
+ RewardsConfig,
8
9
  RLConfig,
9
10
  RLServicesConfig,
10
11
  RLTrainingConfig,
11
12
  RolloutConfig,
13
+ RubricConfig,
12
14
  WeightSyncConfig,
13
15
  )
14
16
  from .sft import (
@@ -20,7 +22,7 @@ from .sft import (
20
22
  TrainingConfig,
21
23
  TrainingValidationConfig,
22
24
  )
23
- from .shared import AlgorithmConfig, ComputeConfig
25
+ from .shared import AlgorithmConfig, ComputeConfig, LoraConfig, PolicyConfig, TopologyConfig
24
26
 
25
27
  __all__ = [
26
28
  "AlgorithmConfig",
@@ -31,13 +33,18 @@ __all__ = [
31
33
  "JobConfig",
32
34
  "JudgeConfig",
33
35
  "JudgeOptionsConfig",
36
+ "LoraConfig",
34
37
  "ModelConfig",
38
+ "PolicyConfig",
39
+ "RewardsConfig",
35
40
  "RLConfig",
36
41
  "RLServicesConfig",
37
42
  "RLTrainingConfig",
38
43
  "RolloutConfig",
44
+ "RubricConfig",
39
45
  "SFTConfig",
40
46
  "SFTDataConfig",
47
+ "TopologyConfig",
41
48
  "TrainingConfig",
42
49
  "TrainingValidationConfig",
43
50
  "WeightSyncConfig",
@@ -7,7 +7,7 @@ from typing import Any
7
7
  from pydantic import model_validator
8
8
 
9
9
  from ..utils import load_toml
10
- from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
10
+ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
11
11
 
12
12
 
13
13
  class RLServicesConfig(ExtraModel):
@@ -48,6 +48,16 @@ class WeightSyncConfig(ExtraModel):
48
48
  verify_every_k: int | None = None
49
49
 
50
50
 
51
+ class RewardsConfig(ExtraModel):
52
+ """Rewards configuration for RL training."""
53
+ step_rewards_enabled: bool | None = None
54
+ step_rewards_mode: str | None = None
55
+ step_rewards_indicator_lambda: float | None = None
56
+ step_rewards_beta: float | None = None
57
+ step_rewards_strategy: str | None = None
58
+ event_rewards_kind: str | None = None
59
+
60
+
51
61
  class RLTrainingConfig(ExtraModel):
52
62
  num_epochs: int
53
63
  iterations_per_epoch: int
@@ -59,13 +69,17 @@ class RLTrainingConfig(ExtraModel):
59
69
  learning_rate: float
60
70
  log_interval: int | None = None
61
71
  weight_sync_interval: int | None = None
72
+ # DEPRECATED: flat reward fields (use rewards.* instead)
62
73
  step_rewards_enabled: bool | None = None
63
74
  step_rewards_mode: str | None = None
64
75
  step_rewards_indicator_lambda: float | None = None
65
76
  step_rewards_beta: float | None = None
66
77
  step_rewards_strategy: str | None = None
67
78
  event_rewards_kind: str | None = None
79
+ # NEW: nested configs
68
80
  weight_sync: WeightSyncConfig | None = None
81
+ lora: LoraConfig | None = None
82
+ rewards: RewardsConfig | None = None
69
83
 
70
84
 
71
85
  class EvaluationConfig(ExtraModel):
@@ -86,9 +100,18 @@ class JudgeOptionsConfig(ExtraModel):
86
100
  max_concurrency: int | None = None
87
101
 
88
102
 
103
+ class RubricConfig(ExtraModel):
104
+ """Rubric configuration for reward blending."""
105
+ enabled: bool = False
106
+ reward_blend: dict[str, float] | None = None # env, event, outcome weights
107
+
108
+
89
109
  class JudgeConfig(ExtraModel):
90
110
  type: str | None = None
91
111
  timeout_s: int | None = None
112
+ enabled: bool | None = None # Master switch for judge/rubric
113
+ reward_blend: dict[str, float] | None = None # NEW: nested reward blending (replaces rubric.weights)
114
+ rubric: RubricConfig | None = None # DEPRECATED: use flat fields instead
92
115
  options: JudgeOptionsConfig | None = None
93
116
 
94
117
 
@@ -96,15 +119,16 @@ class RLConfig(ExtraModel):
96
119
  algorithm: AlgorithmConfig
97
120
  services: RLServicesConfig
98
121
  compute: ComputeConfig | None = None
99
- topology: dict[str, Any] | None = None
122
+ topology: dict[str, Any] | None = None # DEPRECATED: use compute.topology instead
100
123
  vllm: dict[str, Any] | None = None
101
- reference: dict[str, Any] | None = None
102
- model: ModelConfig
103
- lora: dict[str, Any] | None = None
124
+ reference: dict[str, Any] | None = None # DEPRECATED: use compute.topology.reference_placement instead
125
+ model: ModelConfig | None = None # DEPRECATED: use policy instead
126
+ policy: PolicyConfig | None = None # NEW: unified policy (preferred)
127
+ lora: dict[str, Any] | None = None # DEPRECATED: use training.lora instead
104
128
  rollout: RolloutConfig | None = None
105
129
  evaluation: EvaluationConfig | None = None
106
130
  training: RLTrainingConfig | None = None
107
- rubric: dict[str, Any] | None = None
131
+ rubric: dict[str, Any] | None = None # DEPRECATED: use judge.reward_blend and judge.enabled instead
108
132
  judge: JudgeConfig | None = None
109
133
  tags: dict[str, Any] | None = None
110
134
 
@@ -113,7 +137,8 @@ class RLConfig(ExtraModel):
113
137
 
114
138
  @classmethod
115
139
  def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
116
- return cls.model_validate(dict(data))
140
+ """Load RL config from dict/TOML mapping."""
141
+ return cls.model_validate(data)
117
142
 
118
143
  @classmethod
119
144
  def from_path(cls, path: Path) -> RLConfig:
@@ -7,7 +7,7 @@ from typing import Any
7
7
  from pydantic import Field
8
8
 
9
9
  from ..utils import load_toml
10
- from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
10
+ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
11
11
 
12
12
 
13
13
  class JobConfig(ExtraModel):
@@ -35,6 +35,7 @@ class TrainingConfig(ExtraModel):
35
35
  mode: str | None = None
36
36
  use_qlora: bool | None = None
37
37
  validation: TrainingValidationConfig | None = None
38
+ lora: LoraConfig | None = None # NEW: nested LoRA config
38
39
 
39
40
 
40
41
  class HyperparametersParallelism(ExtraModel):
@@ -65,10 +66,12 @@ class HyperparametersConfig(ExtraModel):
65
66
  class SFTConfig(ExtraModel):
66
67
  algorithm: AlgorithmConfig | None = None
67
68
  job: JobConfig
69
+ policy: PolicyConfig | None = None # NEW: unified policy section
68
70
  compute: ComputeConfig | None = None
69
71
  data: SFTDataConfig | None = None
70
72
  training: TrainingConfig | None = None
71
73
  hyperparameters: HyperparametersConfig = Field(default_factory=HyperparametersConfig)
74
+ lora: dict[str, Any] | None = None # DEPRECATED: use training.lora instead
72
75
  tags: dict[str, Any] | None = None
73
76
 
74
77
  def to_dict(self) -> dict[str, Any]:
@@ -76,7 +79,8 @@ class SFTConfig(ExtraModel):
76
79
 
77
80
  @classmethod
78
81
  def from_mapping(cls, data: Mapping[str, Any]) -> SFTConfig:
79
- return cls.model_validate(dict(data))
82
+ """Load SFT config from dict/TOML mapping."""
83
+ return cls.model_validate(data)
80
84
 
81
85
  @classmethod
82
86
  def from_path(cls, path: Path) -> SFTConfig:
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from pydantic import BaseModel, ConfigDict
3
+ from pydantic import BaseModel, ConfigDict, model_validator
4
4
 
5
5
 
6
6
  class ExtraModel(BaseModel):
@@ -15,10 +15,67 @@ class AlgorithmConfig(ExtraModel):
15
15
  variety: str
16
16
 
17
17
 
18
+ class TopologyConfig(ExtraModel):
19
+ """Compute topology configuration - how GPUs are distributed across processes."""
20
+ type: str | None = None # e.g., "single_node_split"
21
+ gpus_for_vllm: int | None = None
22
+ gpus_for_training: int | None = None
23
+ gpus_for_ref: int | None = None
24
+ tensor_parallel: int | None = None
25
+ reference_placement: str | None = None # NEW: e.g., "none", "shared", "dedicated"
26
+
27
+
28
+ class LoraConfig(ExtraModel):
29
+ """LoRA (Low-Rank Adaptation) training configuration."""
30
+ r: int | None = None # Rank
31
+ alpha: int | None = None
32
+ dropout: float | None = None
33
+ target_modules: list[str] | None = None
34
+
35
+
18
36
  class ComputeConfig(ExtraModel):
19
37
  gpu_type: str
20
38
  gpu_count: int
21
39
  nodes: int | None = None
40
+ topology: TopologyConfig | None = None # NEW: nested topology
41
+
42
+
43
+ class PolicyConfig(ExtraModel):
44
+ """Unified policy configuration for both SFT and RL.
45
+
46
+ This is the SINGLE SOURCE OF TRUTH for:
47
+ - What model to use (model_name or source)
48
+ - How to sample from it (temperature, max_tokens, etc.)
49
+ - How to train it (trainer_mode, label)
50
+ """
51
+
52
+ # Model specification (exactly one required)
53
+ model_name: str | None = None # e.g., "Qwen/Qwen3-4B"
54
+ source: str | None = None # e.g., "ft:abc123" for checkpoints
55
+
56
+ # Sampling parameters (with sensible defaults)
57
+ max_tokens: int = 512
58
+ temperature: float = 0.7
59
+ top_p: float = 0.95
60
+ top_k: int | None = None
61
+ repetition_penalty: float = 1.0
62
+ stop_sequences: list[str] | None = None
63
+
64
+ # Training-specific
65
+ trainer_mode: str # "lora", "full", "qlora"
66
+ label: str # Model identifier/name
67
+
68
+ # Optional - for distributed inference
69
+ inference_url: str | None = None
70
+
71
+ @model_validator(mode="after")
72
+ def _ensure_exactly_one_source(self) -> PolicyConfig:
73
+ """Ensure exactly one of model_name or source is set."""
74
+ if not (bool(self.model_name) ^ bool(self.source)):
75
+ raise ValueError(
76
+ "Must set exactly one: [policy].model_name OR [policy].source"
77
+ )
78
+ return self
22
79
 
23
80
 
24
- __all__ = ["ExtraModel", "AlgorithmConfig", "ComputeConfig"]
81
+ __all__ = ["ExtraModel", "AlgorithmConfig", "ComputeConfig", "PolicyConfig", "TopologyConfig", "LoraConfig"]
@@ -0,0 +1,119 @@
1
+ import contextlib
2
+ import os
3
+ import time
4
+ import webbrowser
5
+
6
+ import requests
7
+ from requests import RequestException
8
+ from synth_ai.utils.env import resolve_env_var, write_env_var_to_dotenv, write_env_var_to_json
9
+
10
+
11
+ def fetch_credentials_from_web_browser_session(
12
+ browser: bool = True,
13
+ prod: bool = True
14
+ ) -> None:
15
+ synth_api_key = ''
16
+ env_api_key = ''
17
+ org_name = ''
18
+
19
+ if browser:
20
+ origin = "https://www.usesynth.ai" if prod else "http://localhost:3000"
21
+ init_url = f"{origin}/api/sdk/handshake/init"
22
+ token_url =f"{origin}/api/sdk/handshake/token"
23
+
24
+ print(f"\n🌐 Connecting to {origin} to fetch your Synth credentials")
25
+
26
+ # 1. Initialize browser handshake
27
+ try:
28
+ init_res = requests.post(init_url, timeout=10)
29
+ except RequestException as exc:
30
+ raise RuntimeError(f"Failed to reach handshake init endpoint: {exc}") from exc
31
+
32
+ if init_res.status_code != 200:
33
+ body = init_res.text.strip()
34
+ raise RuntimeError(f"Handshake init failed ({init_res.status_code}): {body or 'no response body'}")
35
+
36
+ try:
37
+ init_data = init_res.json()
38
+ except ValueError as exc:
39
+ raise RuntimeError("Handshake init returned malformed JSON.") from exc
40
+
41
+ device_code = str(init_data.get("device_code") or "").strip()
42
+ verification_uri = str(init_data.get("verification_uri") or "").strip()
43
+ if not device_code or not verification_uri:
44
+ raise RuntimeError("Handshake init response missing device_code or verification_uri.")
45
+
46
+ try:
47
+ expires_in = int(init_data.get("expires_in") or 600)
48
+ except (TypeError, ValueError):
49
+ expires_in = 120
50
+ try:
51
+ interval = max(int(init_data.get("interval") or 3), 1)
52
+ except (TypeError, ValueError):
53
+ interval = 3
54
+
55
+ # 2. Open browser to verification URL
56
+ with contextlib.suppress(Exception):
57
+ webbrowser.open(verification_uri)
58
+
59
+ deadline = time.time() + expires_in
60
+ handshake_data = None
61
+
62
+ # 3. Poll handshake token endpoint
63
+ while time.time() <= deadline:
64
+ try:
65
+ handshake_res = requests.post(
66
+ token_url,
67
+ json={"device_code": device_code},
68
+ timeout=10,
69
+ )
70
+ except RequestException:
71
+ time.sleep(interval)
72
+ continue
73
+
74
+ if handshake_res.status_code == 200:
75
+ try:
76
+ handshake_data = handshake_res.json()
77
+ except ValueError as exc:
78
+ raise RuntimeError("Handshake token returned malformed JSON.") from exc
79
+ break
80
+
81
+ if handshake_res.status_code in (404, 410):
82
+ raise RuntimeError("Handshake failed: device code expired or was revoked.")
83
+
84
+ time.sleep(interval)
85
+
86
+ if handshake_data is None:
87
+ raise TimeoutError("Handshake timed out before credentials were returned.")
88
+
89
+ # 4. Extract credentials from handshake payload
90
+ org = handshake_data.get("org")
91
+ if not isinstance(org, dict):
92
+ org = {}
93
+ org_name = str(org.get("name") or "your organization").strip()
94
+
95
+ credentials = handshake_data.get("keys")
96
+ if not isinstance(credentials, dict):
97
+ credentials = {}
98
+
99
+ synth_api_key = str(credentials.get("synth") or "").strip()
100
+ env_api_key = str(credentials.get("rl_env") or "").strip()
101
+
102
+ print(f"\n✅ Connected to {org_name}")
103
+
104
+ # Load credentials to process environment and save credentials to .env and ~/synth-ai/config.json
105
+ if synth_api_key:
106
+ print("\nLoading SYNTH_API_KEY into process environment")
107
+ os.environ["SYNTH_API_KEY"] = synth_api_key
108
+ synth_api_key = resolve_env_var("SYNTH_API_KEY")
109
+ if env_api_key:
110
+ print("\nLoading ENVIRONMENT_API_KEY into process environment")
111
+ os.environ["ENVIRONMENT_API_KEY"] = env_api_key
112
+ env_api_key = resolve_env_var("ENVIRONMENT_API_KEY")
113
+
114
+ if browser:
115
+ print('')
116
+ write_env_var_to_json("SYNTH_API_KEY", synth_api_key, "~/.synth-ai/config.json")
117
+ write_env_var_to_dotenv("SYNTH_API_KEY", synth_api_key)
118
+ write_env_var_to_json("ENVIRONMENT_API_KEY", env_api_key, "~/.synth-ai/config.json")
119
+ write_env_var_to_dotenv("ENVIRONMENT_API_KEY", env_api_key)
synth_ai/cli/__init__.py CHANGED
@@ -52,9 +52,17 @@ if not _cli_module:
52
52
  raise ImportError("synth_ai.cli.root is required for CLI entrypoint")
53
53
  cli = _cli_module.cli # type: ignore[attr-defined]
54
54
 
55
+ # Register core commands implemented as standalone modules
56
+ try:
57
+ from synth_ai.cli.setup import setup_cmd
58
+
59
+ cli.add_command(setup_cmd, name="setup")
60
+ except Exception:
61
+ pass
62
+
55
63
 
56
64
  # Register optional subcommands packaged under synth_ai.cli.*
57
- for _module_path in ("synth_ai.cli.demo", "synth_ai.cli.turso"):
65
+ for _module_path in ("synth_ai.cli.commands.demo", "synth_ai.cli.commands.status", "synth_ai.cli.turso"):
58
66
  module = _maybe_import(_module_path)
59
67
  if not module:
60
68
  continue
@@ -64,6 +72,9 @@ for _module_path in ("synth_ai.cli.demo", "synth_ai.cli.turso"):
64
72
  if fn:
65
73
  fn(cli)
66
74
 
75
+ # Register help command
76
+ _maybe_call("synth_ai.cli.commands.help.core", "register", cli)
77
+
67
78
  # Train CLI lives under synth_ai.api.train
68
79
  _maybe_call("synth_ai.api.train", "register", cli)
69
80
 
@@ -84,7 +95,4 @@ if _task_apps_module:
84
95
  if register_task_apps:
85
96
  register_task_apps(cli)
86
97
 
87
- # Register TUI command if dependencies allow
88
- _maybe_call("synth_ai.cli.tui", "register", cli)
89
-
90
98
  # Top-level 'info' alias removed; use `synth-ai task-app info` instead
@@ -0,0 +1,17 @@
1
+ """
2
+ Structured CLI command implementations.
3
+
4
+ Each subpackage under this namespace provides the core command entrypoints,
5
+ validation helpers, and error types for a top-level CLI command (e.g. train,
6
+ eval, deploy).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ __all__ = [
12
+ "train",
13
+ "eval",
14
+ "filter",
15
+ "deploy",
16
+ "status",
17
+ ]
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from .core import command, register
4
+
5
+ __all__ = ["command", "register"]
6
+