synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (229) hide show
  1. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  2. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  4. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  5. examples/multi_step/crafter_rl_lora.md +51 -10
  6. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  7. examples/multi_step/task_app_config_notes.md +7 -1
  8. examples/swe/task_app/grpo_swe_mini.py +55 -26
  9. examples/swe/task_app/hosted/rollout.py +40 -0
  10. examples/swe/task_app/hosted/test_service.py +5 -6
  11. examples/task_apps/TESTING.md +275 -0
  12. examples/task_apps/__init__.py +0 -0
  13. examples/task_apps/crafter/__init__.py +0 -0
  14. examples/task_apps/crafter/task_app/__init__.py +2 -0
  15. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
  16. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  17. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  18. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  19. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
  20. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
  21. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  22. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  78. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  79. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  80. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  81. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  82. examples/task_apps/enron/__init__.py +1 -0
  83. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  84. examples/task_apps/enron/task_app/README.md +14 -0
  85. examples/task_apps/enron/task_app/__init__.py +1 -0
  86. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  87. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  88. examples/task_apps/enron/tests/__init__.py +2 -0
  89. examples/task_apps/enron/tests/conftest.py +115 -0
  90. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  91. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  92. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  93. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  94. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  95. examples/task_apps/math/__init__.py +0 -0
  96. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  97. examples/task_apps/pokemon_battle/__init__.py +2 -0
  98. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  99. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  100. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  101. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  102. examples/task_apps/pokemon_red/README.md +357 -0
  103. examples/task_apps/pokemon_red/__init__.py +3 -0
  104. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  105. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  106. examples/task_apps/pokemon_red/task_app.py +606 -0
  107. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  108. examples/task_apps/sokoban/README.md +307 -0
  109. examples/task_apps/sokoban/__init__.py +3 -0
  110. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  111. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  112. examples/task_apps/sokoban/task_app.py +1058 -0
  113. examples/task_apps/sokoban/tests/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/conftest.py +113 -0
  115. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  116. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  117. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  118. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  119. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  120. examples/task_apps/verilog/__init__.py +1 -0
  121. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  122. examples/task_apps/verilog/task_app/README.md +12 -0
  123. examples/task_apps/verilog/task_app/__init__.py +1 -0
  124. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  125. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  126. examples/task_apps/verilog/tests/__init__.py +2 -0
  127. examples/task_apps/verilog/tests/conftest.py +115 -0
  128. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  129. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  130. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  131. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  132. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  133. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  134. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  135. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  136. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  137. examples/warming_up_to_rl/run_eval.py +127 -18
  138. examples/workflows/__init__.py +0 -0
  139. examples/workflows/math_rl/__init__.py +0 -0
  140. examples/workflows/math_rl/download_dataset.py +80 -0
  141. synth_ai/__init__.py +41 -1
  142. synth_ai/api/train/builders.py +73 -29
  143. synth_ai/api/train/cli.py +12 -6
  144. synth_ai/api/train/configs/__init__.py +44 -0
  145. synth_ai/api/train/configs/rl.py +134 -0
  146. synth_ai/api/train/configs/sft.py +95 -0
  147. synth_ai/api/train/configs/shared.py +24 -0
  148. synth_ai/api/train/env_resolver.py +5 -2
  149. synth_ai/api/train/supported_algos.py +10 -5
  150. synth_ai/api/train/utils.py +7 -4
  151. synth_ai/cli/__init__.py +7 -51
  152. synth_ai/cli/_storage.py +4 -3
  153. synth_ai/cli/_validate_task_app.py +11 -0
  154. synth_ai/cli/balance.py +4 -3
  155. synth_ai/cli/calc.py +2 -2
  156. synth_ai/cli/demo.py +49 -43
  157. synth_ai/cli/legacy_root_backup.py +1 -1
  158. synth_ai/cli/rl_demo.py +86 -106
  159. synth_ai/cli/root.py +0 -97
  160. synth_ai/cli/task_apps.py +1710 -186
  161. synth_ai/demos/core/cli.py +121 -159
  162. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  163. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  164. synth_ai/environments/examples/enron/engine.py +7 -2
  165. synth_ai/environments/examples/enron/environment.py +68 -0
  166. synth_ai/environments/examples/red/engine.py +27 -0
  167. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  168. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  169. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  170. synth_ai/environments/examples/red/environment.py +60 -0
  171. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  172. synth_ai/environments/examples/verilog/engine.py +30 -4
  173. synth_ai/evals/__init__.py +15 -0
  174. synth_ai/evals/client.py +82 -0
  175. synth_ai/evals/types.py +42 -0
  176. synth_ai/jobs/client.py +16 -4
  177. synth_ai/judge_schemas.py +127 -0
  178. synth_ai/py.typed +0 -0
  179. synth_ai/task/__init__.py +14 -5
  180. synth_ai/task/contracts.py +124 -38
  181. synth_ai/task/proxy.py +48 -56
  182. synth_ai/task/rubrics/__init__.py +53 -0
  183. synth_ai/task/rubrics/loaders.py +133 -0
  184. synth_ai/task/rubrics/models.py +57 -0
  185. synth_ai/task/rubrics/scoring.py +113 -0
  186. synth_ai/task/rubrics/strict.py +149 -0
  187. synth_ai/task/server.py +8 -7
  188. synth_ai/task/validators.py +269 -6
  189. synth_ai/tracing_v3/decorators.py +7 -3
  190. synth_ai/tracing_v3/replica_sync.py +4 -4
  191. synth_ai/tracing_v3/serialization.py +130 -0
  192. synth_ai/tracing_v3/trace_utils.py +317 -0
  193. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  194. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  195. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
  196. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
  197. synth_ai/task/rubrics.py +0 -219
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  214. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  215. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  216. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  217. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  218. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  219. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  222. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  223. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  224. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  225. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  226. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  227. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  228. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pydantic import model_validator
8
+
9
+ from ..utils import load_toml
10
+ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
11
+
12
+
13
+ class RLServicesConfig(ExtraModel):
14
+ task_url: str
15
+ judge_url: str | None = None
16
+
17
+
18
+ class ModelConfig(ExtraModel):
19
+ source: str | None = None
20
+ base: str | None = None
21
+ trainer_mode: str
22
+ label: str
23
+
24
+ @model_validator(mode="after")
25
+ def _ensure_exactly_one_source_or_base(self) -> ModelConfig:
26
+ if bool(self.source) == bool(self.base):
27
+ raise ValueError("Config must set exactly one of [model].source or [model].base")
28
+ return self
29
+
30
+
31
+ class RolloutConfig(ExtraModel):
32
+ env_name: str
33
+ policy_name: str
34
+ env_config: dict[str, Any] | None = None
35
+ policy_config: dict[str, Any] | None = None
36
+ max_turns: int
37
+ episodes_per_batch: int
38
+ max_concurrent_rollouts: int
39
+ batches_per_step: int | None = None
40
+ ops: list[str] | None = None
41
+
42
+
43
+ class WeightSyncConfig(ExtraModel):
44
+ enable: bool | None = None
45
+ targets: list[str] | None = None
46
+ mode: str | None = None
47
+ direct: bool | None = None
48
+ verify_every_k: int | None = None
49
+
50
+
51
+ class RLTrainingConfig(ExtraModel):
52
+ num_epochs: int
53
+ iterations_per_epoch: int
54
+ gradient_accumulation_steps: int | None = None
55
+ max_accumulated_minibatch: int | None = None
56
+ max_turns: int
57
+ batch_size: int
58
+ group_size: int
59
+ learning_rate: float
60
+ log_interval: int | None = None
61
+ weight_sync_interval: int | None = None
62
+ step_rewards_enabled: bool | None = None
63
+ step_rewards_mode: str | None = None
64
+ step_rewards_indicator_lambda: float | None = None
65
+ step_rewards_beta: float | None = None
66
+ step_rewards_strategy: str | None = None
67
+ event_rewards_kind: str | None = None
68
+ weight_sync: WeightSyncConfig | None = None
69
+
70
+
71
+ class EvaluationConfig(ExtraModel):
72
+ instances: int
73
+ every_n_iters: int
74
+ seeds: list[int]
75
+
76
+
77
+ class JudgeOptionsConfig(ExtraModel):
78
+ event: bool | None = None
79
+ outcome: bool | None = None
80
+ provider: str | None = None
81
+ model: str | None = None
82
+ rubric_id: str | None = None
83
+ rubric_overrides: dict[str, Any] | None = None
84
+ tracks: list[str] | None = None
85
+ weights: dict[str, float] | None = None
86
+ max_concurrency: int | None = None
87
+
88
+
89
+ class JudgeConfig(ExtraModel):
90
+ type: str | None = None
91
+ timeout_s: int | None = None
92
+ options: JudgeOptionsConfig | None = None
93
+
94
+
95
+ class RLConfig(ExtraModel):
96
+ algorithm: AlgorithmConfig
97
+ services: RLServicesConfig
98
+ compute: ComputeConfig | None = None
99
+ topology: dict[str, Any] | None = None
100
+ vllm: dict[str, Any] | None = None
101
+ reference: dict[str, Any] | None = None
102
+ model: ModelConfig
103
+ lora: dict[str, Any] | None = None
104
+ rollout: RolloutConfig | None = None
105
+ evaluation: EvaluationConfig | None = None
106
+ training: RLTrainingConfig | None = None
107
+ rubric: dict[str, Any] | None = None
108
+ judge: JudgeConfig | None = None
109
+ tags: dict[str, Any] | None = None
110
+
111
+ def to_dict(self) -> dict[str, Any]:
112
+ return self.model_dump(mode="python", exclude_none=True)
113
+
114
+ @classmethod
115
+ def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
116
+ return cls.model_validate(dict(data))
117
+
118
+ @classmethod
119
+ def from_path(cls, path: Path) -> RLConfig:
120
+ content = load_toml(path)
121
+ return cls.from_mapping(content)
122
+
123
+
124
+ __all__ = [
125
+ "EvaluationConfig",
126
+ "JudgeConfig",
127
+ "JudgeOptionsConfig",
128
+ "ModelConfig",
129
+ "RLConfig",
130
+ "RLServicesConfig",
131
+ "RLTrainingConfig",
132
+ "RolloutConfig",
133
+ "WeightSyncConfig",
134
+ ]
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pydantic import Field
8
+
9
+ from ..utils import load_toml
10
+ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
11
+
12
+
13
+ class JobConfig(ExtraModel):
14
+ model: str
15
+ data: str | None = None
16
+ data_path: str | None = None
17
+ poll_seconds: int | None = None
18
+
19
+
20
+ class SFTDataConfig(ExtraModel):
21
+ topology: dict[str, Any] | None = None
22
+ validation_path: str | None = None
23
+
24
+
25
+ class TrainingValidationConfig(ExtraModel):
26
+ enabled: bool | None = None
27
+ evaluation_strategy: str | None = None
28
+ eval_steps: int | None = None
29
+ save_best_model_at_end: bool | None = None
30
+ metric_for_best_model: str | None = None
31
+ greater_is_better: bool | None = None
32
+
33
+
34
+ class TrainingConfig(ExtraModel):
35
+ mode: str | None = None
36
+ use_qlora: bool | None = None
37
+ validation: TrainingValidationConfig | None = None
38
+
39
+
40
+ class HyperparametersParallelism(ExtraModel):
41
+ use_deepspeed: bool | None = None
42
+ deepspeed_stage: int | None = None
43
+ fsdp: bool | None = None
44
+ bf16: bool | None = None
45
+ fp16: bool | None = None
46
+ activation_checkpointing: bool | None = None
47
+ tensor_parallel_size: int | None = None
48
+ pipeline_parallel_size: int | None = None
49
+
50
+
51
+ class HyperparametersConfig(ExtraModel):
52
+ n_epochs: int = 1
53
+ batch_size: int | None = None
54
+ global_batch: int | None = None
55
+ per_device_batch: int | None = None
56
+ gradient_accumulation_steps: int | None = None
57
+ sequence_length: int | None = None
58
+ learning_rate: float | None = None
59
+ warmup_ratio: float | None = None
60
+ train_kind: str | None = None
61
+ weight_decay: float | None = None
62
+ parallelism: HyperparametersParallelism | None = None
63
+
64
+
65
+ class SFTConfig(ExtraModel):
66
+ algorithm: AlgorithmConfig | None = None
67
+ job: JobConfig
68
+ compute: ComputeConfig | None = None
69
+ data: SFTDataConfig | None = None
70
+ training: TrainingConfig | None = None
71
+ hyperparameters: HyperparametersConfig = Field(default_factory=HyperparametersConfig)
72
+ tags: dict[str, Any] | None = None
73
+
74
+ def to_dict(self) -> dict[str, Any]:
75
+ return self.model_dump(mode="python", exclude_none=True)
76
+
77
+ @classmethod
78
+ def from_mapping(cls, data: Mapping[str, Any]) -> SFTConfig:
79
+ return cls.model_validate(dict(data))
80
+
81
+ @classmethod
82
+ def from_path(cls, path: Path) -> SFTConfig:
83
+ content = load_toml(path)
84
+ return cls.from_mapping(content)
85
+
86
+
87
+ __all__ = [
88
+ "HyperparametersConfig",
89
+ "HyperparametersParallelism",
90
+ "JobConfig",
91
+ "SFTConfig",
92
+ "SFTDataConfig",
93
+ "TrainingConfig",
94
+ "TrainingValidationConfig",
95
+ ]
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, ConfigDict
4
+
5
+
6
+ class ExtraModel(BaseModel):
7
+ """Base model that tolerates unknown keys so configs keep forward compatibility."""
8
+
9
+ model_config = ConfigDict(extra="allow")
10
+
11
+
12
+ class AlgorithmConfig(ExtraModel):
13
+ type: str
14
+ method: str
15
+ variety: str
16
+
17
+
18
+ class ComputeConfig(ExtraModel):
19
+ gpu_type: str
20
+ gpu_count: int
21
+ nodes: int | None = None
22
+
23
+
24
+ __all__ = ["ExtraModel", "AlgorithmConfig", "ComputeConfig"]
@@ -5,6 +5,7 @@ import os
5
5
  from collections.abc import Callable, Iterable, MutableMapping
6
6
  from dataclasses import dataclass
7
7
  from pathlib import Path
8
+ from typing import Any, cast
8
9
 
9
10
  import click
10
11
 
@@ -14,8 +15,10 @@ from .utils import REPO_ROOT, mask_value, read_env_file, write_env_value
14
15
 
15
16
  def _load_saved_env_path() -> Path | None:
16
17
  try:
17
- module = importlib.import_module("synth_ai.demos.demo_task_apps.core")
18
- loader = module.load_env_file_path
18
+ module = cast(
19
+ Any, importlib.import_module("synth_ai.demos.demo_task_apps.core")
20
+ )
21
+ loader = cast(Callable[[], str | None], module.load_env_file_path)
19
22
  saved_path = loader()
20
23
  if saved_path:
21
24
  return Path(saved_path)
@@ -1,14 +1,19 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import importlib
4
- from collections.abc import Mapping
4
+ from collections.abc import Callable, Mapping
5
5
  from dataclasses import dataclass
6
+ from typing import Any, cast
6
7
 
7
8
  try:
8
- _models_module = importlib.import_module("synth_ai.api.models.supported")
9
- RL_SUPPORTED_MODELS = _models_module.RL_SUPPORTED_MODELS
10
- SFT_SUPPORTED_MODELS = _models_module.SFT_SUPPORTED_MODELS
11
- training_modes_for_model = _models_module.training_modes_for_model
9
+ _models_module = cast(
10
+ Any, importlib.import_module("synth_ai.api.models.supported")
11
+ )
12
+ RL_SUPPORTED_MODELS = cast(tuple[str, ...], _models_module.RL_SUPPORTED_MODELS)
13
+ SFT_SUPPORTED_MODELS = cast(tuple[str, ...], _models_module.SFT_SUPPORTED_MODELS)
14
+ training_modes_for_model = cast(
15
+ Callable[[str], tuple[str, ...]], _models_module.training_modes_for_model
16
+ )
12
17
  except Exception as exc: # pragma: no cover - critical dependency
13
18
  raise RuntimeError("Unable to load supported model metadata") from exc
14
19
 
@@ -8,15 +8,18 @@ import subprocess
8
8
  import tempfile
9
9
  import time
10
10
  import tomllib
11
- from collections.abc import Iterable, Mapping
11
+ from collections.abc import Callable, Iterable, Mapping
12
12
  from dataclasses import dataclass
13
13
  from pathlib import Path
14
- from typing import Any
14
+ from typing import Any, cast
15
15
 
16
16
  import requests
17
17
 
18
18
  try:
19
- collect_sft_jsonl_errors = importlib.import_module("synth_ai.learning.sft").collect_sft_jsonl_errors
19
+ sft_module = cast(Any, importlib.import_module("synth_ai.learning.sft"))
20
+ collect_sft_jsonl_errors = cast(
21
+ Callable[..., list[dict[str, Any]]], sft_module.collect_sft_jsonl_errors
22
+ )
20
23
  except Exception as exc: # pragma: no cover - critical dependency
21
24
  raise RuntimeError("Unable to load SFT JSONL helpers") from exc
22
25
 
@@ -154,7 +157,7 @@ def validate_sft_jsonl(path: Path, *, max_errors: int = 20) -> None:
154
157
 
155
158
  truncated = max_errors is not None and len(issues) >= max_errors
156
159
  suffix = "" if not truncated else f" (showing first {max_errors} issues)"
157
- details = "\n - ".join(issues)
160
+ details = "\n - ".join(cast("list[str]", issues))
158
161
  raise TrainError(f"{path}: Dataset validation failed{suffix}:\n - {details}")
159
162
 
160
163
 
synth_ai/cli/__init__.py CHANGED
@@ -8,6 +8,8 @@ pyproject entry point `synth_ai.cli:cli`.
8
8
  from __future__ import annotations
9
9
 
10
10
  import importlib
11
+ from collections.abc import Callable
12
+ from typing import Any, cast
11
13
 
12
14
  # Load environment variables from a local .env if present (repo root)
13
15
  try:
@@ -30,48 +32,8 @@ except Exception:
30
32
  from .root import cli # new canonical CLI entrypoint
31
33
 
32
34
  # Register subcommands from this package onto the group
33
- try:
34
- from . import watch as _watch
35
-
36
- _watch.register(cli)
37
- except Exception:
38
- pass
39
- try:
40
- from . import balance as _balance
41
-
42
- _balance.register(cli)
43
- except Exception:
44
- pass
45
- try:
46
- from . import man as _man
47
-
48
- _man.register(cli)
49
- except Exception:
50
- pass
51
- try:
52
- from . import traces as _traces
53
-
54
- _traces.register(cli)
55
- except Exception:
56
- pass
57
- try:
58
- from . import recent as _recent
59
-
60
- _recent.register(cli)
61
- except Exception:
62
- pass
63
- try:
64
- from . import calc as _calc
65
-
66
- _calc.register(cli)
67
- except Exception:
68
- pass
69
- try:
70
- from . import status as _status
71
-
72
- _status.register(cli)
73
- except Exception:
74
- pass
35
+ # Deprecated/legacy commands intentionally not registered: watch/experiments, balance, calc,
36
+ # man, recent, status, traces
75
37
  try:
76
38
  from . import demo as _demo
77
39
 
@@ -85,14 +47,8 @@ try:
85
47
  except Exception:
86
48
  pass
87
49
  try:
88
- from . import rl_demo as _rl_demo
89
-
90
- _rl_demo.register(cli)
91
- except Exception:
92
- pass
93
- try:
94
- _train_module = importlib.import_module("synth_ai.api.train")
95
- _train_register = _train_module.register
50
+ _train_module = cast(Any, importlib.import_module("synth_ai.api.train"))
51
+ _train_register = cast(Callable[[Any], None], _train_module.register)
96
52
  _train_register(cli)
97
53
  except Exception:
98
54
  pass
@@ -114,4 +70,4 @@ cli.add_command(task_app_group.commands["serve"], name="serve")
114
70
  cli.add_command(task_app_group.commands["deploy"], name="deploy")
115
71
 
116
72
  cli.add_command(task_app_group.commands["modal-serve"], name="modal-serve")
117
- cli.add_command(task_app_group.commands["info"], name="info")
73
+ # Top-level 'info' alias removed; use `synth-ai task-app info` instead
synth_ai/cli/_storage.py CHANGED
@@ -8,12 +8,13 @@ allowing type checkers to resolve the symbols dynamically.
8
8
  from __future__ import annotations
9
9
 
10
10
  import importlib
11
- from typing import Any
11
+ from collections.abc import Callable
12
+ from typing import Any, cast
12
13
 
13
14
 
14
15
  def load_storage() -> tuple[Any, Any]:
15
16
  """Return (create_storage, StorageConfig) from tracing_v3.storage."""
16
- storage_module = importlib.import_module("synth_ai.tracing_v3.storage")
17
- create_storage = storage_module.create_storage
17
+ storage_module = cast(Any, importlib.import_module("synth_ai.tracing_v3.storage"))
18
+ create_storage = cast(Callable[..., Any], storage_module.create_storage)
18
19
  storage_config = storage_module.StorageConfig
19
20
  return create_storage, storage_config
@@ -0,0 +1,11 @@
1
+ """Task app validation utilities - imported by task_apps.py"""
2
+
3
+ # This module provides the validate_task_app function for CLI use
4
+ # The actual implementation is imported from the task module
5
+
6
+ from synth_ai.task.validators import (
7
+ validate_task_app_endpoint as validate_task_app, # type: ignore[attr-defined]
8
+ )
9
+
10
+ __all__ = ["validate_task_app"]
11
+
synth_ai/cli/balance.py CHANGED
@@ -8,6 +8,7 @@ from __future__ import annotations
8
8
  import importlib
9
9
  import os
10
10
  from collections.abc import Callable
11
+ from typing import Any, cast
11
12
 
12
13
  import click
13
14
  import requests
@@ -19,9 +20,9 @@ from rich.table import Table
19
20
 
20
21
  def _load_base_url_module() -> tuple[str, Callable[[], tuple[str, str]]]:
21
22
  try:
22
- module = importlib.import_module("synth_ai.config.base_url")
23
- default = module.PROD_BASE_URL_DEFAULT
24
- getter = module.get_backend_from_env
23
+ module = cast(Any, importlib.import_module("synth_ai.config.base_url"))
24
+ default = cast(str, module.PROD_BASE_URL_DEFAULT)
25
+ getter = cast(Callable[[], tuple[str, str]], module.get_backend_from_env)
25
26
  return str(default), getter
26
27
  except Exception:
27
28
  return "https://agent-learning.onrender.com", lambda: ("https://agent-learning.onrender.com", "")
synth_ai/cli/calc.py CHANGED
@@ -35,13 +35,13 @@ def _safe_eval(expr: str) -> float:
35
35
  if isinstance(n, ast.Expression):
36
36
  return _eval(n.body)
37
37
  if isinstance(n, ast.Constant):
38
- if isinstance(n.value, (int, float)):
38
+ if isinstance(n.value, int | float):
39
39
  return float(n.value)
40
40
  raise ValueError("Only numeric constants are allowed")
41
41
  num_node = getattr(ast, "Num", None)
42
42
  if num_node is not None and isinstance(n, num_node): # pragma: no cover
43
43
  numeric_value = getattr(n, "n", None)
44
- if isinstance(numeric_value, (int, float)):
44
+ if isinstance(numeric_value, int | float):
45
45
  return float(numeric_value)
46
46
  raise ValueError("Only numeric constants are allowed")
47
47
  if isinstance(n, ast.BinOp):
synth_ai/cli/demo.py CHANGED
@@ -1,18 +1,25 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- CLI: interactive launcher for example demos and forwarders for new RL demo.
3
+ CLI: interactive launcher for example demos and RL demo helpers.
4
4
 
5
- - `synth-ai demo` (no subcommand) -> legacy examples/ runner (run_demo.sh picker)
6
- - `synth-ai demo deploy|configure|run` -> forwards to synth_ai.demos.core.cli
5
+ - `synth-ai demo` (no subcommand) -> initialize RL demo files into ./synth_demo/
6
+ - `synth-ai demo deploy|configure|run` -> invoke RL demo helpers directly.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
+ import importlib
11
12
  import os
12
13
  import subprocess
13
14
  from pathlib import Path
15
+ from typing import Any, cast
14
16
 
15
17
  import click
18
+ from click.exceptions import Exit
19
+
20
+ demo_commands = cast(
21
+ Any, importlib.import_module("synth_ai.demos.core.cli")
22
+ )
16
23
 
17
24
 
18
25
  def _find_demo_scripts(root: Path) -> list[Path]:
@@ -21,17 +28,23 @@ def _find_demo_scripts(root: Path) -> list[Path]:
21
28
  return sorted([p for p in root.rglob("run_demo.sh") if p.is_file()])
22
29
 
23
30
 
24
- def _forward_to_new(args: list[str]) -> None:
25
- import sys
31
+ def _run_demo_command(func, *args, **kwargs) -> None:
32
+ """Invoke a demo command and exit via Click on non-zero status codes."""
26
33
 
27
34
  try:
28
- from synth_ai.demos.core import cli as demo_cli # type: ignore
29
- except Exception as e: # pragma: no cover
30
- click.echo(f"Failed to import demo CLI: {e}")
31
- sys.exit(1)
32
- rc = int(demo_cli.main(args) or 0)
33
- if rc != 0:
34
- sys.exit(rc)
35
+ result = func(*args, **kwargs)
36
+ except SystemExit as exc: # pragma: no cover - defensive
37
+ raise Exit(exc.code or 1) from exc
38
+
39
+ if result is None:
40
+ return
41
+
42
+ try:
43
+ code = int(result)
44
+ except (TypeError, ValueError):
45
+ return
46
+ if code != 0:
47
+ raise Exit(code)
35
48
 
36
49
 
37
50
  def register(cli):
@@ -92,19 +105,13 @@ def register(cli):
92
105
  click.echo("\n🛑 Demo interrupted by user")
93
106
  return
94
107
 
95
- # Default: forward to RL demo init behavior, optionally with --force
96
- args: list[str] = ["rl_demo.init"]
97
- if force:
98
- args.append("--force")
99
- _forward_to_new(args)
108
+ # Default: initialize RL demo files via new command
109
+ _run_demo_command(demo_commands.init, force=force)
100
110
 
101
111
  # (prepare command removed; configure now prepares baseline TOML)
102
112
 
103
113
  # Help pyright understand dynamic Click group attributes
104
- from typing import Any
105
- from typing import cast as _cast
106
-
107
- _dg = _cast(Any, demo)
114
+ _dg = cast(Any, demo)
108
115
 
109
116
  @_dg.command("deploy")
110
117
  @click.option("--local", is_flag=True, help="Run local FastAPI instead of Modal deploy")
@@ -122,24 +129,21 @@ def register(cli):
122
129
  help="Path to deploy_task_app.sh (optional legacy)",
123
130
  )
124
131
  def demo_deploy(local: bool, app: str | None, name: str, script: str | None):
125
- args: list[str] = ["rl_demo.deploy"]
126
- if local:
127
- args.append("--local")
128
- if app:
129
- args.extend(["--app", app])
130
- if name:
131
- args.extend(["--name", name])
132
- if script:
133
- args.extend(["--script", script])
134
- _forward_to_new(args)
132
+ _run_demo_command(
133
+ demo_commands.deploy,
134
+ local=local,
135
+ app=app,
136
+ name=name,
137
+ script=script,
138
+ )
135
139
 
136
140
  @_dg.command("configure")
137
141
  def demo_configure():
138
- _forward_to_new(["rl_demo.configure"])
142
+ _run_demo_command(demo_commands.run)
139
143
 
140
144
  @_dg.command("setup")
141
145
  def demo_setup():
142
- _forward_to_new(["rl_demo.setup"])
146
+ _run_demo_command(demo_commands.setup)
143
147
 
144
148
  @_dg.command("run")
145
149
  @click.option("--batch-size", type=int, default=None)
@@ -147,13 +151,15 @@ def register(cli):
147
151
  @click.option("--model", type=str, default=None)
148
152
  @click.option("--timeout", type=int, default=600)
149
153
  def demo_run(batch_size: int | None, group_size: int | None, model: str | None, timeout: int):
150
- args = ["rl_demo.run"]
151
- if batch_size is not None:
152
- args.extend(["--batch-size", str(batch_size)])
153
- if group_size is not None:
154
- args.extend(["--group-size", str(group_size)])
155
- if model:
156
- args.extend(["--model", model])
157
- if timeout:
158
- args.extend(["--timeout", str(timeout)])
159
- _forward_to_new(args)
154
+ _run_demo_command(
155
+ demo_commands.run,
156
+ batch_size=batch_size,
157
+ group_size=group_size,
158
+ model=model,
159
+ timeout=timeout,
160
+ )
161
+
162
+ @cli.command("setup")
163
+ def setup_alias():
164
+ """Perform SDK handshake and write keys to .env."""
165
+ _run_demo_command(demo_commands.setup)
@@ -235,7 +235,7 @@ def view(url: str):
235
235
  """Launch the interactive TUI dashboard."""
236
236
  try:
237
237
  module = importlib.import_module(".tui.dashboard", __package__)
238
- synth_dashboard_cls = module.SynthDashboard
238
+ synth_dashboard_cls = getattr(module, "SynthDashboard")
239
239
  app = synth_dashboard_cls(db_url=url)
240
240
  app.run()
241
241
  except ImportError: