synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,209 @@
1
+ """Core dataclasses for baseline configuration and results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
8
+
9
+
10
+ class BaselineTaskRunner:
11
+ """
12
+ Base class for task runners.
13
+
14
+ Subclasses should implement `run_task` method for class-based approach,
15
+ or you can use standalone async functions for function-based approach.
16
+ """
17
+
18
+ def __init__(
19
+ self,
20
+ policy_config: Dict[str, Any],
21
+ env_config: Dict[str, Any],
22
+ ):
23
+ """
24
+ Initialize task runner with configuration.
25
+
26
+ Args:
27
+ policy_config: Policy configuration (model, temperature, etc.)
28
+ env_config: Environment configuration (max_steps, difficulty, etc.)
29
+ """
30
+ self.policy_config = policy_config
31
+ self.env_config = env_config
32
+
33
+ async def run_task(self, seed: int) -> TaskResult:
34
+ """
35
+ Execute a single task instance.
36
+
37
+ This method is called for each seed in the selected split.
38
+
39
+ Args:
40
+ seed: The seed/index for this task instance
41
+
42
+ Returns:
43
+ TaskResult: Structured result containing success, rewards, metadata, trace
44
+ """
45
+ raise NotImplementedError("Subclasses must implement run_task method")
46
+
47
+
48
+ @dataclass
49
+ class DataSplit:
50
+ """Definition of a data split (train/val/test)."""
51
+
52
+ name: str # "train", "val", "test"
53
+ seeds: List[int] # Seed/index values for this split
54
+ metadata: Dict[str, Any] = field(default_factory=dict) # Optional metadata
55
+
56
+
57
+ @dataclass
58
+ class TaskResult:
59
+ """Result from a single task execution."""
60
+
61
+ # Required: Seed/index that was evaluated
62
+ seed: int
63
+
64
+ # Required: Did the task complete successfully?
65
+ success: bool
66
+
67
+ # Required: Outcome reward for the episode
68
+ outcome_reward: float
69
+
70
+ # Optional: Event rewards (step-level)
71
+ event_rewards: List[Dict[str, Any]] = field(default_factory=list)
72
+
73
+ # Optional: Total steps/turns taken
74
+ total_steps: int = 0
75
+
76
+ # Optional: Metadata (achievements, completion info, etc.)
77
+ metadata: Dict[str, Any] = field(default_factory=dict)
78
+
79
+ # Optional: Error information if success=False
80
+ error: Optional[str] = None
81
+
82
+ # Optional: v3 trace (SessionTrace dict)
83
+ trace: Optional[Dict[str, Any]] = None
84
+
85
+
86
+ # Type alias for task runner (can be class or function)
87
+ TaskRunnerType = (
88
+ type[BaselineTaskRunner]
89
+ | Callable[[int, dict[str, Any], dict[str, Any]], Any] # Function signature
90
+ )
91
+
92
+ # Type alias for result aggregator (can be class or function)
93
+ AggregatorType = (
94
+ type[Any] # Class with aggregate() method
95
+ | Callable[[list[TaskResult]], dict[str, Any]] # Function signature
96
+ )
97
+
98
+
99
+ @dataclass
100
+ class BaselineConfig:
101
+ """Configuration for a baseline file.
102
+
103
+ A baseline file defines how to evaluate a task without requiring
104
+ a deployed task app. It provides self-contained evaluation logic
105
+ with first-class support for train/val/test splits.
106
+
107
+ Supports both class-based and function-based task runners:
108
+ - Class-based: Pass a class that inherits from BaselineTaskRunner
109
+ - Function-based: Pass an async function with signature:
110
+ async def task_runner(seed: int, policy_config: Dict[str, Any],
111
+ env_config: Dict[str, Any]) -> TaskResult
112
+ """
113
+
114
+ # Required: Unique identifier for this baseline config
115
+ baseline_id: str
116
+
117
+ # Required: Human-readable name
118
+ name: str
119
+
120
+ # Required: Task runner (class or function)
121
+ # Class-based: Pass a class inheriting from BaselineTaskRunner
122
+ # The class will be instantiated with policy_config and env_config,
123
+ # and run_task(seed) will be called for each seed.
124
+ # Function-based: Pass an async function with signature:
125
+ # async def task_runner(seed: int, policy_config: Dict[str, Any],
126
+ # env_config: Dict[str, Any]) -> TaskResult
127
+ task_runner: TaskRunnerType
128
+
129
+ # Required: Data splits (train/val/test)
130
+ splits: Dict[str, DataSplit]
131
+
132
+ # Optional: Description for documentation
133
+ description: str = ""
134
+
135
+ # Optional: Default policy configuration
136
+ default_policy_config: Dict[str, Any] = field(default_factory=dict)
137
+
138
+ # Optional: Default environment configuration
139
+ default_env_config: Dict[str, Any] = field(default_factory=dict)
140
+
141
+ # Optional: Metadata for filtering/organization
142
+ metadata: Dict[str, Any] = field(default_factory=dict)
143
+
144
+ # Optional: Tags for filtering and discovery
145
+ tags: List[str] = field(default_factory=list)
146
+
147
+ # Optional: Custom result aggregator (class or function)
148
+ # Class-based: Pass a class with aggregate(results: List[TaskResult]) method
149
+ # The class will be instantiated and aggregate() called.
150
+ # Function-based: Pass a function with signature:
151
+ # def aggregate_results(results: List[TaskResult]) -> Dict[str, Any]
152
+ result_aggregator: Optional[AggregatorType] = None
153
+
154
+ # Optional: Path to this baseline file (set by discovery)
155
+ _source_path: Optional[Path] = None
156
+
157
+ def matches_tag(self, tag: str) -> bool:
158
+ """Check if baseline matches a tag (case-insensitive)."""
159
+ return tag.lower() in [t.lower() for t in self.tags]
160
+
161
+ def matches_metadata(self, key: str, value: Any) -> bool:
162
+ """Check if baseline metadata matches key-value pair."""
163
+ return self.metadata.get(key) == value
164
+
165
+
166
+ @dataclass
167
+ class BaselineResults:
168
+ """Aggregate results from a baseline evaluation."""
169
+
170
+ # Configuration that was used
171
+ config: BaselineConfig
172
+
173
+ # Split that was evaluated
174
+ split_name: str
175
+
176
+ # Per-seed results
177
+ results: List[TaskResult]
178
+
179
+ # Aggregate metrics
180
+ aggregate_metrics: Dict[str, Any]
181
+
182
+ # Execution metadata
183
+ execution_time_seconds: float
184
+ model_name: str
185
+ timestamp: str
186
+
187
+ def to_dict(self) -> Dict[str, Any]:
188
+ """Serialize to dictionary for JSON output."""
189
+ return {
190
+ "baseline_id": self.config.baseline_id,
191
+ "name": self.config.name,
192
+ "split": self.split_name,
193
+ "model": self.model_name,
194
+ "timestamp": self.timestamp,
195
+ "execution_time_seconds": self.execution_time_seconds,
196
+ "aggregate_metrics": self.aggregate_metrics,
197
+ "results": [
198
+ {
199
+ "seed": r.seed,
200
+ "success": r.success,
201
+ "outcome_reward": r.outcome_reward,
202
+ "total_steps": r.total_steps,
203
+ "metadata": r.metadata,
204
+ "error": r.error,
205
+ }
206
+ for r in self.results
207
+ ],
208
+ }
209
+
@@ -0,0 +1,214 @@
1
+ """AST-based discovery mechanism for baseline files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import importlib.util
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import List, Optional, Tuple
10
+
11
+ from synth_ai.baseline.config import BaselineConfig
12
+
13
+ # Search patterns for baseline files
14
+ BASELINE_FILE_PATTERNS = [
15
+ "**/baseline/*.py",
16
+ "**/baselines/*.py",
17
+ "**/*_baseline.py",
18
+ ]
19
+
20
+ # Directories to ignore during discovery
21
+ IGNORE_PATTERNS = {
22
+ "__pycache__",
23
+ ".git",
24
+ ".venv",
25
+ "venv",
26
+ "node_modules",
27
+ "build",
28
+ "dist",
29
+ ".mypy_cache",
30
+ ".pytest_cache",
31
+ }
32
+
33
+
34
+ @dataclass
35
+ class BaselineChoice:
36
+ """Represents a discovered baseline configuration."""
37
+
38
+ baseline_id: str
39
+ path: Path
40
+ lineno: int
41
+ source: str # "discovered" or "registered"
42
+ config: Optional[BaselineConfig] = None
43
+
44
+
45
+ class BaselineConfigVisitor(ast.NodeVisitor):
46
+ """AST visitor to find BaselineConfig instances."""
47
+
48
+ def __init__(self):
49
+ self.matches: List[Tuple[str, int]] = [] # (baseline_id, lineno)
50
+
51
+ def visit_Assign(self, node: ast.Assign) -> None:
52
+ """Visit assignment statements looking for BaselineConfig."""
53
+ if not isinstance(node.value, ast.Call):
54
+ self.generic_visit(node)
55
+ return
56
+
57
+ # Check if right-hand side is BaselineConfig(...)
58
+ func = node.value.func
59
+ if isinstance(func, ast.Name) and func.id == "BaselineConfig":
60
+ # Extract baseline_id from constructor args
61
+ baseline_id = self._extract_baseline_id(node.value)
62
+ if baseline_id:
63
+ self.matches.append((baseline_id, node.lineno))
64
+
65
+ self.generic_visit(node)
66
+
67
+ def _extract_baseline_id(self, call_node: ast.Call) -> Optional[str]:
68
+ """Extract baseline_id from BaselineConfig constructor."""
69
+ for keyword in call_node.keywords:
70
+ if keyword.arg == "baseline_id" and isinstance(keyword.value, ast.Constant):
71
+ return keyword.value.value
72
+ return None
73
+
74
+
75
+ def should_ignore_path(path: Path) -> bool:
76
+ """Check if a path should be ignored during discovery."""
77
+ return any(part in IGNORE_PATTERNS for part in path.parts)
78
+
79
+
80
+ def discover_baseline_files(search_roots: List[Path]) -> List[BaselineChoice]:
81
+ """Discover baseline files via AST scanning.
82
+
83
+ Args:
84
+ search_roots: List of root directories to search in
85
+
86
+ Returns:
87
+ List of BaselineChoice objects representing discovered baselines
88
+ """
89
+ results: List[BaselineChoice] = []
90
+ seen = set()
91
+
92
+ for root in search_roots:
93
+ if not root.exists():
94
+ continue
95
+
96
+ for pattern in BASELINE_FILE_PATTERNS:
97
+ for path in root.glob(pattern):
98
+ if should_ignore_path(path):
99
+ continue
100
+
101
+ try:
102
+ source = path.read_text(encoding="utf-8")
103
+ tree = ast.parse(source, filename=str(path))
104
+ except (OSError, SyntaxError):
105
+ continue
106
+
107
+ visitor = BaselineConfigVisitor()
108
+ visitor.visit(tree)
109
+
110
+ for baseline_id, lineno in visitor.matches:
111
+ key = (baseline_id, path.resolve())
112
+ if key in seen:
113
+ continue
114
+ seen.add(key)
115
+
116
+ results.append(
117
+ BaselineChoice(
118
+ baseline_id=baseline_id,
119
+ path=path.resolve(),
120
+ lineno=lineno,
121
+ source="discovered",
122
+ )
123
+ )
124
+
125
+ return results
126
+
127
+
128
+ def load_baseline_config_from_file(
129
+ baseline_id: str,
130
+ path: Path,
131
+ ) -> BaselineConfig:
132
+ """Load a BaselineConfig from a Python file.
133
+
134
+ Args:
135
+ baseline_id: The baseline_id to look for
136
+ path: Path to the Python file
137
+
138
+ Returns:
139
+ BaselineConfig instance
140
+
141
+ Raises:
142
+ ValueError: If baseline_id not found or file cannot be loaded
143
+ """
144
+ # Load the module
145
+ spec = importlib.util.spec_from_file_location("baseline_module", path)
146
+ if spec is None or spec.loader is None:
147
+ raise ValueError(f"Cannot load baseline file: {path}")
148
+
149
+ module = importlib.util.module_from_spec(spec)
150
+ try:
151
+ spec.loader.exec_module(module)
152
+ except ModuleNotFoundError as e:
153
+ missing_module = str(e).split("'")[1] if "'" in str(e) else str(e)
154
+ raise ImportError(
155
+ f"❌ Missing dependency for baseline '{baseline_id}'\n"
156
+ f" File: {path}\n"
157
+ f" Missing module: {missing_module}\n"
158
+ f" Fix: pip install {missing_module} (or 'uv add {missing_module}')"
159
+ ) from e
160
+ except SyntaxError as e:
161
+ raise ValueError(
162
+ f"❌ Syntax error in baseline file '{baseline_id}'\n"
163
+ f" File: {path}\n"
164
+ f" Error at line {e.lineno}: {e.msg}\n"
165
+ f" Text: {e.text.strip() if e.text else 'N/A'}\n"
166
+ f" Fix: Check the Python syntax in the baseline file"
167
+ ) from e
168
+ except Exception as e:
169
+ error_type = type(e).__name__
170
+ raise ValueError(
171
+ f"❌ Failed to load baseline '{baseline_id}'\n"
172
+ f" File: {path}\n"
173
+ f" Error type: {error_type}\n"
174
+ f" Message: {str(e)}\n"
175
+ f" This may be due to:\n"
176
+ f" - Missing dependencies (check imports)\n"
177
+ f" - Configuration errors in the baseline file\n"
178
+ f" - Environment variables not set\n"
179
+ f" Tip: Run with --verbose for more details"
180
+ ) from e
181
+
182
+ # Find the BaselineConfig instance
183
+ for attr_name in dir(module):
184
+ if attr_name.startswith("_"):
185
+ continue
186
+
187
+ attr = getattr(module, attr_name)
188
+ if isinstance(attr, BaselineConfig) and attr.baseline_id == baseline_id:
189
+ # Set source path for reference
190
+ attr._source_path = path
191
+ return attr
192
+
193
+ # Provide helpful error message
194
+ found_configs = []
195
+ for attr_name in dir(module):
196
+ if attr_name.startswith("_"):
197
+ continue
198
+ attr = getattr(module, attr_name)
199
+ if isinstance(attr, BaselineConfig):
200
+ found_configs.append(attr.baseline_id)
201
+
202
+ if found_configs:
203
+ raise ValueError(
204
+ f"❌ Baseline '{baseline_id}' not found in {path}\n"
205
+ f" Found baselines in this file: {', '.join(found_configs)}\n"
206
+ f" Fix: Use one of the above baseline IDs or check the baseline_id parameter"
207
+ )
208
+ else:
209
+ raise ValueError(
210
+ f"❌ No BaselineConfig instances found in {path}\n"
211
+ f" Expected to find a BaselineConfig with baseline_id='{baseline_id}'\n"
212
+ f" Fix: Ensure the file defines a BaselineConfig instance with baseline_id='{baseline_id}'"
213
+ )
214
+
@@ -0,0 +1,146 @@
1
+ """Execution engine for baseline evaluations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from synth_ai.baseline.config import (
9
+ BaselineConfig,
10
+ BaselineTaskRunner,
11
+ TaskResult,
12
+ )
13
+
14
+
15
+ def default_aggregator(results: List[TaskResult]) -> Dict[str, Any]:
16
+ """Default result aggregation function.
17
+
18
+ Computes mean, std, min, max, success rate, and other basic metrics.
19
+
20
+ Args:
21
+ results: List of TaskResult objects from all seeds
22
+
23
+ Returns:
24
+ Dict with aggregate metrics
25
+ """
26
+ successful_results = [r for r in results if r.success]
27
+ outcome_rewards = [r.outcome_reward for r in successful_results]
28
+
29
+ if not outcome_rewards:
30
+ return {
31
+ "mean_outcome_reward": 0.0,
32
+ "std_outcome_reward": 0.0,
33
+ "min_outcome_reward": 0.0,
34
+ "max_outcome_reward": 0.0,
35
+ "success_rate": 0.0,
36
+ "total_tasks": len(results),
37
+ "successful_tasks": 0,
38
+ "failed_tasks": len(results),
39
+ }
40
+
41
+ mean_reward = sum(outcome_rewards) / len(outcome_rewards)
42
+
43
+ # Calculate standard deviation
44
+ variance = sum((x - mean_reward) ** 2 for x in outcome_rewards) / len(outcome_rewards)
45
+ std_reward = variance ** 0.5
46
+
47
+ return {
48
+ "mean_outcome_reward": mean_reward,
49
+ "std_outcome_reward": std_reward,
50
+ "min_outcome_reward": min(outcome_rewards),
51
+ "max_outcome_reward": max(outcome_rewards),
52
+ "success_rate": len(successful_results) / len(results),
53
+ "total_tasks": len(results),
54
+ "successful_tasks": len(successful_results),
55
+ "failed_tasks": len(results) - len(successful_results),
56
+ }
57
+
58
+
59
+ def _is_class_based_runner(task_runner: Any) -> bool:
60
+ """Check if task_runner is a class (not a function)."""
61
+ return (
62
+ isinstance(task_runner, type)
63
+ and issubclass(task_runner, BaselineTaskRunner)
64
+ )
65
+
66
+
67
+ async def run_baseline_evaluation(
68
+ config: BaselineConfig,
69
+ seeds: List[int],
70
+ policy_config: Dict[str, Any],
71
+ env_config: Dict[str, Any],
72
+ concurrency: int = 4,
73
+ ) -> List[TaskResult]:
74
+ """Run baseline evaluation for given seeds.
75
+
76
+ Args:
77
+ config: BaselineConfig instance
78
+ seeds: List of seeds to evaluate
79
+ policy_config: Policy configuration (merged from defaults + overrides)
80
+ env_config: Environment configuration (merged from defaults + overrides)
81
+ concurrency: Maximum concurrent task executions
82
+
83
+ Returns:
84
+ List of TaskResult objects, one per seed
85
+ """
86
+ # Determine if we're using class-based or function-based runner
87
+ is_class_based = _is_class_based_runner(config.task_runner)
88
+
89
+ # Instantiate runner if class-based
90
+ runner_instance: Optional[BaselineTaskRunner] = None
91
+ if is_class_based:
92
+ runner_instance = config.task_runner(policy_config, env_config)
93
+
94
+ # Create semaphore for concurrency control
95
+ semaphore = asyncio.Semaphore(concurrency)
96
+
97
+ async def run_task(seed: int) -> TaskResult:
98
+ """Execute a single task with error handling."""
99
+ async with semaphore:
100
+ try:
101
+ if is_class_based and runner_instance:
102
+ # Class-based: call run_task method
103
+ return await runner_instance.run_task(seed)
104
+ else:
105
+ # Function-based: call function directly
106
+ task_runner_fn = config.task_runner
107
+ return await task_runner_fn(seed, policy_config, env_config)
108
+ except Exception as exc:
109
+ # Return error result
110
+ return TaskResult(
111
+ seed=seed,
112
+ success=False,
113
+ outcome_reward=0.0,
114
+ error=str(exc),
115
+ )
116
+
117
+ # Execute all tasks concurrently
118
+ results = await asyncio.gather(*[run_task(seed) for seed in seeds])
119
+ return list(results)
120
+
121
+
122
+ def aggregate_results(
123
+ config: BaselineConfig,
124
+ results: List[TaskResult],
125
+ ) -> Dict[str, Any]:
126
+ """Aggregate results using custom aggregator or default.
127
+
128
+ Args:
129
+ config: BaselineConfig instance
130
+ results: List of TaskResult objects
131
+
132
+ Returns:
133
+ Dict with aggregate metrics
134
+ """
135
+ if config.result_aggregator is None:
136
+ return default_aggregator(results)
137
+
138
+ # Check if aggregator is a class or function
139
+ if isinstance(config.result_aggregator, type):
140
+ # Class-based: instantiate and call aggregate()
141
+ aggregator_instance = config.result_aggregator()
142
+ return aggregator_instance.aggregate(results)
143
+ else:
144
+ # Function-based: call directly
145
+ return config.result_aggregator(results)
146
+
synth_ai/cli/__init__.py CHANGED
@@ -52,9 +52,77 @@ if not _cli_module:
52
52
  raise ImportError("synth_ai.cli.root is required for CLI entrypoint")
53
53
  cli = _cli_module.cli # type: ignore[attr-defined]
54
54
 
55
+ # Register core commands implemented as standalone modules
56
+ try:
57
+ from synth_ai.cli.demo import demo_cmd
58
+ cli.add_command(demo_cmd, name="demo")
59
+ except Exception as e:
60
+ import sys
61
+ print(f"[DEBUG] Failed to register demo command: {e}", file=sys.stderr)
62
+ import traceback
63
+ traceback.print_exc()
64
+ try:
65
+ from synth_ai.cli.setup import setup_cmd
66
+ cli.add_command(setup_cmd, name="setup")
67
+ except Exception as e:
68
+ import sys
69
+ print(f"[DEBUG] Failed to register setup command: {e}", file=sys.stderr)
70
+ import traceback
71
+ traceback.print_exc()
72
+ try:
73
+ from synth_ai.cli.deploy import deploy_cmd # type: ignore[attr-defined]
74
+ cli.add_command(deploy_cmd, name="deploy")
75
+ except Exception as e:
76
+ import sys
77
+ print(f"[DEBUG] Failed to register deploy command: {e}", file=sys.stderr)
78
+ import traceback
79
+ traceback.print_exc()
80
+ try:
81
+ from synth_ai.cli.opencode import opencode_cmd
82
+ cli.add_command(opencode_cmd, name="opencode")
83
+ except Exception as e:
84
+ import sys
85
+ print(f"[DEBUG] Failed to register opencode command: {e}", file=sys.stderr)
86
+ import traceback
87
+ traceback.print_exc()
88
+ try:
89
+ from synth_ai.cli.codex import codex_cmd
90
+ cli.add_command(codex_cmd, name="codex")
91
+ except Exception as e:
92
+ import sys
93
+ print(f"[DEBUG] Failed to register codex command: {e}", file=sys.stderr)
94
+ import traceback
95
+ traceback.print_exc()
96
+ try:
97
+ from synth_ai.cli.eval import command as eval_cmd
98
+ cli.add_command(eval_cmd, name="eval")
99
+ except Exception as e:
100
+ import sys
101
+ print(f"[DEBUG] Failed to register eval command: {e}", file=sys.stderr)
102
+ import traceback
103
+ traceback.print_exc()
104
+ try:
105
+ from synth_ai.cli.claude import claude_cmd
106
+ cli.add_command(claude_cmd, name="claude")
107
+ except Exception as e:
108
+ import sys
109
+ print(f"[DEBUG] Failed to register claude command: {e}", file=sys.stderr)
110
+ import traceback
111
+ traceback.print_exc()
112
+ try:
113
+ from synth_ai.cli.commands.baseline import command as baseline_cmd
114
+ from synth_ai.cli.commands.baseline.list import list_command as baseline_list_cmd
115
+ cli.add_command(baseline_cmd, name="baseline")
116
+ baseline_cmd.add_command(baseline_list_cmd, name="list")
117
+ except Exception as e:
118
+ import sys
119
+ print(f"[DEBUG] Failed to register baseline command: {e}", file=sys.stderr)
120
+ import traceback
121
+ traceback.print_exc()
122
+
55
123
 
56
124
  # Register optional subcommands packaged under synth_ai.cli.*
57
- for _module_path in ("synth_ai.cli.demo", "synth_ai.cli.turso"):
125
+ for _module_path in ("synth_ai.cli.commands.demo", "synth_ai.cli.commands.status", "synth_ai.cli.turso"):
58
126
  module = _maybe_import(_module_path)
59
127
  if not module:
60
128
  continue
@@ -64,27 +132,35 @@ for _module_path in ("synth_ai.cli.demo", "synth_ai.cli.turso"):
64
132
  if fn:
65
133
  fn(cli)
66
134
 
135
+ # Smoke command registration (CLI-only helper)
136
+ try:
137
+ from synth_ai.cli.commands.smoke import register as register_smoke
138
+
139
+ register_smoke(cli)
140
+ except Exception:
141
+ pass
142
+
143
+ # Register help command
144
+ _maybe_call("synth_ai.cli.commands.help.core", "register", cli)
145
+
67
146
  # Train CLI lives under synth_ai.api.train
68
147
  _maybe_call("synth_ai.api.train", "register", cli)
69
148
 
70
149
  # Task app group/commands are optional and have richer API surface
71
150
  _task_apps_module = _maybe_import("synth_ai.cli.task_apps")
72
- if _task_apps_module:
73
- task_app_group = getattr(_task_apps_module, "task_app_group", None)
74
- if task_app_group is not None:
75
- cli.add_command(task_app_group, name="task-app")
76
- # Expose common aliases when present
77
- commands = getattr(task_app_group, "commands", None)
78
- if isinstance(commands, dict):
79
- for alias, name in (("serve", "serve"), ("deploy", "deploy"), ("modal-serve", "modal-serve")):
80
- command = commands.get(name)
81
- if command is not None:
82
- cli.add_command(command, name=alias)
83
- register_task_apps = _callable_from(_task_apps_module, "register")
84
- if register_task_apps:
85
- register_task_apps(cli)
86
-
87
- # Register TUI command if dependencies allow
88
- _maybe_call("synth_ai.cli.tui", "register", cli)
151
+ #if _task_apps_module:
152
+ task_app_group = getattr(_task_apps_module, "task_app_group", None)
153
+ if task_app_group is not None:
154
+ cli.add_command(task_app_group, name="task-app")
155
+ # Expose common aliases when present
156
+ commands = getattr(task_app_group, "commands", None)
157
+ if isinstance(commands, dict):
158
+ for alias, name in (("serve", "serve"), ("deploy", "deploy"), ("modal-serve", "modal-serve")):
159
+ command = commands.get(name)
160
+ if command is not None:
161
+ cli.add_command(command, name=alias)
162
+ register_task_apps = _callable_from(_task_apps_module, "register")
163
+ if register_task_apps:
164
+ register_task_apps(cli)
89
165
 
90
166
  # Top-level 'info' alias removed; use `synth-ai task-app info` instead
File without changes