synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1086) hide show
  1. synth_ai/__init__.py +25 -46
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +98 -72
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +258 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +51 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
  63. synth_ai/cli/commands/train/validation.py +392 -0
  64. synth_ai/cli/commands/train/verifier_schemas.py +200 -0
  65. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  70. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  71. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  72. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  73. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  75. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  79. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  80. synth_ai/cli/demo_apps/math/_common.py +16 -0
  81. synth_ai/cli/demo_apps/math/app.py +38 -0
  82. synth_ai/cli/demo_apps/math/config.toml +75 -0
  83. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  84. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  85. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  86. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  87. synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
  88. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  89. synth_ai/cli/demos/__init__.py +12 -0
  90. synth_ai/cli/demos/demo.py +32 -0
  91. synth_ai/cli/demos/rl_demo.py +254 -0
  92. synth_ai/cli/deploy.py +216 -0
  93. synth_ai/cli/infra/__init__.py +14 -0
  94. synth_ai/cli/infra/balance.py +216 -0
  95. synth_ai/cli/infra/mcp.py +35 -0
  96. synth_ai/cli/infra/modal_app.py +36 -0
  97. synth_ai/cli/infra/setup.py +69 -0
  98. synth_ai/cli/infra/status.py +16 -0
  99. synth_ai/cli/infra/turso.py +77 -0
  100. synth_ai/cli/lib/__init__.py +10 -0
  101. synth_ai/cli/lib/agents.py +76 -0
  102. synth_ai/cli/lib/apps/modal_app.py +101 -0
  103. synth_ai/cli/lib/apps/task_app.py +642 -0
  104. synth_ai/cli/lib/bin.py +39 -0
  105. synth_ai/cli/lib/env.py +375 -0
  106. synth_ai/cli/lib/errors.py +85 -0
  107. synth_ai/cli/lib/modal.py +315 -0
  108. synth_ai/cli/lib/plotting.py +126 -0
  109. synth_ai/cli/lib/prompt_args.py +39 -0
  110. synth_ai/cli/lib/prompts.py +284 -0
  111. synth_ai/cli/lib/sqld.py +122 -0
  112. synth_ai/cli/lib/task_app_discovery.py +884 -0
  113. synth_ai/cli/lib/task_app_env.py +295 -0
  114. synth_ai/cli/lib/train_cfgs.py +300 -0
  115. synth_ai/cli/lib/tunnel_records.py +207 -0
  116. synth_ai/cli/local/__init__.py +14 -0
  117. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  118. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  119. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  120. synth_ai/cli/local/experiment_queue/config.py +128 -0
  121. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  122. synth_ai/cli/local/experiment_queue/database.py +175 -0
  123. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  124. synth_ai/cli/local/experiment_queue/models.py +231 -0
  125. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  126. synth_ai/cli/local/experiment_queue/results.py +373 -0
  127. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  128. synth_ai/cli/local/experiment_queue/service.py +344 -0
  129. synth_ai/cli/local/experiment_queue/status.py +372 -0
  130. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  131. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  132. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  133. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  134. synth_ai/cli/local/session/__init__.py +92 -0
  135. synth_ai/cli/local/session/client.py +383 -0
  136. synth_ai/cli/local/session/constants.py +63 -0
  137. synth_ai/cli/local/session/exceptions.py +105 -0
  138. synth_ai/cli/local/session/manager.py +139 -0
  139. synth_ai/cli/local/session/models.py +89 -0
  140. synth_ai/cli/local/session/query.py +110 -0
  141. synth_ai/cli/root.py +30 -6
  142. synth_ai/cli/task_apps/__init__.py +37 -0
  143. synth_ai/cli/task_apps/commands.py +3145 -0
  144. synth_ai/cli/task_apps/deploy.py +7 -0
  145. synth_ai/cli/task_apps/list.py +26 -0
  146. synth_ai/cli/task_apps/main.py +36 -0
  147. synth_ai/cli/task_apps/modal_serve.py +11 -0
  148. synth_ai/cli/task_apps/serve.py +11 -0
  149. synth_ai/cli/training/__init__.py +8 -0
  150. synth_ai/cli/training/train.py +5 -0
  151. synth_ai/cli/training/train_cfg.py +34 -0
  152. synth_ai/cli/training/watch.py +506 -0
  153. synth_ai/cli/turso.py +34 -55
  154. synth_ai/cli/utils/__init__.py +8 -0
  155. synth_ai/cli/utils/experiments.py +235 -0
  156. synth_ai/cli/utils/queue.py +504 -0
  157. synth_ai/cli/utils/recent.py +133 -0
  158. synth_ai/cli/utils/traces.py +164 -0
  159. synth_ai/contracts/__init__.py +67 -0
  160. synth_ai/core/__init__.py +100 -0
  161. synth_ai/core/_utils/__init__.py +54 -0
  162. synth_ai/core/_utils/base_url.py +10 -0
  163. synth_ai/core/_utils/http.py +10 -0
  164. synth_ai/core/_utils/prompts.py +14 -0
  165. synth_ai/core/_utils/task_app_state.py +12 -0
  166. synth_ai/core/_utils/user_config.py +10 -0
  167. synth_ai/core/apps/common.py +116 -0
  168. synth_ai/core/auth.py +95 -0
  169. synth_ai/core/cfgs.py +240 -0
  170. synth_ai/core/config/__init__.py +16 -0
  171. synth_ai/core/config/base.py +168 -0
  172. synth_ai/core/config/resolver.py +89 -0
  173. synth_ai/core/env.py +231 -0
  174. synth_ai/core/errors.py +125 -0
  175. synth_ai/core/http.py +230 -0
  176. synth_ai/core/integrations/__init__.py +11 -0
  177. synth_ai/core/integrations/cloudflare.py +1886 -0
  178. synth_ai/core/integrations/mcp/__init__.py +6 -0
  179. synth_ai/core/integrations/mcp/__main__.py +8 -0
  180. synth_ai/core/integrations/mcp/claude.py +36 -0
  181. synth_ai/core/integrations/mcp/main.py +254 -0
  182. synth_ai/core/integrations/mcp/setup.py +100 -0
  183. synth_ai/core/integrations/modal.py +277 -0
  184. synth_ai/core/json.py +72 -0
  185. synth_ai/core/log_filter.py +99 -0
  186. synth_ai/core/logging.py +82 -0
  187. synth_ai/core/paths.py +107 -0
  188. synth_ai/core/pricing.py +109 -0
  189. synth_ai/core/process.py +233 -0
  190. synth_ai/core/ssl.py +25 -0
  191. synth_ai/core/storage/__init__.py +71 -0
  192. synth_ai/core/task_app_state.py +318 -0
  193. synth_ai/core/telemetry.py +282 -0
  194. synth_ai/core/tracing_v3/__init__.py +99 -0
  195. synth_ai/core/tracing_v3/abstractions.py +348 -0
  196. synth_ai/core/tracing_v3/config.py +229 -0
  197. synth_ai/core/tracing_v3/constants.py +21 -0
  198. synth_ai/core/tracing_v3/db_config.py +182 -0
  199. synth_ai/core/tracing_v3/decorators.py +401 -0
  200. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  201. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  202. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  203. synth_ai/core/tracing_v3/storage/base.py +211 -0
  204. synth_ai/core/tracing_v3/storage/config.py +109 -0
  205. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  206. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  207. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  208. synth_ai/core/tracing_v3/turso/models.py +470 -0
  209. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  210. synth_ai/core/tracing_v3/utils.py +108 -0
  211. synth_ai/core/urls.py +18 -0
  212. synth_ai/core/user_config.py +137 -0
  213. synth_ai/core/uvicorn.py +222 -0
  214. synth_ai/data/__init__.py +83 -0
  215. synth_ai/data/enums.py +122 -0
  216. synth_ai/data/rewards.py +249 -0
  217. synth_ai/data/traces.py +35 -0
  218. synth_ai/products/__init__.py +6 -0
  219. synth_ai/products/graph_evolve/__init__.py +45 -0
  220. synth_ai/products/graph_evolve/client.py +226 -0
  221. synth_ai/products/graph_evolve/config.py +591 -0
  222. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  223. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  224. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  225. synth_ai/products/graph_evolve/run.py +222 -0
  226. synth_ai/products/graph_gepa/__init__.py +23 -0
  227. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  228. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  229. synth_ai/sdk/__init__.py +129 -0
  230. synth_ai/sdk/api/__init__.py +1 -0
  231. synth_ai/sdk/api/eval/__init__.py +33 -0
  232. synth_ai/sdk/api/eval/job.py +732 -0
  233. synth_ai/sdk/api/models/supported.py +514 -0
  234. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  235. synth_ai/sdk/api/train/__init__.py +85 -0
  236. synth_ai/sdk/api/train/builders.py +1076 -0
  237. synth_ai/sdk/api/train/cli.py +2196 -0
  238. synth_ai/sdk/api/train/config_finder.py +267 -0
  239. synth_ai/sdk/api/train/configs/__init__.py +67 -0
  240. synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
  241. synth_ai/sdk/api/train/configs/rl.py +436 -0
  242. synth_ai/sdk/api/train/configs/sft.py +263 -0
  243. synth_ai/sdk/api/train/configs/shared.py +81 -0
  244. synth_ai/sdk/api/train/context_learning.py +312 -0
  245. synth_ai/sdk/api/train/env_resolver.py +418 -0
  246. synth_ai/sdk/api/train/graph_validators.py +216 -0
  247. synth_ai/sdk/api/train/graphgen.py +1102 -0
  248. synth_ai/sdk/api/train/graphgen_models.py +873 -0
  249. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  250. synth_ai/sdk/api/train/local_api.py +10 -0
  251. synth_ai/sdk/api/train/pollers.py +160 -0
  252. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  253. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  254. synth_ai/sdk/api/train/progress/events.py +326 -0
  255. synth_ai/sdk/api/train/progress/results.py +428 -0
  256. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  257. synth_ai/sdk/api/train/prompt_learning.py +800 -0
  258. synth_ai/sdk/api/train/rl.py +478 -0
  259. synth_ai/sdk/api/train/sft.py +398 -0
  260. synth_ai/sdk/api/train/summary.py +522 -0
  261. synth_ai/sdk/api/train/supported_algos.py +147 -0
  262. synth_ai/sdk/api/train/task_app.py +351 -0
  263. synth_ai/sdk/api/train/utils.py +279 -0
  264. synth_ai/sdk/api/train/validators.py +2424 -0
  265. synth_ai/sdk/graphs/__init__.py +15 -0
  266. synth_ai/sdk/graphs/completions.py +776 -0
  267. synth_ai/sdk/graphs/verifier_schemas.py +222 -0
  268. synth_ai/sdk/inference/__init__.py +6 -0
  269. synth_ai/sdk/inference/client.py +128 -0
  270. synth_ai/sdk/jobs/__init__.py +16 -0
  271. synth_ai/sdk/jobs/client.py +371 -0
  272. synth_ai/sdk/learning/__init__.py +99 -0
  273. synth_ai/sdk/learning/client.py +240 -0
  274. synth_ai/sdk/learning/context_learning_client.py +531 -0
  275. synth_ai/sdk/learning/context_learning_types.py +294 -0
  276. synth_ai/sdk/learning/ft_client.py +7 -0
  277. synth_ai/sdk/learning/health.py +49 -0
  278. synth_ai/sdk/learning/jobs.py +202 -0
  279. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  280. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  281. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  282. synth_ai/sdk/learning/rl/__init__.py +35 -0
  283. synth_ai/sdk/learning/rl/client.py +268 -0
  284. synth_ai/sdk/learning/rl/contracts.py +23 -0
  285. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  286. synth_ai/sdk/learning/rl/secrets.py +13 -0
  287. synth_ai/sdk/learning/sft/client.py +95 -0
  288. synth_ai/sdk/learning/sft/config.py +270 -0
  289. synth_ai/sdk/learning/sft/data.py +698 -0
  290. synth_ai/sdk/learning/validators.py +52 -0
  291. synth_ai/sdk/localapi/__init__.py +40 -0
  292. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  293. synth_ai/sdk/localapi/client.py +10 -0
  294. synth_ai/sdk/localapi/contracts.py +10 -0
  295. synth_ai/sdk/localapi/helpers.py +519 -0
  296. synth_ai/sdk/localapi/rollouts.py +93 -0
  297. synth_ai/sdk/localapi/server.py +29 -0
  298. synth_ai/sdk/localapi/template.py +49 -0
  299. synth_ai/sdk/streaming/__init__.py +35 -0
  300. synth_ai/sdk/streaming/config.py +94 -0
  301. synth_ai/sdk/streaming/handlers.py +1997 -0
  302. synth_ai/sdk/streaming/streamer.py +708 -0
  303. synth_ai/sdk/streaming/types.py +112 -0
  304. synth_ai/sdk/task/__init__.py +164 -0
  305. synth_ai/sdk/task/apps/__init__.py +169 -0
  306. synth_ai/sdk/task/client.py +175 -0
  307. synth_ai/sdk/task/config.py +256 -0
  308. synth_ai/sdk/task/contracts.py +340 -0
  309. synth_ai/sdk/task/datasets.py +108 -0
  310. synth_ai/sdk/task/in_process.py +1200 -0
  311. synth_ai/sdk/task/in_process_runner.py +314 -0
  312. synth_ai/sdk/task/inference_api.py +299 -0
  313. synth_ai/sdk/task/proxy.py +287 -0
  314. synth_ai/sdk/task/rubrics/__init__.py +54 -0
  315. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  316. synth_ai/sdk/task/rubrics/strict.py +148 -0
  317. synth_ai/sdk/task/rubrics.py +219 -0
  318. synth_ai/sdk/task/server.py +640 -0
  319. synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
  320. synth_ai/sdk/task/tracing_utils.py +95 -0
  321. synth_ai/sdk/task/validators.py +441 -0
  322. synth_ai/sdk/training/__init__.py +93 -0
  323. synth_ai/sdk/tunnels/__init__.py +118 -0
  324. synth_ai/sdk/tunnels/cleanup.py +83 -0
  325. synth_ai/sdk/tunnels/ports.py +120 -0
  326. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  327. synth_ai/utils/__init__.py +213 -0
  328. synth_ai-0.4.4.dist-info/METADATA +262 -0
  329. synth_ai-0.4.4.dist-info/RECORD +369 -0
  330. synth_ai-0.4.4.dist-info/top_level.txt +1 -0
  331. examples/__init__.py +0 -16
  332. examples/analyze_semantic_words.sh +0 -17
  333. examples/crafter_debug_render.py +0 -186
  334. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  335. examples/multi_step/configs/README_verilog_rl.md +0 -77
  336. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  337. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  338. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  339. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  340. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  341. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  342. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  343. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  344. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  345. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  346. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  347. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  348. examples/multi_step/crafter_rl_lora.md +0 -70
  349. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  350. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  351. examples/multi_step/readme.md +0 -48
  352. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  353. examples/multi_step/task_app_config_notes.md +0 -494
  354. examples/multi_step/verilog_rl_lora.md +0 -218
  355. examples/qwen_coder/README.md +0 -102
  356. examples/qwen_coder/_shared.py +0 -113
  357. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  358. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  359. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  360. examples/qwen_coder/generate_dataset.py +0 -98
  361. examples/qwen_coder/infer_ft_smoke.py +0 -65
  362. examples/qwen_coder/infer_prod_proxy.py +0 -73
  363. examples/qwen_coder/infer_via_synth.py +0 -87
  364. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  365. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  366. examples/qwen_coder/sft_full_17b.py +0 -103
  367. examples/qwen_coder/sft_lora_30b.py +0 -110
  368. examples/qwen_coder/subset_jsonl.py +0 -39
  369. examples/qwen_coder/todos.md +0 -38
  370. examples/qwen_coder/validate_jsonl.py +0 -60
  371. examples/rl/README.md +0 -169
  372. examples/rl/download_dataset.py +0 -80
  373. examples/run_crafter_demo.sh +0 -10
  374. examples/sft/README.md +0 -139
  375. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  376. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  377. examples/sft/evaluate.py +0 -119
  378. examples/sft/export_dataset.py +0 -117
  379. examples/sft/generate_traces.py +0 -164
  380. examples/swe/__init__.py +0 -12
  381. examples/swe/task_app/README.md +0 -105
  382. examples/swe/task_app/__init__.py +0 -2
  383. examples/swe/task_app/grpo_swe_mini.py +0 -601
  384. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  385. examples/swe/task_app/hosted/README.md +0 -173
  386. examples/swe/task_app/hosted/__init__.py +0 -5
  387. examples/swe/task_app/hosted/branching.py +0 -143
  388. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  389. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  390. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  391. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  392. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  393. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  394. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  395. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  396. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  397. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  398. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  399. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  400. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  401. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  402. examples/swe/task_app/hosted/hosted_app.py +0 -204
  403. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  404. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  405. examples/swe/task_app/hosted/main.py +0 -100
  406. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  407. examples/swe/task_app/hosted/registry.py +0 -195
  408. examples/swe/task_app/hosted/rollout.py +0 -1911
  409. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  410. examples/swe/task_app/hosted/storage/volume.py +0 -211
  411. examples/swe/task_app/hosted/test_agents.py +0 -161
  412. examples/swe/task_app/hosted/test_service.py +0 -136
  413. examples/swe/task_app/hosted/utils.py +0 -62
  414. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  415. examples/task_apps/TESTING.md +0 -275
  416. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  417. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  418. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  419. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  420. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  421. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  422. examples/task_apps/crafter/__init__.py +0 -0
  423. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  424. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  425. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  426. examples/task_apps/crafter/task_app/README.md +0 -42
  427. examples/task_apps/crafter/task_app/__init__.py +0 -5
  428. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  429. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  430. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  431. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  432. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  433. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  434. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  454. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  455. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  456. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  457. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  458. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  459. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  510. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  511. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  512. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  513. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  514. examples/task_apps/enron/__init__.py +0 -1
  515. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  516. examples/task_apps/enron/filter_sft.toml +0 -5
  517. examples/task_apps/enron/task_app/README.md +0 -14
  518. examples/task_apps/enron/task_app/__init__.py +0 -1
  519. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  520. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  521. examples/task_apps/enron/tests/__init__.py +0 -4
  522. examples/task_apps/enron/tests/conftest.py +0 -115
  523. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  524. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  525. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  526. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  527. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  528. examples/task_apps/math/README.md +0 -22
  529. examples/task_apps/math/__init__.py +0 -0
  530. examples/task_apps/math/math_single_step.py +0 -1000
  531. examples/task_apps/math/math_task_app.py +0 -115
  532. examples/task_apps/pokemon_battle/__init__.py +0 -2
  533. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  534. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  535. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  536. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  537. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  538. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  539. examples/task_apps/pokemon_red/README.md +0 -357
  540. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  541. examples/task_apps/pokemon_red/__init__.py +0 -3
  542. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  543. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  544. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  545. examples/task_apps/pokemon_red/task_app.py +0 -799
  546. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  547. examples/task_apps/sokoban/README.md +0 -307
  548. examples/task_apps/sokoban/__init__.py +0 -3
  549. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  550. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  551. examples/task_apps/sokoban/filter_sft.toml +0 -5
  552. examples/task_apps/sokoban/task_app.py +0 -1058
  553. examples/task_apps/sokoban/tests/__init__.py +0 -4
  554. examples/task_apps/sokoban/tests/conftest.py +0 -113
  555. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  556. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  557. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  558. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  560. examples/task_apps/verilog/__init__.py +0 -1
  561. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  562. examples/task_apps/verilog/filter_sft.toml +0 -5
  563. examples/task_apps/verilog/task_app/README.md +0 -12
  564. examples/task_apps/verilog/task_app/__init__.py +0 -1
  565. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  566. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  567. examples/task_apps/verilog/tests/__init__.py +0 -4
  568. examples/task_apps/verilog/tests/conftest.py +0 -115
  569. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  570. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  571. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  572. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  574. examples/vlm/PROPOSAL.md +0 -53
  575. examples/vlm/README.md +0 -68
  576. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  577. examples/vlm/crafter_image_only_agent.py +0 -207
  578. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  579. examples/vlm/filter_image_rows.py +0 -63
  580. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  581. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  582. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  583. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  584. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  585. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  586. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  587. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  588. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  589. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  590. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  591. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  592. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  593. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  594. examples/warming_up_to_rl/groq_test.py +0 -97
  595. examples/warming_up_to_rl/manage_secrets.py +0 -131
  596. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  597. examples/warming_up_to_rl/old/notes.md +0 -73
  598. examples/warming_up_to_rl/readme.md +0 -179
  599. examples/warming_up_to_rl/run_eval.py +0 -736
  600. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  601. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  602. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  603. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  604. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  605. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  606. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  607. examples/workflows/__init__.py +0 -0
  608. examples/workflows/math_rl/__init__.py +0 -0
  609. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  610. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  611. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  612. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  613. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  614. examples/workflows/math_rl/download_dataset.py +0 -80
  615. examples/workflows/math_rl/run_eval.py +0 -436
  616. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  617. synth_ai/api/models/supported.py +0 -377
  618. synth_ai/api/train/__init__.py +0 -5
  619. synth_ai/api/train/builders.py +0 -351
  620. synth_ai/api/train/cli.py +0 -635
  621. synth_ai/api/train/config_finder.py +0 -228
  622. synth_ai/api/train/configs/__init__.py +0 -44
  623. synth_ai/api/train/configs/rl.py +0 -134
  624. synth_ai/api/train/configs/sft.py +0 -95
  625. synth_ai/api/train/configs/shared.py +0 -24
  626. synth_ai/api/train/env_resolver.py +0 -349
  627. synth_ai/api/train/pollers.py +0 -75
  628. synth_ai/api/train/supported_algos.py +0 -147
  629. synth_ai/api/train/task_app.py +0 -195
  630. synth_ai/api/train/utils.py +0 -225
  631. synth_ai/cli/_modal_wrapper.py +0 -29
  632. synth_ai/cli/_storage.py +0 -20
  633. synth_ai/cli/_typer_patch.py +0 -49
  634. synth_ai/cli/_validate_task_app.py +0 -11
  635. synth_ai/cli/balance.py +0 -216
  636. synth_ai/cli/calc.py +0 -84
  637. synth_ai/cli/demo.py +0 -165
  638. synth_ai/cli/legacy_root_backup.py +0 -468
  639. synth_ai/cli/man.py +0 -106
  640. synth_ai/cli/recent.py +0 -132
  641. synth_ai/cli/rl_demo.py +0 -254
  642. synth_ai/cli/status.py +0 -134
  643. synth_ai/cli/task_apps.py +0 -4523
  644. synth_ai/cli/traces.py +0 -164
  645. synth_ai/cli/tui.py +0 -57
  646. synth_ai/cli/watch.py +0 -506
  647. synth_ai/compound/cais.py +0 -0
  648. synth_ai/config/base_url.py +0 -107
  649. synth_ai/core/experiment.py +0 -13
  650. synth_ai/core/system.py +0 -15
  651. synth_ai/demo_registry.py +0 -295
  652. synth_ai/demos/core/__init__.py +0 -1
  653. synth_ai/demos/core/cli.py +0 -1718
  654. synth_ai/demos/demo_task_apps/core.py +0 -440
  655. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  656. synth_ai/demos/demo_task_apps/math/config.toml +0 -74
  657. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  658. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  659. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  660. synth_ai/environments/__init__.py +0 -31
  661. synth_ai/environments/environment/__init__.py +0 -1
  662. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  663. synth_ai/environments/environment/artifacts/base.py +0 -52
  664. synth_ai/environments/environment/core.py +0 -67
  665. synth_ai/environments/environment/db/__init__.py +0 -1
  666. synth_ai/environments/environment/db/sqlite.py +0 -45
  667. synth_ai/environments/environment/registry.py +0 -233
  668. synth_ai/environments/environment/resources/sqlite.py +0 -45
  669. synth_ai/environments/environment/results.py +0 -1
  670. synth_ai/environments/environment/rewards/__init__.py +0 -1
  671. synth_ai/environments/environment/rewards/core.py +0 -29
  672. synth_ai/environments/environment/shared_engine.py +0 -26
  673. synth_ai/environments/environment/tools/__init__.py +0 -200
  674. synth_ai/environments/examples/__init__.py +0 -1
  675. synth_ai/environments/examples/bandit/__init__.py +0 -33
  676. synth_ai/environments/examples/bandit/engine.py +0 -302
  677. synth_ai/environments/examples/bandit/environment.py +0 -194
  678. synth_ai/environments/examples/bandit/taskset.py +0 -200
  679. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  680. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  681. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  682. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  683. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  684. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  716. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  717. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  718. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  719. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  720. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  744. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  745. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  746. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  747. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  748. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  749. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  750. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  751. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  752. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  753. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  754. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  755. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  756. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  757. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  758. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  759. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  760. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  761. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  762. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  763. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  764. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  765. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  766. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  767. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  768. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  769. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  770. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  771. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  772. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  773. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  774. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  775. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  776. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  777. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  778. synth_ai/environments/examples/enron/engine.py +0 -300
  779. synth_ai/environments/examples/enron/environment.py +0 -234
  780. synth_ai/environments/examples/enron/taskset.py +0 -112
  781. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  782. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  783. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  784. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  785. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  786. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  787. synth_ai/environments/examples/minigrid/engine.py +0 -589
  788. synth_ai/environments/examples/minigrid/environment.py +0 -274
  789. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  790. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  791. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  792. synth_ai/environments/examples/nethack/__init__.py +0 -7
  793. synth_ai/environments/examples/nethack/achievements.py +0 -337
  794. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  795. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  796. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  797. synth_ai/environments/examples/nethack/engine.py +0 -739
  798. synth_ai/environments/examples/nethack/environment.py +0 -256
  799. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  800. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  801. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  802. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  803. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  804. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  805. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  806. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  807. synth_ai/environments/examples/nethack/taskset.py +0 -323
  808. synth_ai/environments/examples/red/__init__.py +0 -7
  809. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  810. synth_ai/environments/examples/red/config_logging.py +0 -110
  811. synth_ai/environments/examples/red/engine.py +0 -721
  812. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  813. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  814. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  815. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  816. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  817. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  818. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  828. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  829. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  830. synth_ai/environments/examples/red/environment.py +0 -298
  831. synth_ai/environments/examples/red/taskset.py +0 -79
  832. synth_ai/environments/examples/red/units/__init__.py +0 -1
  833. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  834. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  835. synth_ai/environments/examples/sokoban/engine.py +0 -678
  836. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  838. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  839. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  840. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  841. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  848. synth_ai/environments/examples/sokoban/environment.py +0 -229
  849. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  850. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  851. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  852. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  853. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  854. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  855. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  856. synth_ai/environments/examples/verilog/__init__.py +0 -10
  857. synth_ai/environments/examples/verilog/engine.py +0 -421
  858. synth_ai/environments/examples/verilog/environment.py +0 -350
  859. synth_ai/environments/examples/verilog/taskset.py +0 -420
  860. synth_ai/environments/examples/wordle/__init__.py +0 -29
  861. synth_ai/environments/examples/wordle/engine.py +0 -398
  862. synth_ai/environments/examples/wordle/environment.py +0 -159
  863. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  864. synth_ai/environments/examples/wordle/taskset.py +0 -230
  865. synth_ai/environments/reproducibility/core.py +0 -42
  866. synth_ai/environments/reproducibility/helpers.py +0 -0
  867. synth_ai/environments/reproducibility/tree.py +0 -363
  868. synth_ai/environments/service/app.py +0 -97
  869. synth_ai/environments/service/core_routes.py +0 -1021
  870. synth_ai/environments/service/external_registry.py +0 -56
  871. synth_ai/environments/service/registry.py +0 -9
  872. synth_ai/environments/stateful/__init__.py +0 -1
  873. synth_ai/environments/stateful/core.py +0 -163
  874. synth_ai/environments/stateful/engine.py +0 -21
  875. synth_ai/environments/stateful/state.py +0 -7
  876. synth_ai/environments/tasks/api.py +0 -19
  877. synth_ai/environments/tasks/core.py +0 -81
  878. synth_ai/environments/tasks/filters.py +0 -40
  879. synth_ai/environments/tasks/utils.py +0 -90
  880. synth_ai/environments/v0_observability/history.py +0 -3
  881. synth_ai/environments/v0_observability/log.py +0 -2
  882. synth_ai/evals/__init__.py +0 -15
  883. synth_ai/evals/base.py +0 -13
  884. synth_ai/evals/client.py +0 -82
  885. synth_ai/evals/types.py +0 -42
  886. synth_ai/handshake.py +0 -109
  887. synth_ai/http.py +0 -26
  888. synth_ai/http_client.py +0 -136
  889. synth_ai/inference/__init__.py +0 -5
  890. synth_ai/inference/client.py +0 -34
  891. synth_ai/jobs/client.py +0 -295
  892. synth_ai/judge_schemas.py +0 -127
  893. synth_ai/learning/__init__.py +0 -59
  894. synth_ai/learning/client.py +0 -241
  895. synth_ai/learning/ft_client.py +0 -7
  896. synth_ai/learning/health.py +0 -49
  897. synth_ai/learning/jobs.py +0 -201
  898. synth_ai/learning/rl/__init__.py +0 -39
  899. synth_ai/learning/rl/client.py +0 -267
  900. synth_ai/learning/rl/contracts.py +0 -27
  901. synth_ai/learning/rl/env_keys.py +0 -166
  902. synth_ai/learning/rl/secrets.py +0 -13
  903. synth_ai/learning/sft/client.py +0 -68
  904. synth_ai/learning/sft/config.py +0 -270
  905. synth_ai/learning/sft/data.py +0 -295
  906. synth_ai/learning/validators.py +0 -49
  907. synth_ai/lm/__init__.py +0 -25
  908. synth_ai/task/__init__.py +0 -121
  909. synth_ai/task/apps/__init__.py +0 -129
  910. synth_ai/task/client.py +0 -167
  911. synth_ai/task/config.py +0 -257
  912. synth_ai/task/contracts.py +0 -236
  913. synth_ai/task/datasets.py +0 -108
  914. synth_ai/task/proxy.py +0 -251
  915. synth_ai/task/rubrics/__init__.py +0 -56
  916. synth_ai/task/rubrics/loaders.py +0 -152
  917. synth_ai/task/rubrics/strict.py +0 -149
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1057. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1058. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1068. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1069. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1077. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1078. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1079. /synth_ai/{task → sdk/task}/health.py +0 -0
  1080. /synth_ai/{task → sdk/task}/json.py +0 -0
  1081. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1082. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1083. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1084. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  1085. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  1086. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1428 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import contextlib
5
+ import logging
6
+ import os
7
+ import subprocess
8
+ import sys
9
+ import time
10
+ import tomllib
11
+ import uuid
12
+ from pathlib import Path
13
+ from typing import Any
14
+ from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
15
+
16
+ import click
17
+ import httpx
18
+
19
+ from synth_ai.core.tracing_v3.config import resolve_trace_db_settings
20
+ from synth_ai.core.tracing_v3.turso.daemon import start_sqld
21
+ from synth_ai.sdk.localapi.client import LocalAPIClient
22
+ from synth_ai.sdk.task.contracts import (
23
+ RolloutEnvSpec,
24
+ RolloutMode,
25
+ RolloutPolicySpec,
26
+ RolloutRecordConfig,
27
+ RolloutRequest,
28
+ )
29
+ from synth_ai.sdk.task.validators import (
30
+ normalize_inference_url,
31
+ validate_rollout_response_for_rl,
32
+ validate_task_app_url,
33
+ )
34
+
35
+
36
+ def _append_query_param(url: str, key: str, value: str) -> str:
37
+ parsed = urlparse(url)
38
+ params = dict(parse_qsl(parsed.query, keep_blank_values=True))
39
+ params[key] = value
40
+ new_query = urlencode(params)
41
+ result = urlunparse(parsed._replace(query=new_query))
42
+ return str(result)
43
+
44
+
45
+ def _ensure_local_libsql() -> None:
46
+ """Start a local sqld/libSQL instance or abort the smoke test."""
47
+
48
+ traces_root = Path(os.getenv("SYNTH_TRACES_DIR", str((Path.cwd() / "traces" / "v3").resolve())))
49
+ traces_root.mkdir(parents=True, exist_ok=True)
50
+
51
+ local_db_path = Path(os.getenv("SQLD_DB_PATH", str(traces_root / "local.db"))).resolve()
52
+ local_db_path.parent.mkdir(parents=True, exist_ok=True)
53
+
54
+ hrana_port = int(os.getenv("SQLD_HTTP_PORT", "8080"))
55
+ http_port = hrana_port + 1
56
+ os.environ["SQLD_DB_PATH"] = str(local_db_path)
57
+ os.environ["SQLD_HTTP_PORT"] = str(hrana_port)
58
+
59
+ try:
60
+ start_sqld(db_path=str(local_db_path), hrana_port=hrana_port, http_port=http_port)
61
+ started_new = True
62
+ except Exception as exc:
63
+ # If address in use, assume an existing sqld instance; verify health below
64
+ if "Address already in use" in str(exc):
65
+ started_new = False
66
+ click.echo(
67
+ f"[libsql] sqld already running on 127.0.0.1:{hrana_port} (hrana) and 127.0.0.1:{http_port} (http); attempting to reuse", err=True
68
+ )
69
+ else:
70
+ raise click.ClickException(
71
+ f"Failed to start local sqld on 127.0.0.1:{hrana_port}: {exc}"
72
+ ) from exc
73
+
74
+ health_url = f"http://127.0.0.1:{http_port}/health"
75
+ deadline = time.time() + 5.0
76
+ healthy = False
77
+ while time.time() < deadline:
78
+ try:
79
+ resp = httpx.get(health_url, timeout=0.5)
80
+ if resp.status_code == 200:
81
+ healthy = True
82
+ break
83
+ except Exception:
84
+ pass
85
+ time.sleep(0.1)
86
+
87
+ if not healthy:
88
+ msg = (
89
+ f"Tracing backend not reachable at {health_url}. "
90
+ "Start sqld manually or disable tracing (TASKAPP_TRACING_ENABLED=0)."
91
+ )
92
+ raise click.ClickException(msg)
93
+
94
+ click.echo(
95
+ f"[libsql] sqld ready on libsql://127.0.0.1:{hrana_port} with HTTP API on :{http_port} (started_new={started_new})",
96
+ err=True,
97
+ )
98
+
99
+ # Python libsql client uses HTTP API port, not Hrana WebSocket port
100
+ local_dsn = f"http://127.0.0.1:{http_port}"
101
+ os.environ["LIBSQL_URL"] = local_dsn
102
+ os.environ["SYNTH_TRACES_DB"] = local_dsn
103
+ os.environ.pop("LIBSQL_AUTH_TOKEN", None)
104
+ os.environ.pop("TURSO_AUTH_TOKEN", None)
105
+
106
+
107
+ def _refresh_tracing_config() -> None:
108
+ """Rebuild global tracing configuration so new env vars take effect."""
109
+
110
+ from synth_ai.core.tracing_v3 import config as tracing_config_module
111
+ from synth_ai.core.tracing_v3.storage import config as storage_config_module
112
+
113
+ tracing_config_module.CONFIG = tracing_config_module.TursoConfig() # type: ignore[assignment]
114
+ storage_config_module.STORAGE_CONFIG = storage_config_module.StorageConfig( # type: ignore[assignment]
115
+ connection_string=os.environ["SYNTH_TRACES_DB"],
116
+ backend=storage_config_module.StorageBackend.TURSO_NATIVE,
117
+ )
118
+
119
+
120
+ def _load_smoke_config(config_path: Path | None) -> dict[str, Any]:
121
+ """Load [smoke] section from TOML config file.
122
+
123
+ Returns an empty dict if no config file or no [smoke] section.
124
+ """
125
+ if not config_path:
126
+ return {}
127
+
128
+ try:
129
+ with open(config_path, "rb") as f:
130
+ full_config = tomllib.load(f)
131
+
132
+ smoke_config = full_config.get("smoke", {})
133
+
134
+ if smoke_config:
135
+ click.echo(f"[smoke] Loaded configuration from {config_path}", err=True)
136
+ click.echo(f"[smoke] Config keys: {', '.join(smoke_config.keys())}", err=True)
137
+
138
+ return smoke_config
139
+ except Exception as exc:
140
+ click.echo(f"[smoke] Warning: Failed to load config from {config_path}: {exc}", err=True)
141
+ return {}
142
+
143
+
144
+ def _kill_process_on_port(port: int) -> None:
145
+ """Kill any process listening on the given port."""
146
+ try:
147
+ # Use lsof to find and kill process on port
148
+ result = subprocess.run(
149
+ ["lsof", "-ti", f":{port}"],
150
+ capture_output=True,
151
+ text=True,
152
+ timeout=2,
153
+ )
154
+ if result.stdout.strip():
155
+ pids = result.stdout.strip().split('\n')
156
+ for pid in pids:
157
+ try:
158
+ subprocess.run(["kill", "-9", pid], timeout=2)
159
+ click.echo(f"[smoke] Killed existing process {pid} on port {port}", err=True)
160
+ except Exception:
161
+ pass
162
+ time.sleep(2.0) # Give OS time to release port
163
+ except Exception as exc:
164
+ click.echo(f"[smoke] Warning: Could not check/kill port {port}: {exc}", err=True)
165
+
166
+
167
+ def _start_task_app_server(
168
+ task_app_name: str,
169
+ port: int,
170
+ env_file: str | None,
171
+ force: bool
172
+ ) -> tuple[Any, str]:
173
+ """Start a task app server in the background using task-app serve.
174
+
175
+ Returns (process, url) tuple.
176
+ """
177
+ import subprocess
178
+ import time as time_module
179
+
180
+ # Build command using task-app serve (for TaskAppConfig-based apps)
181
+ cmd = [
182
+ "nohup",
183
+ "uvx", "synth-ai",
184
+ "task-app", "serve", task_app_name,
185
+ "--port", str(port),
186
+ ]
187
+
188
+ if env_file:
189
+ cmd.extend(["--env-file", env_file])
190
+
191
+ if force:
192
+ cmd.append("--force")
193
+
194
+ # Resolve the synth-ai root directory
195
+ import synth_ai
196
+ synth_ai_root = Path(synth_ai.__file__ or Path(__file__).resolve()).resolve().parent.parent
197
+
198
+ click.echo(f"[smoke] Starting task app '{task_app_name}' on port {port}...", err=True)
199
+ click.echo(f"[smoke] Command: {' '.join(cmd)}", err=True)
200
+ click.echo(f"[smoke] Working directory: {synth_ai_root}", err=True)
201
+
202
+ # nohup requires output redirection to a file
203
+ # Open file, start process, then close file handle so process is fully detached
204
+ # Run from synth-ai root so task app discovery works
205
+ nohup_log = Path(synth_ai_root) / "nohup_task_app.out"
206
+
207
+ # Inherit SYNTH_QUIET environment variable to suppress patch messages
208
+ env = os.environ.copy()
209
+ if os.getenv("SYNTH_QUIET"):
210
+ env["SYNTH_QUIET"] = "1"
211
+
212
+ with open(nohup_log, "w") as log_file:
213
+ proc = subprocess.Popen(
214
+ cmd,
215
+ stdout=log_file,
216
+ stderr=subprocess.STDOUT,
217
+ text=True,
218
+ cwd=str(synth_ai_root),
219
+ env=env,
220
+ )
221
+ # File is closed immediately so process is detached
222
+
223
+ # Wait for server to be ready
224
+ url = f"http://localhost:{port}"
225
+ click.echo(f"[smoke] Waiting for task app to be ready at {url}...", err=True)
226
+
227
+ import httpx
228
+ deadline = time.time() + 120.0 # Give it 2 minutes for initial setup
229
+ attempt = 0
230
+ last_log_line = None
231
+ while time.time() < deadline:
232
+ attempt += 1
233
+ try:
234
+ resp = httpx.get(f"{url}/health", timeout=1.0)
235
+ # Accept both 200 and 400 - 400 means server is up but auth is failing (which is fine for smoke test)
236
+ if resp.status_code in (200, 400):
237
+ click.echo(f"[smoke] Task app ready at {url} (status={resp.status_code})", err=True)
238
+ return proc, url
239
+ except Exception:
240
+ pass
241
+
242
+ # Show polling progress every 5 seconds with last log line
243
+ if attempt % 10 == 0:
244
+ elapsed = int(time.time() - (deadline - 120.0))
245
+ # Try to read last line from nohup log
246
+ try:
247
+ if nohup_log.exists():
248
+ with open(nohup_log) as f:
249
+ lines = f.readlines()
250
+ if lines:
251
+ # Get last non-empty line
252
+ for line in reversed(lines[-10:]):
253
+ stripped = line.strip()
254
+ if stripped and stripped != last_log_line:
255
+ last_log_line = stripped
256
+ # Truncate if too long
257
+ if len(stripped) > 80:
258
+ stripped = stripped[:77] + "..."
259
+ click.echo(f"[smoke] Waiting ({elapsed}s): {stripped}", err=True)
260
+ break
261
+ else:
262
+ click.echo(f"[smoke] Still waiting for task app... ({elapsed}s elapsed)", err=True)
263
+ else:
264
+ click.echo(f"[smoke] Still waiting for task app... ({elapsed}s elapsed)", err=True)
265
+ except Exception:
266
+ click.echo(f"[smoke] Still waiting for task app... ({elapsed}s elapsed)", err=True)
267
+
268
+ # Check if process died
269
+ if proc.poll() is not None:
270
+ # Build a manual command that the user can copy-paste
271
+ manual_cmd_parts = ["uvx", "synth-ai", "task-app", "serve", task_app_name, "--port", str(port)]
272
+ if env_file:
273
+ manual_cmd_parts.extend(["--env-file", env_file])
274
+ if force:
275
+ manual_cmd_parts.append("--force")
276
+
277
+ raise click.ClickException(
278
+ f"Task app '{task_app_name}' process exited unexpectedly (code={proc.returncode}). "
279
+ f"Check that the task app name is correct and .env has required keys. "
280
+ f"Try running manually: {' '.join(manual_cmd_parts)}"
281
+ )
282
+
283
+ time_module.sleep(0.5)
284
+
285
+ proc.kill()
286
+ raise click.ClickException("Task app failed to start within 120 seconds")
287
+
288
+
289
+ def _start_sqld_server(
290
+ db_path: str,
291
+ hrana_port: int,
292
+ http_port: int
293
+ ) -> Any:
294
+ """Start sqld server in the background.
295
+
296
+ Returns the process handle.
297
+ """
298
+ import shutil
299
+ import subprocess
300
+
301
+ # Check if sqld is available
302
+ sqld_bin = shutil.which("sqld")
303
+ if not sqld_bin:
304
+ click.echo("[smoke] Warning: sqld not found in PATH, skipping auto-start", err=True)
305
+ click.echo("[smoke] Install sqld: brew install sqld", err=True)
306
+ return None
307
+
308
+ # Ensure db directory exists
309
+ db_path_obj = Path(db_path).expanduser().resolve()
310
+ db_path_obj.parent.mkdir(parents=True, exist_ok=True)
311
+
312
+ # Kill any existing processes on these ports
313
+ for port in [hrana_port, http_port]:
314
+ _kill_process_on_port(port)
315
+
316
+ cmd = [
317
+ sqld_bin,
318
+ "--db-path", str(db_path_obj),
319
+ "--hrana-listen-addr", f"127.0.0.1:{hrana_port}",
320
+ "--http-listen-addr", f"127.0.0.1:{http_port}",
321
+ ]
322
+
323
+ click.echo("[smoke] Starting sqld server...", err=True)
324
+ click.echo(f"[smoke] DB path: {db_path_obj}", err=True)
325
+ click.echo(f"[smoke] Hrana port: {hrana_port}, HTTP port: {http_port}", err=True)
326
+ click.echo(f"[smoke] Command: {' '.join(cmd)}", err=True)
327
+
328
+ # Redirect to devnull to avoid process dying from pipe buffer issues
329
+ proc = subprocess.Popen(
330
+ cmd,
331
+ stdout=subprocess.DEVNULL,
332
+ stderr=subprocess.DEVNULL,
333
+ text=True,
334
+ )
335
+
336
+ # Wait for server to be ready
337
+ health_url = f"http://127.0.0.1:{http_port}/health"
338
+ click.echo(f"[smoke] Waiting for sqld to be ready at {health_url}...", err=True)
339
+
340
+ deadline = time.time() + 10.0
341
+ while time.time() < deadline:
342
+ try:
343
+ resp = httpx.get(health_url, timeout=0.5)
344
+ if resp.status_code == 200:
345
+ click.echo("[smoke] sqld ready", err=True)
346
+ # Set environment variables for tracing
347
+ os.environ["SQLD_DB_PATH"] = str(db_path_obj)
348
+ os.environ["SQLD_HTTP_PORT"] = str(hrana_port)
349
+ os.environ["LIBSQL_URL"] = f"http://127.0.0.1:{http_port}"
350
+ os.environ["SYNTH_TRACES_DB"] = f"http://127.0.0.1:{http_port}"
351
+ return proc
352
+ except Exception:
353
+ pass
354
+
355
+ # Check if process died
356
+ if proc.poll() is not None:
357
+ click.echo(f"[smoke] Warning: sqld process exited with code {proc.returncode}", err=True)
358
+ return None
359
+
360
+ time.sleep(0.2)
361
+
362
+ click.echo("[smoke] Warning: sqld health check timed out, continuing anyway...", err=True)
363
+ return proc
364
+
365
+ class MockRLTrainer:
366
+ """Minimal trainer emulator with a local FastAPI mock for GPT-5-Nano.
367
+
368
+ In ``synthetic`` mode it emits deterministic tool calls so the rollout can
369
+ progress without relying on external inference. In ``openai`` mode it acts
370
+ as a thin proxy around the real OpenAI chat completions endpoint (useful to
371
+ reproduce production behaviour locally).
372
+ """
373
+
374
+ def __init__(self, *, port: int = 0, backend: str = "synthetic") -> None:
375
+ self.port = port
376
+ self.backend = backend.lower().strip() or "synthetic"
377
+ self._server = None
378
+ self._task: asyncio.Task | None = None
379
+ self._openai_endpoint = os.getenv(
380
+ "SMOKE_OPENAI_ENDPOINT", "https://api.openai.com/v1/chat/completions"
381
+ )
382
+ self._openai_api_key = (
383
+ os.getenv("SMOKE_OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
384
+ )
385
+
386
+ def _build_app(self):
387
+ import json
388
+
389
+ from fastapi import Body, FastAPI
390
+ from fastapi.responses import JSONResponse
391
+
392
+ try:
393
+ logger = logging.getLogger(__name__)
394
+ except Exception: # pragma: no cover - logging failures should not crash
395
+ logger = None
396
+
397
+ app = FastAPI()
398
+ backend = self.backend
399
+
400
+ @app.post("/v1/chat/completions")
401
+ async def chat_completions(body: dict = Body(...), cid: str | None = None):
402
+ log = logger or logging.getLogger("MockRLTrainer")
403
+ try:
404
+ msg_count = len(body.get("messages") or [])
405
+ except Exception:
406
+ msg_count = -1
407
+ click.echo(
408
+ f"[mock-rl] ← request backend={backend} model={body.get('model')} messages={msg_count} cid={cid}",
409
+ err=True,
410
+ )
411
+
412
+ # Explicit Body(...) avoids FastAPI interpreting parameters as query args
413
+ model = (body.get("model") or "gpt-5-nano")
414
+ messages = body.get("messages") or []
415
+ tools = body.get("tools") or []
416
+
417
+ # Decide whether to emit a tool call (to drive env steps) or plain text
418
+ emit_tool = False
419
+ tool_name = ""
420
+ for t in tools:
421
+ try:
422
+ if (t or {}).get("type") == "function":
423
+ fn = (t or {}).get("function") or {}
424
+ name = (fn or {}).get("name") or ""
425
+ if name:
426
+ tool_name = name
427
+ emit_tool = True
428
+ break
429
+ except Exception:
430
+ continue
431
+
432
+ # Simple heuristic actions to move/explore then interact
433
+ actions = ["move_right", "move_right", "move_down", "move_left", "do"]
434
+
435
+ correlation = cid
436
+
437
+ if backend == "openai":
438
+ if not self._openai_api_key:
439
+ return JSONResponse(
440
+ {
441
+ "error": "OPENAI_API_KEY (or SMOKE_OPENAI_API_KEY) is required for mock backend 'openai'"
442
+ },
443
+ status_code=500,
444
+ )
445
+ try:
446
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.inference.openai_client import ( # type: ignore[import-untyped]
447
+ OpenAIClient as _HostedOpenAIClient,
448
+ )
449
+
450
+ hosted_client = _HostedOpenAIClient(
451
+ base_url=self._openai_endpoint,
452
+ api_key=self._openai_api_key,
453
+ )
454
+ except Exception as exc:
455
+ if logger is not None:
456
+ logger.error("MockRLTrainer failed to import HostedOpenAIClient: %s", exc)
457
+ return JSONResponse(
458
+ {"error": f"OpenAI proxy unavailable: {exc}"},
459
+ status_code=500,
460
+ )
461
+
462
+ try:
463
+ result = await hosted_client.generate_with_retries( # type: ignore[attr-defined]
464
+ request=body,
465
+ base_url=self._openai_endpoint,
466
+ max_retries=0,
467
+ )
468
+ except Exception as exc:
469
+ if logger is not None:
470
+ logger.error("MockRLTrainer OpenAI generate failed: %s", exc)
471
+ return JSONResponse(
472
+ {"error": f"OpenAI proxy request failed: {exc}"},
473
+ status_code=502,
474
+ )
475
+
476
+ if isinstance(result, dict):
477
+ data_typed = dict(result)
478
+ synth_meta = data_typed.get("synth")
479
+ if not isinstance(synth_meta, dict):
480
+ synth_meta = {}
481
+ data_typed["synth"] = synth_meta
482
+ if correlation:
483
+ synth_meta.setdefault("cid", correlation)
484
+
485
+ # Fallback: if the upstream response failed to emit tool calls,
486
+ # synthesize a deterministic action plan so the rollout can proceed.
487
+ try:
488
+ choices = data_typed.get("choices") or []
489
+ first = choices[0] if choices else {}
490
+ message = first.get("message") if isinstance(first, dict) else {}
491
+ tc = message.get("tool_calls") if isinstance(message, dict) else None
492
+ if not tc:
493
+ if logger is not None:
494
+ logger.warning(
495
+ "MockRLTrainer fallback: OpenAI returned no tool calls; injecting deterministic actions."
496
+ )
497
+ fallback_message = dict(message or {})
498
+ fallback_message.setdefault("role", "assistant")
499
+ fallback_message["content"] = ""
500
+ fallback_message["tool_calls"] = [
501
+ {
502
+ "id": f"call_{uuid.uuid4().hex[:8]}",
503
+ "type": "function",
504
+ "function": {
505
+ "name": tool_name or "interact_many",
506
+ "arguments": json.dumps({"actions": actions}),
507
+ },
508
+ }
509
+ ]
510
+ fallback_message["function_call"] = {
511
+ "name": tool_name or "interact_many",
512
+ "arguments": json.dumps({"actions": actions}),
513
+ }
514
+ if choices:
515
+ choices[0]["message"] = fallback_message
516
+ else:
517
+ data_typed["choices"] = [
518
+ {
519
+ "index": 0,
520
+ "message": fallback_message,
521
+ "finish_reason": "tool_calls",
522
+ }
523
+ ]
524
+ except Exception as exc:
525
+ if logger is not None:
526
+ logger.debug("MockRLTrainer fallback injection failed: %s", exc)
527
+
528
+ tool_call_count = 0
529
+ try:
530
+ choices = data_typed.get("choices") or []
531
+ first = choices[0] if choices else {}
532
+ message = first.get("message") if isinstance(first, dict) else {}
533
+ if isinstance(message, dict):
534
+ tool_call_count = len(message.get("tool_calls") or [])
535
+ except Exception:
536
+ tool_call_count = 0
537
+
538
+ log.info(
539
+ "MockRLTrainer proxy returning response with %s tool calls (cid=%s)",
540
+ tool_call_count,
541
+ cid,
542
+ )
543
+ if tool_call_count == 0:
544
+ log.error(
545
+ "MockRLTrainer proxy still missing tool_calls after fallback injection (cid=%s)",
546
+ cid,
547
+ )
548
+ click.echo(
549
+ "[mock-rl] ✗ proxy response missing tool_calls; failing request", err=True
550
+ )
551
+ return JSONResponse(data_typed)
552
+ return JSONResponse(result)
553
+
554
+ if emit_tool:
555
+ # Emit BOTH legacy function_call and modern tool_calls for broad compatibility
556
+ message_payload = {
557
+ "role": "assistant",
558
+ "content": "",
559
+ "function_call": {
560
+ "name": tool_name,
561
+ "arguments": json.dumps({"actions": actions}),
562
+ },
563
+ "tool_calls": [
564
+ {
565
+ "id": f"call_{uuid.uuid4().hex[:8]}",
566
+ "type": "function",
567
+ "function": {
568
+ "name": tool_name,
569
+ "arguments": json.dumps({"actions": actions}),
570
+ },
571
+ }
572
+ ],
573
+ }
574
+ finish_reason = "tool_calls"
575
+ else:
576
+ # Fallback: echo last user content as plain text
577
+ click.echo(
578
+ f"[mock-rl] ! no tool schema supplied; returning text response (cid={cid})",
579
+ err=True,
580
+ )
581
+ log.warning(
582
+ "MockRLTrainer received request without tool schema; responding with text content (cid=%s)",
583
+ cid,
584
+ )
585
+ last_user = next((m.get("content", "") for m in reversed(messages) if m.get("role") == "user"), "")
586
+ text = (last_user or "").strip()
587
+ if len(text) > 160:
588
+ text = text[:160] + "..."
589
+ message_payload = {"role": "assistant", "content": f"MOCK(gpt-5-nano): {text or 'ack'}"}
590
+ finish_reason = "stop"
591
+
592
+ response = {
593
+ "id": f"cmpl_{uuid.uuid4().hex[:12]}",
594
+ "object": "chat.completion",
595
+ "created": int(asyncio.get_event_loop().time()),
596
+ "model": model,
597
+ "choices": [{"index": 0, "message": message_payload, "finish_reason": finish_reason}],
598
+ "usage": {"prompt_tokens": 32, "completion_tokens": 16, "total_tokens": 48},
599
+ "synth": {"cid": correlation},
600
+ }
601
+ if finish_reason == "tool_calls":
602
+ # Type-safe extraction of tool call count
603
+ tc = 0
604
+ try:
605
+ choices = response.get("choices")
606
+ if isinstance(choices, list) and choices:
607
+ first_choice = choices[0]
608
+ if isinstance(first_choice, dict):
609
+ msg = first_choice.get("message")
610
+ if isinstance(msg, dict):
611
+ tool_calls = msg.get("tool_calls")
612
+ if isinstance(tool_calls, list):
613
+ tc = len(tool_calls)
614
+ except Exception:
615
+ pass
616
+ log.debug(
617
+ "MockRLTrainer synthetic response emitting %s tool calls (cid=%s)",
618
+ tc,
619
+ cid,
620
+ )
621
+ assert tc > 0, "MockRLTrainer synthetic response missing tool_calls"
622
+ click.echo(
623
+ f"[mock-rl] → response tool_calls={tc} backend={backend} cid={cid}",
624
+ err=True,
625
+ )
626
+ else:
627
+ click.echo(
628
+ f"[mock-rl] → response finish_reason={finish_reason} backend={backend} cid={cid}",
629
+ err=True,
630
+ )
631
+ return JSONResponse(response)
632
+
633
+ return app
634
+
635
+ async def start(self) -> None:
636
+ import socket
637
+
638
+ import uvicorn
639
+
640
+ def _allocate_port() -> int:
641
+ nonlocal socket
642
+ if self.port:
643
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
644
+ probe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
645
+ try:
646
+ probe.bind(("127.0.0.1", self.port))
647
+ return self.port
648
+ except OSError:
649
+ pass
650
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
651
+ probe.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
652
+ probe.bind(("127.0.0.1", 0))
653
+ self.port = probe.getsockname()[1]
654
+ return self.port
655
+
656
+ retries = 0
657
+ while True:
658
+ selected_port = _allocate_port()
659
+ config = uvicorn.Config(
660
+ self._build_app(),
661
+ host="127.0.0.1",
662
+ port=selected_port,
663
+ log_level="warning",
664
+ )
665
+ self._server = uvicorn.Server(config)
666
+ self._task = asyncio.create_task(self._server.serve())
667
+
668
+ for _ in range(100):
669
+ if getattr(self._server, "started", False):
670
+ break
671
+ if self._task.done():
672
+ break
673
+ await asyncio.sleep(0.05)
674
+
675
+ if getattr(self._server, "started", False):
676
+ try:
677
+ logging.getLogger(__name__).info(
678
+ "MockRLTrainer started on http://127.0.0.1:%s (backend=%s)",
679
+ self.port,
680
+ self.backend,
681
+ )
682
+ click.echo(
683
+ f"[mock-rl] server ready http://127.0.0.1:{self.port} backend={self.backend}",
684
+ err=True,
685
+ )
686
+ except Exception:
687
+ pass
688
+ return
689
+
690
+ # Startup failed; stop server and retry on a new port if possible
691
+ await self.stop()
692
+ if retries >= 5:
693
+ raise RuntimeError("MockRLTrainer failed to start after multiple attempts")
694
+ self.port = 0
695
+ retries += 1
696
+
697
+ async def stop(self) -> None:
698
+ if self._server is not None:
699
+ self._server.should_exit = True
700
+ if self._task is not None:
701
+ with contextlib.suppress(Exception):
702
+ await asyncio.wait_for(self._task, timeout=2.0)
703
+ self._task = None
704
+ self._server = None
705
+ click.echo("[mock-rl] server stopped", err=True)
706
+
707
+ async def _run_smoke_async(
708
+ *,
709
+ task_app_url: str,
710
+ api_key: str | None,
711
+ env_name_opt: str | None,
712
+ policy_name: str,
713
+ model: str,
714
+ inference_url_opt: str | None,
715
+ inference_policy: str | None,
716
+ max_steps: int,
717
+ return_trace: bool,
718
+ use_mock: bool,
719
+ mock_port: int,
720
+ mock_backend: str,
721
+ config_path: Path | None,
722
+ rollouts: int = 1,
723
+ group_size: int = 1,
724
+ batch_size: int | None = None,
725
+ ) -> int:
726
+ # If config is provided, derive defaults (URL/env/model)
727
+ cfg: Any | None = None
728
+ if config_path is not None:
729
+ try:
730
+ from synth_ai.sdk.api.train.configs.rl import (
731
+ RLConfig as _RLConfig, # lazy import to avoid heavy deps when unused
732
+ )
733
+ cfg = _RLConfig.from_path(config_path)
734
+ except Exception as exc:
735
+ click.echo(f"Failed to load RL config {config_path}: {exc}", err=True)
736
+ return 2
737
+
738
+ # Prefer explicit CLI --url; only use config services.task_url if URL not provided
739
+ try:
740
+ if not task_app_url and cfg.services and getattr(cfg.services, "task_url", None):
741
+ task_app_url = cfg.services.task_url
742
+ except Exception:
743
+ pass
744
+ # Fill env and model if not explicitly set
745
+ try:
746
+ if not env_name_opt and cfg.rollout and getattr(cfg.rollout, "env_name", None):
747
+ env_name_opt = cfg.rollout.env_name
748
+ except Exception:
749
+ pass
750
+ try:
751
+ if model == "gpt-5-nano":
752
+ # Prefer smoke config model over policy model for smoke tests
753
+ smoke_cfg = getattr(cfg, "smoke", None)
754
+ smoke_model = None
755
+ if smoke_cfg and hasattr(smoke_cfg, "model"):
756
+ smoke_model = smoke_cfg.model
757
+ if smoke_model:
758
+ model = str(smoke_model).strip()
759
+ elif cfg.policy:
760
+ if getattr(cfg.policy, "model_name", None):
761
+ model = str(cfg.policy.model_name).strip()
762
+ elif getattr(cfg.policy, "source", None):
763
+ model = str(cfg.policy.source).strip()
764
+ elif cfg.model and getattr(cfg.model, "source", None):
765
+ model = str(cfg.model.source).strip()
766
+ elif cfg.model and getattr(cfg.model, "base", None):
767
+ model = str(cfg.model.base).strip()
768
+ except Exception:
769
+ pass
770
+
771
+ base = validate_task_app_url(task_app_url)
772
+ mock_backend = (mock_backend or "synthetic").strip().lower()
773
+
774
+ # Discover environment if not provided
775
+ async with LocalAPIClient(base_url=base, api_key=api_key) as client:
776
+ # Probe basic info quickly
777
+ try:
778
+ _ = await client.health()
779
+ except Exception:
780
+ click.echo("Auth or connectivity check failed on /health. If this endpoint requires a key, pass --api-key or set ENVIRONMENT_API_KEY.", err=True)
781
+ # Continue; rollout may still clarify the error
782
+
783
+ # Fetch a sample task instance to infer environment name if not provided
784
+ env_name = env_name_opt
785
+ if not env_name:
786
+ try:
787
+ ti = await client.task_info(seeds=[0])
788
+ # task_info returns TaskInfo or list[TaskInfo]; normalize
789
+ info: Any = ti[0] if isinstance(ti, list) else ti
790
+ env_name = getattr(info, "environment", None) or getattr(info, "task", {}).get("name") # type: ignore[attr-defined]
791
+ except Exception:
792
+ env_name = None
793
+ if not env_name:
794
+ click.echo("Could not infer environment name; pass --env-name.", err=True)
795
+ return 2
796
+
797
+ # Inference URL: user override > preset > local mock > Synth API default
798
+ synth_base = (os.getenv("SYNTH_API_BASE") or os.getenv("SYNTH_BASE_URL") or "https://api.synth.run").rstrip("/")
799
+ # Avoid double '/api' if base already includes it
800
+ if synth_base.endswith("/api"):
801
+ default_infer = f"{synth_base}/inference/v1/chat/completions"
802
+ else:
803
+ default_infer = f"{synth_base}/api/inference/v1/chat/completions"
804
+
805
+ # Helper to execute one or more rollouts and return exit code
806
+ async def __do_rollouts(inference_url_raw: str) -> int:
807
+ successes = 0
808
+ total_steps = 0
809
+ nonzero_returns = 0
810
+ v3_traces = 0
811
+
812
+ # Derive sampling params from config if present
813
+ sampling: dict[str, Any] = {}
814
+ try:
815
+ if cfg and cfg.policy:
816
+ if getattr(cfg.policy, "temperature", None) is not None:
817
+ sampling["temperature"] = cfg.policy.temperature
818
+ if getattr(cfg.policy, "top_p", None) is not None:
819
+ sampling["top_p"] = cfg.policy.top_p
820
+ if getattr(cfg.policy, "max_tokens", None) is not None:
821
+ sampling["max_tokens"] = cfg.policy.max_tokens
822
+ except Exception:
823
+ pass
824
+
825
+ num_outer = batch_size if (batch_size is not None and batch_size > 0) else max(1, int(rollouts))
826
+ for i in range(num_outer):
827
+ for g in range(max(1, int(group_size))):
828
+ if inference_url_raw.startswith("/"):
829
+ inference_url_abs = f"{base}{inference_url_raw}"
830
+ else:
831
+ inference_url_abs = inference_url_raw
832
+ inference_url_norm = normalize_inference_url(inference_url_abs)
833
+ correlation_id = f"smoke-{uuid.uuid4()}"
834
+ inference_url_with_cid = _append_query_param(inference_url_norm, "cid", correlation_id)
835
+
836
+ run_id = correlation_id
837
+ policy_cfg: dict[str, Any] = {
838
+ "model": model,
839
+ "inference_url": inference_url_with_cid,
840
+ }
841
+ if sampling:
842
+ policy_cfg.update(sampling)
843
+
844
+ request = RolloutRequest(
845
+ run_id=run_id,
846
+ env=RolloutEnvSpec(env_name=env_name, config={}, seed=i),
847
+ policy=RolloutPolicySpec(policy_name=policy_name, config=policy_cfg),
848
+ record=RolloutRecordConfig(
849
+ trajectories=True,
850
+ logprobs=False,
851
+ value=False,
852
+ return_trace=return_trace,
853
+ trace_format=("structured" if return_trace else "compact"),
854
+ ),
855
+ on_done="reset",
856
+ training_session_id=None,
857
+ synth_base_url=synth_base,
858
+ mode=RolloutMode.RL,
859
+ )
860
+
861
+ try:
862
+ click.echo(f">> POST /rollout run_id={run_id} env={env_name} policy={policy_name} url={inference_url_with_cid}")
863
+ response = await client.rollout(request)
864
+ except Exception as exc:
865
+ click.echo(f"Rollout[{i}:{g}] failed: {type(exc).__name__}: {exc}", err=True)
866
+ import traceback
867
+ click.echo(f"Traceback: {traceback.format_exc()}", err=True)
868
+ continue
869
+
870
+ successes += 1
871
+ try:
872
+ validate_rollout_response_for_rl(response.model_dump())
873
+ except Exception as vexc:
874
+ click.echo(f" ⚠ RL response validation warning: {vexc}", err=True)
875
+
876
+ pm = response.pipeline_metadata or {}
877
+ inferred_url = pm.get("inference_url") if isinstance(pm, dict) else None
878
+ metrics = response.metrics
879
+ if inferred_url:
880
+ click.echo(f" rollout[{i}:{g}] inference_url: {inferred_url}")
881
+ click.echo(f" rollout[{i}:{g}] episodes={metrics.num_episodes} steps={metrics.num_steps} reward_mean={metrics.reward_mean:.4f}")
882
+
883
+ total_steps += int(metrics.num_steps)
884
+ if (metrics.reward_mean or 0.0) != 0.0:
885
+ nonzero_returns += 1
886
+ if response.trace is not None and isinstance(response.trace, dict):
887
+ v3_traces += 1
888
+
889
+ if i == 0 and g == 0:
890
+ try:
891
+ traj0 = response.trajectories[0]
892
+ step_meta_url = None
893
+ for step in traj0.steps:
894
+ info = getattr(step, "info", None) or {}
895
+ meta = info.get("meta") if isinstance(info, dict) else None
896
+ if isinstance(meta, dict) and meta.get("inference_url"):
897
+ step_meta_url = meta.get("inference_url")
898
+ break
899
+ if step_meta_url:
900
+ click.echo(f" step.meta.inference_url: {str(step_meta_url)[:120]}...")
901
+ except Exception:
902
+ pass
903
+
904
+ try:
905
+ try:
906
+ metrics_dump = response.metrics.model_dump()
907
+ except Exception:
908
+ metrics_dump = {
909
+ "episode_rewards": getattr(response.metrics, "episode_rewards", None),
910
+ "reward_mean": getattr(response.metrics, "reward_mean", None),
911
+ "num_steps": getattr(response.metrics, "num_steps", None),
912
+ "num_episodes": getattr(response.metrics, "num_episodes", None),
913
+ "outcome_score": getattr(response.metrics, "outcome_score", None),
914
+ "events_score": getattr(response.metrics, "events_score", None),
915
+ }
916
+ click.echo(" reward.info (metrics): " + str(metrics_dump))
917
+
918
+ try:
919
+ traj = response.trajectories[0]
920
+ step_rewards = []
921
+ all_achievements = set()
922
+ for st in getattr(traj, "steps", []) or []:
923
+ try:
924
+ step_rewards.append(getattr(st, "reward", None))
925
+ except Exception:
926
+ step_rewards.append(None)
927
+ # Extract achievements from step info
928
+ try:
929
+ step_info = getattr(st, "info", None)
930
+ if isinstance(step_info, dict):
931
+ achievements_status = step_info.get("achievements_status")
932
+ if isinstance(achievements_status, dict):
933
+ for ach_name, ach_val in achievements_status.items():
934
+ if ach_val:
935
+ all_achievements.add(str(ach_name))
936
+ except Exception:
937
+ pass
938
+ click.echo(" reward.per_step: " + str(step_rewards))
939
+ if all_achievements:
940
+ click.echo(f" achievements: {sorted(all_achievements)}")
941
+ else:
942
+ click.echo(" achievements: none")
943
+ except Exception:
944
+ pass
945
+
946
+ # Extract and display tool calls from v3 trace
947
+ #
948
+ # IMPORTANT: Tool calls are extracted from the structured v3 trace format.
949
+ # The trace must be requested with return_trace=True for this to work.
950
+ #
951
+ # Trace structure:
952
+ # trace.event_history[] - list of events (policy calls, env steps)
953
+ # ├─ event.call_records[] - LLM calls made during this event
954
+ # ├─ call_record.output_tool_calls[] - tool calls from LLM response
955
+ # ├─ tool_call.name - function name (e.g., "interact_many")
956
+ # └─ tool_call.arguments_json - JSON string of arguments
957
+ #
958
+ # This provides visibility into what actions the policy is taking,
959
+ # which is critical for debugging RL training issues.
960
+ tr = response.trace if isinstance(response.trace, dict) else None
961
+ if tr:
962
+ event_history = tr.get("event_history", [])
963
+ tool_call_count = 0
964
+
965
+ # Extract tool calls from event_history call_records
966
+ if event_history and isinstance(event_history, list):
967
+ for event in event_history:
968
+ if not isinstance(event, dict):
969
+ continue
970
+ # Policy events contain call_records with LLM interactions
971
+ call_records = event.get("call_records")
972
+ if call_records and isinstance(call_records, list):
973
+ for call_record in call_records:
974
+ if isinstance(call_record, dict):
975
+ # Extract tool calls from this LLM call
976
+ output_tool_calls = call_record.get("output_tool_calls", [])
977
+ if output_tool_calls and isinstance(output_tool_calls, list):
978
+ for tc in output_tool_calls:
979
+ if isinstance(tc, dict):
980
+ fn_name = tc.get("name", "unknown")
981
+ fn_args = tc.get("arguments_json", "{}")
982
+ # Display tool call with truncated args for readability
983
+ click.echo(f" TOOL_CALL[{tool_call_count}]: {fn_name}({fn_args[:100]}{'...' if len(fn_args) > 100 else ''})")
984
+ tool_call_count += 1
985
+
986
+ if tool_call_count > 0:
987
+ click.echo(f" ✓ {tool_call_count} tool calls executed")
988
+ else:
989
+ # No tool calls found - might indicate:
990
+ # 1. return_trace=False (trace not requested)
991
+ # 2. Policy didn't make tool calls (unlikely for most RL tasks)
992
+ # 3. Trace format mismatch (structure changed)
993
+ click.echo(" ⚠ No tool calls found in trace")
994
+ else:
995
+ click.echo(" ⚠ Trace not available")
996
+ except Exception as e:
997
+ click.echo(f" trace error: {e}", err=True)
998
+
999
+ click.echo("✓ Smoke rollouts complete")
1000
+ denom = num_outer * max(1, int(group_size))
1001
+ click.echo(f" successes={successes}/{denom} total_steps={total_steps} v3_traces={v3_traces}/{denom} nonzero_returns={nonzero_returns}/{denom}")
1002
+
1003
+ if successes == 0:
1004
+ click.echo(" ⚠ All rollouts failed", err=True)
1005
+ return 3
1006
+ if v3_traces < successes:
1007
+ click.echo(" ⚠ Some rollouts missing v3 traces (trace field)", err=True)
1008
+ if total_steps == 0:
1009
+ click.echo(" ⚠ No steps executed; check policy config", err=True)
1010
+
1011
+ return 0
1012
+
1013
+ # Initialize to default; policy/flags may override below
1014
+ inference_url_raw = inference_url_opt or default_infer
1015
+ mock: MockRLTrainer | None = None
1016
+ preset = (inference_policy or "").strip().lower()
1017
+
1018
+ # Respect explicit preset overrides
1019
+ if preset == "mock":
1020
+ use_mock = True
1021
+ elif preset == "gpt-5-nano":
1022
+ if not inference_url_opt:
1023
+ inference_url_raw = default_infer
1024
+ if not model:
1025
+ model = "gpt-5-nano"
1026
+ elif preset == "openai":
1027
+ inference_url_raw = "https://api.openai.com/v1/chat/completions"
1028
+ elif preset == "groq":
1029
+ inference_url_raw = "https://api.groq.com/openai/v1/chat/completions"
1030
+
1031
+ # Start mock proxy only when explicitly requested
1032
+ if use_mock:
1033
+ backend_choice = mock_backend
1034
+ if backend_choice == "openai" and not (
1035
+ os.getenv("SMOKE_OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
1036
+ ):
1037
+ click.echo(
1038
+ " ⚠ OPENAI_API_KEY not configured; falling back to synthetic mock.",
1039
+ err=True,
1040
+ )
1041
+ backend_choice = "synthetic"
1042
+ mock = MockRLTrainer(port=mock_port, backend=backend_choice)
1043
+ await mock.start()
1044
+ inference_url_raw = f"http://127.0.0.1:{mock.port}"
1045
+
1046
+ try:
1047
+ result = await __do_rollouts(inference_url_raw)
1048
+ finally:
1049
+ if mock is not None:
1050
+ with contextlib.suppress(Exception):
1051
+ await mock.stop()
1052
+ return result
1053
+ async def _run_train_step(
1054
+ *,
1055
+ task_app_url: str,
1056
+ api_key: str | None,
1057
+ env_name_opt: str | None,
1058
+ policy_name: str,
1059
+ model: str,
1060
+ inference_policy: str | None,
1061
+ inference_url_opt: str | None,
1062
+ max_steps: int,
1063
+ return_trace: bool,
1064
+ use_mock: bool,
1065
+ mock_backend: str,
1066
+ mock_port: int,
1067
+ config_path: Path | None,
1068
+ parallel: int,
1069
+ ) -> int:
1070
+ import time
1071
+ start = time.perf_counter()
1072
+
1073
+ async def one(seed_idx: int) -> dict[str, Any]:
1074
+ t0 = time.perf_counter()
1075
+ try:
1076
+ code = await _run_smoke_async(
1077
+ task_app_url=task_app_url,
1078
+ api_key=api_key,
1079
+ env_name_opt=env_name_opt,
1080
+ policy_name=policy_name,
1081
+ model=model,
1082
+ inference_policy=inference_policy,
1083
+ inference_url_opt=inference_url_opt,
1084
+ max_steps=max_steps,
1085
+ return_trace=return_trace,
1086
+ use_mock=use_mock,
1087
+ mock_backend=mock_backend,
1088
+ mock_port=mock_port,
1089
+ config_path=config_path,
1090
+ rollouts=1,
1091
+ group_size=1,
1092
+ batch_size=None,
1093
+ )
1094
+ wall_ms = (time.perf_counter() - t0) * 1000.0
1095
+ return {"exit": int(code), "wall_ms": wall_ms}
1096
+ except Exception as e:
1097
+ wall_ms = (time.perf_counter() - t0) * 1000.0
1098
+ return {"exit": 99, "wall_ms": wall_ms, "error": f"{type(e).__name__}: {e}"}
1099
+
1100
+ # Launch N rollouts concurrently
1101
+ tasks = [one(i) for i in range(max(1, int(parallel)))]
1102
+ results = await asyncio.gather(*tasks, return_exceptions=False)
1103
+ total_wall_ms = (time.perf_counter() - start) * 1000.0
1104
+
1105
+ # Print summary
1106
+ def _exit_code(result: dict[str, Any]) -> int:
1107
+ value = result.get("exit")
1108
+ if isinstance(value, int | float):
1109
+ return int(value)
1110
+ if isinstance(value, str) and value.strip():
1111
+ try:
1112
+ return int(value.strip())
1113
+ except ValueError:
1114
+ return 1
1115
+ return 1
1116
+
1117
+ successes = sum(1 for r in results if _exit_code(r) == 0)
1118
+ avg_wall = sum(float(r.get("wall_ms", 0.0)) for r in results) / max(len(results), 1)
1119
+ click.echo("✓ Train-step emulation complete")
1120
+ click.echo(f" parallel={parallel} successes={successes}/{len(results)} total_wall_ms={total_wall_ms:.1f} avg_rollout_wall_ms={avg_wall:.1f}")
1121
+
1122
+ # Show brief failure codes to aid diagnosis
1123
+ if successes < len(results):
1124
+ codes: dict[int, int] = {}
1125
+ for r in results:
1126
+ if not isinstance(r, dict):
1127
+ continue
1128
+ c = _exit_code(r)
1129
+ codes[c] = codes.get(c, 0) + 1
1130
+ click.echo(f" failure_codes={codes}")
1131
+
1132
+ return 0 if successes == len(results) else 3
1133
+
1134
+
1135
+ @click.command("smoke")
1136
+ @click.option("--url", "task_app_url", type=str, default=lambda: os.getenv("TASK_APP_URL", "http://localhost:8765"), help="Task app base URL.")
1137
+ @click.option(
1138
+ "--api-key",
1139
+ type=str,
1140
+ default=lambda: os.getenv("ENVIRONMENT_API_KEY", ""),
1141
+ envvar="ENVIRONMENT_API_KEY",
1142
+ help="Environment API key (X-API-Key).",
1143
+ )
1144
+ @click.option("--env-name", type=str, default=None, help="Environment name to roll out (auto-detected if possible).")
1145
+ @click.option("--policy-name", type=str, default="react", help="Policy name to pass to task app.")
1146
+ @click.option("--model", type=str, default="gpt-5-nano", help="Model id to route in inference payload.")
1147
+ @click.option(
1148
+ "--policy",
1149
+ "inference_policy",
1150
+ type=click.Choice(["mock", "gpt-5-nano", "openai", "groq"], case_sensitive=False),
1151
+ default=None,
1152
+ help="Inference route preset (mock, gpt-5-nano via Synth, OpenAI or Groq).",
1153
+ )
1154
+ @click.option("--inference-url", type=str, default=None, help="Override inference URL (default: Synth API chat completions).")
1155
+ @click.option("--max-steps", type=int, default=3, show_default=True, help="Number of agent/env step pairs.")
1156
+ @click.option("--return-trace", is_flag=True, help="Request v3 trace in response if supported.")
1157
+ @click.option("--use-mock/--no-mock", default=True, show_default=True, help="Use local mock inference server (GPT-5-Nano emulation).")
1158
+ @click.option(
1159
+ "--mock-backend",
1160
+ type=click.Choice(["synthetic", "openai"], case_sensitive=False),
1161
+ default="synthetic",
1162
+ show_default=True,
1163
+ help="Mock inference backend: synthetic deterministic tooling or OpenAI passthrough.",
1164
+ )
1165
+ @click.option("--mock-port", type=int, default=0, show_default=True, help="Port for local mock inference server (0 = auto).")
1166
+ @click.option("--config", type=click.Path(exists=True, dir_okay=False, path_type=Path), default=None, help="RL TOML config to derive URL/env/model.")
1167
+ @click.option("--env-file", type=click.Path(exists=True, dir_okay=False, path_type=Path), default=None, help="Path to .env to load before running.")
1168
+ @click.option("--rollouts", type=int, default=1, show_default=True, help="Number of rollouts (seeds 0..N-1).")
1169
+ @click.option("--group-size", type=int, default=1, show_default=True, help="Completions per seed to emulate GRPO grouping.")
1170
+ @click.option("--batch-size", type=int, default=None, help="Alias for rollouts; when set, overrides --rollouts.")
1171
+ @click.option(
1172
+ "--parallel",
1173
+ type=int,
1174
+ default=0,
1175
+ show_default=True,
1176
+ help="Emulate a train step by running this many rollouts concurrently (0 = sequential).",
1177
+ )
1178
+ def command(
1179
+ task_app_url: str,
1180
+ api_key: str,
1181
+ env_name: str | None,
1182
+ policy_name: str,
1183
+ model: str,
1184
+ inference_policy: str | None,
1185
+ inference_url: str | None,
1186
+ max_steps: int,
1187
+ return_trace: bool,
1188
+ use_mock: bool,
1189
+ mock_backend: str,
1190
+ mock_port: int,
1191
+ config: Path | None,
1192
+ env_file: Path | None,
1193
+ rollouts: int,
1194
+ group_size: int,
1195
+ batch_size: int | None,
1196
+ parallel: int,
1197
+ ) -> None:
1198
+ """Smoke-test a Task App by emulating a trainer rollout using GPT-5-Nano.
1199
+
1200
+ This command posts a minimal RL rollout to the task app, with a valid
1201
+ OpenAI-compatible inference URL including a trace correlation id, and
1202
+ validates that the response contains the fields required by the RL trainer
1203
+ (e.g. pipeline_metadata.inference_url and per-step info.meta.inference_url).
1204
+
1205
+ If --config is provided, loads settings from the [smoke] section in the TOML file.
1206
+ CLI arguments override TOML values.
1207
+ """
1208
+
1209
+ # Load [smoke] section from TOML if config is provided
1210
+ smoke_config = _load_smoke_config(config)
1211
+
1212
+ # Track background processes for cleanup
1213
+ background_procs: list[Any] = []
1214
+
1215
+ try:
1216
+ # Auto-start sqld if configured
1217
+ if smoke_config.get("sqld_auto_start"):
1218
+ sqld_db_path = smoke_config.get("sqld_db_path", "./traces/local.db")
1219
+ sqld_hrana_port = smoke_config.get("sqld_hrana_port", 8080)
1220
+ sqld_http_port = smoke_config.get("sqld_http_port", 8081)
1221
+
1222
+ sqld_proc = _start_sqld_server(
1223
+ db_path=sqld_db_path,
1224
+ hrana_port=sqld_hrana_port,
1225
+ http_port=sqld_http_port,
1226
+ )
1227
+ if sqld_proc:
1228
+ background_procs.append(("sqld", sqld_proc))
1229
+
1230
+ # Auto-start task app if configured
1231
+ task_app_override_url = None
1232
+ if smoke_config.get("task_app_name"):
1233
+ task_app_name = smoke_config["task_app_name"]
1234
+ task_app_port = smoke_config.get("task_app_port", 8765)
1235
+ task_app_env_file = smoke_config.get("task_app_env_file")
1236
+ task_app_force = smoke_config.get("task_app_force", True)
1237
+
1238
+ task_app_proc, task_app_url = _start_task_app_server(
1239
+ task_app_name=task_app_name,
1240
+ port=task_app_port,
1241
+ env_file=task_app_env_file,
1242
+ force=task_app_force,
1243
+ )
1244
+ background_procs.append(("task_app", task_app_proc))
1245
+ task_app_override_url = task_app_url
1246
+ click.echo(f"[smoke] Task app started, will use URL: {task_app_url}", err=True)
1247
+ except Exception as exc:
1248
+ # Cleanup any processes that did start
1249
+ for proc_name, proc in background_procs:
1250
+ if proc and proc.poll() is None:
1251
+ click.echo(f"[smoke] Cleaning up {proc_name}...", err=True)
1252
+ proc.terminate()
1253
+ try:
1254
+ proc.wait(timeout=3)
1255
+ except Exception:
1256
+ proc.kill()
1257
+
1258
+ click.echo(f"[smoke] ERROR: Auto-start failed: {exc}", err=True)
1259
+ raise click.ClickException(f"Auto-start failed: {exc}") from exc
1260
+
1261
+ # Apply TOML defaults (CLI args take precedence)
1262
+ # Override task_url with auto-started task app URL if applicable
1263
+ if task_app_override_url:
1264
+ task_app_url = task_app_override_url
1265
+ # For string/int args: use TOML value if CLI value matches the default
1266
+ ctx = click.get_current_context()
1267
+
1268
+ # Helper to check if a CLI param was explicitly provided or is using default
1269
+ def use_toml_default(param_name: str, cli_value: Any, toml_key: str) -> Any:
1270
+ """Use TOML value if CLI param is at its default, otherwise use CLI value."""
1271
+ if not smoke_config or toml_key not in smoke_config:
1272
+ return cli_value
1273
+
1274
+ param = next((p for p in ctx.command.params if p.name == param_name), None)
1275
+ if not param:
1276
+ return cli_value
1277
+
1278
+ # Check if value was explicitly provided (not default)
1279
+ # If it matches the default, use TOML value
1280
+ param_default = param.default() if callable(param.default) else param.default
1281
+ if cli_value == param_default:
1282
+ toml_value = smoke_config[toml_key]
1283
+ click.echo(f"[smoke] Using {toml_key}={toml_value} from config", err=True)
1284
+ return toml_value
1285
+
1286
+ return cli_value
1287
+
1288
+ # Apply TOML defaults
1289
+ task_app_url = use_toml_default("task_app_url", task_app_url, "task_url")
1290
+ env_name = use_toml_default("env_name", env_name, "env_name")
1291
+ policy_name = use_toml_default("policy_name", policy_name, "policy_name")
1292
+ model = use_toml_default("model", model, "model")
1293
+ inference_policy = use_toml_default("inference_policy", inference_policy, "policy")
1294
+ inference_url = use_toml_default("inference_url", inference_url, "inference_url")
1295
+ max_steps = use_toml_default("max_steps", max_steps, "max_steps")
1296
+ return_trace = use_toml_default("return_trace", return_trace, "return_trace")
1297
+ use_mock = use_toml_default("use_mock", use_mock, "use_mock")
1298
+ mock_backend = use_toml_default("mock_backend", mock_backend, "mock_backend")
1299
+ mock_port = use_toml_default("mock_port", mock_port, "mock_port")
1300
+ api_key = use_toml_default("api_key", api_key, "api_key")
1301
+
1302
+ # Auto-configure tracing to avoid interactive prompts
1303
+ try:
1304
+ os.environ.setdefault("CI", "true")
1305
+ os.environ.setdefault("SYNTH_TRACING_AUTO_YES", "1")
1306
+ # Derive a default traces directory relative to CWD
1307
+ traces_dir = os.environ.get("SYNTH_TRACES_DIR")
1308
+ if not traces_dir:
1309
+ traces_dir = str((Path.cwd() / "traces" / "v3").resolve())
1310
+ os.environ["SYNTH_TRACES_DIR"] = traces_dir
1311
+ with contextlib.suppress(Exception):
1312
+ Path(traces_dir).mkdir(parents=True, exist_ok=True)
1313
+ _ensure_local_libsql()
1314
+ # Prefer a libsql/turso/sqld URL when provided to enable concurrent writes
1315
+ libsql_url = (
1316
+ os.getenv("TRACING_DB_URL")
1317
+ or os.getenv("LIBSQL_URL")
1318
+ or os.getenv("TURSO_DATABASE_URL")
1319
+ or os.getenv("LIBSQL_HTTP_URL")
1320
+ )
1321
+ if libsql_url:
1322
+ os.environ.setdefault("LIBSQL_URL", libsql_url)
1323
+
1324
+ auth_hint = (
1325
+ os.getenv("TRACING_DB_AUTH_TOKEN")
1326
+ or os.getenv("LIBSQL_AUTH_TOKEN")
1327
+ or os.getenv("TURSO_AUTH_TOKEN")
1328
+ )
1329
+ if auth_hint:
1330
+ os.environ.setdefault("LIBSQL_AUTH_TOKEN", auth_hint)
1331
+
1332
+ resolved_url, resolved_token = resolve_trace_db_settings()
1333
+ os.environ.setdefault("SYNTH_TRACES_DB", resolved_url)
1334
+ if resolved_token and not (
1335
+ os.getenv("LIBSQL_AUTH_TOKEN") or os.getenv("TURSO_AUTH_TOKEN")
1336
+ ):
1337
+ os.environ["LIBSQL_AUTH_TOKEN"] = resolved_token
1338
+
1339
+ _refresh_tracing_config()
1340
+ except Exception:
1341
+ pass
1342
+
1343
+ # Load env file(s) before resolving API key
1344
+ try:
1345
+ # Explicit --env-file takes precedence
1346
+ if env_file is not None:
1347
+ try:
1348
+ from dotenv import load_dotenv as _ld
1349
+ _ld(env_file, override=False)
1350
+ except Exception:
1351
+ pass
1352
+ else:
1353
+ # Best-effort auto-discovery from CWD
1354
+ try:
1355
+ from dotenv import find_dotenv as _fd
1356
+ from dotenv import load_dotenv as _ld
1357
+ _ld(_fd(usecwd=True), override=False)
1358
+ except Exception:
1359
+ pass
1360
+
1361
+ # If api_key not passed, try to read from env now
1362
+ if not api_key:
1363
+ api_key = os.getenv("ENVIRONMENT_API_KEY", "")
1364
+ except Exception:
1365
+ pass
1366
+
1367
+ try:
1368
+ if parallel and parallel > 0:
1369
+ exit_code = asyncio.run(
1370
+ _run_train_step(
1371
+ task_app_url=task_app_url,
1372
+ api_key=(api_key or None),
1373
+ env_name_opt=env_name,
1374
+ policy_name=policy_name,
1375
+ model=model,
1376
+ inference_policy=inference_policy,
1377
+ inference_url_opt=inference_url,
1378
+ max_steps=max_steps,
1379
+ return_trace=return_trace,
1380
+ use_mock=use_mock,
1381
+ mock_backend=mock_backend,
1382
+ mock_port=mock_port,
1383
+ config_path=config,
1384
+ parallel=parallel,
1385
+ )
1386
+ )
1387
+ else:
1388
+ exit_code = asyncio.run(
1389
+ _run_smoke_async(
1390
+ task_app_url=task_app_url,
1391
+ api_key=(api_key or None),
1392
+ env_name_opt=env_name,
1393
+ policy_name=policy_name,
1394
+ model=model,
1395
+ inference_policy=inference_policy,
1396
+ inference_url_opt=inference_url,
1397
+ max_steps=max_steps,
1398
+ return_trace=return_trace,
1399
+ use_mock=use_mock,
1400
+ mock_backend=mock_backend,
1401
+ mock_port=mock_port,
1402
+ config_path=config,
1403
+ rollouts=rollouts,
1404
+ group_size=group_size,
1405
+ batch_size=batch_size,
1406
+ )
1407
+ )
1408
+ except KeyboardInterrupt:
1409
+ click.echo("Interrupted", err=True)
1410
+ sys.exit(130)
1411
+ finally:
1412
+ # Cleanup background processes
1413
+ for proc_name, proc in background_procs:
1414
+ if proc and proc.poll() is None:
1415
+ click.echo(f"[smoke] Stopping {proc_name}...", err=True)
1416
+ proc.terminate()
1417
+ try:
1418
+ proc.wait(timeout=5)
1419
+ except Exception:
1420
+ proc.kill()
1421
+ if background_procs:
1422
+ click.echo("[smoke] Background services stopped", err=True)
1423
+
1424
+ sys.exit(exit_code)
1425
+
1426
+
1427
+ def register(cli: click.Group) -> None:
1428
+ cli.add_command(command)