synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1086) hide show
  1. synth_ai/__init__.py +25 -46
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +98 -72
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +258 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +51 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
  63. synth_ai/cli/commands/train/validation.py +392 -0
  64. synth_ai/cli/commands/train/verifier_schemas.py +200 -0
  65. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  70. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  71. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  72. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  73. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  75. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  79. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  80. synth_ai/cli/demo_apps/math/_common.py +16 -0
  81. synth_ai/cli/demo_apps/math/app.py +38 -0
  82. synth_ai/cli/demo_apps/math/config.toml +75 -0
  83. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  84. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  85. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  86. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  87. synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
  88. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  89. synth_ai/cli/demos/__init__.py +12 -0
  90. synth_ai/cli/demos/demo.py +32 -0
  91. synth_ai/cli/demos/rl_demo.py +254 -0
  92. synth_ai/cli/deploy.py +216 -0
  93. synth_ai/cli/infra/__init__.py +14 -0
  94. synth_ai/cli/infra/balance.py +216 -0
  95. synth_ai/cli/infra/mcp.py +35 -0
  96. synth_ai/cli/infra/modal_app.py +36 -0
  97. synth_ai/cli/infra/setup.py +69 -0
  98. synth_ai/cli/infra/status.py +16 -0
  99. synth_ai/cli/infra/turso.py +77 -0
  100. synth_ai/cli/lib/__init__.py +10 -0
  101. synth_ai/cli/lib/agents.py +76 -0
  102. synth_ai/cli/lib/apps/modal_app.py +101 -0
  103. synth_ai/cli/lib/apps/task_app.py +642 -0
  104. synth_ai/cli/lib/bin.py +39 -0
  105. synth_ai/cli/lib/env.py +375 -0
  106. synth_ai/cli/lib/errors.py +85 -0
  107. synth_ai/cli/lib/modal.py +315 -0
  108. synth_ai/cli/lib/plotting.py +126 -0
  109. synth_ai/cli/lib/prompt_args.py +39 -0
  110. synth_ai/cli/lib/prompts.py +284 -0
  111. synth_ai/cli/lib/sqld.py +122 -0
  112. synth_ai/cli/lib/task_app_discovery.py +884 -0
  113. synth_ai/cli/lib/task_app_env.py +295 -0
  114. synth_ai/cli/lib/train_cfgs.py +300 -0
  115. synth_ai/cli/lib/tunnel_records.py +207 -0
  116. synth_ai/cli/local/__init__.py +14 -0
  117. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  118. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  119. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  120. synth_ai/cli/local/experiment_queue/config.py +128 -0
  121. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  122. synth_ai/cli/local/experiment_queue/database.py +175 -0
  123. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  124. synth_ai/cli/local/experiment_queue/models.py +231 -0
  125. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  126. synth_ai/cli/local/experiment_queue/results.py +373 -0
  127. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  128. synth_ai/cli/local/experiment_queue/service.py +344 -0
  129. synth_ai/cli/local/experiment_queue/status.py +372 -0
  130. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  131. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  132. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  133. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  134. synth_ai/cli/local/session/__init__.py +92 -0
  135. synth_ai/cli/local/session/client.py +383 -0
  136. synth_ai/cli/local/session/constants.py +63 -0
  137. synth_ai/cli/local/session/exceptions.py +105 -0
  138. synth_ai/cli/local/session/manager.py +139 -0
  139. synth_ai/cli/local/session/models.py +89 -0
  140. synth_ai/cli/local/session/query.py +110 -0
  141. synth_ai/cli/root.py +30 -6
  142. synth_ai/cli/task_apps/__init__.py +37 -0
  143. synth_ai/cli/task_apps/commands.py +3145 -0
  144. synth_ai/cli/task_apps/deploy.py +7 -0
  145. synth_ai/cli/task_apps/list.py +26 -0
  146. synth_ai/cli/task_apps/main.py +36 -0
  147. synth_ai/cli/task_apps/modal_serve.py +11 -0
  148. synth_ai/cli/task_apps/serve.py +11 -0
  149. synth_ai/cli/training/__init__.py +8 -0
  150. synth_ai/cli/training/train.py +5 -0
  151. synth_ai/cli/training/train_cfg.py +34 -0
  152. synth_ai/cli/training/watch.py +506 -0
  153. synth_ai/cli/turso.py +34 -55
  154. synth_ai/cli/utils/__init__.py +8 -0
  155. synth_ai/cli/utils/experiments.py +235 -0
  156. synth_ai/cli/utils/queue.py +504 -0
  157. synth_ai/cli/utils/recent.py +133 -0
  158. synth_ai/cli/utils/traces.py +164 -0
  159. synth_ai/contracts/__init__.py +67 -0
  160. synth_ai/core/__init__.py +100 -0
  161. synth_ai/core/_utils/__init__.py +54 -0
  162. synth_ai/core/_utils/base_url.py +10 -0
  163. synth_ai/core/_utils/http.py +10 -0
  164. synth_ai/core/_utils/prompts.py +14 -0
  165. synth_ai/core/_utils/task_app_state.py +12 -0
  166. synth_ai/core/_utils/user_config.py +10 -0
  167. synth_ai/core/apps/common.py +116 -0
  168. synth_ai/core/auth.py +95 -0
  169. synth_ai/core/cfgs.py +240 -0
  170. synth_ai/core/config/__init__.py +16 -0
  171. synth_ai/core/config/base.py +168 -0
  172. synth_ai/core/config/resolver.py +89 -0
  173. synth_ai/core/env.py +231 -0
  174. synth_ai/core/errors.py +125 -0
  175. synth_ai/core/http.py +230 -0
  176. synth_ai/core/integrations/__init__.py +11 -0
  177. synth_ai/core/integrations/cloudflare.py +1886 -0
  178. synth_ai/core/integrations/mcp/__init__.py +6 -0
  179. synth_ai/core/integrations/mcp/__main__.py +8 -0
  180. synth_ai/core/integrations/mcp/claude.py +36 -0
  181. synth_ai/core/integrations/mcp/main.py +254 -0
  182. synth_ai/core/integrations/mcp/setup.py +100 -0
  183. synth_ai/core/integrations/modal.py +277 -0
  184. synth_ai/core/json.py +72 -0
  185. synth_ai/core/log_filter.py +99 -0
  186. synth_ai/core/logging.py +82 -0
  187. synth_ai/core/paths.py +107 -0
  188. synth_ai/core/pricing.py +109 -0
  189. synth_ai/core/process.py +233 -0
  190. synth_ai/core/ssl.py +25 -0
  191. synth_ai/core/storage/__init__.py +71 -0
  192. synth_ai/core/task_app_state.py +318 -0
  193. synth_ai/core/telemetry.py +282 -0
  194. synth_ai/core/tracing_v3/__init__.py +99 -0
  195. synth_ai/core/tracing_v3/abstractions.py +348 -0
  196. synth_ai/core/tracing_v3/config.py +229 -0
  197. synth_ai/core/tracing_v3/constants.py +21 -0
  198. synth_ai/core/tracing_v3/db_config.py +182 -0
  199. synth_ai/core/tracing_v3/decorators.py +401 -0
  200. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  201. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  202. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  203. synth_ai/core/tracing_v3/storage/base.py +211 -0
  204. synth_ai/core/tracing_v3/storage/config.py +109 -0
  205. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  206. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  207. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  208. synth_ai/core/tracing_v3/turso/models.py +470 -0
  209. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  210. synth_ai/core/tracing_v3/utils.py +108 -0
  211. synth_ai/core/urls.py +18 -0
  212. synth_ai/core/user_config.py +137 -0
  213. synth_ai/core/uvicorn.py +222 -0
  214. synth_ai/data/__init__.py +83 -0
  215. synth_ai/data/enums.py +122 -0
  216. synth_ai/data/rewards.py +249 -0
  217. synth_ai/data/traces.py +35 -0
  218. synth_ai/products/__init__.py +6 -0
  219. synth_ai/products/graph_evolve/__init__.py +45 -0
  220. synth_ai/products/graph_evolve/client.py +226 -0
  221. synth_ai/products/graph_evolve/config.py +591 -0
  222. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  223. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  224. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  225. synth_ai/products/graph_evolve/run.py +222 -0
  226. synth_ai/products/graph_gepa/__init__.py +23 -0
  227. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  228. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  229. synth_ai/sdk/__init__.py +129 -0
  230. synth_ai/sdk/api/__init__.py +1 -0
  231. synth_ai/sdk/api/eval/__init__.py +33 -0
  232. synth_ai/sdk/api/eval/job.py +732 -0
  233. synth_ai/sdk/api/models/supported.py +514 -0
  234. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  235. synth_ai/sdk/api/train/__init__.py +85 -0
  236. synth_ai/sdk/api/train/builders.py +1076 -0
  237. synth_ai/sdk/api/train/cli.py +2196 -0
  238. synth_ai/sdk/api/train/config_finder.py +267 -0
  239. synth_ai/sdk/api/train/configs/__init__.py +67 -0
  240. synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
  241. synth_ai/sdk/api/train/configs/rl.py +436 -0
  242. synth_ai/sdk/api/train/configs/sft.py +263 -0
  243. synth_ai/sdk/api/train/configs/shared.py +81 -0
  244. synth_ai/sdk/api/train/context_learning.py +312 -0
  245. synth_ai/sdk/api/train/env_resolver.py +418 -0
  246. synth_ai/sdk/api/train/graph_validators.py +216 -0
  247. synth_ai/sdk/api/train/graphgen.py +1102 -0
  248. synth_ai/sdk/api/train/graphgen_models.py +873 -0
  249. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  250. synth_ai/sdk/api/train/local_api.py +10 -0
  251. synth_ai/sdk/api/train/pollers.py +160 -0
  252. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  253. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  254. synth_ai/sdk/api/train/progress/events.py +326 -0
  255. synth_ai/sdk/api/train/progress/results.py +428 -0
  256. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  257. synth_ai/sdk/api/train/prompt_learning.py +800 -0
  258. synth_ai/sdk/api/train/rl.py +478 -0
  259. synth_ai/sdk/api/train/sft.py +398 -0
  260. synth_ai/sdk/api/train/summary.py +522 -0
  261. synth_ai/sdk/api/train/supported_algos.py +147 -0
  262. synth_ai/sdk/api/train/task_app.py +351 -0
  263. synth_ai/sdk/api/train/utils.py +279 -0
  264. synth_ai/sdk/api/train/validators.py +2424 -0
  265. synth_ai/sdk/graphs/__init__.py +15 -0
  266. synth_ai/sdk/graphs/completions.py +776 -0
  267. synth_ai/sdk/graphs/verifier_schemas.py +222 -0
  268. synth_ai/sdk/inference/__init__.py +6 -0
  269. synth_ai/sdk/inference/client.py +128 -0
  270. synth_ai/sdk/jobs/__init__.py +16 -0
  271. synth_ai/sdk/jobs/client.py +371 -0
  272. synth_ai/sdk/learning/__init__.py +99 -0
  273. synth_ai/sdk/learning/client.py +240 -0
  274. synth_ai/sdk/learning/context_learning_client.py +531 -0
  275. synth_ai/sdk/learning/context_learning_types.py +294 -0
  276. synth_ai/sdk/learning/ft_client.py +7 -0
  277. synth_ai/sdk/learning/health.py +49 -0
  278. synth_ai/sdk/learning/jobs.py +202 -0
  279. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  280. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  281. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  282. synth_ai/sdk/learning/rl/__init__.py +35 -0
  283. synth_ai/sdk/learning/rl/client.py +268 -0
  284. synth_ai/sdk/learning/rl/contracts.py +23 -0
  285. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  286. synth_ai/sdk/learning/rl/secrets.py +13 -0
  287. synth_ai/sdk/learning/sft/client.py +95 -0
  288. synth_ai/sdk/learning/sft/config.py +270 -0
  289. synth_ai/sdk/learning/sft/data.py +698 -0
  290. synth_ai/sdk/learning/validators.py +52 -0
  291. synth_ai/sdk/localapi/__init__.py +40 -0
  292. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  293. synth_ai/sdk/localapi/client.py +10 -0
  294. synth_ai/sdk/localapi/contracts.py +10 -0
  295. synth_ai/sdk/localapi/helpers.py +519 -0
  296. synth_ai/sdk/localapi/rollouts.py +93 -0
  297. synth_ai/sdk/localapi/server.py +29 -0
  298. synth_ai/sdk/localapi/template.py +49 -0
  299. synth_ai/sdk/streaming/__init__.py +35 -0
  300. synth_ai/sdk/streaming/config.py +94 -0
  301. synth_ai/sdk/streaming/handlers.py +1997 -0
  302. synth_ai/sdk/streaming/streamer.py +708 -0
  303. synth_ai/sdk/streaming/types.py +112 -0
  304. synth_ai/sdk/task/__init__.py +164 -0
  305. synth_ai/sdk/task/apps/__init__.py +169 -0
  306. synth_ai/sdk/task/client.py +175 -0
  307. synth_ai/sdk/task/config.py +256 -0
  308. synth_ai/sdk/task/contracts.py +340 -0
  309. synth_ai/sdk/task/datasets.py +108 -0
  310. synth_ai/sdk/task/in_process.py +1200 -0
  311. synth_ai/sdk/task/in_process_runner.py +314 -0
  312. synth_ai/sdk/task/inference_api.py +299 -0
  313. synth_ai/sdk/task/proxy.py +287 -0
  314. synth_ai/sdk/task/rubrics/__init__.py +54 -0
  315. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  316. synth_ai/sdk/task/rubrics/strict.py +148 -0
  317. synth_ai/sdk/task/rubrics.py +219 -0
  318. synth_ai/sdk/task/server.py +640 -0
  319. synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
  320. synth_ai/sdk/task/tracing_utils.py +95 -0
  321. synth_ai/sdk/task/validators.py +441 -0
  322. synth_ai/sdk/training/__init__.py +93 -0
  323. synth_ai/sdk/tunnels/__init__.py +118 -0
  324. synth_ai/sdk/tunnels/cleanup.py +83 -0
  325. synth_ai/sdk/tunnels/ports.py +120 -0
  326. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  327. synth_ai/utils/__init__.py +213 -0
  328. synth_ai-0.4.4.dist-info/METADATA +262 -0
  329. synth_ai-0.4.4.dist-info/RECORD +369 -0
  330. synth_ai-0.4.4.dist-info/top_level.txt +1 -0
  331. examples/__init__.py +0 -16
  332. examples/analyze_semantic_words.sh +0 -17
  333. examples/crafter_debug_render.py +0 -186
  334. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  335. examples/multi_step/configs/README_verilog_rl.md +0 -77
  336. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  337. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  338. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  339. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  340. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  341. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  342. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  343. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  344. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  345. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  346. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  347. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  348. examples/multi_step/crafter_rl_lora.md +0 -70
  349. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  350. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  351. examples/multi_step/readme.md +0 -48
  352. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  353. examples/multi_step/task_app_config_notes.md +0 -494
  354. examples/multi_step/verilog_rl_lora.md +0 -218
  355. examples/qwen_coder/README.md +0 -102
  356. examples/qwen_coder/_shared.py +0 -113
  357. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  358. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  359. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  360. examples/qwen_coder/generate_dataset.py +0 -98
  361. examples/qwen_coder/infer_ft_smoke.py +0 -65
  362. examples/qwen_coder/infer_prod_proxy.py +0 -73
  363. examples/qwen_coder/infer_via_synth.py +0 -87
  364. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  365. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  366. examples/qwen_coder/sft_full_17b.py +0 -103
  367. examples/qwen_coder/sft_lora_30b.py +0 -110
  368. examples/qwen_coder/subset_jsonl.py +0 -39
  369. examples/qwen_coder/todos.md +0 -38
  370. examples/qwen_coder/validate_jsonl.py +0 -60
  371. examples/rl/README.md +0 -169
  372. examples/rl/download_dataset.py +0 -80
  373. examples/run_crafter_demo.sh +0 -10
  374. examples/sft/README.md +0 -139
  375. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  376. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  377. examples/sft/evaluate.py +0 -119
  378. examples/sft/export_dataset.py +0 -117
  379. examples/sft/generate_traces.py +0 -164
  380. examples/swe/__init__.py +0 -12
  381. examples/swe/task_app/README.md +0 -105
  382. examples/swe/task_app/__init__.py +0 -2
  383. examples/swe/task_app/grpo_swe_mini.py +0 -601
  384. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  385. examples/swe/task_app/hosted/README.md +0 -173
  386. examples/swe/task_app/hosted/__init__.py +0 -5
  387. examples/swe/task_app/hosted/branching.py +0 -143
  388. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  389. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  390. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  391. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  392. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  393. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  394. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  395. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  396. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  397. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  398. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  399. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  400. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  401. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  402. examples/swe/task_app/hosted/hosted_app.py +0 -204
  403. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  404. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  405. examples/swe/task_app/hosted/main.py +0 -100
  406. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  407. examples/swe/task_app/hosted/registry.py +0 -195
  408. examples/swe/task_app/hosted/rollout.py +0 -1911
  409. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  410. examples/swe/task_app/hosted/storage/volume.py +0 -211
  411. examples/swe/task_app/hosted/test_agents.py +0 -161
  412. examples/swe/task_app/hosted/test_service.py +0 -136
  413. examples/swe/task_app/hosted/utils.py +0 -62
  414. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  415. examples/task_apps/TESTING.md +0 -275
  416. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  417. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  418. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  419. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  420. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  421. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  422. examples/task_apps/crafter/__init__.py +0 -0
  423. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  424. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  425. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  426. examples/task_apps/crafter/task_app/README.md +0 -42
  427. examples/task_apps/crafter/task_app/__init__.py +0 -5
  428. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  429. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  430. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  431. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  432. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  433. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  434. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  454. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  455. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  456. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  457. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  458. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  459. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  510. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  511. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  512. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  513. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  514. examples/task_apps/enron/__init__.py +0 -1
  515. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  516. examples/task_apps/enron/filter_sft.toml +0 -5
  517. examples/task_apps/enron/task_app/README.md +0 -14
  518. examples/task_apps/enron/task_app/__init__.py +0 -1
  519. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  520. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  521. examples/task_apps/enron/tests/__init__.py +0 -4
  522. examples/task_apps/enron/tests/conftest.py +0 -115
  523. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  524. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  525. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  526. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  527. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  528. examples/task_apps/math/README.md +0 -22
  529. examples/task_apps/math/__init__.py +0 -0
  530. examples/task_apps/math/math_single_step.py +0 -1000
  531. examples/task_apps/math/math_task_app.py +0 -115
  532. examples/task_apps/pokemon_battle/__init__.py +0 -2
  533. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  534. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  535. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  536. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  537. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  538. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  539. examples/task_apps/pokemon_red/README.md +0 -357
  540. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  541. examples/task_apps/pokemon_red/__init__.py +0 -3
  542. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  543. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  544. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  545. examples/task_apps/pokemon_red/task_app.py +0 -799
  546. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  547. examples/task_apps/sokoban/README.md +0 -307
  548. examples/task_apps/sokoban/__init__.py +0 -3
  549. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  550. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  551. examples/task_apps/sokoban/filter_sft.toml +0 -5
  552. examples/task_apps/sokoban/task_app.py +0 -1058
  553. examples/task_apps/sokoban/tests/__init__.py +0 -4
  554. examples/task_apps/sokoban/tests/conftest.py +0 -113
  555. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  556. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  557. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  558. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  560. examples/task_apps/verilog/__init__.py +0 -1
  561. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  562. examples/task_apps/verilog/filter_sft.toml +0 -5
  563. examples/task_apps/verilog/task_app/README.md +0 -12
  564. examples/task_apps/verilog/task_app/__init__.py +0 -1
  565. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  566. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  567. examples/task_apps/verilog/tests/__init__.py +0 -4
  568. examples/task_apps/verilog/tests/conftest.py +0 -115
  569. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  570. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  571. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  572. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  574. examples/vlm/PROPOSAL.md +0 -53
  575. examples/vlm/README.md +0 -68
  576. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  577. examples/vlm/crafter_image_only_agent.py +0 -207
  578. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  579. examples/vlm/filter_image_rows.py +0 -63
  580. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  581. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  582. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  583. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  584. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  585. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  586. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  587. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  588. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  589. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  590. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  591. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  592. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  593. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  594. examples/warming_up_to_rl/groq_test.py +0 -97
  595. examples/warming_up_to_rl/manage_secrets.py +0 -131
  596. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  597. examples/warming_up_to_rl/old/notes.md +0 -73
  598. examples/warming_up_to_rl/readme.md +0 -179
  599. examples/warming_up_to_rl/run_eval.py +0 -736
  600. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  601. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  602. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  603. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  604. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  605. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  606. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  607. examples/workflows/__init__.py +0 -0
  608. examples/workflows/math_rl/__init__.py +0 -0
  609. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  610. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  611. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  612. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  613. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  614. examples/workflows/math_rl/download_dataset.py +0 -80
  615. examples/workflows/math_rl/run_eval.py +0 -436
  616. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  617. synth_ai/api/models/supported.py +0 -377
  618. synth_ai/api/train/__init__.py +0 -5
  619. synth_ai/api/train/builders.py +0 -351
  620. synth_ai/api/train/cli.py +0 -635
  621. synth_ai/api/train/config_finder.py +0 -228
  622. synth_ai/api/train/configs/__init__.py +0 -44
  623. synth_ai/api/train/configs/rl.py +0 -134
  624. synth_ai/api/train/configs/sft.py +0 -95
  625. synth_ai/api/train/configs/shared.py +0 -24
  626. synth_ai/api/train/env_resolver.py +0 -349
  627. synth_ai/api/train/pollers.py +0 -75
  628. synth_ai/api/train/supported_algos.py +0 -147
  629. synth_ai/api/train/task_app.py +0 -195
  630. synth_ai/api/train/utils.py +0 -225
  631. synth_ai/cli/_modal_wrapper.py +0 -29
  632. synth_ai/cli/_storage.py +0 -20
  633. synth_ai/cli/_typer_patch.py +0 -49
  634. synth_ai/cli/_validate_task_app.py +0 -11
  635. synth_ai/cli/balance.py +0 -216
  636. synth_ai/cli/calc.py +0 -84
  637. synth_ai/cli/demo.py +0 -165
  638. synth_ai/cli/legacy_root_backup.py +0 -468
  639. synth_ai/cli/man.py +0 -106
  640. synth_ai/cli/recent.py +0 -132
  641. synth_ai/cli/rl_demo.py +0 -254
  642. synth_ai/cli/status.py +0 -134
  643. synth_ai/cli/task_apps.py +0 -4523
  644. synth_ai/cli/traces.py +0 -164
  645. synth_ai/cli/tui.py +0 -57
  646. synth_ai/cli/watch.py +0 -506
  647. synth_ai/compound/cais.py +0 -0
  648. synth_ai/config/base_url.py +0 -107
  649. synth_ai/core/experiment.py +0 -13
  650. synth_ai/core/system.py +0 -15
  651. synth_ai/demo_registry.py +0 -295
  652. synth_ai/demos/core/__init__.py +0 -1
  653. synth_ai/demos/core/cli.py +0 -1718
  654. synth_ai/demos/demo_task_apps/core.py +0 -440
  655. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  656. synth_ai/demos/demo_task_apps/math/config.toml +0 -74
  657. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  658. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  659. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  660. synth_ai/environments/__init__.py +0 -31
  661. synth_ai/environments/environment/__init__.py +0 -1
  662. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  663. synth_ai/environments/environment/artifacts/base.py +0 -52
  664. synth_ai/environments/environment/core.py +0 -67
  665. synth_ai/environments/environment/db/__init__.py +0 -1
  666. synth_ai/environments/environment/db/sqlite.py +0 -45
  667. synth_ai/environments/environment/registry.py +0 -233
  668. synth_ai/environments/environment/resources/sqlite.py +0 -45
  669. synth_ai/environments/environment/results.py +0 -1
  670. synth_ai/environments/environment/rewards/__init__.py +0 -1
  671. synth_ai/environments/environment/rewards/core.py +0 -29
  672. synth_ai/environments/environment/shared_engine.py +0 -26
  673. synth_ai/environments/environment/tools/__init__.py +0 -200
  674. synth_ai/environments/examples/__init__.py +0 -1
  675. synth_ai/environments/examples/bandit/__init__.py +0 -33
  676. synth_ai/environments/examples/bandit/engine.py +0 -302
  677. synth_ai/environments/examples/bandit/environment.py +0 -194
  678. synth_ai/environments/examples/bandit/taskset.py +0 -200
  679. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  680. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  681. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  682. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  683. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  684. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  716. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  717. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  718. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  719. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  720. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  744. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  745. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  746. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  747. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  748. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  749. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  750. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  751. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  752. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  753. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  754. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  755. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  756. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  757. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  758. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  759. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  760. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  761. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  762. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  763. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  764. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  765. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  766. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  767. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  768. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  769. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  770. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  771. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  772. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  773. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  774. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  775. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  776. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  777. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  778. synth_ai/environments/examples/enron/engine.py +0 -300
  779. synth_ai/environments/examples/enron/environment.py +0 -234
  780. synth_ai/environments/examples/enron/taskset.py +0 -112
  781. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  782. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  783. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  784. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  785. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  786. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  787. synth_ai/environments/examples/minigrid/engine.py +0 -589
  788. synth_ai/environments/examples/minigrid/environment.py +0 -274
  789. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  790. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  791. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  792. synth_ai/environments/examples/nethack/__init__.py +0 -7
  793. synth_ai/environments/examples/nethack/achievements.py +0 -337
  794. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  795. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  796. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  797. synth_ai/environments/examples/nethack/engine.py +0 -739
  798. synth_ai/environments/examples/nethack/environment.py +0 -256
  799. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  800. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  801. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  802. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  803. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  804. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  805. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  806. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  807. synth_ai/environments/examples/nethack/taskset.py +0 -323
  808. synth_ai/environments/examples/red/__init__.py +0 -7
  809. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  810. synth_ai/environments/examples/red/config_logging.py +0 -110
  811. synth_ai/environments/examples/red/engine.py +0 -721
  812. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  813. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  814. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  815. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  816. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  817. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  818. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  828. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  829. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  830. synth_ai/environments/examples/red/environment.py +0 -298
  831. synth_ai/environments/examples/red/taskset.py +0 -79
  832. synth_ai/environments/examples/red/units/__init__.py +0 -1
  833. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  834. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  835. synth_ai/environments/examples/sokoban/engine.py +0 -678
  836. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  838. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  839. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  840. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  841. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  848. synth_ai/environments/examples/sokoban/environment.py +0 -229
  849. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  850. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  851. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  852. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  853. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  854. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  855. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  856. synth_ai/environments/examples/verilog/__init__.py +0 -10
  857. synth_ai/environments/examples/verilog/engine.py +0 -421
  858. synth_ai/environments/examples/verilog/environment.py +0 -350
  859. synth_ai/environments/examples/verilog/taskset.py +0 -420
  860. synth_ai/environments/examples/wordle/__init__.py +0 -29
  861. synth_ai/environments/examples/wordle/engine.py +0 -398
  862. synth_ai/environments/examples/wordle/environment.py +0 -159
  863. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  864. synth_ai/environments/examples/wordle/taskset.py +0 -230
  865. synth_ai/environments/reproducibility/core.py +0 -42
  866. synth_ai/environments/reproducibility/helpers.py +0 -0
  867. synth_ai/environments/reproducibility/tree.py +0 -363
  868. synth_ai/environments/service/app.py +0 -97
  869. synth_ai/environments/service/core_routes.py +0 -1021
  870. synth_ai/environments/service/external_registry.py +0 -56
  871. synth_ai/environments/service/registry.py +0 -9
  872. synth_ai/environments/stateful/__init__.py +0 -1
  873. synth_ai/environments/stateful/core.py +0 -163
  874. synth_ai/environments/stateful/engine.py +0 -21
  875. synth_ai/environments/stateful/state.py +0 -7
  876. synth_ai/environments/tasks/api.py +0 -19
  877. synth_ai/environments/tasks/core.py +0 -81
  878. synth_ai/environments/tasks/filters.py +0 -40
  879. synth_ai/environments/tasks/utils.py +0 -90
  880. synth_ai/environments/v0_observability/history.py +0 -3
  881. synth_ai/environments/v0_observability/log.py +0 -2
  882. synth_ai/evals/__init__.py +0 -15
  883. synth_ai/evals/base.py +0 -13
  884. synth_ai/evals/client.py +0 -82
  885. synth_ai/evals/types.py +0 -42
  886. synth_ai/handshake.py +0 -109
  887. synth_ai/http.py +0 -26
  888. synth_ai/http_client.py +0 -136
  889. synth_ai/inference/__init__.py +0 -5
  890. synth_ai/inference/client.py +0 -34
  891. synth_ai/jobs/client.py +0 -295
  892. synth_ai/judge_schemas.py +0 -127
  893. synth_ai/learning/__init__.py +0 -59
  894. synth_ai/learning/client.py +0 -241
  895. synth_ai/learning/ft_client.py +0 -7
  896. synth_ai/learning/health.py +0 -49
  897. synth_ai/learning/jobs.py +0 -201
  898. synth_ai/learning/rl/__init__.py +0 -39
  899. synth_ai/learning/rl/client.py +0 -267
  900. synth_ai/learning/rl/contracts.py +0 -27
  901. synth_ai/learning/rl/env_keys.py +0 -166
  902. synth_ai/learning/rl/secrets.py +0 -13
  903. synth_ai/learning/sft/client.py +0 -68
  904. synth_ai/learning/sft/config.py +0 -270
  905. synth_ai/learning/sft/data.py +0 -295
  906. synth_ai/learning/validators.py +0 -49
  907. synth_ai/lm/__init__.py +0 -25
  908. synth_ai/task/__init__.py +0 -121
  909. synth_ai/task/apps/__init__.py +0 -129
  910. synth_ai/task/client.py +0 -167
  911. synth_ai/task/config.py +0 -257
  912. synth_ai/task/contracts.py +0 -236
  913. synth_ai/task/datasets.py +0 -108
  914. synth_ai/task/proxy.py +0 -251
  915. synth_ai/task/rubrics/__init__.py +0 -56
  916. synth_ai/task/rubrics/loaders.py +0 -152
  917. synth_ai/task/rubrics/strict.py +0 -149
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1057. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1058. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1068. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1069. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1077. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1078. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1079. /synth_ai/{task → sdk/task}/health.py +0 -0
  1080. /synth_ai/{task → sdk/task}/json.py +0 -0
  1081. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1082. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1083. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1084. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  1085. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  1086. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1102 @@
1
+ """First-class SDK API for GraphGen (Graph Opt).
2
+
3
+ **Status:** Alpha
4
+
5
+ GraphGen is a simplified "Workflows API" for prompt optimization that:
6
+ - Uses a simple JSON dataset format (GraphGenTaskSet) instead of TOML configs
7
+ - Auto-generates task apps from the dataset (no user-managed task apps)
8
+ - Has built-in verifier configurations (rubric, contrastive, gold_examples)
9
+ - Wraps GEPA internally for the actual optimization
10
+
11
+ Example CLI usage:
12
+ uvx synth-ai train --type graphgen --dataset my_tasks.json --poll
13
+
14
+ Example SDK usage:
15
+ from synth_ai.sdk.api.train.graphgen import GraphGenJob
16
+ from synth_ai.sdk.api.train.graphgen_models import GraphGenTaskSet, GraphGenTask
17
+
18
+ # From a dataset file
19
+ job = GraphGenJob.from_dataset("my_tasks.json")
20
+ job.submit()
21
+ result = job.stream_until_complete()
22
+ print(f"Best score: {result.get('best_score')}")
23
+
24
+ # Or programmatically
25
+ dataset = GraphGenTaskSet(
26
+ metadata=GraphGenTaskSetMetadata(name="My Tasks"),
27
+ tasks=[GraphGenTask(id="t1", input={"question": "What is 2+2?"})],
28
+ gold_outputs=[GraphGenGoldOutput(output={"answer": "4"}, task_id="t1")],
29
+ )
30
+ job = GraphGenJob.from_dataset(dataset, policy_model="gpt-4o-mini", problem_spec="You are a helpful assistant.")
31
+ job.submit()
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import asyncio
37
+ import json
38
+ import os
39
+ from dataclasses import dataclass, field
40
+ from pathlib import Path
41
+ from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, cast
42
+
43
+ from synth_ai.core.telemetry import log_info
44
+
45
+ from .graphgen_models import (
46
+ GraphGenJobConfig,
47
+ GraphGenTaskSet,
48
+ load_graphgen_taskset,
49
+ parse_graphgen_taskset,
50
+ SessionTraceInput,
51
+ GraphGenGraphVerifierResponse,
52
+ )
53
+ from .utils import ensure_api_base, http_get, http_post
54
+
55
+
56
+ @dataclass
57
+ class GraphGenJobResult:
58
+ """Result from a GraphGen job.
59
+
60
+ Contains the final status and results of a completed GraphGen workflow
61
+ optimization job, including the best score and snapshot ID for the
62
+ optimized graph.
63
+
64
+ Attributes:
65
+ graphgen_job_id: Unique identifier for the GraphGen job (e.g.,
66
+ "graphgen_abc123def456").
67
+ status: Current job status. One of: "pending", "running", "succeeded",
68
+ "failed", "cancelled".
69
+ best_score: Best evaluation score achieved during optimization. Higher
70
+ is better. None if job hasn't completed successfully.
71
+ best_snapshot_id: ID of the graph snapshot with the best score. Use this
72
+ to download or deploy the optimized graph.
73
+ error: Error message if the job failed, None otherwise.
74
+ dataset_name: Name of the dataset used for optimization.
75
+ task_count: Number of tasks in the dataset.
76
+ graph_evolve_job_id: ID of the underlying graph evolution job, if applicable.
77
+
78
+ Example:
79
+ >>> result = job.get_result()
80
+ >>> if result.status == "succeeded":
81
+ ... print(f"Best score: {result.best_score}")
82
+ ... print(f"Snapshot ID: {result.best_snapshot_id}")
83
+ """
84
+
85
+ graphgen_job_id: str
86
+ status: str
87
+ best_score: Optional[float] = None
88
+ best_snapshot_id: Optional[str] = None
89
+ error: Optional[str] = None
90
+ dataset_name: Optional[str] = None
91
+ task_count: Optional[int] = None
92
+ graph_evolve_job_id: Optional[str] = None
93
+
94
+
95
+ @dataclass
96
+ class GraphGenSubmitResult:
97
+ """Result from submitting a GraphGen job.
98
+
99
+ Returned immediately after job submission with initial job metadata
100
+ and configuration details.
101
+
102
+ Attributes:
103
+ graphgen_job_id: Unique identifier for the GraphGen job.
104
+ status: Initial job status (typically "pending" or "running").
105
+ dataset_name: Name of the dataset being used for optimization.
106
+ task_count: Number of tasks in the dataset.
107
+ rollout_budget: Total number of rollouts (evaluations) budgeted for
108
+ this optimization job.
109
+ policy_model: Name of the LLM model being used for the policy
110
+ (e.g., "gpt-4o-mini", "claude-3-5-sonnet").
111
+ verifier_mode: Evaluation mode being used. One of: "rubric", "contrastive",
112
+ "gold_examples", "verifier_graph".
113
+ graph_evolve_job_id: ID of the underlying graph evolution job, if applicable.
114
+
115
+ Example:
116
+ >>> submit_result = job.submit()
117
+ >>> print(f"Job {submit_result.graphgen_job_id} started")
118
+ >>> print(f"Optimizing {submit_result.task_count} tasks with {submit_result.rollout_budget} rollouts")
119
+ """
120
+
121
+ graphgen_job_id: str
122
+ status: str
123
+ dataset_name: str
124
+ task_count: int
125
+ rollout_budget: int
126
+ policy_model: str
127
+ verifier_mode: str
128
+ graph_evolve_job_id: Optional[str] = None
129
+
130
+
131
+ class GraphGenJob:
132
+ """High-level SDK class for running GraphGen workflow optimization jobs.
133
+
134
+ GraphGen (Graph Opt) provides a simplified API for
135
+ graph/workflow optimization that doesn't require users to manage task apps.
136
+
137
+ Key differences from PromptLearningJob:
138
+ - Uses JSON dataset format (GraphGenTaskSet) instead of TOML configs
139
+ - No task app management required - GraphGen builds it internally
140
+ - Built-in verifier modes (rubric, contrastive, gold_examples)
141
+ - Graph-first: trains multi-node workflows by default (Graph-GEPA)
142
+ - Public graph downloads are redacted `.txt` exports only
143
+ - Simpler configuration with sensible defaults
144
+
145
+ Example:
146
+ >>> from synth_ai.sdk.api.train.graphgen import GraphGenJob
147
+ >>>
148
+ >>> # Create job from dataset file
149
+ >>> job = GraphGenJob.from_dataset(
150
+ ... dataset="my_tasks.json",
151
+ ... policy_model="gpt-4o-mini",
152
+ ... rollout_budget=100,
153
+ ... )
154
+ >>>
155
+ >>> # Train a verifier graph
156
+ >>> verifier_job = GraphGenJob.from_dataset(
157
+ ... dataset="verifier_dataset.json",
158
+ ... graph_type="verifier",
159
+ ... rollout_budget=200,
160
+ ... )
161
+ >>>
162
+ >>> # Train an RLM graph (massive context via tools)
163
+ >>> rlm_job = GraphGenJob.from_dataset(
164
+ ... dataset="rlm_dataset.json",
165
+ ... graph_type="rlm",
166
+ ... configured_tools=[
167
+ ... {"name": "materialize_context", "kind": "rlm_materialize", "stateful": True},
168
+ ... {"name": "local_grep", "kind": "rlm_local_grep", "stateful": False},
169
+ ... ],
170
+ ... rollout_budget=100,
171
+ ... )
172
+ >>>
173
+ >>> # Submit and stream
174
+ >>> job.submit()
175
+ >>> result = job.stream_until_complete(timeout=3600.0)
176
+ >>> print(f"Best score: {result.get('best_score')}")
177
+ >>>
178
+ >>> # Download public graph export
179
+ >>> export_txt = job.download_graph_txt()
180
+ >>> print(export_txt)
181
+ >>>
182
+ >>> # Run inference with optimized prompt
183
+ >>> output = job.run_inference({"question": "What is 2+2?"})
184
+ >>>
185
+ >>> # Run verifier with optimized verifier graph
186
+ >>> verification = verifier_job.run_verifier(trace_data)
187
+ >>> print(f"Outcome reward: {verification.outcome_reward}")
188
+ """
189
+
190
+ def __init__(
191
+ self,
192
+ *,
193
+ dataset: GraphGenTaskSet,
194
+ config: GraphGenJobConfig,
195
+ backend_url: str,
196
+ api_key: str,
197
+ auto_start: bool = True,
198
+ metadata: Optional[Dict[str, Any]] = None,
199
+ ) -> None:
200
+ """Initialize an GraphGen job.
201
+
202
+ Args:
203
+ dataset: The GraphGenTaskSet containing tasks and evaluation config
204
+ config: Job configuration (policy model, budget, etc.)
205
+ backend_url: Backend API URL
206
+ api_key: Synth API key
207
+ auto_start: Whether to start the job immediately after creation
208
+ metadata: Additional metadata for the job
209
+ """
210
+ self.dataset = dataset
211
+ self.config = config
212
+ self.backend_url = ensure_api_base(backend_url)
213
+ self.api_key = api_key
214
+ self.auto_start = auto_start
215
+ self.metadata = metadata or {}
216
+
217
+ self._graphgen_job_id: Optional[str] = None
218
+ self._graph_evolve_job_id: Optional[str] = None
219
+ self._submit_result: Optional[GraphGenSubmitResult] = None
220
+
221
+ @classmethod
222
+ def from_dataset(
223
+ cls,
224
+ dataset: str | Path | Dict[str, Any] | GraphGenTaskSet,
225
+ *,
226
+ graph_type: Literal["policy", "verifier", "rlm"] = "policy",
227
+ policy_model: str = "gpt-4o-mini",
228
+ rollout_budget: int = 100,
229
+ proposer_effort: Literal["low", "medium", "high"] = "medium",
230
+ verifier_model: Optional[str] = None,
231
+ verifier_provider: Optional[str] = None,
232
+ population_size: int = 4,
233
+ num_generations: Optional[int] = None,
234
+ problem_spec: Optional[str] = None,
235
+ target_llm_calls: Optional[int] = None,
236
+ configured_tools: Optional[List[Dict[str, Any]]] = None,
237
+ backend_url: Optional[str] = None,
238
+ api_key: Optional[str] = None,
239
+ auto_start: bool = True,
240
+ metadata: Optional[Dict[str, Any]] = None,
241
+ ) -> GraphGenJob:
242
+ """Create an GraphGen job from a dataset.
243
+
244
+ Args:
245
+ dataset: Dataset as file path, dict, or GraphGenTaskSet object
246
+ graph_type: Type of graph to train:
247
+ - "policy": Maps inputs to outputs (default).
248
+ - "verifier": Verifies/scores traces (requires verifier-compliant dataset).
249
+ - "rlm": Recursive Language Model - handles massive contexts via tool-based search
250
+ and recursive LLM calls. Requires configured_tools parameter.
251
+ policy_model: Model to use for policy inference
252
+ rollout_budget: Total number of rollouts for optimization
253
+ proposer_effort: Proposer effort level ("medium" or "high").
254
+ "low" is not allowed as gpt-4.1-mini is too weak for graph generation.
255
+ verifier_model: Override verifier model from dataset
256
+ verifier_provider: Override verifier provider from dataset
257
+ population_size: Population size for GEPA
258
+ num_generations: Number of generations (auto-calculated if not specified)
259
+ problem_spec: Detailed problem specification for the graph proposer.
260
+ Include domain-specific info like valid output labels for classification.
261
+ target_llm_calls: Target number of LLM calls for the graph (1-10).
262
+ Controls how many LLM nodes the graph should use. Defaults to 5.
263
+ configured_tools: Optional list of tool bindings for RLM graphs.
264
+ Required for graph_type="rlm". Each tool should be a dict with 'name', 'kind', and 'stateful'.
265
+ Example: [{'name': 'materialize_context', 'kind': 'rlm_materialize', 'stateful': True}]
266
+ backend_url: Backend API URL (defaults to env or production)
267
+ api_key: API key (defaults to SYNTH_API_KEY env var)
268
+ auto_start: Whether to start the job immediately
269
+ metadata: Additional metadata for the job
270
+
271
+ Returns:
272
+ GraphGenJob instance
273
+
274
+ Example:
275
+ >>> # From file
276
+ >>> job = GraphGenJob.from_dataset("tasks.json")
277
+ >>>
278
+ >>> # From dict
279
+ >>> job = GraphGenJob.from_dataset({
280
+ ... "metadata": {"name": "My Tasks"},
281
+ ... "tasks": [{"id": "t1", "input": {"q": "Hi"}}],
282
+ ... }, problem_spec="You are helpful.")
283
+ >>>
284
+ >>> # From GraphGenTaskSet object
285
+ >>> job = GraphGenJob.from_dataset(my_taskset, policy_model="gpt-4o")
286
+ """
287
+ from synth_ai.core.env import get_backend_from_env
288
+
289
+ # Parse dataset
290
+ if isinstance(dataset, (str, Path)):
291
+ parsed_dataset = load_graphgen_taskset(dataset)
292
+ elif isinstance(dataset, dict):
293
+ parsed_dataset = parse_graphgen_taskset(dataset)
294
+ elif isinstance(dataset, GraphGenTaskSet):
295
+ parsed_dataset = dataset
296
+ else:
297
+ raise TypeError(
298
+ f"dataset must be a file path, dict, or GraphGenTaskSet, got {type(dataset)}"
299
+ )
300
+
301
+ # Resolve backend URL
302
+ if not backend_url:
303
+ backend_url = os.environ.get("BACKEND_BASE_URL", "").strip()
304
+ if not backend_url:
305
+ base, _ = get_backend_from_env()
306
+ backend_url = f"{base}/api" if not base.endswith("/api") else base
307
+
308
+ # Resolve API key
309
+ if not api_key:
310
+ api_key = os.environ.get("SYNTH_API_KEY")
311
+ if not api_key:
312
+ raise ValueError(
313
+ "api_key is required (provide explicitly or set SYNTH_API_KEY env var)"
314
+ )
315
+
316
+ # Build config
317
+ config = GraphGenJobConfig(
318
+ graph_type=graph_type,
319
+ policy_model=policy_model,
320
+ rollout_budget=rollout_budget,
321
+ proposer_effort=proposer_effort,
322
+ verifier_model=verifier_model,
323
+ verifier_provider=verifier_provider,
324
+ population_size=population_size,
325
+ num_generations=num_generations,
326
+ problem_spec=problem_spec,
327
+ target_llm_calls=target_llm_calls,
328
+ configured_tools=configured_tools,
329
+ )
330
+
331
+ return cls(
332
+ dataset=parsed_dataset,
333
+ config=config,
334
+ backend_url=backend_url,
335
+ api_key=api_key,
336
+ auto_start=auto_start,
337
+ metadata=metadata,
338
+ )
339
+
340
+ @classmethod
341
+ def from_job_id(
342
+ cls,
343
+ job_id: str,
344
+ backend_url: Optional[str] = None,
345
+ api_key: Optional[str] = None,
346
+ ) -> GraphGenJob:
347
+ """Resume an existing GraphGen job by ID.
348
+
349
+ Args:
350
+ job_id: GraphGen job ID ("graphgen_*") or underlying GEPA job ID ("pl_*")
351
+ backend_url: Backend API URL (defaults to env or production)
352
+ api_key: API key (defaults to SYNTH_API_KEY env var)
353
+
354
+ Returns:
355
+ GraphGenJob instance for the existing job
356
+ """
357
+ from synth_ai.core.env import get_backend_from_env
358
+
359
+ # Resolve backend URL
360
+ if not backend_url:
361
+ backend_url = os.environ.get("BACKEND_BASE_URL", "").strip()
362
+ if not backend_url:
363
+ base, _ = get_backend_from_env()
364
+ backend_url = f"{base}/api" if not base.endswith("/api") else base
365
+
366
+ # Resolve API key
367
+ if not api_key:
368
+ api_key = os.environ.get("SYNTH_API_KEY")
369
+ if not api_key:
370
+ raise ValueError(
371
+ "api_key is required (provide explicitly or set SYNTH_API_KEY env var)"
372
+ )
373
+
374
+ # Create minimal instance - dataset will be fetched from backend if needed
375
+ # For now, create a placeholder dataset
376
+ from .graphgen_models import GraphGenTaskSetMetadata, GraphGenTask
377
+ placeholder_dataset = GraphGenTaskSet(
378
+ metadata=GraphGenTaskSetMetadata(name="(resumed job)"),
379
+ tasks=[GraphGenTask(id="placeholder", input={})],
380
+ )
381
+
382
+ job = cls(
383
+ dataset=placeholder_dataset,
384
+ config=GraphGenJobConfig(),
385
+ backend_url=backend_url,
386
+ api_key=api_key,
387
+ auto_start=False,
388
+ )
389
+
390
+ # Accept GraphGen/GraphGen or graph_evolve/GEPA job IDs - backend handles resolution internally
391
+ valid_prefixes = ("graphgen_", "graphgen_", "graph_evolve_", "graph_evolve_", "pl_")
392
+ if not any(job_id.startswith(p) for p in valid_prefixes):
393
+ raise ValueError(
394
+ f"Unsupported job ID format: {job_id!r}. "
395
+ f"Expected one of: {valid_prefixes}"
396
+ )
397
+ job._graphgen_job_id = job_id
398
+ if job_id.startswith("pl_"):
399
+ job._graph_evolve_job_id = job_id
400
+ return job
401
+
402
+ @classmethod
403
+ def from_graph_evolve_job_id(
404
+ cls,
405
+ graph_evolve_job_id: str,
406
+ backend_url: Optional[str] = None,
407
+ api_key: Optional[str] = None,
408
+ ) -> GraphGenJob:
409
+ """Alias for resuming an GraphGen job from a GEPA job ID."""
410
+ return cls.from_job_id(graph_evolve_job_id, backend_url=backend_url, api_key=api_key)
411
+
412
+ @property
413
+ def job_id(self) -> Optional[str]:
414
+ """Get the GraphGen job ID (None if not yet submitted)."""
415
+ return self._graphgen_job_id
416
+
417
+ @property
418
+ def graph_evolve_job_id(self) -> Optional[str]:
419
+ """Get the underlying GEPA job ID if known."""
420
+ if self._graph_evolve_job_id:
421
+ return self._graph_evolve_job_id
422
+ if self._submit_result and self._submit_result.graph_evolve_job_id:
423
+ return self._submit_result.graph_evolve_job_id
424
+ return None
425
+
426
+ def _build_payload(self) -> Dict[str, Any]:
427
+ """Build the job creation payload."""
428
+ # Merge config num_generations into metadata if provided
429
+ metadata = dict(self.metadata) if self.metadata else {}
430
+ if self.config.num_generations is not None:
431
+ metadata["num_generations"] = self.config.num_generations
432
+ if self.config.population_size != 4: # Only include if non-default
433
+ metadata["population_size"] = self.config.population_size
434
+ if self.config.num_parents != 2:
435
+ metadata["num_parents"] = self.config.num_parents
436
+ if self.config.evaluation_seeds is not None:
437
+ metadata["evaluation_seeds"] = self.config.evaluation_seeds
438
+
439
+ # Extract eval/feedback sample sizes from metadata as direct fields
440
+ eval_sample_size = metadata.pop("eval_sample_size", None)
441
+ feedback_sample_size = metadata.pop("feedback_sample_size", None)
442
+
443
+ # Build dataset dict and ensure it has an initial_prompt to satisfy legacy backend validation
444
+ # GraphGen is graph-first and doesn't really use this, so we use a placeholder or problem_spec
445
+ dataset_dict = self.dataset.model_dump()
446
+ if "initial_prompt" not in dataset_dict:
447
+ dataset_dict["initial_prompt"] = self.config.problem_spec or "Optimizing prompt graph..."
448
+
449
+ payload: Dict[str, Any] = {
450
+ "dataset": dataset_dict,
451
+ "initial_prompt": None, # Top-level initial_prompt is ignored in favor of dataset.initial_prompt
452
+ "graph_type": self.config.graph_type,
453
+ "policy_model": self.config.policy_model,
454
+ "policy_provider": self.config.policy_provider,
455
+ "rollout_budget": self.config.rollout_budget,
456
+ "proposer_effort": self.config.proposer_effort,
457
+ "verifier_model": self.config.verifier_model,
458
+ "verifier_provider": self.config.verifier_provider,
459
+ "problem_spec": self.config.problem_spec,
460
+ "target_llm_calls": self.config.target_llm_calls,
461
+ "configured_tools": self.config.configured_tools,
462
+ "eval_sample_size": eval_sample_size,
463
+ "feedback_sample_size": feedback_sample_size,
464
+ "metadata": metadata,
465
+ "auto_start": self.auto_start,
466
+ }
467
+
468
+ # Strip unset optional fields so we don't send nulls to strict backends.
469
+ if payload.get("eval_sample_size") is None:
470
+ payload.pop("eval_sample_size", None)
471
+ if payload.get("feedback_sample_size") is None:
472
+ payload.pop("feedback_sample_size", None)
473
+ if payload.get("policy_provider") is None:
474
+ payload.pop("policy_provider", None)
475
+ if payload.get("verifier_model") is None:
476
+ payload.pop("verifier_model", None)
477
+ if payload.get("verifier_provider") is None:
478
+ payload.pop("verifier_provider", None)
479
+ if payload.get("problem_spec") is None:
480
+ payload.pop("problem_spec", None)
481
+ if payload.get("target_llm_calls") is None:
482
+ payload.pop("target_llm_calls", None)
483
+ if payload.get("configured_tools") is None:
484
+ payload.pop("configured_tools", None)
485
+
486
+ return payload
487
+
488
+ def submit(self) -> GraphGenSubmitResult:
489
+ """Submit the job to the backend.
490
+
491
+ Returns:
492
+ GraphGenSubmitResult with job IDs and initial status
493
+
494
+ Raises:
495
+ RuntimeError: If job submission fails
496
+ """
497
+ from .graphgen_validators import validate_graphgen_job_config
498
+
499
+ ctx: Dict[str, Any] = {"dataset_name": self.dataset.metadata.name}
500
+ log_info("GraphGenJob.submit invoked", ctx=ctx)
501
+
502
+ if self._graphgen_job_id:
503
+ raise RuntimeError(f"Job already submitted: {self._graphgen_job_id}")
504
+
505
+ # Validate config + dataset before expensive API call.
506
+ validate_graphgen_job_config(self.config, self.dataset)
507
+
508
+ payload = self._build_payload()
509
+
510
+ # Submit job - use /graphgen/jobs endpoint
511
+ create_url = f"{self.backend_url}/graphgen/jobs"
512
+ headers = {
513
+ "X-API-Key": self.api_key,
514
+ "Content-Type": "application/json",
515
+ }
516
+
517
+ import logging
518
+ logger = logging.getLogger(__name__)
519
+ logger.debug(f"Submitting GraphGen job to: {create_url}")
520
+
521
+ resp = http_post(create_url, headers=headers, json_body=payload, timeout=180.0)
522
+
523
+ if resp.status_code not in (200, 201):
524
+ error_msg = f"Job submission failed with status {resp.status_code}: {resp.text[:500]}"
525
+ if resp.status_code == 404:
526
+ error_msg += (
527
+ f"\n\nPossible causes:"
528
+ f"\n1. Backend route /api/graphgen/jobs not registered"
529
+ f"\n2. GraphGen feature may not be enabled on this backend"
530
+ f"\n3. Verify backend is running at: {self.backend_url}"
531
+ )
532
+ raise RuntimeError(error_msg)
533
+
534
+ try:
535
+ js = resp.json()
536
+ except Exception as e:
537
+ raise RuntimeError(f"Failed to parse response: {e}") from e
538
+
539
+ self._graphgen_job_id = js.get("graphgen_job_id")
540
+
541
+ if not self._graphgen_job_id:
542
+ raise RuntimeError("Response missing graphgen_job_id")
543
+
544
+ self._graph_evolve_job_id = js.get("graph_evolve_job_id")
545
+
546
+ self._submit_result = GraphGenSubmitResult(
547
+ graphgen_job_id=self._graphgen_job_id,
548
+ status=js.get("status", "queued"),
549
+ dataset_name=js.get("dataset_name", self.dataset.metadata.name),
550
+ task_count=js.get("task_count", len(self.dataset.tasks)),
551
+ rollout_budget=js.get("rollout_budget", self.config.rollout_budget),
552
+ policy_model=js.get("policy_model", self.config.policy_model),
553
+ verifier_mode=js.get("verifier_mode", self.dataset.verifier_config.mode),
554
+ graph_evolve_job_id=self._graph_evolve_job_id,
555
+ )
556
+
557
+ ctx["graphgen_job_id"] = self._graphgen_job_id
558
+ log_info("GraphGenJob.submit completed", ctx=ctx)
559
+
560
+ return self._submit_result
561
+
562
+ def get_status(self) -> Dict[str, Any]:
563
+ """Get current job status.
564
+
565
+ Returns:
566
+ Job status dictionary containing 'status', 'best_score', etc.
567
+
568
+ Raises:
569
+ RuntimeError: If job hasn't been submitted yet or API call fails.
570
+ """
571
+ if not self.job_id:
572
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
573
+
574
+ url = f"{self.backend_url}/graphgen/jobs/{self.job_id}"
575
+ headers = {
576
+ "X-API-Key": self.api_key,
577
+ }
578
+
579
+ resp = http_get(url, headers=headers, timeout=30.0)
580
+
581
+ if resp.status_code != 200:
582
+ raise RuntimeError(
583
+ f"Failed to get job status: {resp.status_code} - {resp.text[:500]}"
584
+ )
585
+
586
+ data: Dict[str, Any] = resp.json()
587
+ gepa_id = data.get("graph_evolve_job_id")
588
+ if gepa_id:
589
+ self._graph_evolve_job_id = gepa_id
590
+ return data
591
+
592
+ def start(self) -> Dict[str, Any]:
593
+ """Start a queued GraphGen job.
594
+
595
+ This is only needed if the job was created with auto_start=False or ended up queued.
596
+
597
+ Returns:
598
+ Updated job status dictionary.
599
+ """
600
+ if not self.job_id:
601
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
602
+
603
+ url = f"{self.backend_url}/graphgen/jobs/{self.job_id}/start"
604
+ headers = {
605
+ "X-API-Key": self.api_key,
606
+ "Content-Type": "application/json",
607
+ }
608
+
609
+ resp = http_post(url, headers=headers, json_body=None, timeout=60.0)
610
+ if resp.status_code != 200:
611
+ raise RuntimeError(
612
+ f"Failed to start job: {resp.status_code} - {resp.text[:500]}"
613
+ )
614
+ data: Dict[str, Any] = resp.json()
615
+ if self._submit_result and "status" in data:
616
+ self._submit_result.status = data.get("status", self._submit_result.status)
617
+ return data
618
+
619
+ def get_events(self, *, since_seq: int = 0, limit: int = 1000) -> Dict[str, Any]:
620
+ """Fetch events for this GraphGen job.
621
+
622
+ Args:
623
+ since_seq: Return events with sequence number greater than this.
624
+ limit: Maximum number of events to return.
625
+
626
+ Returns:
627
+ Backend envelope: {"events": [...], "has_more": bool, "next_seq": int}.
628
+ """
629
+ if not self.job_id:
630
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
631
+
632
+ base = f"{self.backend_url}/graphgen/jobs/{self.job_id}/events"
633
+ url = f"{base}?since_seq={since_seq}&limit={limit}"
634
+ headers = {"X-API-Key": self.api_key}
635
+
636
+ resp = http_get(url, headers=headers, timeout=30.0)
637
+ if resp.status_code != 200:
638
+ raise RuntimeError(
639
+ f"Failed to get events: {resp.status_code} - {resp.text[:500]}"
640
+ )
641
+ return cast(Dict[str, Any], resp.json())
642
+
643
+ def get_metrics(
644
+ self,
645
+ *,
646
+ name: Optional[str] = None,
647
+ after_step: Optional[int] = None,
648
+ limit: int = 500,
649
+ run_id: Optional[str] = None,
650
+ ) -> Dict[str, Any]:
651
+ """Fetch metrics for this GraphGen job.
652
+
653
+ Args:
654
+ name: Optional metric name filter.
655
+ after_step: Optional step filter.
656
+ limit: Maximum number of metrics to return.
657
+ run_id: Optional run identifier filter.
658
+
659
+ Returns:
660
+ Dictionary containing 'metrics' list.
661
+ """
662
+ if not self.job_id:
663
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
664
+
665
+ from urllib.parse import urlencode
666
+
667
+ params: Dict[str, Any] = {"limit": limit}
668
+ if name is not None:
669
+ params["name"] = name
670
+ if after_step is not None:
671
+ params["after_step"] = after_step
672
+ if run_id is not None:
673
+ params["run_id"] = run_id
674
+
675
+ qs = urlencode(params)
676
+ url = f"{self.backend_url}/graphgen/jobs/{self.job_id}/metrics?{qs}"
677
+ headers = {"X-API-Key": self.api_key}
678
+
679
+ resp = http_get(url, headers=headers, timeout=30.0)
680
+ if resp.status_code != 200:
681
+ raise RuntimeError(
682
+ f"Failed to get metrics: {resp.status_code} - {resp.text[:500]}"
683
+ )
684
+ return cast(Dict[str, Any], resp.json())
685
+
686
+ def stream_until_complete(
687
+ self,
688
+ *,
689
+ timeout: float = 3600.0,
690
+ interval: float = 5.0,
691
+ handlers: Optional[Sequence[Any]] = None,
692
+ on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
693
+ ) -> Dict[str, Any]:
694
+ """Stream job events until completion using Server-Sent Events (SSE).
695
+
696
+ This method connects to the backend SSE stream and processes events in real-time
697
+ until the job reaches a terminal state (completed, failed, or cancelled).
698
+
699
+ Events include:
700
+ - job_started: Job execution began
701
+ - generation_started: New generation of candidates started
702
+ - candidate_evaluated: A candidate graph was evaluated
703
+ - generation_completed: Generation finished
704
+ - optimization_completed: Job finished successfully
705
+ - job_failed: Job encountered an error
706
+
707
+ Args:
708
+ timeout: Maximum seconds to wait for completion
709
+ interval: Seconds between status checks (for SSE reconnects)
710
+ handlers: Optional StreamHandler instances for custom event handling.
711
+ Defaults to GraphGenHandler which provides formatted CLI output.
712
+ on_event: Optional callback function called on each event.
713
+ Receives the event dict as argument.
714
+
715
+ Returns:
716
+ Final job status dictionary containing 'status', 'best_score', etc.
717
+
718
+ Raises:
719
+ RuntimeError: If job hasn't been submitted yet
720
+ TimeoutError: If timeout exceeded before job completion
721
+
722
+ Example:
723
+ >>> job.submit()
724
+ >>> result = job.stream_until_complete(timeout=1800.0)
725
+ >>> print(f"Best score: {result.get('best_score')}")
726
+ """
727
+ if not self.job_id:
728
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
729
+
730
+ from synth_ai.sdk.streaming import (
731
+ GraphGenHandler,
732
+ JobStreamer,
733
+ StreamConfig,
734
+ StreamEndpoints,
735
+ StreamType,
736
+ )
737
+
738
+ # Build stream config
739
+ config = StreamConfig(
740
+ enabled_streams={StreamType.STATUS, StreamType.EVENTS, StreamType.METRICS},
741
+ max_events_per_poll=500,
742
+ deduplicate=True,
743
+ )
744
+
745
+ # Use provided handlers or default CLI handler
746
+ if handlers is None:
747
+ handlers = [GraphGenHandler()]
748
+
749
+ # Create streamer with GraphGen endpoints
750
+ # Backend handles GraphGen → GEPA resolution internally via job_relationships table
751
+ streamer = JobStreamer(
752
+ base_url=self.backend_url,
753
+ api_key=self.api_key,
754
+ job_id=self.job_id, # Only GraphGen job ID - backend resolves to GEPA internally
755
+ endpoints=StreamEndpoints.graphgen(self.job_id),
756
+ config=config,
757
+ handlers=list(handlers),
758
+ interval_seconds=interval,
759
+ timeout_seconds=timeout,
760
+ )
761
+
762
+ # Run streaming
763
+ final_status = asyncio.run(streamer.stream_until_terminal())
764
+
765
+ return final_status
766
+
767
+ def poll_until_complete(
768
+ self,
769
+ *,
770
+ timeout: float = 3600.0,
771
+ interval: float = 5.0,
772
+ progress: bool = False,
773
+ on_status: Optional[Callable[[Dict[str, Any]], None]] = None,
774
+ ) -> Dict[str, Any]:
775
+ """Poll job until it reaches a terminal state.
776
+
777
+ Similar to PromptLearningJob.poll_until_complete(), this method polls
778
+ the backend periodically instead of using SSE streaming. Useful for
779
+ notebooks and environments where SSE may not work reliably.
780
+
781
+ Args:
782
+ timeout: Maximum seconds to wait (default: 3600 = 1 hour)
783
+ interval: Seconds between poll attempts (default: 5)
784
+ progress: If True, print status updates during polling (useful for notebooks)
785
+ on_status: Optional callback called on each status update
786
+
787
+ Returns:
788
+ Final job status dictionary containing 'status', 'best_score', etc.
789
+
790
+ Raises:
791
+ RuntimeError: If job hasn't been submitted yet
792
+ TimeoutError: If timeout is exceeded
793
+
794
+ Example:
795
+ >>> result = job.poll_until_complete(progress=True)
796
+ [00:15] running | score: 0.72
797
+ [00:30] running | score: 0.78
798
+ [00:45] succeeded | score: 0.85
799
+ """
800
+ if not self.job_id:
801
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
802
+
803
+ import time
804
+
805
+ start_time = time.time()
806
+ elapsed = 0.0
807
+ last_data: Dict[str, Any] = {}
808
+
809
+ while elapsed <= timeout:
810
+ try:
811
+ status_data = self.get_status()
812
+ last_data = dict(status_data) if isinstance(status_data, dict) else {}
813
+
814
+ status = last_data.get("status", "unknown")
815
+ best_score = last_data.get("best_score")
816
+
817
+ # Progress output
818
+ if progress:
819
+ mins, secs = divmod(int(elapsed), 60)
820
+ score_str = f"score: {best_score:.2f}" if best_score is not None else "score: --"
821
+ print(f"[{mins:02d}:{secs:02d}] {status} | {score_str}")
822
+
823
+ # Callback for custom handling
824
+ if on_status:
825
+ on_status(last_data)
826
+
827
+ # Check terminal state
828
+ if status in ("succeeded", "completed", "failed", "error", "cancelled"):
829
+ return last_data
830
+
831
+ # Sleep before next poll
832
+ time.sleep(interval)
833
+ elapsed = time.time() - start_time
834
+
835
+ except Exception as e:
836
+ # On error, continue polling (might be transient network issue)
837
+ import logging
838
+ logger = logging.getLogger(__name__)
839
+ logger.warning(f"Error polling job status: {e}")
840
+ time.sleep(interval)
841
+ elapsed = time.time() - start_time
842
+
843
+ # Timeout exceeded
844
+ raise TimeoutError(
845
+ f"Job {self.job_id} did not complete within {timeout}s timeout. "
846
+ f"Current status: {last_data.get('status', 'unknown')}"
847
+ )
848
+
849
+ def download_prompt(self) -> str:
850
+ """Download the optimized prompt from a completed job.
851
+
852
+ For graph-first jobs, prefer `download_graph_txt()`; this method is
853
+ mainly useful for legacy single-node prompt workflows.
854
+
855
+ Returns:
856
+ Optimized prompt text
857
+
858
+ Raises:
859
+ RuntimeError: If job hasn't been submitted or isn't complete
860
+ """
861
+ if not self.job_id:
862
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
863
+
864
+ url = f"{self.backend_url}/graphgen/jobs/{self.job_id}/download"
865
+ headers = {
866
+ "X-API-Key": self.api_key,
867
+ }
868
+
869
+ resp = http_get(url, headers=headers, timeout=30.0)
870
+
871
+ if resp.status_code != 200:
872
+ raise RuntimeError(
873
+ f"Failed to download prompt: {resp.status_code} - {resp.text[:500]}"
874
+ )
875
+
876
+ data = resp.json()
877
+ return data.get("prompt", "")
878
+
879
+ def download_graph_txt(self) -> str:
880
+ """Download a PUBLIC (redacted) graph export for a completed job.
881
+
882
+ Graph-first GraphGen jobs produce multi-node graphs. The internal graph
883
+ YAML/spec is proprietary and never exposed. This helper downloads the
884
+ `.txt` export from:
885
+ GET /api/graphgen/jobs/{job_id}/graph.txt
886
+ """
887
+ if not self.job_id:
888
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
889
+
890
+ url = f"{self.backend_url}/graphgen/jobs/{self.job_id}/graph.txt"
891
+ headers = {"X-API-Key": self.api_key}
892
+
893
+ resp = http_get(url, headers=headers, timeout=30.0)
894
+ if resp.status_code != 200:
895
+ raise RuntimeError(
896
+ f"Failed to download graph export: {resp.status_code} - {resp.text[:500]}"
897
+ )
898
+ return resp.text
899
+
900
+ def run_inference(
901
+ self,
902
+ input_data: Dict[str, Any],
903
+ *,
904
+ model: Optional[str] = None,
905
+ prompt_snapshot_id: Optional[str] = None,
906
+ graph_snapshot_id: Optional[str] = None,
907
+ timeout: float = 120.0,
908
+ ) -> Dict[str, Any]:
909
+ """Run inference with the optimized graph/workflow.
910
+
911
+ Args:
912
+ input_data: Input data matching the task format
913
+ model: Override model (default: use job's policy model)
914
+ prompt_snapshot_id: Legacy alias for selecting a specific snapshot.
915
+ graph_snapshot_id: Specific GraphSnapshot to use (default: best).
916
+ Preferred for graph-first jobs. If provided, it is sent as
917
+ `prompt_snapshot_id` for backward-compatible backend routing.
918
+ timeout: Request timeout in seconds (default: 120.0 = 2 minutes for image generation tasks)
919
+
920
+ Returns:
921
+ Output dictionary containing 'output', 'usage', etc.
922
+
923
+ Raises:
924
+ RuntimeError: If job hasn't been submitted or inference fails.
925
+ ValueError: If both prompt_snapshot_id and graph_snapshot_id are provided.
926
+ """
927
+ if not self.job_id:
928
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
929
+
930
+ if prompt_snapshot_id and graph_snapshot_id:
931
+ raise ValueError("Provide only one of prompt_snapshot_id or graph_snapshot_id.")
932
+
933
+ url = f"{self.backend_url}/graphgen/graph/completions"
934
+ headers = {
935
+ "X-API-Key": self.api_key,
936
+ "Content-Type": "application/json",
937
+ }
938
+
939
+ payload: Dict[str, Any] = {
940
+ "job_id": self.job_id,
941
+ "input": input_data,
942
+ }
943
+ if model:
944
+ payload["model"] = model
945
+ snapshot_id = graph_snapshot_id or prompt_snapshot_id
946
+ if snapshot_id:
947
+ payload["prompt_snapshot_id"] = snapshot_id
948
+
949
+ # Use longer timeout for image generation tasks (can take 2-3 minutes)
950
+ resp = http_post(url, headers=headers, json_body=payload, timeout=timeout)
951
+
952
+ if resp.status_code != 200:
953
+ raise RuntimeError(
954
+ f"Inference failed: {resp.status_code} - {resp.text[:500]}"
955
+ )
956
+
957
+ return cast(Dict[str, Any], resp.json())
958
+
959
+ def run_inference_output(
960
+ self,
961
+ input_data: Dict[str, Any],
962
+ *,
963
+ model: Optional[str] = None,
964
+ prompt_snapshot_id: Optional[str] = None,
965
+ graph_snapshot_id: Optional[str] = None,
966
+ ) -> Any:
967
+ """Convenience wrapper returning only the model output."""
968
+ result = self.run_inference(
969
+ input_data,
970
+ model=model,
971
+ prompt_snapshot_id=prompt_snapshot_id,
972
+ graph_snapshot_id=graph_snapshot_id,
973
+ )
974
+ if isinstance(result, dict):
975
+ return result.get("output")
976
+ return None
977
+
978
+ def run_verifier(
979
+ self,
980
+ session_trace: Dict[str, Any] | SessionTraceInput,
981
+ *,
982
+ context: Optional[Dict[str, Any]] = None,
983
+ prompt_snapshot_id: Optional[str] = None,
984
+ graph_snapshot_id: Optional[str] = None,
985
+ ) -> GraphGenGraphVerifierResponse:
986
+ """Run a verifier graph on an execution trace.
987
+
988
+ This method is specifically for graphs trained with graph_type=\"verifier\".
989
+ It accepts a V3 trace and returns structured rewards.
990
+
991
+ Args:
992
+ session_trace: V3 session trace to evaluate. Can be a dict or SessionTraceInput.
993
+ context: Additional context for evaluation (e.g., rubric overrides, task description).
994
+ prompt_snapshot_id: Specific snapshot to use (default: best).
995
+ graph_snapshot_id: Specific GraphSnapshot to use (default: best).
996
+ Preferred for graph-first jobs.
997
+
998
+ Returns:
999
+ GraphGenGraphVerifierResponse containing structured rewards.
1000
+
1001
+ Raises:
1002
+ RuntimeError: If job hasn't been submitted or inference fails.
1003
+ """
1004
+ if not self.job_id:
1005
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
1006
+
1007
+ if prompt_snapshot_id and graph_snapshot_id:
1008
+ raise ValueError("Provide only one of prompt_snapshot_id or graph_snapshot_id.")
1009
+
1010
+ url = f"{self.backend_url}/graphgen/graph/verifier"
1011
+ headers = {
1012
+ "X-API-Key": self.api_key,
1013
+ "Content-Type": "application/json",
1014
+ }
1015
+
1016
+ # Convert trace to dict if it's a Pydantic model
1017
+ if isinstance(session_trace, SessionTraceInput):
1018
+ session_trace_data = session_trace.model_dump(mode="json")
1019
+ else:
1020
+ session_trace_data = session_trace
1021
+
1022
+ payload = {
1023
+ "job_id": self.job_id,
1024
+ "session_trace": session_trace_data,
1025
+ "context": context,
1026
+ }
1027
+
1028
+ snapshot_id = graph_snapshot_id or prompt_snapshot_id
1029
+ if snapshot_id:
1030
+ payload["prompt_snapshot_id"] = snapshot_id
1031
+
1032
+ resp = http_post(url, headers=headers, json_body=payload, timeout=120.0)
1033
+
1034
+ if resp.status_code != 200:
1035
+ raise RuntimeError(
1036
+ f"Verifier inference failed: {resp.status_code} - {resp.text[:500]}"
1037
+ )
1038
+
1039
+ return GraphGenGraphVerifierResponse.model_validate(resp.json())
1040
+
1041
+ def get_graph_record(
1042
+ self,
1043
+ *,
1044
+ prompt_snapshot_id: Optional[str] = None,
1045
+ graph_snapshot_id: Optional[str] = None,
1046
+ ) -> Dict[str, Any]:
1047
+ """Get the optimized graph record (snapshot) for a completed job.
1048
+
1049
+ Note: for graph-first jobs, this record is **redacted** and never
1050
+ includes proprietary YAML/spec. Use `download_graph_txt()` for the
1051
+ public export.
1052
+
1053
+ Args:
1054
+ prompt_snapshot_id: Legacy alias for selecting a specific snapshot.
1055
+ graph_snapshot_id: Specific GraphSnapshot to use (default: best).
1056
+
1057
+ Returns:
1058
+ Graph record dictionary containing:
1059
+ - job_id: The job ID
1060
+ - snapshot_id: The snapshot ID used
1061
+ - prompt: Extracted prompt text (legacy single-node only; may be empty)
1062
+ - graph: Public graph record payload (e.g., export metadata)
1063
+ - model: Model used for this graph (optional)
1064
+
1065
+ Raises:
1066
+ RuntimeError: If job hasn't been submitted or API call fails.
1067
+ ValueError: If both prompt_snapshot_id and graph_snapshot_id are provided.
1068
+ """
1069
+ if not self.job_id:
1070
+ raise RuntimeError("Job not yet submitted. Call submit() first.")
1071
+
1072
+ if prompt_snapshot_id and graph_snapshot_id:
1073
+ raise ValueError("Provide only one of prompt_snapshot_id or graph_snapshot_id.")
1074
+
1075
+ url = f"{self.backend_url}/graphgen/graph/record"
1076
+ headers = {
1077
+ "X-API-Key": self.api_key,
1078
+ "Content-Type": "application/json",
1079
+ }
1080
+
1081
+ payload: Dict[str, Any] = {
1082
+ "job_id": self.job_id,
1083
+ }
1084
+ snapshot_id = graph_snapshot_id or prompt_snapshot_id
1085
+ if snapshot_id:
1086
+ payload["prompt_snapshot_id"] = snapshot_id
1087
+
1088
+ resp = http_post(url, headers=headers, json_body=payload, timeout=30.0)
1089
+
1090
+ if resp.status_code != 200:
1091
+ raise RuntimeError(
1092
+ f"Failed to get graph record: {resp.status_code} - {resp.text[:500]}"
1093
+ )
1094
+
1095
+ return cast(Dict[str, Any], resp.json())
1096
+
1097
+
1098
+ __all__ = [
1099
+ "GraphGenJob",
1100
+ "GraphGenJobResult",
1101
+ "GraphGenSubmitResult",
1102
+ ]