synth-ai 0.2.6.dev1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. synth_ai/__init__.py +44 -24
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +103 -48
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +256 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +53 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  63. synth_ai/cli/commands/train/judge_validation.py +305 -0
  64. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  65. synth_ai/cli/commands/train/validation.py +392 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
  70. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  71. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  72. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  73. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
  75. synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +117 -51
  76. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/_common.py +16 -0
  81. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
  82. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  83. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
  84. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  85. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  86. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  87. synth_ai/cli/demo_apps/math/_common.py +16 -0
  88. synth_ai/cli/demo_apps/math/app.py +38 -0
  89. synth_ai/cli/demo_apps/math/config.toml +75 -0
  90. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  91. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  92. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  93. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  94. synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
  95. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  96. synth_ai/cli/demos/__init__.py +12 -0
  97. synth_ai/cli/demos/demo.py +32 -0
  98. synth_ai/cli/demos/rl_demo.py +254 -0
  99. synth_ai/cli/deploy.py +216 -0
  100. synth_ai/cli/infra/__init__.py +14 -0
  101. synth_ai/cli/{balance.py → infra/balance.py} +21 -3
  102. synth_ai/cli/infra/mcp.py +35 -0
  103. synth_ai/cli/infra/modal_app.py +36 -0
  104. synth_ai/cli/infra/setup.py +69 -0
  105. synth_ai/cli/infra/status.py +16 -0
  106. synth_ai/cli/infra/turso.py +77 -0
  107. synth_ai/cli/lib/__init__.py +10 -0
  108. synth_ai/cli/lib/agents.py +76 -0
  109. synth_ai/cli/lib/apps/modal_app.py +101 -0
  110. synth_ai/cli/lib/apps/task_app.py +642 -0
  111. synth_ai/cli/lib/bin.py +39 -0
  112. synth_ai/cli/lib/env.py +375 -0
  113. synth_ai/cli/lib/errors.py +85 -0
  114. synth_ai/cli/lib/modal.py +315 -0
  115. synth_ai/cli/lib/plotting.py +126 -0
  116. synth_ai/cli/lib/prompt_args.py +39 -0
  117. synth_ai/cli/lib/prompts.py +284 -0
  118. synth_ai/cli/lib/sqld.py +122 -0
  119. synth_ai/cli/lib/task_app_discovery.py +884 -0
  120. synth_ai/cli/lib/task_app_env.py +295 -0
  121. synth_ai/cli/lib/train_cfgs.py +300 -0
  122. synth_ai/cli/lib/tunnel_records.py +207 -0
  123. synth_ai/cli/local/__init__.py +14 -0
  124. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  125. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  126. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  127. synth_ai/cli/local/experiment_queue/config.py +128 -0
  128. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  129. synth_ai/cli/local/experiment_queue/database.py +175 -0
  130. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  131. synth_ai/cli/local/experiment_queue/models.py +231 -0
  132. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  133. synth_ai/cli/local/experiment_queue/results.py +373 -0
  134. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  135. synth_ai/cli/local/experiment_queue/service.py +344 -0
  136. synth_ai/cli/local/experiment_queue/status.py +372 -0
  137. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  138. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  139. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  140. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  141. synth_ai/cli/local/session/__init__.py +92 -0
  142. synth_ai/cli/local/session/client.py +383 -0
  143. synth_ai/cli/local/session/constants.py +63 -0
  144. synth_ai/cli/local/session/exceptions.py +105 -0
  145. synth_ai/cli/local/session/manager.py +139 -0
  146. synth_ai/cli/local/session/models.py +89 -0
  147. synth_ai/cli/local/session/query.py +110 -0
  148. synth_ai/cli/root.py +150 -102
  149. synth_ai/cli/task_apps/__init__.py +37 -0
  150. synth_ai/cli/task_apps/commands.py +3145 -0
  151. synth_ai/cli/task_apps/deploy.py +7 -0
  152. synth_ai/cli/task_apps/list.py +26 -0
  153. synth_ai/cli/task_apps/main.py +36 -0
  154. synth_ai/cli/task_apps/modal_serve.py +11 -0
  155. synth_ai/cli/task_apps/serve.py +11 -0
  156. synth_ai/cli/training/__init__.py +8 -0
  157. synth_ai/cli/training/train.py +5 -0
  158. synth_ai/cli/training/train_cfg.py +34 -0
  159. synth_ai/cli/{watch.py → training/watch.py} +13 -18
  160. synth_ai/cli/turso.py +52 -0
  161. synth_ai/cli/utils/__init__.py +8 -0
  162. synth_ai/cli/utils/experiments.py +235 -0
  163. synth_ai/cli/utils/queue.py +504 -0
  164. synth_ai/cli/{recent.py → utils/recent.py} +13 -7
  165. synth_ai/cli/{traces.py → utils/traces.py} +9 -5
  166. synth_ai/contracts/__init__.py +67 -0
  167. synth_ai/core/__init__.py +100 -0
  168. synth_ai/core/_utils/__init__.py +54 -0
  169. synth_ai/core/_utils/base_url.py +10 -0
  170. synth_ai/core/_utils/http.py +10 -0
  171. synth_ai/core/_utils/prompts.py +14 -0
  172. synth_ai/core/_utils/task_app_state.py +12 -0
  173. synth_ai/core/_utils/user_config.py +10 -0
  174. synth_ai/core/apps/common.py +116 -0
  175. synth_ai/core/auth.py +95 -0
  176. synth_ai/core/cfgs.py +240 -0
  177. synth_ai/core/config/__init__.py +16 -0
  178. synth_ai/core/config/base.py +168 -0
  179. synth_ai/core/config/resolver.py +89 -0
  180. synth_ai/core/env.py +231 -0
  181. synth_ai/core/errors.py +126 -0
  182. synth_ai/core/http.py +230 -0
  183. synth_ai/core/integrations/__init__.py +11 -0
  184. synth_ai/core/integrations/cloudflare.py +1710 -0
  185. synth_ai/core/integrations/mcp/__init__.py +6 -0
  186. synth_ai/core/integrations/mcp/__main__.py +8 -0
  187. synth_ai/core/integrations/mcp/claude.py +36 -0
  188. synth_ai/core/integrations/mcp/main.py +254 -0
  189. synth_ai/core/integrations/mcp/setup.py +100 -0
  190. synth_ai/core/integrations/modal.py +277 -0
  191. synth_ai/core/json.py +72 -0
  192. synth_ai/core/log_filter.py +99 -0
  193. synth_ai/core/logging.py +82 -0
  194. synth_ai/core/paths.py +107 -0
  195. synth_ai/core/pricing.py +109 -0
  196. synth_ai/core/process.py +233 -0
  197. synth_ai/core/ssl.py +25 -0
  198. synth_ai/core/storage/__init__.py +71 -0
  199. synth_ai/core/task_app_state.py +318 -0
  200. synth_ai/core/telemetry.py +282 -0
  201. synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
  202. synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
  203. synth_ai/core/tracing_v3/config.py +229 -0
  204. synth_ai/core/tracing_v3/constants.py +21 -0
  205. synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
  206. synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
  207. synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
  208. synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
  209. synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
  210. synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
  211. synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
  212. synth_ai/core/tracing_v3/serialization.py +130 -0
  213. synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
  214. synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
  215. synth_ai/core/tracing_v3/storage/config.py +109 -0
  216. synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
  217. synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
  218. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  219. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  220. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  221. synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
  222. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  223. synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
  224. synth_ai/core/urls.py +18 -0
  225. synth_ai/core/user_config.py +137 -0
  226. synth_ai/core/uvicorn.py +222 -0
  227. synth_ai/data/__init__.py +83 -0
  228. synth_ai/data/enums.py +123 -0
  229. synth_ai/data/rewards.py +152 -0
  230. synth_ai/data/traces.py +35 -0
  231. synth_ai/products/__init__.py +6 -0
  232. synth_ai/products/graph_evolve/__init__.py +46 -0
  233. synth_ai/products/graph_evolve/client.py +226 -0
  234. synth_ai/products/graph_evolve/config.py +591 -0
  235. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  236. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  237. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  238. synth_ai/products/graph_evolve/run.py +222 -0
  239. synth_ai/products/graph_gepa/__init__.py +23 -0
  240. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  241. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  242. synth_ai/sdk/__init__.py +123 -0
  243. synth_ai/sdk/api/__init__.py +1 -0
  244. synth_ai/sdk/api/models/supported.py +514 -0
  245. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  246. synth_ai/sdk/api/train/__init__.py +85 -0
  247. synth_ai/sdk/api/train/builders.py +895 -0
  248. synth_ai/sdk/api/train/cli.py +2199 -0
  249. synth_ai/sdk/api/train/config_finder.py +267 -0
  250. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  251. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  252. synth_ai/sdk/api/train/configs/rl.py +187 -0
  253. synth_ai/sdk/api/train/configs/sft.py +99 -0
  254. synth_ai/sdk/api/train/configs/shared.py +81 -0
  255. synth_ai/sdk/api/train/context_learning.py +312 -0
  256. synth_ai/sdk/api/train/env_resolver.py +418 -0
  257. synth_ai/sdk/api/train/graph_validators.py +216 -0
  258. synth_ai/sdk/api/train/graphgen.py +984 -0
  259. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  260. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  261. synth_ai/sdk/api/train/local_api.py +10 -0
  262. synth_ai/sdk/api/train/pollers.py +124 -0
  263. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  264. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  265. synth_ai/sdk/api/train/progress/events.py +326 -0
  266. synth_ai/sdk/api/train/progress/results.py +428 -0
  267. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  268. synth_ai/sdk/api/train/prompt_learning.py +469 -0
  269. synth_ai/sdk/api/train/rl.py +441 -0
  270. synth_ai/sdk/api/train/sft.py +396 -0
  271. synth_ai/sdk/api/train/summary.py +522 -0
  272. synth_ai/sdk/api/train/supported_algos.py +147 -0
  273. synth_ai/sdk/api/train/task_app.py +351 -0
  274. synth_ai/sdk/api/train/utils.py +279 -0
  275. synth_ai/sdk/api/train/validators.py +2424 -0
  276. synth_ai/sdk/graphs/__init__.py +15 -0
  277. synth_ai/sdk/graphs/completions.py +570 -0
  278. synth_ai/{inference → sdk/inference}/__init__.py +0 -1
  279. synth_ai/sdk/inference/client.py +128 -0
  280. synth_ai/sdk/jobs/__init__.py +16 -0
  281. synth_ai/sdk/jobs/client.py +371 -0
  282. synth_ai/sdk/judging/__init__.py +14 -0
  283. synth_ai/sdk/judging/base.py +24 -0
  284. synth_ai/sdk/judging/client.py +40 -0
  285. synth_ai/sdk/judging/schemas.py +222 -0
  286. synth_ai/sdk/judging/types.py +42 -0
  287. synth_ai/sdk/learning/__init__.py +99 -0
  288. synth_ai/sdk/learning/algorithms.py +14 -0
  289. synth_ai/{learning → sdk/learning}/client.py +121 -30
  290. synth_ai/sdk/learning/config.py +5 -0
  291. synth_ai/{learning → sdk/learning}/constants.py +0 -2
  292. synth_ai/sdk/learning/context_learning_client.py +531 -0
  293. synth_ai/sdk/learning/context_learning_types.py +292 -0
  294. synth_ai/sdk/learning/ft_client.py +7 -0
  295. synth_ai/{learning → sdk/learning}/health.py +15 -9
  296. synth_ai/{learning → sdk/learning}/jobs.py +44 -47
  297. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  298. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  299. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  300. synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
  301. synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
  302. synth_ai/sdk/learning/rl/config.py +31 -0
  303. synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
  304. synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
  305. synth_ai/sdk/learning/rl/secrets.py +13 -0
  306. synth_ai/sdk/learning/rl_client.py +5 -0
  307. synth_ai/sdk/learning/sft/__init__.py +29 -0
  308. synth_ai/sdk/learning/sft/client.py +95 -0
  309. synth_ai/sdk/learning/sft/config.py +270 -0
  310. synth_ai/sdk/learning/sft/data.py +698 -0
  311. synth_ai/sdk/learning/sse.py +57 -0
  312. synth_ai/sdk/learning/validators.py +52 -0
  313. synth_ai/sdk/localapi/__init__.py +40 -0
  314. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  315. synth_ai/sdk/localapi/client.py +10 -0
  316. synth_ai/sdk/localapi/contracts.py +10 -0
  317. synth_ai/sdk/localapi/helpers.py +519 -0
  318. synth_ai/sdk/localapi/rollouts.py +87 -0
  319. synth_ai/sdk/localapi/server.py +29 -0
  320. synth_ai/sdk/localapi/template.py +70 -0
  321. synth_ai/sdk/streaming/__init__.py +35 -0
  322. synth_ai/sdk/streaming/config.py +94 -0
  323. synth_ai/sdk/streaming/handlers.py +1997 -0
  324. synth_ai/sdk/streaming/streamer.py +713 -0
  325. synth_ai/sdk/streaming/types.py +112 -0
  326. synth_ai/sdk/task/__init__.py +164 -0
  327. synth_ai/sdk/task/apps/__init__.py +169 -0
  328. synth_ai/sdk/task/auth.py +165 -0
  329. synth_ai/sdk/task/client.py +175 -0
  330. synth_ai/sdk/task/config.py +257 -0
  331. synth_ai/sdk/task/contracts.py +219 -0
  332. synth_ai/sdk/task/datasets.py +108 -0
  333. synth_ai/sdk/task/errors.py +50 -0
  334. synth_ai/sdk/task/health.py +34 -0
  335. synth_ai/sdk/task/in_process.py +1190 -0
  336. synth_ai/sdk/task/in_process_runner.py +314 -0
  337. synth_ai/sdk/task/inference_api.py +299 -0
  338. synth_ai/sdk/task/json.py +111 -0
  339. synth_ai/sdk/task/proxy.py +287 -0
  340. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  341. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  342. synth_ai/sdk/task/rubrics/models.py +57 -0
  343. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  344. synth_ai/sdk/task/rubrics/strict.py +149 -0
  345. synth_ai/sdk/task/rubrics.py +219 -0
  346. synth_ai/sdk/task/server.py +631 -0
  347. synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
  348. synth_ai/sdk/task/tracing_utils.py +95 -0
  349. synth_ai/sdk/task/validators.py +441 -0
  350. synth_ai/sdk/task/vendors.py +59 -0
  351. synth_ai/sdk/training/__init__.py +102 -0
  352. synth_ai/sdk/tunnels/__init__.py +83 -0
  353. synth_ai/sdk/tunnels/cleanup.py +83 -0
  354. synth_ai/sdk/tunnels/ports.py +120 -0
  355. synth_ai/utils/__init__.py +213 -0
  356. synth_ai-0.4.3.dist-info/METADATA +262 -0
  357. synth_ai-0.4.3.dist-info/RECORD +370 -0
  358. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
  359. synth_ai/cli/calc.py +0 -69
  360. synth_ai/cli/demo.py +0 -131
  361. synth_ai/cli/legacy_root_backup.py +0 -470
  362. synth_ai/cli/man.py +0 -106
  363. synth_ai/cli/rl_demo.py +0 -137
  364. synth_ai/cli/status.py +0 -133
  365. synth_ai/config/base_url.py +0 -98
  366. synth_ai/core/experiment.py +0 -15
  367. synth_ai/core/system.py +0 -15
  368. synth_ai/demos/core/__init__.py +0 -1
  369. synth_ai/demos/core/cli.py +0 -685
  370. synth_ai/demos/demo_task_apps/__init__.py +0 -1
  371. synth_ai/demos/demo_task_apps/math/config.toml +0 -44
  372. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  373. synth_ai/environments/__init__.py +0 -31
  374. synth_ai/environments/environment/__init__.py +0 -1
  375. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  376. synth_ai/environments/environment/artifacts/base.py +0 -52
  377. synth_ai/environments/environment/core.py +0 -67
  378. synth_ai/environments/environment/db/__init__.py +0 -1
  379. synth_ai/environments/environment/db/sqlite.py +0 -45
  380. synth_ai/environments/environment/registry.py +0 -233
  381. synth_ai/environments/environment/resources/sqlite.py +0 -45
  382. synth_ai/environments/environment/results.py +0 -1
  383. synth_ai/environments/environment/rewards/__init__.py +0 -1
  384. synth_ai/environments/environment/rewards/core.py +0 -29
  385. synth_ai/environments/environment/shared_engine.py +0 -26
  386. synth_ai/environments/environment/tools/__init__.py +0 -200
  387. synth_ai/environments/examples/__init__.py +0 -1
  388. synth_ai/environments/examples/bandit/__init__.py +0 -33
  389. synth_ai/environments/examples/bandit/engine.py +0 -294
  390. synth_ai/environments/examples/bandit/environment.py +0 -194
  391. synth_ai/environments/examples/bandit/taskset.py +0 -200
  392. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  393. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  394. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  395. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  396. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  397. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  398. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  399. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  400. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -724
  401. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  402. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  403. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  404. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  405. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  406. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  407. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  408. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  409. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  410. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  411. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  412. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  413. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  414. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  415. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  416. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  417. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  418. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  419. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  420. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  421. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  422. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  423. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  424. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  425. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  426. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  427. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  428. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  429. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  430. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  431. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  432. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  433. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  434. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  435. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  436. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  437. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  438. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  439. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  440. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  441. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  442. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  443. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  444. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  445. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  446. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  447. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  448. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  449. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  450. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  451. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  452. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  453. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  454. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  455. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  456. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  457. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  458. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  459. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  460. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  461. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  462. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  463. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  464. synth_ai/environments/examples/crafter_classic/environment.py +0 -404
  465. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  466. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  467. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  468. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  469. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  470. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  471. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  472. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  473. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  474. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  475. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  476. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  477. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  478. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  479. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  480. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  481. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  482. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  483. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  484. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  485. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  486. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  487. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  488. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  489. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  490. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  491. synth_ai/environments/examples/enron/engine.py +0 -295
  492. synth_ai/environments/examples/enron/environment.py +0 -166
  493. synth_ai/environments/examples/enron/taskset.py +0 -112
  494. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  495. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  496. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  497. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  498. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  499. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  500. synth_ai/environments/examples/minigrid/engine.py +0 -589
  501. synth_ai/environments/examples/minigrid/environment.py +0 -274
  502. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  503. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  504. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  505. synth_ai/environments/examples/nethack/__init__.py +0 -7
  506. synth_ai/environments/examples/nethack/achievements.py +0 -337
  507. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  508. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  509. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  510. synth_ai/environments/examples/nethack/engine.py +0 -739
  511. synth_ai/environments/examples/nethack/environment.py +0 -256
  512. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  513. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  514. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  515. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  516. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  517. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  518. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  519. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  520. synth_ai/environments/examples/nethack/taskset.py +0 -323
  521. synth_ai/environments/examples/red/__init__.py +0 -7
  522. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  523. synth_ai/environments/examples/red/config_logging.py +0 -110
  524. synth_ai/environments/examples/red/engine.py +0 -694
  525. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  526. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  527. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  528. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  529. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  530. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  531. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  532. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  533. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  534. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  535. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  536. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  537. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  538. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  539. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  540. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  541. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  542. synth_ai/environments/examples/red/environment.py +0 -238
  543. synth_ai/environments/examples/red/taskset.py +0 -79
  544. synth_ai/environments/examples/red/units/__init__.py +0 -1
  545. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  546. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  547. synth_ai/environments/examples/sokoban/engine.py +0 -678
  548. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  549. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  550. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  551. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  552. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  553. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  554. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  555. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  556. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  557. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  558. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  559. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  560. synth_ai/environments/examples/sokoban/environment.py +0 -229
  561. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  562. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  563. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  564. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  565. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  566. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  567. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  568. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  569. synth_ai/environments/examples/verilog/__init__.py +0 -10
  570. synth_ai/environments/examples/verilog/engine.py +0 -329
  571. synth_ai/environments/examples/verilog/environment.py +0 -350
  572. synth_ai/environments/examples/verilog/taskset.py +0 -420
  573. synth_ai/environments/examples/wordle/__init__.py +0 -29
  574. synth_ai/environments/examples/wordle/engine.py +0 -398
  575. synth_ai/environments/examples/wordle/environment.py +0 -159
  576. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  577. synth_ai/environments/examples/wordle/taskset.py +0 -230
  578. synth_ai/environments/reproducibility/core.py +0 -42
  579. synth_ai/environments/reproducibility/helpers.py +0 -0
  580. synth_ai/environments/reproducibility/tree.py +0 -364
  581. synth_ai/environments/service/app.py +0 -91
  582. synth_ai/environments/service/core_routes.py +0 -1020
  583. synth_ai/environments/service/external_registry.py +0 -56
  584. synth_ai/environments/service/registry.py +0 -9
  585. synth_ai/environments/stateful/__init__.py +0 -1
  586. synth_ai/environments/stateful/core.py +0 -163
  587. synth_ai/environments/stateful/engine.py +0 -21
  588. synth_ai/environments/stateful/state.py +0 -7
  589. synth_ai/environments/tasks/api.py +0 -19
  590. synth_ai/environments/tasks/core.py +0 -80
  591. synth_ai/environments/tasks/filters.py +0 -41
  592. synth_ai/environments/tasks/utils.py +0 -91
  593. synth_ai/environments/v0_observability/history.py +0 -3
  594. synth_ai/environments/v0_observability/log.py +0 -2
  595. synth_ai/evals/base.py +0 -15
  596. synth_ai/experimental/synth_oss.py +0 -446
  597. synth_ai/http.py +0 -102
  598. synth_ai/inference/client.py +0 -20
  599. synth_ai/install_sqld.sh +0 -40
  600. synth_ai/jobs/client.py +0 -246
  601. synth_ai/learning/__init__.py +0 -24
  602. synth_ai/learning/config.py +0 -43
  603. synth_ai/learning/filtering.py +0 -0
  604. synth_ai/learning/ft_client.py +0 -59
  605. synth_ai/learning/offline/dpo.py +0 -0
  606. synth_ai/learning/offline/providers.py +0 -7
  607. synth_ai/learning/offline/sft.py +0 -0
  608. synth_ai/learning/offline/shared.py +0 -0
  609. synth_ai/learning/online/grpo.py +0 -0
  610. synth_ai/learning/online/irft.py +0 -0
  611. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  612. synth_ai/learning/prompts/gepa.py +0 -0
  613. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  614. synth_ai/learning/prompts/mipro.py +0 -289
  615. synth_ai/learning/prompts/random_search.py +0 -246
  616. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  617. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  618. synth_ai/learning/sse.py +0 -58
  619. synth_ai/learning/validators.py +0 -48
  620. synth_ai/lm/__init__.py +0 -51
  621. synth_ai/lm/caching/constants.py +0 -6
  622. synth_ai/lm/caching/dbs.py +0 -0
  623. synth_ai/lm/caching/ephemeral.py +0 -102
  624. synth_ai/lm/caching/handler.py +0 -137
  625. synth_ai/lm/caching/initialize.py +0 -11
  626. synth_ai/lm/caching/persistent.py +0 -114
  627. synth_ai/lm/config.py +0 -110
  628. synth_ai/lm/constants.py +0 -32
  629. synth_ai/lm/core/__init__.py +0 -8
  630. synth_ai/lm/core/all.py +0 -73
  631. synth_ai/lm/core/exceptions.py +0 -7
  632. synth_ai/lm/core/main.py +0 -319
  633. synth_ai/lm/core/main_v3.py +0 -594
  634. synth_ai/lm/core/synth_models.py +0 -48
  635. synth_ai/lm/core/vendor_clients.py +0 -188
  636. synth_ai/lm/cost/__init__.py +0 -0
  637. synth_ai/lm/cost/monitor.py +0 -1
  638. synth_ai/lm/cost/statefulness.py +0 -1
  639. synth_ai/lm/injection.py +0 -80
  640. synth_ai/lm/overrides.py +0 -206
  641. synth_ai/lm/provider_support/__init__.py +0 -8
  642. synth_ai/lm/provider_support/anthropic.py +0 -972
  643. synth_ai/lm/provider_support/openai.py +0 -1139
  644. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  645. synth_ai/lm/structured_outputs/__init__.py +0 -0
  646. synth_ai/lm/structured_outputs/handler.py +0 -440
  647. synth_ai/lm/structured_outputs/inject.py +0 -297
  648. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  649. synth_ai/lm/tools/__init__.py +0 -3
  650. synth_ai/lm/tools/base.py +0 -172
  651. synth_ai/lm/unified_interface.py +0 -202
  652. synth_ai/lm/vendors/__init__.py +0 -0
  653. synth_ai/lm/vendors/base.py +0 -81
  654. synth_ai/lm/vendors/core/__init__.py +0 -0
  655. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  656. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  657. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  658. synth_ai/lm/vendors/core/openai_api.py +0 -220
  659. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  660. synth_ai/lm/vendors/local/__init__.py +0 -0
  661. synth_ai/lm/vendors/local/ollama.py +0 -0
  662. synth_ai/lm/vendors/openai_standard.py +0 -780
  663. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  664. synth_ai/lm/vendors/retries.py +0 -22
  665. synth_ai/lm/vendors/supported/__init__.py +0 -0
  666. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  667. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  668. synth_ai/lm/vendors/supported/grok.py +0 -75
  669. synth_ai/lm/vendors/supported/groq.py +0 -16
  670. synth_ai/lm/vendors/supported/ollama.py +0 -15
  671. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  672. synth_ai/lm/vendors/supported/together.py +0 -11
  673. synth_ai/lm/vendors/synth_client.py +0 -808
  674. synth_ai/lm/warmup.py +0 -186
  675. synth_ai/rl/secrets.py +0 -19
  676. synth_ai/scripts/verify_rewards.py +0 -100
  677. synth_ai/task/__init__.py +0 -10
  678. synth_ai/task/contracts.py +0 -120
  679. synth_ai/task/health.py +0 -28
  680. synth_ai/task/validators.py +0 -12
  681. synth_ai/tracing/__init__.py +0 -30
  682. synth_ai/tracing_v1/__init__.py +0 -33
  683. synth_ai/tracing_v3/config.py +0 -84
  684. synth_ai/tracing_v3/storage/config.py +0 -62
  685. synth_ai/tracing_v3/turso/__init__.py +0 -25
  686. synth_ai/tracing_v3/turso/daemon.py +0 -144
  687. synth_ai/tracing_v3/turso/manager.py +0 -760
  688. synth_ai/v0/tracing/__init__.py +0 -0
  689. synth_ai/v0/tracing/abstractions.py +0 -224
  690. synth_ai/v0/tracing/base_client.py +0 -91
  691. synth_ai/v0/tracing/client_manager.py +0 -131
  692. synth_ai/v0/tracing/config.py +0 -140
  693. synth_ai/v0/tracing/context.py +0 -146
  694. synth_ai/v0/tracing/decorators.py +0 -680
  695. synth_ai/v0/tracing/events/__init__.py +0 -0
  696. synth_ai/v0/tracing/events/manage.py +0 -147
  697. synth_ai/v0/tracing/events/scope.py +0 -86
  698. synth_ai/v0/tracing/events/store.py +0 -228
  699. synth_ai/v0/tracing/immediate_client.py +0 -151
  700. synth_ai/v0/tracing/local.py +0 -18
  701. synth_ai/v0/tracing/log_client_base.py +0 -73
  702. synth_ai/v0/tracing/retry_queue.py +0 -186
  703. synth_ai/v0/tracing/trackers.py +0 -515
  704. synth_ai/v0/tracing/upload.py +0 -510
  705. synth_ai/v0/tracing/utils.py +0 -9
  706. synth_ai/v0/tracing_v1/__init__.py +0 -16
  707. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  708. synth_ai/v0/tracing_v1/base_client.py +0 -91
  709. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  710. synth_ai/v0/tracing_v1/config.py +0 -140
  711. synth_ai/v0/tracing_v1/context.py +0 -146
  712. synth_ai/v0/tracing_v1/decorators.py +0 -701
  713. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  714. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  715. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  716. synth_ai/v0/tracing_v1/events/store.py +0 -228
  717. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  718. synth_ai/v0/tracing_v1/local.py +0 -18
  719. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  720. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  721. synth_ai/v0/tracing_v1/trackers.py +0 -515
  722. synth_ai/v0/tracing_v1/upload.py +0 -525
  723. synth_ai/v0/tracing_v1/utils.py +0 -9
  724. synth_ai/zyk/__init__.py +0 -30
  725. synth_ai-0.2.6.dev1.dist-info/METADATA +0 -106
  726. synth_ai-0.2.6.dev1.dist-info/RECORD +0 -416
  727. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  728. /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
  729. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  730. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  731. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  732. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  733. /synth_ai/{compound/cais.py → py.typed} +0 -0
  734. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  735. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  736. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
  737. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
  738. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,922 @@
1
+ """Banking77 intent classification task app for Synth prompt optimization benchmarks."""
2
+
3
+ import contextlib
4
+ import inspect
5
+ import json
6
+ import os
7
+ import socket
8
+ from collections.abc import Iterable, Sequence
9
+ from pathlib import Path
10
+ from typing import Any, Mapping, cast
11
+ from urllib.parse import urlparse
12
+
13
+ from fastapi import APIRouter, HTTPException, Request
14
+ from fastapi.exceptions import RequestValidationError
15
+ from fastapi.responses import JSONResponse
16
+ from pydantic import BaseModel
17
+ from starlette.requests import Request as StarletteRequest
18
+ from synth_ai.sdk.task.auth import is_api_key_header_authorized, normalize_environment_api_key
19
+ from synth_ai.sdk.task.contracts import (
20
+ RolloutMetrics,
21
+ RolloutRequest,
22
+ RolloutResponse,
23
+ TaskInfo,
24
+ )
25
+ from synth_ai.sdk.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
26
+ from synth_ai.sdk.task.rubrics import Rubric, load_rubric
27
+ from synth_ai.sdk.task.server import (
28
+ ProxyConfig,
29
+ RubricBundle,
30
+ TaskAppConfig,
31
+ create_task_app,
32
+ run_task_app,
33
+ )
34
+ from synth_ai.sdk.task.trace_correlation_helpers import (
35
+ build_trace_payload,
36
+ extract_trace_correlation_id,
37
+ )
38
+ from synth_ai.sdk.task.vendors import normalize_vendor_keys
39
+
40
+ # Dataset configuration
41
+ DATASET_NAME = os.getenv("BANKING77_DATASET_NAME", "banking77")
42
+ DEFAULT_SPLIT = "train"
43
+ AVAILABLE_SPLITS: tuple[str, ...] = ("train", "test")
44
+ TOOL_NAME = "banking77_classify"
45
+
46
+
47
+ def get_current_module_code():
48
+ """Extract source code for the caller's module using inspect."""
49
+ frame = inspect.currentframe()
50
+ try:
51
+ if frame is None:
52
+ return None
53
+ caller_frame = frame.f_back
54
+ if caller_frame is None:
55
+ return None
56
+ module = inspect.getmodule(caller_frame)
57
+ if module is None:
58
+ return None
59
+ try:
60
+ return inspect.getsource(module)
61
+ except (OSError, TypeError):
62
+ return None
63
+ finally:
64
+ del frame
65
+
66
+
67
+ class Banking77Dataset:
68
+ """Lazy Hugging Face dataset loader for Banking77."""
69
+
70
+ def __init__(self) -> None:
71
+ self._cache: dict[str, Any] = {}
72
+ self._label_names: list[str] | None = None
73
+
74
+ def _load_split(self, split: str):
75
+ if split not in AVAILABLE_SPLITS:
76
+ raise ValueError(f"Unknown split: {split}. Available: {AVAILABLE_SPLITS}")
77
+ if split not in self._cache:
78
+ try:
79
+ from datasets import load_dataset as _load_dataset # lazy import
80
+ ds = _load_dataset(DATASET_NAME, split=split, trust_remote_code=False)
81
+ self._cache[split] = ds
82
+ label_feature = ds.features.get("label") # type: ignore[attr-defined]
83
+ if self._label_names is None and label_feature is not None and hasattr(label_feature, "names"):
84
+ self._label_names = label_feature.names
85
+ except Exception as exc:
86
+ raise RuntimeError(
87
+ f"Dataset preparation failed: {split}: Failed to download Banking77 dataset from Hugging Face. "
88
+ f"Dataset: {DATASET_NAME} | Split: {split}"
89
+ ) from exc
90
+ return self._cache[split]
91
+
92
+ def ensure_ready(self, splits: Sequence[str]) -> None:
93
+ for split in splits:
94
+ self._load_split(split)
95
+
96
+ def size(self, split: str) -> int:
97
+ dataset = self._load_split(split)
98
+ return len(dataset)
99
+
100
+ def sample(self, *, split: str, index: int) -> dict[str, Any]:
101
+ dataset = self._load_split(split)
102
+ size = len(dataset)
103
+ if size == 0:
104
+ raise RuntimeError(f"Banking77 split '{split}' is empty")
105
+ idx = int(index) % size
106
+ row = dataset[int(idx)]
107
+
108
+ label_idx = int(row.get("label", 0))
109
+ label_text = self.get_label_name(label_idx)
110
+
111
+ return {
112
+ "index": idx,
113
+ "split": split,
114
+ "text": str(row.get("text", "")),
115
+ "label": label_text,
116
+ "label_idx": label_idx,
117
+ }
118
+
119
+ def get_label_name(self, label_idx: int) -> str:
120
+ if self._label_names is None:
121
+ self._load_split(DEFAULT_SPLIT)
122
+ if self._label_names and 0 <= label_idx < len(self._label_names):
123
+ return self._label_names[label_idx]
124
+ return f"label_{label_idx}"
125
+
126
+ @property
127
+ def label_names(self) -> list[str]:
128
+ if self._label_names is None:
129
+ self._load_split(DEFAULT_SPLIT)
130
+ return self._label_names or []
131
+
132
+
133
+ banking77_router = APIRouter()
134
+
135
+
136
+ BANKING77_DATASET_SPEC = TaskDatasetSpec(
137
+ id="banking77",
138
+ name="Banking77 Intent Classification",
139
+ version="1.0.0",
140
+ splits=list(AVAILABLE_SPLITS),
141
+ default_split=DEFAULT_SPLIT,
142
+ description="Banking customer query intent classification with 77 intent categories.",
143
+ )
144
+
145
+
146
+ class ClassifyReq(BaseModel):
147
+ query: str
148
+
149
+
150
+ class ClassifyRes(BaseModel):
151
+ intent: str
152
+ confidence: float | None = None
153
+
154
+
155
+ @banking77_router.post("/classify", response_model=ClassifyRes)
156
+ async def classify_endpoint(req: ClassifyReq, request: Request):
157
+ _ = request.app.state.banking77_dataset # Dataset loaded but not used in this stub endpoint
158
+ return ClassifyRes(intent="activate_my_card", confidence=None)
159
+
160
+
161
+ async def call_chat_completion(
162
+ policy_config: dict[str, Any],
163
+ placeholders: dict[str, Any],
164
+ default_messages: list[dict[str, str]],
165
+ api_key: str | None = None,
166
+ ) -> tuple[str, dict[str, Any] | None, list[dict[str, Any]]]:
167
+ # STRICT: require all policy fields to come from TOML (no defaults)
168
+ missing_fields: list[str] = []
169
+ # Always require model; provider optional when routing via proxy
170
+ model_val = policy_config.get("model")
171
+ if not isinstance(model_val, str) or not model_val.strip():
172
+ missing_fields.append("model")
173
+ # Resolve routing base - ALWAYS prioritize inference_url if provided (trainer-provided interceptor URL)
174
+ # If inference_url is set, use it exclusively and ignore api_base/base_url
175
+ inference_url_raw = policy_config.get("inference_url")
176
+ api_base_raw = policy_config.get("api_base")
177
+ base_url_raw = policy_config.get("base_url")
178
+
179
+ if inference_url_raw:
180
+ # Trainer provided inference_url (interceptor URL) - use it exclusively
181
+ route_base = str(inference_url_raw).strip()
182
+ if api_base_raw or base_url_raw:
183
+ # Log warning if api_base/base_url are also present (they'll be ignored)
184
+ with contextlib.suppress(Exception):
185
+ print(
186
+ f"[TASK_APP] ⚠️ inference_url is set ({route_base}), ignoring api_base/base_url",
187
+ flush=True,
188
+ )
189
+ else:
190
+ # Fallback: use api_base or base_url if inference_url not provided
191
+ route_base = (
192
+ (api_base_raw or "").strip()
193
+ or (base_url_raw or "").strip()
194
+ )
195
+ if not route_base:
196
+ missing_fields.append("inference_url")
197
+ if missing_fields:
198
+ raise HTTPException(
199
+ status_code=400,
200
+ detail=(
201
+ "Missing policy fields in TOML [prompt_learning.policy]: " + ", ".join(missing_fields)
202
+ ),
203
+ )
204
+ model = policy_config["model"].strip()
205
+ lowered = route_base.lower()
206
+ is_provider_host = ("api.openai.com" in lowered) or ("api.groq.com" in lowered)
207
+ # Normalize inference URL: allow bases like .../v1 and auto-append /chat/completions
208
+ # Properly handles query strings and interceptor URLs with trial IDs
209
+ # Matches the pattern used in gepa_benchmarks/common.py for consistency
210
+ def _normalize_chat_url(url: str) -> str:
211
+ from urllib.parse import urlparse, urlunparse
212
+
213
+ u = (url or "").rstrip("/")
214
+ if not u:
215
+ return "/chat/completions"
216
+
217
+ # Parse URL to separate path from query parameters
218
+ parsed = urlparse(u)
219
+ path = parsed.path.rstrip("/")
220
+ query = parsed.query
221
+ fragment = parsed.fragment
222
+
223
+ # Already complete
224
+ if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
225
+ return u
226
+
227
+ # Check if this looks like an interceptor URL with trial_id
228
+ # Interceptor URLs have /v1/ followed by an identifier (e.g., /v1/cli-mipro-..., /v1/gepa-...)
229
+ # These URLs already have /v1/{trial_id} in them, so we should append /chat/completions
230
+ if "/v1/" in path and not path.endswith("/v1"):
231
+ # This is likely an interceptor URL with trial_id - append /chat/completions to path
232
+ new_path = f"{path}/chat/completions"
233
+ # Reconstruct URL with query parameters preserved
234
+ result = urlunparse((parsed.scheme, parsed.netloc, new_path, parsed.params, query, fragment))
235
+ return result
236
+
237
+ # Standard case: append /v1/chat/completions
238
+ if path.endswith("/v1"):
239
+ new_path = f"{path}/chat/completions"
240
+ elif path.endswith("/completions"):
241
+ new_path = path.rsplit("/", 1)[0] + "/chat/completions"
242
+ else:
243
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
244
+
245
+ # Reconstruct URL with query parameters preserved
246
+ result = urlunparse((parsed.scheme, parsed.netloc, new_path, parsed.params, query, fragment))
247
+ return result
248
+ inference_url = _normalize_chat_url(str(route_base))
249
+ temperature = policy_config.get("temperature", 0.7)
250
+ max_tokens = policy_config.get("max_completion_tokens", 100)
251
+
252
+ # Loud route log
253
+ with contextlib.suppress(Exception):
254
+ print(f"[TASK_APP] POLICY ROUTE → {inference_url}", flush=True)
255
+
256
+ messages = []
257
+ for msg_template in default_messages:
258
+ role = msg_template.get("role", "user")
259
+ pattern = msg_template.get("pattern", "")
260
+ content = pattern.format(**placeholders)
261
+ messages.append({"role": role, "content": content})
262
+
263
+ # Loud logging of rendered messages (trim for safety)
264
+ preview = [
265
+ {"role": m.get("role"), "len": len(m.get("content", "")), "head": (m.get("content", "")[:160])}
266
+ for m in messages
267
+ ]
268
+ print(f"[TASK_APP] MESSAGES: {preview}", flush=True)
269
+
270
+ # Assert we are NOT hitting a provider host directly for policy
271
+ if is_provider_host:
272
+ # Print full policy config for forensics
273
+ with contextlib.suppress(Exception):
274
+ print(
275
+ f"[TASK_APP] POLICY_CONFIG: {json.dumps(policy_config, ensure_ascii=False)}",
276
+ flush=True,
277
+ )
278
+ raise HTTPException(status_code=502, detail=f"Direct provider URL not allowed for policy: {route_base}")
279
+
280
+ # If routing to proxy/interceptor, include task app API key if provided
281
+ headers: dict[str, str]
282
+ headers = {"Content-Type": "application/json"}
283
+ if api_key:
284
+ headers["X-API-Key"] = api_key
285
+ with contextlib.suppress(Exception):
286
+ print(f"[TASK_APP] 🔐 PROXY ROUTING with API key: {api_key[:12]}...{api_key[-4:]} (len={len(api_key)})", flush=True)
287
+ print(f"[TASK_APP] 🔐 Headers being sent to proxy: {list(headers.keys())}", flush=True)
288
+ # Verify the key is actually in the headers
289
+ assert "X-API-Key" in headers, "X-API-Key missing from headers!"
290
+ assert headers["X-API-Key"] == api_key, "X-API-Key value mismatch!"
291
+ print("[TASK_APP] ✅ Header validation passed: X-API-Key present", flush=True)
292
+ else:
293
+ with contextlib.suppress(Exception):
294
+ print("[TASK_APP] ⚠️ PROXY ROUTING (NO API KEY PROVIDED!)", flush=True)
295
+ print("[TASK_APP] ⚠️ This will likely fail auth at the proxy endpoint", flush=True)
296
+
297
+ # Define tool schema for banking77 classification (no enum to keep payload small)
298
+ classify_tool = {
299
+ "type": "function",
300
+ "function": {
301
+ "name": TOOL_NAME,
302
+ "description": "Return the predicted banking77 intent label in the `intent` field.",
303
+ "parameters": {
304
+ "type": "object",
305
+ "properties": {"intent": {"type": "string"}},
306
+ "required": ["intent"],
307
+ },
308
+ },
309
+ }
310
+
311
+ payload = {
312
+ "model": model,
313
+ "messages": messages,
314
+ "temperature": temperature,
315
+ "max_tokens": max_tokens,
316
+ "tools": [classify_tool],
317
+ "tool_choice": "required" if classify_tool else None,
318
+ }
319
+
320
+ print(
321
+ f"[TASK_APP] OUTBOUND: model={model} temp={temperature} max={max_tokens} tools=1 choice={TOOL_NAME}",
322
+ flush=True,
323
+ )
324
+
325
+ # Lazy import httpx to avoid top-level import during modal code gen
326
+ try:
327
+ import httpx # type: ignore
328
+ except Exception as _exc: # pragma: no cover
329
+ raise HTTPException(status_code=500, detail=f"httpx unavailable: {_exc}") from _exc
330
+
331
+ # Proxy target diagnostics (no preflight health; we go straight to POST)
332
+ try:
333
+ parsed = urlparse(inference_url)
334
+ host = parsed.hostname or ""
335
+ port = parsed.port or (443 if parsed.scheme == "https" else 80)
336
+ print(f"[TASK_APP] PROXY_TARGET: scheme={parsed.scheme} host={host} port={port} path={parsed.path}", flush=True)
337
+ addrinfo = socket.getaddrinfo(host, None)
338
+ ips = sorted({ai[4][0] for ai in addrinfo})
339
+ print(f"[TASK_APP] PROXY_DNS: ips={ips}", flush=True)
340
+ except Exception as e:
341
+ print(f"[TASK_APP] PROXY_DNS_ERROR: {e}", flush=True)
342
+
343
+ async with httpx.AsyncClient(timeout=30.0) as client:
344
+ # Log the actual request about to be sent
345
+ with contextlib.suppress(Exception):
346
+ headers_log = {k: (f"{v[:15]}..." if k == "X-API-Key" and len(v) > 15 else v) for k, v in headers.items()}
347
+ print(f"[TASK_APP] 📤 Sending POST to: {inference_url}", flush=True)
348
+ print(f"[TASK_APP] 📤 With headers: {headers_log}", flush=True)
349
+ print(f"[TASK_APP] 📤 Payload keys: {list(payload.keys())}", flush=True)
350
+ # Final assertion before sending
351
+ if "X-API-Key" in headers:
352
+ print(f"[TASK_APP] ✅ X-API-Key IS in headers (len={len(headers['X-API-Key'])})", flush=True)
353
+ else:
354
+ print("[TASK_APP] ❌ X-API-Key NOT in headers!", flush=True)
355
+
356
+ try:
357
+ response = await client.post(inference_url, json=payload, headers=headers)
358
+ except Exception as e:
359
+ print(f"[TASK_APP] POST_EXCEPTION: {type(e).__name__}: {e}", flush=True)
360
+ raise HTTPException(status_code=502, detail=f"Proxy POST failed: {e}") from e
361
+
362
+ # Always print status/headers/body BEFORE any error is raised
363
+ print(f"[TASK_APP] RESPONSE_STATUS: {response.status_code}", flush=True)
364
+ print(f"[TASK_APP] RESPONSE_HEADERS: {dict(response.headers)}", flush=True)
365
+
366
+ # Handle error responses from interceptor/provider
367
+ if response.status_code != 200:
368
+ try:
369
+ error_json = response.json()
370
+ error_msg = str(error_json.get("error", {}).get("message", error_json.get("error", "Unknown error"))) # type: ignore[misc]
371
+ print(f"[TASK_APP] ❌ Error response from interceptor: {error_msg}", flush=True)
372
+ raise HTTPException(
373
+ status_code=response.status_code,
374
+ detail=f"Interceptor/provider error: {error_msg}"
375
+ )
376
+ except HTTPException:
377
+ raise
378
+ except Exception as e:
379
+ error_text = response.text[:500]
380
+ print(f"[TASK_APP] ❌ Non-JSON error response: {error_text}", flush=True)
381
+ raise HTTPException(
382
+ status_code=response.status_code,
383
+ detail=f"Interceptor/provider returned error: {error_text}"
384
+ ) from e
385
+
386
+ # Try JSON, fallback to text
387
+ try:
388
+ response_json = response.json()
389
+ raw = json.dumps(response_json, ensure_ascii=False)
390
+ print(f"[TASK_APP] RESPONSE_JSON ({len(raw)} bytes): {raw}", flush=True)
391
+ except Exception:
392
+ response_text = response.text
393
+ print(f"[TASK_APP] RESPONSE_TEXT ({len(response_text)} bytes): {response_text}", flush=True)
394
+ response.raise_for_status()
395
+ # If we got here, raise_for_status didn't throw; keep an empty JSON
396
+ response_json = {}
397
+ # After logging, surface HTTP errors (shouldn't reach here if status != 200)
398
+ response.raise_for_status()
399
+
400
+ with contextlib.suppress(Exception):
401
+ usage = response_json.get("usage", {}) if isinstance(response_json, dict) else {} # type: ignore[misc]
402
+ ch = (response_json.get("choices") or [{}])[0] # type: ignore[misc]
403
+ txt = (ch.get("message", {}) or {}).get("content", "") # type: ignore[misc]
404
+ tc = (ch.get("message", {}) or {}).get("tool_calls", []) # type: ignore[misc]
405
+ print(
406
+ f"[TASK_APP] RESPONSE: usage={usage} choices={len(response_json.get('choices', []))} first_len={len(txt)} tool_calls={len(tc)}",
407
+ flush=True,
408
+ )
409
+
410
+ # Hard assertions: require either tool_calls or non-empty content
411
+ try:
412
+ choices = response_json.get("choices") or [] # type: ignore[misc]
413
+ first_msg = (choices[0] or {}).get("message", {}) if choices else {} # type: ignore[misc]
414
+ tool_calls = first_msg.get("tool_calls", []) or [] # type: ignore[misc]
415
+ content_text = str(first_msg.get("content", "")) # type: ignore[misc]
416
+ if not tool_calls and not content_text.strip():
417
+ raise HTTPException(status_code=502, detail="Empty model output: no tool_calls and no content")
418
+ # If tool_calls present, validate schema
419
+ if tool_calls:
420
+ for call in tool_calls:
421
+ fn = (call or {}).get("function", {}) or {} # type: ignore[misc]
422
+ if fn.get("name") != TOOL_NAME: # type: ignore[misc]
423
+ raise HTTPException(status_code=502, detail=f"Unexpected tool name: {fn.get('name')}") # type: ignore[misc]
424
+ args_raw = fn.get("arguments", "{}") # type: ignore[misc]
425
+ try:
426
+ args = json.loads(args_raw)
427
+ except Exception as e:
428
+ raise HTTPException(status_code=502, detail="Tool call arguments not valid JSON") from e
429
+ if not str(args.get("intent", "")).strip(): # type: ignore[misc]
430
+ raise HTTPException(status_code=502, detail="Tool call missing 'intent'")
431
+ except HTTPException:
432
+ raise
433
+ except Exception as exc:
434
+ # Convert unexpected errors to HTTP for visibility
435
+ raise HTTPException(status_code=500, detail=f"Response validation failed: {exc}") from exc
436
+
437
+ response_text = ""
438
+ tool_calls = []
439
+
440
+ if "choices" in response_json and len(response_json["choices"]) > 0:
441
+ choice = response_json["choices"][0]
442
+ message = choice.get("message", {})
443
+ response_text = message.get("content", "")
444
+
445
+ if "tool_calls" in message and message["tool_calls"]:
446
+ for tc in message["tool_calls"]:
447
+ tool_calls.append({
448
+ "id": tc.get("id", ""),
449
+ "type": tc.get("type", "function"),
450
+ "function": {
451
+ "name": tc.get("function", {}).get("name", ""),
452
+ "arguments": tc.get("function", {}).get("arguments", "{}"),
453
+ }
454
+ })
455
+
456
+ return response_text, response_json, tool_calls
457
+
458
+
459
+ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) -> RolloutResponse:
460
+ dataset: Banking77Dataset = fastapi_request.app.state.banking77_dataset
461
+ # Inbound snapshot from GEPA
462
+ with contextlib.suppress(Exception):
463
+ cfg = (request.policy.config or {})
464
+ print(
465
+ f"[TASK_APP] INBOUND_ROLLOUT: run_id={request.run_id} seed={request.env.seed} env={request.env.env_name} "
466
+ f"policy.model={cfg.get('model')} provider={cfg.get('provider')} api_base={cfg.get('inference_url') or cfg.get('api_base') or cfg.get('base_url')}",
467
+ flush=True,
468
+ )
469
+
470
+ split = str(((request.env.config or {}).get("split")) or DEFAULT_SPLIT)
471
+ seed = request.env.seed or 0
472
+
473
+ sample = dataset.sample(split=split, index=seed)
474
+ observation = {
475
+ "query": sample["text"],
476
+ "index": sample["index"],
477
+ "split": sample["split"],
478
+ "available_intents": dataset.label_names,
479
+ }
480
+
481
+ # Format available intents as a numbered list for the prompt
482
+ intents_list = "\n".join(f"{i+1}. {label}" for i, label in enumerate(dataset.label_names))
483
+ placeholders = {
484
+ "query": sample["text"],
485
+ "available_intents": intents_list,
486
+ }
487
+
488
+ default_messages = [
489
+ {
490
+ "role": "system",
491
+ "pattern": (
492
+ "You are an expert banking assistant that classifies customer queries into banking intents. "
493
+ "Given a customer message, respond with exactly one intent label from the provided list using the `banking77_classify` tool."
494
+ ),
495
+ },
496
+ {
497
+ "role": "user",
498
+ "pattern": "Customer Query: {query}\n\nAvailable Intents:\n{available_intents}\n\nClassify this query into one of the above banking intents using the tool call.",
499
+ },
500
+ ]
501
+
502
+ response_json: dict[str, Any] | None = None
503
+ response_text = ""
504
+ tool_calls = []
505
+ # Render baseline messages for validation/introspection
506
+ rendered_messages: list[dict[str, str]] = []
507
+ for msg_template in default_messages:
508
+ role = msg_template.get("role", "user")
509
+ pattern = msg_template.get("pattern", "")
510
+ content = pattern.format(**placeholders)
511
+ rendered_messages.append({"role": role, "content": content})
512
+ error_info: dict[str, Any] = {}
513
+
514
+ # Extract API key from request headers for forwarding to proxy
515
+ api_key = (
516
+ fastapi_request.headers.get("X-API-Key")
517
+ or fastapi_request.headers.get("x-api-key")
518
+ or (fastapi_request.headers.get("Authorization", "").replace("Bearer ", "").strip() if fastapi_request.headers.get("Authorization") else None)
519
+ or None
520
+ )
521
+
522
+ # Call proxy - HARD FAILS on any invalid/empty responses. No soft handling.
523
+ response_text, response_json, tool_calls = await call_chat_completion(
524
+ request.policy.config or {},
525
+ placeholders,
526
+ default_messages,
527
+ api_key=api_key,
528
+ )
529
+ # Full upstream JSON must be present and non-empty
530
+ try:
531
+ raw_upstream = json.dumps(response_json, ensure_ascii=False)
532
+ except Exception:
533
+ raw_upstream = str(response_json)
534
+ print(f"[TASK_APP] UPSTREAM_RESPONSE_JSON ({len(raw_upstream)} bytes): {raw_upstream}", flush=True)
535
+ if not isinstance(response_json, dict) or not response_json:
536
+ raise RuntimeError("Proxy returned missing/empty JSON")
537
+ # Must have choices
538
+ choices = response_json.get("choices") or []
539
+ if not isinstance(choices, list) or len(choices) == 0:
540
+ raise RuntimeError("Proxy JSON missing choices")
541
+ first_msg = (choices[0] or {}).get("message", {}) if choices else {}
542
+ if not isinstance(first_msg, dict):
543
+ raise RuntimeError("Proxy JSON message malformed")
544
+ tc_list = first_msg.get("tool_calls") or []
545
+ content_text = str(first_msg.get("content", ""))
546
+ if not tc_list and not content_text.strip():
547
+ raise RuntimeError("Proxy JSON has neither tool_calls nor content")
548
+ print(f"[TASK_APP] RAW_TOOL_CALLS: {tool_calls}", flush=True)
549
+
550
+ predicted_intent = ""
551
+ if tool_calls:
552
+ for tc in tool_calls:
553
+ if tc.get("function", {}).get("name") == TOOL_NAME:
554
+ args_str = tc.get("function", {}).get("arguments", "{}")
555
+ try:
556
+ args = json.loads(args_str)
557
+ predicted_intent = args.get("intent", "")
558
+ print(f"[TASK_APP] PARSED_TOOL_INTENT: {predicted_intent}", flush=True)
559
+ except Exception:
560
+ print(f"[TASK_APP] TOOL_PARSE_ERROR: {args_str}", flush=True)
561
+ elif response_text:
562
+ predicted_intent = response_text.strip().split()[0] if response_text.strip() else ""
563
+ print(f"[TASK_APP] CONTENT_FALLBACK_INTENT: {predicted_intent} text_len={len(response_text or '')}", flush=True)
564
+
565
+ # Hard-crash if no prediction produced at this point
566
+ if not str(predicted_intent or "").strip():
567
+ raise RuntimeError("No prediction produced from proxy response")
568
+
569
+ expected_intent = sample["label"]
570
+ is_correct = (predicted_intent.lower().replace("_", " ") == expected_intent.lower().replace("_", " "))
571
+ reward = 1.0 if is_correct else 0.0
572
+
573
+ print(
574
+ f"[TASK_APP] PREDICTION: expected={expected_intent} predicted={predicted_intent} correct={is_correct}",
575
+ flush=True,
576
+ )
577
+
578
+ info_payload = {
579
+ "expected_intent": expected_intent,
580
+ "predicted_intent": predicted_intent,
581
+ "response_json": response_json,
582
+ "tool_calls": tool_calls,
583
+ "correct": is_correct,
584
+ # Provide messages so pattern validation can extract them reliably
585
+ "messages": rendered_messages,
586
+ **error_info,
587
+ }
588
+
589
+ with contextlib.suppress(Exception):
590
+ print(
591
+ f"[BANKING77_ROLLOUT] run_id={request.run_id} split={sample['split']} "
592
+ f"index={sample['index']} expected={expected_intent} predicted={predicted_intent} "
593
+ f"reward={reward}",
594
+ flush=True,
595
+ )
596
+
597
+ inference_url = (request.policy.config or {}).get("inference_url")
598
+
599
+ metrics = RolloutMetrics(
600
+ episode_returns=[reward],
601
+ mean_return=reward,
602
+ num_steps=1,
603
+ num_episodes=1,
604
+ outcome_score=reward,
605
+ events_score=reward,
606
+ details={"correct": is_correct},
607
+ )
608
+ policy_config = request.policy.config or {}
609
+ trace_correlation_id = extract_trace_correlation_id(
610
+ policy_config=policy_config,
611
+ inference_url=str(inference_url or ""),
612
+ mode=request.mode,
613
+ )
614
+ trace_metadata = {
615
+ "env": "banking77",
616
+ "split": sample["split"],
617
+ "index": sample["index"],
618
+ "correct": is_correct,
619
+ }
620
+ trace_payload = build_trace_payload(
621
+ messages=rendered_messages,
622
+ response=response_json if isinstance(response_json, dict) else None,
623
+ correlation_id=trace_correlation_id,
624
+ metadata=trace_metadata,
625
+ )
626
+
627
+ pipeline_metadata = {"inference_url": str(inference_url or "")}
628
+ if trace_correlation_id:
629
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
630
+
631
+ return RolloutResponse(
632
+ run_id=request.run_id,
633
+ branches={},
634
+ metrics=metrics,
635
+ aborted=False,
636
+ trace_correlation_id=trace_correlation_id,
637
+ trace=trace_payload,
638
+ pipeline_metadata=pipeline_metadata,
639
+ )
640
+
641
+
642
+ def build_dataset() -> tuple[TaskDatasetRegistry, Banking77Dataset]:
643
+ registry = TaskDatasetRegistry()
644
+ dataset = Banking77Dataset()
645
+ # Lazy load dataset on first use to avoid cold-start latency/timeouts
646
+ registry.register(BANKING77_DATASET_SPEC, lambda _spec: dataset, cache=True)
647
+ return registry, dataset
648
+
649
+
650
+ def _base_task_info() -> TaskInfo:
651
+ return TaskInfo( # type: ignore[call-overload]
652
+ task={ # type: ignore[arg-type]
653
+ "id": "banking77",
654
+ "name": "Banking77 Intent Classification",
655
+ "version": "1.0.0",
656
+ "action_space": {
657
+ "type": "tool_call",
658
+ "tool_name": TOOL_NAME,
659
+ "description": "Classify banking queries into one of 77 intent categories.",
660
+ },
661
+ },
662
+ environment="banking77",
663
+ dataset={ # type: ignore[arg-type]
664
+ **BANKING77_DATASET_SPEC.model_dump(),
665
+ "hf_dataset": DATASET_NAME,
666
+ },
667
+ rubric={ # type: ignore[arg-type]
668
+ "version": "1",
669
+ "criteria_count": 1,
670
+ "source": "inline",
671
+ },
672
+ inference={ # type: ignore[arg-type]
673
+ "supports_proxy": True,
674
+ "tool": TOOL_NAME,
675
+ },
676
+ limits={"max_turns": 1}, # type: ignore[arg-type]
677
+ task_metadata={"format": "tool_call"}, # type: ignore[arg-type]
678
+ )
679
+
680
+
681
+ def describe_taskset(dataset: Banking77Dataset) -> Mapping[str, Any]:
682
+ return {
683
+ **BANKING77_DATASET_SPEC.model_dump(),
684
+ "hf_dataset": DATASET_NAME,
685
+ "num_labels": len(dataset.label_names),
686
+ "sizes": {split: dataset.size(split) for split in AVAILABLE_SPLITS},
687
+ }
688
+
689
+
690
+ def provide_task_instances(dataset: Banking77Dataset, seeds: Sequence[int]) -> Iterable[TaskInfo]:
691
+ base_info = _base_task_info()
692
+ # Convert pydantic models to dicts for spreading
693
+ base_dataset = base_info.dataset.model_dump() if hasattr(base_info.dataset, 'model_dump') else dict(base_info.dataset)
694
+ base_metadata = base_info.task_metadata.model_dump() if hasattr(base_info.task_metadata, 'model_dump') else dict(base_info.task_metadata)
695
+ for seed in seeds:
696
+ sample = dataset.sample(split=DEFAULT_SPLIT, index=seed)
697
+ yield TaskInfo( # type: ignore[call-overload]
698
+ task=base_info.task,
699
+ environment=base_info.environment,
700
+ dataset={ # type: ignore[arg-type]
701
+ **base_dataset,
702
+ "split": sample["split"],
703
+ "index": sample["index"],
704
+ },
705
+ rubric=base_info.rubric,
706
+ inference=base_info.inference,
707
+ limits=base_info.limits,
708
+ task_metadata={
709
+ **base_metadata,
710
+ "query": sample["text"],
711
+ },
712
+ )
713
+
714
+
715
+ OUTCOME_RUBRIC: Rubric = cast(
716
+ Rubric,
717
+ load_rubric(
718
+ {
719
+ "version": "1",
720
+ "goal_text": "Classify banking customer queries into the correct intent category.",
721
+ "aggregation": "weighted_sum",
722
+ "criteria": [
723
+ {
724
+ "id": "intent_accuracy",
725
+ "description": "Correctly classify the customer query into the appropriate banking intent.",
726
+ "weight": 1.0,
727
+ }
728
+ ],
729
+ }
730
+ ),
731
+ )
732
+
733
+ EVENTS_RUBRIC: Rubric = cast(
734
+ Rubric,
735
+ load_rubric(
736
+ {
737
+ "version": "1",
738
+ "goal_text": "Use the banking77_classify tool correctly.",
739
+ "aggregation": "weighted_sum",
740
+ "criteria": [
741
+ {
742
+ "id": "tool_usage",
743
+ "description": "Properly invoke the banking77_classify tool with the correct format.",
744
+ "weight": 1.0,
745
+ }
746
+ ],
747
+ }
748
+ ),
749
+ )
750
+
751
+
752
+ def build_config() -> TaskAppConfig:
753
+ registry, dataset = build_dataset()
754
+ base_info = _base_task_info()
755
+
756
+ proxy_keys = normalize_vendor_keys()
757
+ proxy_config = ProxyConfig(
758
+ enable_openai=proxy_keys.get("OPENAI_API_KEY") is not None,
759
+ enable_groq=proxy_keys.get("GROQ_API_KEY") is not None,
760
+ system_hint="Use the banking77_classify tool to classify the customer query.",
761
+ )
762
+
763
+ config = TaskAppConfig(
764
+ app_id="banking77",
765
+ name="Banking77 Intent Classification Task",
766
+ description="Banking77 dataset task app for classifying customer queries into banking intents.",
767
+ base_task_info=base_info,
768
+ describe_taskset=lambda: describe_taskset(dataset),
769
+ provide_task_instances=lambda seeds: provide_task_instances(dataset, seeds),
770
+ rollout=rollout_executor,
771
+ dataset_registry=registry,
772
+ rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
773
+ proxy=proxy_config,
774
+ routers=(banking77_router,),
775
+ app_state={"banking77_dataset": dataset},
776
+ cors_origins=["*"],
777
+ )
778
+ return config
779
+
780
+
781
+ def fastapi_app():
782
+ """Return the FastAPI application for Modal or other ASGI hosts."""
783
+
784
+ app = create_task_app(build_config())
785
+
786
+ # Replace default health endpoints with auth-tolerant handlers
787
+ # FastAPI matches routes in order, so we need to remove old routes and add new ones
788
+ # Access the router's route registry directly
789
+ routes_to_remove = []
790
+ for route in list(app.router.routes):
791
+ # Check if this is a route (not middleware or other components)
792
+ if hasattr(route, "path") and hasattr(route, "methods"):
793
+ path = getattr(route, "path", None)
794
+ methods = getattr(route, "methods", set()) or set()
795
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
796
+ routes_to_remove.append(route)
797
+
798
+ # Remove routes from router
799
+ for route in routes_to_remove:
800
+ app.router.routes.remove(route)
801
+ print(f"[banking77] Removed default route: {getattr(route, 'path', 'unknown')}", flush=True)
802
+
803
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
804
+ if not env_key:
805
+ return None
806
+ prefix = env_key[: max(1, len(env_key) // 2)]
807
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
808
+ return prefix
809
+
810
+ @app.get("/health")
811
+ async def health(request: StarletteRequest):
812
+ env_key = normalize_environment_api_key()
813
+ if not env_key:
814
+ return JSONResponse(
815
+ status_code=503,
816
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
817
+ )
818
+ if not is_api_key_header_authorized(request):
819
+ prefix = _log_env_key_prefix("health", env_key)
820
+ content = {"status": "healthy", "authorized": False}
821
+ if prefix:
822
+ content["expected_api_key_prefix"] = prefix
823
+ return JSONResponse(status_code=200, content=content)
824
+ return {"status": "healthy", "authorized": True}
825
+
826
+ @app.get("/health/rollout")
827
+ async def health_rollout(request: StarletteRequest):
828
+ env_key = normalize_environment_api_key()
829
+ if not env_key:
830
+ return JSONResponse(
831
+ status_code=503,
832
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
833
+ )
834
+ if not is_api_key_header_authorized(request):
835
+ prefix = _log_env_key_prefix("health/rollout", env_key)
836
+ content = {"status": "healthy", "authorized": False}
837
+ if prefix:
838
+ content["expected_api_key_prefix"] = prefix
839
+ return JSONResponse(status_code=200, content=content)
840
+ return {"ok": True, "authorized": True}
841
+
842
+ @app.get("/metadata")
843
+ async def get_metadata(request: StarletteRequest):
844
+ """Return program code and metadata for proposer use.
845
+
846
+ This endpoint allows task apps to self-extract their own code using inspect,
847
+ keeping the architecture self-contained.
848
+ """
849
+ # Extract code using inspect
850
+ program_code = get_current_module_code()
851
+
852
+ # Get module path
853
+ import inspect
854
+ frame = inspect.currentframe()
855
+ try:
856
+ if frame is None:
857
+ module_path = None
858
+ else:
859
+ caller_frame = frame.f_back
860
+ if caller_frame is None:
861
+ module_path = None
862
+ else:
863
+ module = inspect.getmodule(caller_frame)
864
+ module_path = module.__name__ if module else None
865
+ finally:
866
+ del frame
867
+
868
+ return {
869
+ "program_code": program_code, # Full source code of task app
870
+ "module_path": module_path, # Module path (e.g., "examples.task_apps.banking77.banking77_task_app")
871
+ "extraction_method": "inspect", # How code was extracted
872
+ }
873
+
874
+ @app.exception_handler(RequestValidationError)
875
+ async def _on_validation_error(request: StarletteRequest, exc: RequestValidationError):
876
+ try:
877
+ hdr = request.headers
878
+ snapshot = {
879
+ "path": str(request.url.path),
880
+ "have_x_api_key": bool(hdr.get("x-api-key")), # type: ignore[misc]
881
+ "have_x_api_keys": bool(hdr.get("x-api-keys")), # type: ignore[misc]
882
+ "have_authorization": bool(hdr.get("authorization")), # type: ignore[misc]
883
+ "errors": exc.errors()[:5],
884
+ }
885
+ print("[422] validation", snapshot, flush=True)
886
+ except Exception:
887
+ pass
888
+ return JSONResponse(
889
+ status_code=422,
890
+ content={"status": "invalid", "detail": exc.errors()[:5]},
891
+ )
892
+
893
+ return app
894
+
895
+
896
+ if __name__ == "__main__":
897
+ import argparse
898
+
899
+ parser = argparse.ArgumentParser(description="Run the Banking77 task app locally")
900
+ parser.add_argument("--host", default="0.0.0.0")
901
+ parser.add_argument("--port", type=int, default=8102)
902
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
903
+ parser.add_argument(
904
+ "--env-file",
905
+ action="append",
906
+ default=[],
907
+ help="Additional .env files to load before startup",
908
+ )
909
+ args = parser.parse_args()
910
+
911
+ # Look for .env at repo root (3 levels up: banking77/ -> task_apps/ -> examples/ -> repo_root/)
912
+ default_env = Path(__file__).resolve().parents[3] / ".env"
913
+ env_files = [str(default_env)] if default_env.exists() else []
914
+ env_files.extend(args.env_file or [])
915
+
916
+ run_task_app(
917
+ build_config,
918
+ host=args.host,
919
+ port=args.port,
920
+ reload=args.reload,
921
+ env_files=env_files,
922
+ )