synth-ai 0.2.6.dev1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (738) hide show
  1. synth_ai/__init__.py +44 -24
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +103 -48
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +256 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +53 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  63. synth_ai/cli/commands/train/judge_validation.py +305 -0
  64. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  65. synth_ai/cli/commands/train/validation.py +392 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
  70. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  71. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  72. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  73. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
  75. synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +117 -51
  76. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/_common.py +16 -0
  81. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
  82. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  83. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
  84. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  85. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  86. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  87. synth_ai/cli/demo_apps/math/_common.py +16 -0
  88. synth_ai/cli/demo_apps/math/app.py +38 -0
  89. synth_ai/cli/demo_apps/math/config.toml +75 -0
  90. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  91. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  92. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  93. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  94. synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
  95. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  96. synth_ai/cli/demos/__init__.py +12 -0
  97. synth_ai/cli/demos/demo.py +32 -0
  98. synth_ai/cli/demos/rl_demo.py +254 -0
  99. synth_ai/cli/deploy.py +216 -0
  100. synth_ai/cli/infra/__init__.py +14 -0
  101. synth_ai/cli/{balance.py → infra/balance.py} +21 -3
  102. synth_ai/cli/infra/mcp.py +35 -0
  103. synth_ai/cli/infra/modal_app.py +36 -0
  104. synth_ai/cli/infra/setup.py +69 -0
  105. synth_ai/cli/infra/status.py +16 -0
  106. synth_ai/cli/infra/turso.py +77 -0
  107. synth_ai/cli/lib/__init__.py +10 -0
  108. synth_ai/cli/lib/agents.py +76 -0
  109. synth_ai/cli/lib/apps/modal_app.py +101 -0
  110. synth_ai/cli/lib/apps/task_app.py +642 -0
  111. synth_ai/cli/lib/bin.py +39 -0
  112. synth_ai/cli/lib/env.py +375 -0
  113. synth_ai/cli/lib/errors.py +85 -0
  114. synth_ai/cli/lib/modal.py +315 -0
  115. synth_ai/cli/lib/plotting.py +126 -0
  116. synth_ai/cli/lib/prompt_args.py +39 -0
  117. synth_ai/cli/lib/prompts.py +284 -0
  118. synth_ai/cli/lib/sqld.py +122 -0
  119. synth_ai/cli/lib/task_app_discovery.py +884 -0
  120. synth_ai/cli/lib/task_app_env.py +295 -0
  121. synth_ai/cli/lib/train_cfgs.py +300 -0
  122. synth_ai/cli/lib/tunnel_records.py +207 -0
  123. synth_ai/cli/local/__init__.py +14 -0
  124. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  125. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  126. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  127. synth_ai/cli/local/experiment_queue/config.py +128 -0
  128. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  129. synth_ai/cli/local/experiment_queue/database.py +175 -0
  130. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  131. synth_ai/cli/local/experiment_queue/models.py +231 -0
  132. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  133. synth_ai/cli/local/experiment_queue/results.py +373 -0
  134. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  135. synth_ai/cli/local/experiment_queue/service.py +344 -0
  136. synth_ai/cli/local/experiment_queue/status.py +372 -0
  137. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  138. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  139. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  140. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  141. synth_ai/cli/local/session/__init__.py +92 -0
  142. synth_ai/cli/local/session/client.py +383 -0
  143. synth_ai/cli/local/session/constants.py +63 -0
  144. synth_ai/cli/local/session/exceptions.py +105 -0
  145. synth_ai/cli/local/session/manager.py +139 -0
  146. synth_ai/cli/local/session/models.py +89 -0
  147. synth_ai/cli/local/session/query.py +110 -0
  148. synth_ai/cli/root.py +150 -102
  149. synth_ai/cli/task_apps/__init__.py +37 -0
  150. synth_ai/cli/task_apps/commands.py +3145 -0
  151. synth_ai/cli/task_apps/deploy.py +7 -0
  152. synth_ai/cli/task_apps/list.py +26 -0
  153. synth_ai/cli/task_apps/main.py +36 -0
  154. synth_ai/cli/task_apps/modal_serve.py +11 -0
  155. synth_ai/cli/task_apps/serve.py +11 -0
  156. synth_ai/cli/training/__init__.py +8 -0
  157. synth_ai/cli/training/train.py +5 -0
  158. synth_ai/cli/training/train_cfg.py +34 -0
  159. synth_ai/cli/{watch.py → training/watch.py} +13 -18
  160. synth_ai/cli/turso.py +52 -0
  161. synth_ai/cli/utils/__init__.py +8 -0
  162. synth_ai/cli/utils/experiments.py +235 -0
  163. synth_ai/cli/utils/queue.py +504 -0
  164. synth_ai/cli/{recent.py → utils/recent.py} +13 -7
  165. synth_ai/cli/{traces.py → utils/traces.py} +9 -5
  166. synth_ai/contracts/__init__.py +67 -0
  167. synth_ai/core/__init__.py +100 -0
  168. synth_ai/core/_utils/__init__.py +54 -0
  169. synth_ai/core/_utils/base_url.py +10 -0
  170. synth_ai/core/_utils/http.py +10 -0
  171. synth_ai/core/_utils/prompts.py +14 -0
  172. synth_ai/core/_utils/task_app_state.py +12 -0
  173. synth_ai/core/_utils/user_config.py +10 -0
  174. synth_ai/core/apps/common.py +116 -0
  175. synth_ai/core/auth.py +95 -0
  176. synth_ai/core/cfgs.py +240 -0
  177. synth_ai/core/config/__init__.py +16 -0
  178. synth_ai/core/config/base.py +168 -0
  179. synth_ai/core/config/resolver.py +89 -0
  180. synth_ai/core/env.py +231 -0
  181. synth_ai/core/errors.py +126 -0
  182. synth_ai/core/http.py +230 -0
  183. synth_ai/core/integrations/__init__.py +11 -0
  184. synth_ai/core/integrations/cloudflare.py +1710 -0
  185. synth_ai/core/integrations/mcp/__init__.py +6 -0
  186. synth_ai/core/integrations/mcp/__main__.py +8 -0
  187. synth_ai/core/integrations/mcp/claude.py +36 -0
  188. synth_ai/core/integrations/mcp/main.py +254 -0
  189. synth_ai/core/integrations/mcp/setup.py +100 -0
  190. synth_ai/core/integrations/modal.py +277 -0
  191. synth_ai/core/json.py +72 -0
  192. synth_ai/core/log_filter.py +99 -0
  193. synth_ai/core/logging.py +82 -0
  194. synth_ai/core/paths.py +107 -0
  195. synth_ai/core/pricing.py +109 -0
  196. synth_ai/core/process.py +233 -0
  197. synth_ai/core/ssl.py +25 -0
  198. synth_ai/core/storage/__init__.py +71 -0
  199. synth_ai/core/task_app_state.py +318 -0
  200. synth_ai/core/telemetry.py +282 -0
  201. synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
  202. synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
  203. synth_ai/core/tracing_v3/config.py +229 -0
  204. synth_ai/core/tracing_v3/constants.py +21 -0
  205. synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
  206. synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
  207. synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
  208. synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
  209. synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
  210. synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
  211. synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
  212. synth_ai/core/tracing_v3/serialization.py +130 -0
  213. synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
  214. synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
  215. synth_ai/core/tracing_v3/storage/config.py +109 -0
  216. synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
  217. synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
  218. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  219. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  220. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  221. synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
  222. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  223. synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
  224. synth_ai/core/urls.py +18 -0
  225. synth_ai/core/user_config.py +137 -0
  226. synth_ai/core/uvicorn.py +222 -0
  227. synth_ai/data/__init__.py +83 -0
  228. synth_ai/data/enums.py +123 -0
  229. synth_ai/data/rewards.py +152 -0
  230. synth_ai/data/traces.py +35 -0
  231. synth_ai/products/__init__.py +6 -0
  232. synth_ai/products/graph_evolve/__init__.py +46 -0
  233. synth_ai/products/graph_evolve/client.py +226 -0
  234. synth_ai/products/graph_evolve/config.py +591 -0
  235. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  236. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  237. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  238. synth_ai/products/graph_evolve/run.py +222 -0
  239. synth_ai/products/graph_gepa/__init__.py +23 -0
  240. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  241. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  242. synth_ai/sdk/__init__.py +123 -0
  243. synth_ai/sdk/api/__init__.py +1 -0
  244. synth_ai/sdk/api/models/supported.py +514 -0
  245. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  246. synth_ai/sdk/api/train/__init__.py +85 -0
  247. synth_ai/sdk/api/train/builders.py +895 -0
  248. synth_ai/sdk/api/train/cli.py +2199 -0
  249. synth_ai/sdk/api/train/config_finder.py +267 -0
  250. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  251. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  252. synth_ai/sdk/api/train/configs/rl.py +187 -0
  253. synth_ai/sdk/api/train/configs/sft.py +99 -0
  254. synth_ai/sdk/api/train/configs/shared.py +81 -0
  255. synth_ai/sdk/api/train/context_learning.py +312 -0
  256. synth_ai/sdk/api/train/env_resolver.py +418 -0
  257. synth_ai/sdk/api/train/graph_validators.py +216 -0
  258. synth_ai/sdk/api/train/graphgen.py +984 -0
  259. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  260. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  261. synth_ai/sdk/api/train/local_api.py +10 -0
  262. synth_ai/sdk/api/train/pollers.py +124 -0
  263. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  264. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  265. synth_ai/sdk/api/train/progress/events.py +326 -0
  266. synth_ai/sdk/api/train/progress/results.py +428 -0
  267. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  268. synth_ai/sdk/api/train/prompt_learning.py +469 -0
  269. synth_ai/sdk/api/train/rl.py +441 -0
  270. synth_ai/sdk/api/train/sft.py +396 -0
  271. synth_ai/sdk/api/train/summary.py +522 -0
  272. synth_ai/sdk/api/train/supported_algos.py +147 -0
  273. synth_ai/sdk/api/train/task_app.py +351 -0
  274. synth_ai/sdk/api/train/utils.py +279 -0
  275. synth_ai/sdk/api/train/validators.py +2424 -0
  276. synth_ai/sdk/graphs/__init__.py +15 -0
  277. synth_ai/sdk/graphs/completions.py +570 -0
  278. synth_ai/{inference → sdk/inference}/__init__.py +0 -1
  279. synth_ai/sdk/inference/client.py +128 -0
  280. synth_ai/sdk/jobs/__init__.py +16 -0
  281. synth_ai/sdk/jobs/client.py +371 -0
  282. synth_ai/sdk/judging/__init__.py +14 -0
  283. synth_ai/sdk/judging/base.py +24 -0
  284. synth_ai/sdk/judging/client.py +40 -0
  285. synth_ai/sdk/judging/schemas.py +222 -0
  286. synth_ai/sdk/judging/types.py +42 -0
  287. synth_ai/sdk/learning/__init__.py +99 -0
  288. synth_ai/sdk/learning/algorithms.py +14 -0
  289. synth_ai/{learning → sdk/learning}/client.py +121 -30
  290. synth_ai/sdk/learning/config.py +5 -0
  291. synth_ai/{learning → sdk/learning}/constants.py +0 -2
  292. synth_ai/sdk/learning/context_learning_client.py +531 -0
  293. synth_ai/sdk/learning/context_learning_types.py +292 -0
  294. synth_ai/sdk/learning/ft_client.py +7 -0
  295. synth_ai/{learning → sdk/learning}/health.py +15 -9
  296. synth_ai/{learning → sdk/learning}/jobs.py +44 -47
  297. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  298. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  299. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  300. synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
  301. synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
  302. synth_ai/sdk/learning/rl/config.py +31 -0
  303. synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
  304. synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
  305. synth_ai/sdk/learning/rl/secrets.py +13 -0
  306. synth_ai/sdk/learning/rl_client.py +5 -0
  307. synth_ai/sdk/learning/sft/__init__.py +29 -0
  308. synth_ai/sdk/learning/sft/client.py +95 -0
  309. synth_ai/sdk/learning/sft/config.py +270 -0
  310. synth_ai/sdk/learning/sft/data.py +698 -0
  311. synth_ai/sdk/learning/sse.py +57 -0
  312. synth_ai/sdk/learning/validators.py +52 -0
  313. synth_ai/sdk/localapi/__init__.py +40 -0
  314. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  315. synth_ai/sdk/localapi/client.py +10 -0
  316. synth_ai/sdk/localapi/contracts.py +10 -0
  317. synth_ai/sdk/localapi/helpers.py +519 -0
  318. synth_ai/sdk/localapi/rollouts.py +87 -0
  319. synth_ai/sdk/localapi/server.py +29 -0
  320. synth_ai/sdk/localapi/template.py +70 -0
  321. synth_ai/sdk/streaming/__init__.py +35 -0
  322. synth_ai/sdk/streaming/config.py +94 -0
  323. synth_ai/sdk/streaming/handlers.py +1997 -0
  324. synth_ai/sdk/streaming/streamer.py +713 -0
  325. synth_ai/sdk/streaming/types.py +112 -0
  326. synth_ai/sdk/task/__init__.py +164 -0
  327. synth_ai/sdk/task/apps/__init__.py +169 -0
  328. synth_ai/sdk/task/auth.py +165 -0
  329. synth_ai/sdk/task/client.py +175 -0
  330. synth_ai/sdk/task/config.py +257 -0
  331. synth_ai/sdk/task/contracts.py +219 -0
  332. synth_ai/sdk/task/datasets.py +108 -0
  333. synth_ai/sdk/task/errors.py +50 -0
  334. synth_ai/sdk/task/health.py +34 -0
  335. synth_ai/sdk/task/in_process.py +1190 -0
  336. synth_ai/sdk/task/in_process_runner.py +314 -0
  337. synth_ai/sdk/task/inference_api.py +299 -0
  338. synth_ai/sdk/task/json.py +111 -0
  339. synth_ai/sdk/task/proxy.py +287 -0
  340. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  341. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  342. synth_ai/sdk/task/rubrics/models.py +57 -0
  343. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  344. synth_ai/sdk/task/rubrics/strict.py +149 -0
  345. synth_ai/sdk/task/rubrics.py +219 -0
  346. synth_ai/sdk/task/server.py +631 -0
  347. synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
  348. synth_ai/sdk/task/tracing_utils.py +95 -0
  349. synth_ai/sdk/task/validators.py +441 -0
  350. synth_ai/sdk/task/vendors.py +59 -0
  351. synth_ai/sdk/training/__init__.py +102 -0
  352. synth_ai/sdk/tunnels/__init__.py +83 -0
  353. synth_ai/sdk/tunnels/cleanup.py +83 -0
  354. synth_ai/sdk/tunnels/ports.py +120 -0
  355. synth_ai/utils/__init__.py +213 -0
  356. synth_ai-0.4.3.dist-info/METADATA +262 -0
  357. synth_ai-0.4.3.dist-info/RECORD +370 -0
  358. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
  359. synth_ai/cli/calc.py +0 -69
  360. synth_ai/cli/demo.py +0 -131
  361. synth_ai/cli/legacy_root_backup.py +0 -470
  362. synth_ai/cli/man.py +0 -106
  363. synth_ai/cli/rl_demo.py +0 -137
  364. synth_ai/cli/status.py +0 -133
  365. synth_ai/config/base_url.py +0 -98
  366. synth_ai/core/experiment.py +0 -15
  367. synth_ai/core/system.py +0 -15
  368. synth_ai/demos/core/__init__.py +0 -1
  369. synth_ai/demos/core/cli.py +0 -685
  370. synth_ai/demos/demo_task_apps/__init__.py +0 -1
  371. synth_ai/demos/demo_task_apps/math/config.toml +0 -44
  372. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  373. synth_ai/environments/__init__.py +0 -31
  374. synth_ai/environments/environment/__init__.py +0 -1
  375. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  376. synth_ai/environments/environment/artifacts/base.py +0 -52
  377. synth_ai/environments/environment/core.py +0 -67
  378. synth_ai/environments/environment/db/__init__.py +0 -1
  379. synth_ai/environments/environment/db/sqlite.py +0 -45
  380. synth_ai/environments/environment/registry.py +0 -233
  381. synth_ai/environments/environment/resources/sqlite.py +0 -45
  382. synth_ai/environments/environment/results.py +0 -1
  383. synth_ai/environments/environment/rewards/__init__.py +0 -1
  384. synth_ai/environments/environment/rewards/core.py +0 -29
  385. synth_ai/environments/environment/shared_engine.py +0 -26
  386. synth_ai/environments/environment/tools/__init__.py +0 -200
  387. synth_ai/environments/examples/__init__.py +0 -1
  388. synth_ai/environments/examples/bandit/__init__.py +0 -33
  389. synth_ai/environments/examples/bandit/engine.py +0 -294
  390. synth_ai/environments/examples/bandit/environment.py +0 -194
  391. synth_ai/environments/examples/bandit/taskset.py +0 -200
  392. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  393. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  394. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  395. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  396. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  397. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  398. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  399. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  400. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -724
  401. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  402. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  403. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  404. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  405. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  406. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  407. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  408. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  409. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  410. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  411. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  412. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  413. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  414. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  415. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  416. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  417. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  418. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  419. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  420. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  421. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  422. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  423. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  424. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  425. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  426. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  427. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  428. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  429. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  430. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  431. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  432. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  433. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  434. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  435. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  436. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  437. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  438. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  439. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  440. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  441. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  442. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  443. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  444. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  445. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  446. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  447. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  448. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  449. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  450. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  451. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  452. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  453. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  454. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  455. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  456. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  457. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  458. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  459. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  460. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  461. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  462. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  463. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  464. synth_ai/environments/examples/crafter_classic/environment.py +0 -404
  465. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  466. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  467. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  468. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  469. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  470. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  471. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  472. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  473. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  474. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  475. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  476. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  477. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  478. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  479. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  480. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  481. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  482. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  483. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  484. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  485. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  486. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  487. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  488. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  489. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  490. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  491. synth_ai/environments/examples/enron/engine.py +0 -295
  492. synth_ai/environments/examples/enron/environment.py +0 -166
  493. synth_ai/environments/examples/enron/taskset.py +0 -112
  494. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  495. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  496. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  497. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  498. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  499. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  500. synth_ai/environments/examples/minigrid/engine.py +0 -589
  501. synth_ai/environments/examples/minigrid/environment.py +0 -274
  502. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  503. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  504. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  505. synth_ai/environments/examples/nethack/__init__.py +0 -7
  506. synth_ai/environments/examples/nethack/achievements.py +0 -337
  507. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  508. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  509. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  510. synth_ai/environments/examples/nethack/engine.py +0 -739
  511. synth_ai/environments/examples/nethack/environment.py +0 -256
  512. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  513. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  514. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  515. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  516. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  517. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  518. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  519. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  520. synth_ai/environments/examples/nethack/taskset.py +0 -323
  521. synth_ai/environments/examples/red/__init__.py +0 -7
  522. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  523. synth_ai/environments/examples/red/config_logging.py +0 -110
  524. synth_ai/environments/examples/red/engine.py +0 -694
  525. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  526. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  527. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  528. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  529. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  530. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  531. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  532. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  533. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  534. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  535. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  536. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  537. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  538. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  539. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  540. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  541. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  542. synth_ai/environments/examples/red/environment.py +0 -238
  543. synth_ai/environments/examples/red/taskset.py +0 -79
  544. synth_ai/environments/examples/red/units/__init__.py +0 -1
  545. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  546. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  547. synth_ai/environments/examples/sokoban/engine.py +0 -678
  548. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  549. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  550. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  551. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  552. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  553. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  554. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  555. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  556. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  557. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  558. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  559. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  560. synth_ai/environments/examples/sokoban/environment.py +0 -229
  561. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  562. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  563. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  564. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  565. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  566. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  567. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  568. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  569. synth_ai/environments/examples/verilog/__init__.py +0 -10
  570. synth_ai/environments/examples/verilog/engine.py +0 -329
  571. synth_ai/environments/examples/verilog/environment.py +0 -350
  572. synth_ai/environments/examples/verilog/taskset.py +0 -420
  573. synth_ai/environments/examples/wordle/__init__.py +0 -29
  574. synth_ai/environments/examples/wordle/engine.py +0 -398
  575. synth_ai/environments/examples/wordle/environment.py +0 -159
  576. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  577. synth_ai/environments/examples/wordle/taskset.py +0 -230
  578. synth_ai/environments/reproducibility/core.py +0 -42
  579. synth_ai/environments/reproducibility/helpers.py +0 -0
  580. synth_ai/environments/reproducibility/tree.py +0 -364
  581. synth_ai/environments/service/app.py +0 -91
  582. synth_ai/environments/service/core_routes.py +0 -1020
  583. synth_ai/environments/service/external_registry.py +0 -56
  584. synth_ai/environments/service/registry.py +0 -9
  585. synth_ai/environments/stateful/__init__.py +0 -1
  586. synth_ai/environments/stateful/core.py +0 -163
  587. synth_ai/environments/stateful/engine.py +0 -21
  588. synth_ai/environments/stateful/state.py +0 -7
  589. synth_ai/environments/tasks/api.py +0 -19
  590. synth_ai/environments/tasks/core.py +0 -80
  591. synth_ai/environments/tasks/filters.py +0 -41
  592. synth_ai/environments/tasks/utils.py +0 -91
  593. synth_ai/environments/v0_observability/history.py +0 -3
  594. synth_ai/environments/v0_observability/log.py +0 -2
  595. synth_ai/evals/base.py +0 -15
  596. synth_ai/experimental/synth_oss.py +0 -446
  597. synth_ai/http.py +0 -102
  598. synth_ai/inference/client.py +0 -20
  599. synth_ai/install_sqld.sh +0 -40
  600. synth_ai/jobs/client.py +0 -246
  601. synth_ai/learning/__init__.py +0 -24
  602. synth_ai/learning/config.py +0 -43
  603. synth_ai/learning/filtering.py +0 -0
  604. synth_ai/learning/ft_client.py +0 -59
  605. synth_ai/learning/offline/dpo.py +0 -0
  606. synth_ai/learning/offline/providers.py +0 -7
  607. synth_ai/learning/offline/sft.py +0 -0
  608. synth_ai/learning/offline/shared.py +0 -0
  609. synth_ai/learning/online/grpo.py +0 -0
  610. synth_ai/learning/online/irft.py +0 -0
  611. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  612. synth_ai/learning/prompts/gepa.py +0 -0
  613. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  614. synth_ai/learning/prompts/mipro.py +0 -289
  615. synth_ai/learning/prompts/random_search.py +0 -246
  616. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  617. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  618. synth_ai/learning/sse.py +0 -58
  619. synth_ai/learning/validators.py +0 -48
  620. synth_ai/lm/__init__.py +0 -51
  621. synth_ai/lm/caching/constants.py +0 -6
  622. synth_ai/lm/caching/dbs.py +0 -0
  623. synth_ai/lm/caching/ephemeral.py +0 -102
  624. synth_ai/lm/caching/handler.py +0 -137
  625. synth_ai/lm/caching/initialize.py +0 -11
  626. synth_ai/lm/caching/persistent.py +0 -114
  627. synth_ai/lm/config.py +0 -110
  628. synth_ai/lm/constants.py +0 -32
  629. synth_ai/lm/core/__init__.py +0 -8
  630. synth_ai/lm/core/all.py +0 -73
  631. synth_ai/lm/core/exceptions.py +0 -7
  632. synth_ai/lm/core/main.py +0 -319
  633. synth_ai/lm/core/main_v3.py +0 -594
  634. synth_ai/lm/core/synth_models.py +0 -48
  635. synth_ai/lm/core/vendor_clients.py +0 -188
  636. synth_ai/lm/cost/__init__.py +0 -0
  637. synth_ai/lm/cost/monitor.py +0 -1
  638. synth_ai/lm/cost/statefulness.py +0 -1
  639. synth_ai/lm/injection.py +0 -80
  640. synth_ai/lm/overrides.py +0 -206
  641. synth_ai/lm/provider_support/__init__.py +0 -8
  642. synth_ai/lm/provider_support/anthropic.py +0 -972
  643. synth_ai/lm/provider_support/openai.py +0 -1139
  644. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  645. synth_ai/lm/structured_outputs/__init__.py +0 -0
  646. synth_ai/lm/structured_outputs/handler.py +0 -440
  647. synth_ai/lm/structured_outputs/inject.py +0 -297
  648. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  649. synth_ai/lm/tools/__init__.py +0 -3
  650. synth_ai/lm/tools/base.py +0 -172
  651. synth_ai/lm/unified_interface.py +0 -202
  652. synth_ai/lm/vendors/__init__.py +0 -0
  653. synth_ai/lm/vendors/base.py +0 -81
  654. synth_ai/lm/vendors/core/__init__.py +0 -0
  655. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  656. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  657. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  658. synth_ai/lm/vendors/core/openai_api.py +0 -220
  659. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  660. synth_ai/lm/vendors/local/__init__.py +0 -0
  661. synth_ai/lm/vendors/local/ollama.py +0 -0
  662. synth_ai/lm/vendors/openai_standard.py +0 -780
  663. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  664. synth_ai/lm/vendors/retries.py +0 -22
  665. synth_ai/lm/vendors/supported/__init__.py +0 -0
  666. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  667. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  668. synth_ai/lm/vendors/supported/grok.py +0 -75
  669. synth_ai/lm/vendors/supported/groq.py +0 -16
  670. synth_ai/lm/vendors/supported/ollama.py +0 -15
  671. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  672. synth_ai/lm/vendors/supported/together.py +0 -11
  673. synth_ai/lm/vendors/synth_client.py +0 -808
  674. synth_ai/lm/warmup.py +0 -186
  675. synth_ai/rl/secrets.py +0 -19
  676. synth_ai/scripts/verify_rewards.py +0 -100
  677. synth_ai/task/__init__.py +0 -10
  678. synth_ai/task/contracts.py +0 -120
  679. synth_ai/task/health.py +0 -28
  680. synth_ai/task/validators.py +0 -12
  681. synth_ai/tracing/__init__.py +0 -30
  682. synth_ai/tracing_v1/__init__.py +0 -33
  683. synth_ai/tracing_v3/config.py +0 -84
  684. synth_ai/tracing_v3/storage/config.py +0 -62
  685. synth_ai/tracing_v3/turso/__init__.py +0 -25
  686. synth_ai/tracing_v3/turso/daemon.py +0 -144
  687. synth_ai/tracing_v3/turso/manager.py +0 -760
  688. synth_ai/v0/tracing/__init__.py +0 -0
  689. synth_ai/v0/tracing/abstractions.py +0 -224
  690. synth_ai/v0/tracing/base_client.py +0 -91
  691. synth_ai/v0/tracing/client_manager.py +0 -131
  692. synth_ai/v0/tracing/config.py +0 -140
  693. synth_ai/v0/tracing/context.py +0 -146
  694. synth_ai/v0/tracing/decorators.py +0 -680
  695. synth_ai/v0/tracing/events/__init__.py +0 -0
  696. synth_ai/v0/tracing/events/manage.py +0 -147
  697. synth_ai/v0/tracing/events/scope.py +0 -86
  698. synth_ai/v0/tracing/events/store.py +0 -228
  699. synth_ai/v0/tracing/immediate_client.py +0 -151
  700. synth_ai/v0/tracing/local.py +0 -18
  701. synth_ai/v0/tracing/log_client_base.py +0 -73
  702. synth_ai/v0/tracing/retry_queue.py +0 -186
  703. synth_ai/v0/tracing/trackers.py +0 -515
  704. synth_ai/v0/tracing/upload.py +0 -510
  705. synth_ai/v0/tracing/utils.py +0 -9
  706. synth_ai/v0/tracing_v1/__init__.py +0 -16
  707. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  708. synth_ai/v0/tracing_v1/base_client.py +0 -91
  709. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  710. synth_ai/v0/tracing_v1/config.py +0 -140
  711. synth_ai/v0/tracing_v1/context.py +0 -146
  712. synth_ai/v0/tracing_v1/decorators.py +0 -701
  713. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  714. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  715. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  716. synth_ai/v0/tracing_v1/events/store.py +0 -228
  717. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  718. synth_ai/v0/tracing_v1/local.py +0 -18
  719. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  720. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  721. synth_ai/v0/tracing_v1/trackers.py +0 -515
  722. synth_ai/v0/tracing_v1/upload.py +0 -525
  723. synth_ai/v0/tracing_v1/utils.py +0 -9
  724. synth_ai/zyk/__init__.py +0 -30
  725. synth_ai-0.2.6.dev1.dist-info/METADATA +0 -106
  726. synth_ai-0.2.6.dev1.dist-info/RECORD +0 -416
  727. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  728. /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
  729. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  730. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  731. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  732. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  733. /synth_ai/{compound/cais.py → py.typed} +0 -0
  734. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  735. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  736. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
  737. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
  738. {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
@@ -1,858 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Comprehensive script to run Crafter rollouts for multiple models and compare their performance.
4
- Updated to use tracing_v3 with async architecture.
5
-
6
- Runs experiments for:
7
- - gpt-4o-mini
8
- - gpt-4.1-mini
9
- - gpt-4.1-nano
10
- - gemini-1.5-flash
11
- - gemini-2.5-flash-lite
12
- - qwen3/32b
13
-
14
- Analyzes and compares:
15
- - Invalid action rates
16
- - Achievement frequencies by step
17
- - Achievement counts across models
18
- - Performance metrics
19
- - Cost analysis
20
- """
21
-
22
- import argparse
23
- import asyncio
24
- import json
25
- import logging
26
- import os
27
- import sys
28
- import time
29
- from collections import defaultdict
30
- from datetime import datetime
31
- from pathlib import Path
32
- from typing import Any
33
- from uuid import uuid4
34
-
35
- import numpy as np
36
- import pandas as pd
37
- from tqdm import tqdm
38
- from tqdm.asyncio import tqdm_asyncio as atqdm
39
-
40
- # Disable httpx logging for cleaner output
41
- logging.getLogger("httpx").setLevel(logging.WARNING)
42
-
43
- # Add parent directory to path for imports
44
- sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent.parent))
45
-
46
- # Disable v1 logging to see v3 tracing clearly
47
- os.environ["LANGFUSE_ENABLED"] = "false"
48
- os.environ["SYNTH_LOGGING"] = "false"
49
-
50
- # Import enhanced LM with v3 tracing
51
- from synth_ai.lm.core.main_v3 import LM
52
- from synth_ai.tracing_v3.abstractions import (
53
- EnvironmentEvent,
54
- RuntimeEvent,
55
- SessionEventMarkovBlanketMessage,
56
- TimeRecord,
57
- )
58
- from synth_ai.tracing_v3.decorators import set_turn_number
59
-
60
- # Import session tracer for v3 tracing
61
- from synth_ai.tracing_v3.session_tracer import SessionTracer
62
-
63
- # from synth_ai.tracing_v3.utils import create_experiment_context # Not needed
64
- from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
65
-
66
- # Import Crafter hooks
67
- try:
68
- from synth_ai.environments.examples.crafter_classic.trace_hooks_v3 import CRAFTER_HOOKS
69
- print(f"✅ Loaded {len(CRAFTER_HOOKS.hooks)} Crafter achievement hooks (Easy, Medium, Hard)")
70
- except ImportError:
71
- print("Warning: Could not import CRAFTER_HOOKS for v3")
72
- from synth_ai.tracing_v3.hooks import HookManager
73
- CRAFTER_HOOKS = HookManager()
74
-
75
- import random
76
-
77
- import httpx
78
-
79
- # Global buckets for sessions
80
- _SESSIONS: dict[str, tuple[str, object]] = {} # session_id -> (experiment_id, trace)
81
-
82
- # Configuration
83
- MODELS_TO_TEST = [
84
- "gpt-4o-mini",
85
- "gpt-4.1-mini",
86
- ]
87
-
88
- # Service URLs (modify these based on your setup)
89
- CRAFTER_SERVICE_URL = "http://localhost:8901"
90
-
91
- # Database configuration - uses the centralized config which matches serve.sh
92
- from synth_ai.tracing_v3.db_config import get_default_db_config
93
-
94
- db_config = get_default_db_config()
95
- DATABASE_URL = db_config.database_url
96
-
97
- # Retry configuration for HTTP requests
98
- MAX_RETRIES = 3
99
- BASE_DELAY = 0.1
100
- MAX_DELAY = 2.0
101
- HTTP_TIMEOUT = 30.0
102
-
103
- class ExperimentConfig:
104
- """Configuration for the multi-model experiment."""
105
-
106
- def __init__(self):
107
- self.num_episodes = 10 # Number of episodes per model
108
- self.max_turns = 100 # Max turns per episode
109
- self.difficulty = "easy"
110
- self.save_traces = True
111
- self.verbose = True
112
- self.quiet = False # Default to verbose mode
113
- self.enable_v3_tracing = True
114
- self.v3_trace_dir = "./traces"
115
- self.crafter_service_url = CRAFTER_SERVICE_URL
116
- self.database_url = DATABASE_URL
117
- self.base_seed = 1000 # Base seed for episode generation
118
- self.turn_timeout = 30.0 # Timeout per turn in seconds
119
- self.episode_timeout = 300.0 # Total timeout per episode in seconds
120
-
121
-
122
- async def retry_http_request(client: httpx.AsyncClient, method: str, url: str, **kwargs) -> Any:
123
- """Retry HTTP requests with exponential backoff and jitter."""
124
- last_exception = None
125
-
126
- for attempt in range(MAX_RETRIES):
127
- try:
128
- if attempt > 0:
129
- delay = min(BASE_DELAY * (2 ** (attempt - 1)), MAX_DELAY)
130
- jitter = random.uniform(0, 0.1 * delay)
131
- total_delay = delay + jitter
132
- await asyncio.sleep(total_delay)
133
-
134
- response = await client.request(method, url, timeout=HTTP_TIMEOUT, **kwargs)
135
-
136
- if response.status_code < 500:
137
- return response
138
-
139
- last_exception = Exception(f"HTTP {response.status_code}: {response.text}")
140
-
141
- except httpx.ConnectError as e:
142
- last_exception = Exception(f"Connection failed to {url}: {e}")
143
- if attempt < MAX_RETRIES - 1:
144
- await asyncio.sleep(1.0 * (2 ** attempt))
145
- except httpx.ReadError as e:
146
- last_exception = e
147
- if attempt < MAX_RETRIES - 1:
148
- read_error_delay = min(1.0 * (2 ** attempt), 5.0)
149
- await asyncio.sleep(read_error_delay)
150
- except Exception as e:
151
- last_exception = e
152
-
153
- print(f" ❌ HTTP request failed after {MAX_RETRIES} attempts: {method} {url}")
154
- print(f" ❌ Error: {type(last_exception).__name__}: {str(last_exception)[:200]}")
155
- raise last_exception
156
-
157
-
158
- # Crafter action mapping
159
- CRAFTER_ACTIONS = {
160
- "noop": 0, "move_left": 1, "move_right": 2, "move_up": 3, "move_down": 4,
161
- "do": 5, "sleep": 6, "place_stone": 7, "place_table": 8, "place_furnace": 9,
162
- "place_plant": 10, "make_wood_pickaxe": 11, "make_stone_pickaxe": 12,
163
- "make_iron_pickaxe": 13, "make_wood_sword": 14, "make_stone_sword": 15,
164
- "make_iron_sword": 16, "eat_cow": 17, "eat_plant": 18
165
- }
166
-
167
- # Create reverse mapping for validation
168
- INT_TO_ACTION_STRING = {v: k for k, v in CRAFTER_ACTIONS.items()}
169
-
170
-
171
- def compress_observation_for_trace(obs: dict[str, Any]) -> str:
172
- """Compress observation data for storage in traces."""
173
- try:
174
- return json.dumps({
175
- "inv": {k: v for k, v in obs.get("inventory", {}).items() if v > 0},
176
- "nearby": obs.get("nearby", []),
177
- "hp": obs.get("status", {}).get("health", 0),
178
- "food": obs.get("status", {}).get("food", 0),
179
- "ach": sum(1 for v in obs.get("achievements_status", {}).values() if v)
180
- }, separators=(',', ':'))
181
- except Exception as e:
182
- return f"{{\"error\": \"{str(e)}\"}}"
183
-
184
-
185
- def create_message(content: str, message_type: str, system_id: str, turn: int) -> SessionEventMarkovBlanketMessage:
186
- """Create a SessionEventMarkovBlanketMessage with metadata."""
187
- return SessionEventMarkovBlanketMessage(
188
- content=content,
189
- message_type=message_type,
190
- metadata={"system_id": system_id, "turn": turn},
191
- time_record=TimeRecord(
192
- event_time=time.time(),
193
- message_time=turn
194
- )
195
- )
196
-
197
-
198
- async def run_episode(config: ExperimentConfig,
199
- model_name: str,
200
- episode_num: int,
201
- experiment_id: str) -> dict[str, Any]:
202
- """Run a single episode with a specific model using v3 tracing."""
203
- # Create a new session tracer for this episode
204
- session_tracer = SessionTracer(hooks=CRAFTER_HOOKS, db_url=config.database_url)
205
-
206
- # Start session with metadata
207
- session_id = await session_tracer.start_session(
208
- metadata={
209
- "model": model_name,
210
- "episode": episode_num,
211
- "experiment_id": experiment_id,
212
- "difficulty": config.difficulty
213
- }
214
- )
215
-
216
- # Started tracing session (output disabled for clean UI)
217
-
218
- # Store session in global bucket
219
- _SESSIONS[session_id] = (experiment_id, session_tracer)
220
-
221
- # Initialize LM with session tracer
222
- lm = LM(
223
- vendor="openai",
224
- model=model_name,
225
- temperature=0.1, # Low temperature for more consistent gameplay
226
- session_tracer=session_tracer,
227
- system_id=f"crafter_agent_{model_name}",
228
- enable_v3_tracing=True
229
- )
230
-
231
- # Create HTTP client
232
- async with httpx.AsyncClient() as client:
233
- try:
234
- # Initialize environment with consecutive seed
235
- seed = config.base_seed + episode_num # Base seed + episode number for consecutive seeds
236
- request_data = {"config": {"difficulty": config.difficulty, "seed": seed}}
237
- init_response = await retry_http_request(
238
- client, "POST", f"{config.crafter_service_url}/env/CrafterClassic/initialize",
239
- json=request_data
240
- )
241
- init_data = init_response.json()
242
-
243
- # Debug the response format (removed for clean output)
244
-
245
- # Handle different possible response formats
246
- if "instance_id" in init_data:
247
- instance_id = init_data["instance_id"]
248
- elif "env_id" in init_data:
249
- instance_id = init_data["env_id"]
250
- elif "id" in init_data:
251
- instance_id = init_data["id"]
252
- else:
253
- # If none of the expected keys exist, print the response and raise a clear error
254
- print(f"❌ Unexpected response format from Crafter service: {init_data}")
255
- raise KeyError(f"Could not find environment ID in response. Available keys: {list(init_data.keys())}")
256
-
257
- # Get initial observation (from initialize response)
258
- obs = init_data["observation"]
259
-
260
- prev_obs = obs
261
- done = False
262
- invalid_actions = 0
263
- total_actions = 0
264
- episode_start_time = time.time()
265
-
266
- for turn in range(config.max_turns):
267
- if done:
268
- break
269
-
270
- # Check episode timeout
271
- if time.time() - episode_start_time > config.episode_timeout:
272
- print(f" ⏰ Episode {episode_num} timed out after {config.episode_timeout}s")
273
- done = True
274
- break
275
-
276
- # Update progress bar
277
- if hasattr(config, '_pbar'):
278
- current_achievements = sum(1 for v in obs.get("achievements_status", {}).values() if v)
279
- config._pbar.set_postfix({
280
- f"ep{episode_num}": f"step {turn+1}/{config.max_turns}, ach: {current_achievements}"
281
- })
282
-
283
- set_turn_number(turn)
284
-
285
- # Start timestep for this turn
286
- await session_tracer.start_timestep(f"turn_{turn}")
287
-
288
- # Prepare context for the agent
289
- inventory_str = ", ".join([f"{k}: {v}" for k, v in obs.get("inventory", {}).items() if v > 0])
290
- if not inventory_str:
291
- inventory_str = "empty"
292
-
293
- nearby_str = ", ".join(obs.get("nearby", []))
294
- if not nearby_str:
295
- nearby_str = "nothing"
296
-
297
- status = obs.get("status", {})
298
- health = status.get("health", 0)
299
- hunger = status.get("food", 0)
300
-
301
- # Get more detailed game state
302
- position = obs.get("position", [0, 0])
303
- achievements = obs.get("achievements_status", {})
304
- unlocked = [name for name, status in achievements.items() if status]
305
- achievements_str = ", ".join(unlocked) if unlocked else "none"
306
-
307
- # Get semantic map if available
308
- semantic_map = obs.get("semantic_map", None)
309
- map_str = ""
310
- if semantic_map is not None:
311
- # Simple 5x5 view around player
312
- try:
313
- px, py = position
314
- view_size = 5
315
- half = view_size // 2
316
- map_lines = []
317
- for dy in range(-half, half + 1):
318
- row = []
319
- for dx in range(-half, half + 1):
320
- x, y = px + dx, py + dy
321
- if dx == 0 and dy == 0:
322
- row.append("@") # Player
323
- elif 0 <= x < len(semantic_map) and 0 <= y < len(semantic_map[0]):
324
- cell = semantic_map[x][y]
325
- # Map common items
326
- if cell == 0:
327
- row.append(".") # Empty/grass
328
- elif cell == 1:
329
- row.append("T") # Tree
330
- elif cell == 2:
331
- row.append("S") # Stone
332
- elif cell == 3:
333
- row.append("C") # Cow
334
- elif cell == 4:
335
- row.append("W") # Water
336
- else:
337
- row.append("?")
338
- else:
339
- row.append("#") # Out of bounds
340
- map_lines.append(" ".join(row))
341
- map_str = "\nMap (5x5 view, @ = you):\n" + "\n".join(map_lines)
342
- except Exception:
343
- map_str = "\nMap view unavailable"
344
-
345
- # Create agent prompt
346
- prompt = f"""Game State (Turn {turn}):
347
- - Position: {position}
348
- - Health: {health}/9
349
- - Hunger: {hunger}/9
350
- - Inventory: {inventory_str}
351
- - Nearby objects: {nearby_str}
352
- - Achievements unlocked: {achievements_str}
353
- {map_str}
354
-
355
- Choose your next actions based on what you see. Use the 'interact' tool with a list of action IDs.
356
-
357
- Tips:
358
- - Look at the map! T=tree (wood), S=stone, C=cow (food), W=water
359
- - To collect resources: move to them (actions 1-4) then use action 5 (do)
360
- - To craft: place table (8) first, then craft tools (11-16)
361
- - If hungry and see cow (C), move to it and eat (17)
362
-
363
- What actions do you want to take?"""
364
-
365
- # Send observation as message
366
- obs_msg = create_message(
367
- f"Observation: {compress_observation_for_trace(obs)}",
368
- "system",
369
- f"crafter_env_{instance_id}",
370
- turn
371
- )
372
- await session_tracer.record_message(
373
- content=obs_msg.content,
374
- message_type=obs_msg.message_type,
375
- event_time=obs_msg.time_record.event_time,
376
- message_time=obs_msg.time_record.message_time,
377
- metadata=obs_msg.metadata
378
- )
379
-
380
- # Get action from LM with tools (with timeout)
381
- turn_start_time = time.time()
382
- try:
383
- # Define the interact tool for Crafter
384
- from pydantic import BaseModel, Field
385
- from synth_ai.lm.tools.base import BaseTool
386
-
387
- class InteractArgs(BaseModel):
388
- actions: list[int] = Field(..., description="List of action IDs to execute")
389
-
390
- interact_tool = BaseTool(
391
- name="interact",
392
- arguments=InteractArgs,
393
- description="Execute actions in the Crafter game"
394
- )
395
-
396
- # Create system message that explains available actions
397
- action_list = "\n".join([f"{action_id}: {action}" for action, action_id in CRAFTER_ACTIONS.items()])
398
- system_message = f"""You are an agent playing Crafter, a 2D survival game. Your goal is to survive and unlock achievements.
399
-
400
- You MUST use the 'interact' tool to execute actions. The tool takes a list of action IDs.
401
-
402
- Action ID mapping:
403
- {action_list}
404
-
405
- Strategy tips:
406
- - Start by collecting wood (move to trees and use action 5)
407
- - Place a crafting table (action 8) to unlock crafting recipes
408
- - Craft tools to collect resources more efficiently
409
- - Eat when hungry, sleep when tired
410
- - Explore to find different resources
411
-
412
- IMPORTANT: Always use the 'interact' tool with a list of action IDs. For example: interact(actions=[2, 2, 5]) to move right twice and collect."""
413
-
414
- # Get actions from LM using tools with timeout
415
- try:
416
- action_response = await asyncio.wait_for(
417
- lm.respond_async(
418
- system_message=system_message,
419
- user_message=prompt,
420
- tools=[interact_tool],
421
- turn_number=turn
422
- ),
423
- timeout=config.turn_timeout
424
- )
425
- except asyncio.TimeoutError:
426
- print(f" ⏰ Turn {turn} timed out for episode {episode_num} after {config.turn_timeout}s")
427
- action_response = None
428
- done = True
429
- break
430
-
431
- # Debug: print response (removed for clean output)
432
-
433
- # Extract tool calls from response
434
- if hasattr(action_response, 'tool_calls') and action_response.tool_calls:
435
- tool_calls = action_response.tool_calls
436
-
437
- # Process each tool call
438
- for tool_call in tool_calls:
439
- if tool_call.get('function', {}).get('name') == 'interact':
440
- # Extract actions from the tool call
441
- import json
442
- args = json.loads(tool_call.get('function', {}).get('arguments', '{}'))
443
- actions = args.get('actions', [])
444
-
445
- if not actions:
446
- # If no actions provided, use noop
447
- actions = [0]
448
-
449
- # Execute each action separately
450
- for action_id in actions:
451
- total_actions += 1
452
-
453
- # Validate action ID
454
- if action_id not in INT_TO_ACTION_STRING:
455
- # Invalid action logging removed for clean output
456
- action_id = 0
457
- invalid_actions += 1
458
-
459
- # Send action to Crafter service with timeout
460
- try:
461
- step_response = await asyncio.wait_for(
462
- retry_http_request(
463
- client, "POST", f"{config.crafter_service_url}/env/CrafterClassic/step",
464
- json={
465
- "env_id": instance_id,
466
- "action": {
467
- "tool_calls": [
468
- {"tool": "interact", "args": {"action": action_id}}
469
- ]
470
- }
471
- }
472
- ),
473
- timeout=5.0 # 5 second timeout for individual action
474
- )
475
- except asyncio.TimeoutError:
476
- print(f" ⏰ Action execution timed out in episode {episode_num}")
477
- done = True
478
- break
479
-
480
- if step_response.status_code != 200:
481
- print(f" ❌ Step failed: {step_response.status_code} - {step_response.text}")
482
- done = True
483
- break
484
-
485
- step_data = step_response.json()
486
-
487
- # Extract data from response
488
- new_obs = step_data["observation"]
489
- reward = step_data["reward"]
490
- done = step_data["done"]
491
-
492
- # Record runtime event for action
493
- action_name = INT_TO_ACTION_STRING.get(action_id, "unknown")
494
- runtime_event = RuntimeEvent(
495
- system_instance_id=f"crafter_env_{instance_id}",
496
- time_record=TimeRecord(
497
- event_time=time.time(),
498
- message_time=turn
499
- ),
500
- actions=[action_id],
501
- metadata={
502
- "action_name": action_name,
503
- "valid": action_name != "noop" or invalid_actions == 0
504
- }
505
- )
506
- await session_tracer.record_event(runtime_event)
507
-
508
- # Record environment event
509
- env_event = EnvironmentEvent(
510
- system_instance_id=f"crafter_env_{instance_id}",
511
- time_record=TimeRecord(
512
- event_time=time.time(),
513
- message_time=turn
514
- ),
515
- reward=reward,
516
- terminated=done,
517
- system_state_before={"observation": prev_obs},
518
- system_state_after={"observation": new_obs, "public_state": {"achievements_status": new_obs.get("achievements_status", {})}}
519
- )
520
- await session_tracer.record_event(env_event)
521
-
522
- # Update for next turn
523
- prev_obs = obs
524
- obs = new_obs
525
-
526
- if done:
527
- break
528
-
529
- # Update progress bar after each action
530
- if hasattr(config, '_pbar'):
531
- config._pbar.update(1)
532
- else:
533
- # No tool calls provided, use noop
534
- action_id = 0
535
- total_actions += 1
536
- invalid_actions += 1
537
-
538
- # Send noop action with timeout
539
- try:
540
- step_response = await asyncio.wait_for(
541
- retry_http_request(
542
- client, "POST", f"{config.crafter_service_url}/env/CrafterClassic/step",
543
- json={
544
- "env_id": instance_id,
545
- "action": {
546
- "tool_calls": [
547
- {"tool": "interact", "args": {"action": action_id}}
548
- ]
549
- }
550
- }
551
- ),
552
- timeout=5.0 # 5 second timeout
553
- )
554
- except asyncio.TimeoutError:
555
- print(f" ⏰ Noop action timed out in episode {episode_num}")
556
- done = True
557
- break
558
-
559
- if step_response.status_code != 200:
560
- print(f" ❌ Step failed: {step_response.status_code} - {step_response.text}")
561
- done = True
562
- else:
563
- step_data = step_response.json()
564
- new_obs = step_data["observation"]
565
- reward = step_data["reward"]
566
- done = step_data["done"]
567
-
568
- # Update observation
569
- prev_obs = obs
570
- obs = new_obs
571
-
572
- # End timestep
573
- await session_tracer.end_timestep(f"turn_{turn}")
574
-
575
- except Exception as e:
576
- print(f" ❌ Environment step error: {e}")
577
- done = True
578
-
579
- # Update progress bar for remaining steps if episode ended early
580
- if hasattr(config, '_pbar') and turn < config.max_turns - 1:
581
- remaining_steps = config.max_turns - turn - 1
582
- config._pbar.update(remaining_steps)
583
-
584
- # Calculate invalid action rate
585
- invalid_rate = invalid_actions / total_actions if total_actions > 0 else 0
586
-
587
- # Calculate achievements
588
- final_achievements = obs.get("achievements_status", {})
589
- total_achievements = sum(1 for v in final_achievements.values() if v)
590
-
591
- # Terminate environment
592
- try:
593
- await retry_http_request(
594
- client, "POST", f"{config.crafter_service_url}/env/CrafterClassic/terminate",
595
- json={"env_id": instance_id}
596
- )
597
- except Exception as e:
598
- print(f" ⚠️ Failed to terminate environment: {e}")
599
-
600
- # End session
601
- await session_tracer.end_session(save=config.save_traces)
602
- # Close the tracer for this episode
603
- await session_tracer.close()
604
-
605
- return {
606
- "model": model_name,
607
- "episode": episode_num,
608
- "total_achievements": total_achievements,
609
- "achievements": final_achievements,
610
- "invalid_action_rate": invalid_rate,
611
- "total_actions": total_actions,
612
- "invalid_actions": invalid_actions,
613
- "session_id": session_id
614
- }
615
-
616
- except Exception as e:
617
- print(f" ❌ Episode failed: {e}")
618
- import traceback
619
- traceback.print_exc()
620
-
621
- # End session even if failed
622
- await session_tracer.end_session(save=config.save_traces)
623
- # Close the tracer for this episode
624
- await session_tracer.close()
625
-
626
- return {
627
- "model": model_name,
628
- "episode": episode_num,
629
- "total_achievements": 0,
630
- "achievements": {},
631
- "invalid_action_rate": 1.0,
632
- "total_actions": 0,
633
- "invalid_actions": 0,
634
- "session_id": session_id,
635
- "error": str(e)
636
- }
637
-
638
-
639
- async def run_model_experiment(config: ExperimentConfig, model_name: str, experiment_id: str) -> list[dict[str, Any]]:
640
- """Run multiple episodes for a single model in parallel."""
641
- print(f"\n🚀 Running {config.num_episodes} episodes for {model_name} in parallel...\n")
642
-
643
- # Create a progress bar for all steps across all episodes
644
- total_steps = config.num_episodes * config.max_turns
645
- pbar = atqdm(total=total_steps, desc=f"{model_name}", unit="steps", leave=True)
646
- config._pbar = pbar # Store in config so episodes can update it
647
-
648
- try:
649
- # Create tasks for all episodes (each will create its own tracer)
650
- tasks = []
651
- for i in range(config.num_episodes):
652
- task = run_episode(config, model_name, i, experiment_id)
653
- tasks.append(task)
654
-
655
- # Run all episodes in parallel
656
- results = await asyncio.gather(*tasks)
657
-
658
- # Calculate summary stats
659
- successful_results = [r for r in results if "error" not in r]
660
- if successful_results:
661
- avg_achievements = sum(r["total_achievements"] for r in successful_results) / len(successful_results)
662
- avg_invalid_rate = sum(r["invalid_action_rate"] for r in successful_results) / len(successful_results)
663
- pbar.set_postfix({
664
- "avg_achievements": f"{avg_achievements:.1f}",
665
- "avg_invalid_rate": f"{avg_invalid_rate:.1%}",
666
- "success_rate": f"{len(successful_results)}/{len(results)}"
667
- })
668
- finally:
669
- pbar.close()
670
-
671
- return results
672
-
673
-
674
- async def analyze_results(config: ExperimentConfig, all_results: dict[str, list[dict[str, Any]]]):
675
- """Analyze results across all models using v3 database."""
676
- print("\n📊 Analysis Results:")
677
- print("=" * 80)
678
-
679
- # Initialize database manager
680
- db_manager = AsyncSQLTraceManager(config.database_url)
681
- await db_manager.initialize()
682
-
683
- try:
684
- # Basic statistics by model
685
- model_stats = {}
686
- for model, results in all_results.items():
687
- valid_results = [r for r in results if "error" not in r]
688
- if valid_results:
689
- achievements = [r["total_achievements"] for r in valid_results]
690
- invalid_rates = [r["invalid_action_rate"] for r in valid_results]
691
-
692
- model_stats[model] = {
693
- "avg_achievements": np.mean(achievements),
694
- "std_achievements": np.std(achievements),
695
- "max_achievements": max(achievements),
696
- "avg_invalid_rate": np.mean(invalid_rates),
697
- "success_rate": len(valid_results) / len(results)
698
- }
699
-
700
- # Print model comparison
701
- print("\n📈 Model Performance Summary:")
702
- print(f"{'Model':<20} {'Avg Achievements':<18} {'Max Achievements':<18} {'Invalid Rate':<15} {'Success Rate':<15}")
703
- print("-" * 86)
704
-
705
- for model, stats in sorted(model_stats.items(), key=lambda x: x[1]["avg_achievements"], reverse=True):
706
- print(f"{model:<20} {stats['avg_achievements']:>6.2f} ± {stats['std_achievements']:>4.2f} "
707
- f"{stats['max_achievements']:>16} {stats['avg_invalid_rate']:>12.2%} {stats['success_rate']:>12.2%}")
708
-
709
- # Achievement frequency analysis
710
- print("\n🏆 Achievement Frequencies:")
711
- achievement_counts = defaultdict(lambda: defaultdict(int))
712
-
713
- for model, results in all_results.items():
714
- for result in results:
715
- if "error" not in result:
716
- for achievement, unlocked in result["achievements"].items():
717
- if unlocked:
718
- achievement_counts[model][achievement] += 1
719
-
720
- # Get all unique achievements
721
- all_achievements = set()
722
- for model_achievements in achievement_counts.values():
723
- all_achievements.update(model_achievements.keys())
724
-
725
- # Print achievement table
726
- if all_achievements:
727
- print(f"\n{'Achievement':<25} " + " ".join(f"{model[:8]:>10}" for model in sorted(all_results.keys())))
728
- print("-" * (25 + 11 * len(all_results)))
729
-
730
- for achievement in sorted(all_achievements):
731
- row = f"{achievement:<25}"
732
- for model in sorted(all_results.keys()):
733
- count = achievement_counts[model].get(achievement, 0)
734
- total = len([r for r in all_results[model] if "error" not in r])
735
- pct = (count / total * 100) if total > 0 else 0
736
- row += f" {count:>3}/{total:<3} ({pct:>3.0f}%)"
737
- print(row)
738
-
739
- # Query model usage from database - filter to only show models used in this experiment
740
- print("\n💰 Model Usage Statistics from Current Experiment:")
741
- model_usage_df = await db_manager.get_model_usage()
742
-
743
- if model_usage_df is not None and not model_usage_df.empty:
744
- # Filter to only show models from this experiment
745
- experiment_models = set(all_results.keys())
746
- filtered_df = model_usage_df[model_usage_df['model_name'].isin(experiment_models)]
747
-
748
- if not filtered_df.empty:
749
- # Format model usage statistics as table
750
- print(f"{'Model':<20} {'Provider':<10} {'Usage Count':<12} {'Avg Latency (ms)':<18} {'Total Cost':<12}")
751
- print("-" * 72)
752
- for _, row in filtered_df.iterrows():
753
- avg_latency = row['avg_latency_ms']
754
- if pd.notna(avg_latency):
755
- print(f"{row['model_name']:<20} {row['provider'] or 'N/A':<10} {row['usage_count']:<12} "
756
- f"{avg_latency:<18.2f} ${row['total_cost_usd']:<11.4f}")
757
- else:
758
- print(f"{row['model_name']:<20} {row['provider'] or 'N/A':<10} {row['usage_count']:<12} "
759
- f"{'N/A':<18} ${row['total_cost_usd']:<11.4f}")
760
-
761
- # Export detailed results
762
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
763
- results_file = f"crafter_experiment_results_{timestamp}.json"
764
-
765
- with open(results_file, "w") as f:
766
- json.dump({
767
- "config": {
768
- "num_episodes": config.num_episodes,
769
- "max_turns": config.max_turns,
770
- "difficulty": config.difficulty,
771
- "models": list(all_results.keys())
772
- },
773
- "results": all_results,
774
- "statistics": model_stats,
775
- "timestamp": timestamp
776
- }, f, indent=2)
777
-
778
- print(f"\n💾 Detailed results saved to: {results_file}")
779
-
780
- finally:
781
- await db_manager.close()
782
-
783
-
784
- async def main():
785
- """Main entry point for the experiment."""
786
- parser = argparse.ArgumentParser(description="Run Crafter experiments with multiple models")
787
- parser.add_argument("--episodes", type=int, default=5, help="Number of episodes per model")
788
- parser.add_argument("--max-turns", type=int, default=100, help="Maximum turns per episode")
789
- parser.add_argument("--difficulty", choices=["easy", "medium", "hard"], default="easy", help="Game difficulty")
790
- parser.add_argument("--models", nargs="+", default=MODELS_TO_TEST, help="Models to test")
791
- parser.add_argument("--no-save", action="store_true", help="Don't save traces to database")
792
- parser.add_argument("--quiet", action="store_true", help="Reduce output verbosity")
793
- parser.add_argument("--db-url", default=DATABASE_URL, help="Database URL for tracing")
794
- parser.add_argument("--base-seed", type=int, default=1000, help="Base seed for episodes (episodes use base_seed+episode_num)")
795
- parser.add_argument("--turn-timeout", type=float, default=30.0, help="Timeout per turn in seconds")
796
- parser.add_argument("--episode-timeout", type=float, default=300.0, help="Total timeout per episode in seconds")
797
-
798
- args = parser.parse_args()
799
-
800
- # Create configuration
801
- config = ExperimentConfig()
802
- config.num_episodes = args.episodes
803
- config.max_turns = args.max_turns
804
- config.difficulty = args.difficulty
805
- config.save_traces = not args.no_save
806
- config.verbose = not args.quiet
807
- config.quiet = args.quiet
808
- config.database_url = args.db_url
809
- config.base_seed = args.base_seed
810
- config.turn_timeout = args.turn_timeout
811
- config.episode_timeout = args.episode_timeout
812
-
813
- # Generate experiment ID
814
- experiment_id = f"crafter_multi_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
815
-
816
- print("🎮 Crafter Multi-Model Experiment")
817
- print("=" * 50)
818
- print(f"Experiment ID: {experiment_id}")
819
- print(f"Models: {', '.join(args.models)}")
820
- print(f"Episodes per model: {config.num_episodes}")
821
- print(f"Max turns per episode: {config.max_turns}")
822
- print(f"Difficulty: {config.difficulty}")
823
- print(f"Seeds: {config.base_seed} to {config.base_seed + config.num_episodes - 1}")
824
- print(f"Turn timeout: {config.turn_timeout}s")
825
- print(f"Episode timeout: {config.episode_timeout}s")
826
- print(f"Save traces: {config.save_traces}")
827
- print(f"Database URL: {config.database_url}")
828
- print("=" * 50)
829
-
830
- # Check Crafter service
831
- try:
832
- async with httpx.AsyncClient() as client:
833
- response = await client.get(f"{config.crafter_service_url}/health", timeout=5.0)
834
- if response.status_code != 200:
835
- print(f"❌ Crafter service not healthy at {config.crafter_service_url}")
836
- return
837
- except Exception as e:
838
- print(f"❌ Cannot connect to Crafter service at {config.crafter_service_url}: {e}")
839
- print("Please ensure the Crafter service is running.")
840
- return
841
-
842
- print("✅ Crafter service is running")
843
-
844
- # Run experiments for each model
845
- all_results = {}
846
-
847
- for model in args.models:
848
- results = await run_model_experiment(config, model, experiment_id)
849
- all_results[model] = results
850
-
851
- # Analyze and compare results
852
- await analyze_results(config, all_results)
853
-
854
- print("\n✅ Experiment complete!")
855
-
856
-
857
- if __name__ == "__main__":
858
- asyncio.run(main())