synth-ai 0.2.14__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1091) hide show
  1. synth_ai/__init__.py +19 -40
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +105 -70
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/baseline/__init__.py +12 -0
  25. synth_ai/cli/commands/baseline/core.py +636 -0
  26. synth_ai/cli/commands/baseline/list.py +94 -0
  27. synth_ai/cli/commands/demo/__init__.py +3 -0
  28. synth_ai/cli/commands/demo/core.py +153 -0
  29. synth_ai/cli/commands/eval/__init__.py +19 -0
  30. synth_ai/cli/commands/eval/core.py +1113 -0
  31. synth_ai/cli/commands/eval/errors.py +81 -0
  32. synth_ai/cli/commands/eval/validation.py +133 -0
  33. synth_ai/cli/commands/filter/__init__.py +12 -0
  34. synth_ai/cli/commands/filter/core.py +424 -0
  35. synth_ai/cli/commands/filter/errors.py +55 -0
  36. synth_ai/cli/commands/filter/validation.py +77 -0
  37. synth_ai/cli/commands/help/__init__.py +185 -0
  38. synth_ai/cli/commands/help/core.py +72 -0
  39. synth_ai/cli/commands/scan/__init__.py +19 -0
  40. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  41. synth_ai/cli/commands/scan/core.py +344 -0
  42. synth_ai/cli/commands/scan/health_checker.py +242 -0
  43. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  44. synth_ai/cli/commands/scan/models.py +83 -0
  45. synth_ai/cli/commands/smoke/__init__.py +7 -0
  46. synth_ai/cli/commands/smoke/core.py +1438 -0
  47. synth_ai/cli/commands/status/__init__.py +66 -0
  48. synth_ai/cli/commands/status/client.py +192 -0
  49. synth_ai/cli/commands/status/config.py +92 -0
  50. synth_ai/cli/commands/status/errors.py +20 -0
  51. synth_ai/cli/commands/status/formatters.py +164 -0
  52. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  53. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  54. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  55. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  56. synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
  57. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  58. synth_ai/cli/commands/status/subcommands/session.py +182 -0
  59. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  60. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  61. synth_ai/cli/commands/status/utils.py +114 -0
  62. synth_ai/cli/commands/train/__init__.py +53 -0
  63. synth_ai/cli/commands/train/core.py +22 -0
  64. synth_ai/cli/commands/train/errors.py +117 -0
  65. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  66. synth_ai/cli/commands/train/judge_validation.py +305 -0
  67. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  68. synth_ai/cli/commands/train/validation.py +392 -0
  69. synth_ai/cli/demo_apps/__init__.py +10 -0
  70. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  71. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  72. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  73. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  74. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  75. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  81. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  82. synth_ai/cli/demo_apps/math/_common.py +16 -0
  83. synth_ai/cli/demo_apps/math/app.py +38 -0
  84. synth_ai/cli/demo_apps/math/config.toml +76 -0
  85. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  86. synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
  87. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  88. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  89. synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
  90. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  91. synth_ai/cli/demos/__init__.py +12 -0
  92. synth_ai/cli/demos/demo.py +32 -0
  93. synth_ai/cli/demos/rl_demo.py +254 -0
  94. synth_ai/cli/deploy.py +216 -0
  95. synth_ai/cli/infra/__init__.py +14 -0
  96. synth_ai/cli/infra/balance.py +216 -0
  97. synth_ai/cli/infra/mcp.py +35 -0
  98. synth_ai/cli/infra/modal_app.py +36 -0
  99. synth_ai/cli/infra/setup.py +69 -0
  100. synth_ai/cli/infra/status.py +16 -0
  101. synth_ai/cli/infra/turso.py +77 -0
  102. synth_ai/cli/lib/__init__.py +10 -0
  103. synth_ai/cli/lib/agents.py +76 -0
  104. synth_ai/cli/lib/apps/modal_app.py +101 -0
  105. synth_ai/cli/lib/apps/task_app.py +643 -0
  106. synth_ai/cli/lib/bin.py +39 -0
  107. synth_ai/cli/lib/env.py +375 -0
  108. synth_ai/cli/lib/errors.py +85 -0
  109. synth_ai/cli/lib/modal.py +315 -0
  110. synth_ai/cli/lib/plotting.py +126 -0
  111. synth_ai/cli/lib/prompt_args.py +39 -0
  112. synth_ai/cli/lib/prompts.py +284 -0
  113. synth_ai/cli/lib/sqld.py +122 -0
  114. synth_ai/cli/lib/task_app_discovery.py +884 -0
  115. synth_ai/cli/lib/task_app_env.py +295 -0
  116. synth_ai/cli/lib/train_cfgs.py +300 -0
  117. synth_ai/cli/lib/tunnel_records.py +207 -0
  118. synth_ai/cli/local/__init__.py +14 -0
  119. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  120. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  121. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  122. synth_ai/cli/local/experiment_queue/config.py +128 -0
  123. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  124. synth_ai/cli/local/experiment_queue/database.py +175 -0
  125. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  126. synth_ai/cli/local/experiment_queue/models.py +231 -0
  127. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  128. synth_ai/cli/local/experiment_queue/results.py +373 -0
  129. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  130. synth_ai/cli/local/experiment_queue/service.py +344 -0
  131. synth_ai/cli/local/experiment_queue/status.py +372 -0
  132. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  133. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  134. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  135. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  136. synth_ai/cli/local/session/__init__.py +92 -0
  137. synth_ai/cli/local/session/client.py +383 -0
  138. synth_ai/cli/local/session/constants.py +63 -0
  139. synth_ai/cli/local/session/exceptions.py +105 -0
  140. synth_ai/cli/local/session/manager.py +139 -0
  141. synth_ai/cli/local/session/models.py +89 -0
  142. synth_ai/cli/local/session/query.py +110 -0
  143. synth_ai/cli/root.py +30 -6
  144. synth_ai/cli/task_apps/__init__.py +26 -0
  145. synth_ai/cli/task_apps/commands.py +3153 -0
  146. synth_ai/cli/task_apps/deploy.py +7 -0
  147. synth_ai/cli/task_apps/list.py +26 -0
  148. synth_ai/cli/task_apps/main.py +36 -0
  149. synth_ai/cli/task_apps/modal_serve.py +11 -0
  150. synth_ai/cli/task_apps/serve.py +11 -0
  151. synth_ai/cli/training/__init__.py +8 -0
  152. synth_ai/cli/training/train.py +5 -0
  153. synth_ai/cli/training/train_cfg.py +34 -0
  154. synth_ai/cli/training/watch.py +506 -0
  155. synth_ai/cli/turso.py +34 -55
  156. synth_ai/cli/usage.py +159 -0
  157. synth_ai/cli/utils/__init__.py +8 -0
  158. synth_ai/cli/utils/experiments.py +235 -0
  159. synth_ai/cli/utils/queue.py +504 -0
  160. synth_ai/cli/utils/recent.py +133 -0
  161. synth_ai/cli/utils/traces.py +164 -0
  162. synth_ai/contracts/__init__.py +67 -0
  163. synth_ai/core/__init__.py +100 -0
  164. synth_ai/core/_utils/__init__.py +54 -0
  165. synth_ai/core/_utils/base_url.py +10 -0
  166. synth_ai/core/_utils/http.py +10 -0
  167. synth_ai/core/_utils/prompts.py +14 -0
  168. synth_ai/core/_utils/task_app_state.py +12 -0
  169. synth_ai/core/_utils/user_config.py +10 -0
  170. synth_ai/core/apps/common.py +116 -0
  171. synth_ai/core/auth.py +95 -0
  172. synth_ai/core/cfgs.py +240 -0
  173. synth_ai/core/config/__init__.py +16 -0
  174. synth_ai/core/config/base.py +168 -0
  175. synth_ai/core/config/resolver.py +89 -0
  176. synth_ai/core/env.py +220 -0
  177. synth_ai/core/errors.py +126 -0
  178. synth_ai/core/http.py +230 -0
  179. synth_ai/core/integrations/__init__.py +11 -0
  180. synth_ai/core/integrations/cloudflare.py +1710 -0
  181. synth_ai/core/integrations/mcp/__init__.py +6 -0
  182. synth_ai/core/integrations/mcp/__main__.py +8 -0
  183. synth_ai/core/integrations/mcp/claude.py +36 -0
  184. synth_ai/core/integrations/mcp/main.py +254 -0
  185. synth_ai/core/integrations/mcp/setup.py +100 -0
  186. synth_ai/core/integrations/modal.py +277 -0
  187. synth_ai/core/json.py +72 -0
  188. synth_ai/core/log_filter.py +99 -0
  189. synth_ai/core/logging.py +82 -0
  190. synth_ai/core/paths.py +107 -0
  191. synth_ai/core/pricing.py +109 -0
  192. synth_ai/core/process.py +233 -0
  193. synth_ai/core/ssl.py +25 -0
  194. synth_ai/core/storage/__init__.py +71 -0
  195. synth_ai/core/task_app_state.py +318 -0
  196. synth_ai/core/telemetry.py +282 -0
  197. synth_ai/core/tracing_v3/__init__.py +99 -0
  198. synth_ai/core/tracing_v3/abstractions.py +302 -0
  199. synth_ai/core/tracing_v3/config.py +229 -0
  200. synth_ai/core/tracing_v3/constants.py +21 -0
  201. synth_ai/core/tracing_v3/db_config.py +182 -0
  202. synth_ai/core/tracing_v3/decorators.py +401 -0
  203. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  204. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  205. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  206. synth_ai/core/tracing_v3/storage/base.py +211 -0
  207. synth_ai/core/tracing_v3/storage/config.py +109 -0
  208. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  209. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  210. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  211. synth_ai/core/tracing_v3/turso/models.py +470 -0
  212. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  213. synth_ai/core/tracing_v3/utils.py +108 -0
  214. synth_ai/core/urls.py +18 -0
  215. synth_ai/core/user_config.py +137 -0
  216. synth_ai/core/uvicorn.py +222 -0
  217. synth_ai/data/__init__.py +110 -0
  218. synth_ai/data/enums.py +141 -0
  219. synth_ai/data/rewards.py +152 -0
  220. synth_ai/data/specs.py +36 -0
  221. synth_ai/data/traces.py +35 -0
  222. synth_ai/products/__init__.py +6 -0
  223. synth_ai/products/graph_evolve/__init__.py +46 -0
  224. synth_ai/products/graph_evolve/client.py +226 -0
  225. synth_ai/products/graph_evolve/config.py +591 -0
  226. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  227. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  228. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  229. synth_ai/products/graph_evolve/run.py +222 -0
  230. synth_ai/sdk/__init__.py +119 -0
  231. synth_ai/sdk/api/__init__.py +1 -0
  232. synth_ai/sdk/api/models/supported.py +514 -0
  233. synth_ai/sdk/api/research_agent/__init__.py +86 -0
  234. synth_ai/sdk/api/research_agent/cli.py +428 -0
  235. synth_ai/sdk/api/research_agent/config.py +357 -0
  236. synth_ai/sdk/api/research_agent/job.py +717 -0
  237. synth_ai/sdk/api/train/__init__.py +85 -0
  238. synth_ai/sdk/api/train/builders.py +895 -0
  239. synth_ai/sdk/api/train/cli.py +2188 -0
  240. synth_ai/sdk/api/train/config_finder.py +267 -0
  241. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  242. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  243. synth_ai/sdk/api/train/configs/rl.py +188 -0
  244. synth_ai/sdk/api/train/configs/sft.py +99 -0
  245. synth_ai/sdk/api/train/configs/shared.py +81 -0
  246. synth_ai/sdk/api/train/context_learning.py +312 -0
  247. synth_ai/sdk/api/train/env_resolver.py +418 -0
  248. synth_ai/sdk/api/train/graph_validators.py +216 -0
  249. synth_ai/sdk/api/train/graphgen.py +984 -0
  250. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  251. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  252. synth_ai/sdk/api/train/pollers.py +124 -0
  253. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  254. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  255. synth_ai/sdk/api/train/progress/events.py +326 -0
  256. synth_ai/sdk/api/train/progress/results.py +428 -0
  257. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  258. synth_ai/sdk/api/train/prompt_learning.py +470 -0
  259. synth_ai/sdk/api/train/rl.py +442 -0
  260. synth_ai/sdk/api/train/sft.py +396 -0
  261. synth_ai/sdk/api/train/summary.py +522 -0
  262. synth_ai/sdk/api/train/supported_algos.py +147 -0
  263. synth_ai/sdk/api/train/task_app.py +331 -0
  264. synth_ai/sdk/api/train/utils.py +279 -0
  265. synth_ai/sdk/api/train/validators.py +2424 -0
  266. synth_ai/sdk/baseline/__init__.py +25 -0
  267. synth_ai/sdk/baseline/config.py +209 -0
  268. synth_ai/sdk/baseline/discovery.py +216 -0
  269. synth_ai/sdk/baseline/execution.py +154 -0
  270. synth_ai/sdk/graphs/__init__.py +15 -0
  271. synth_ai/sdk/graphs/completions.py +570 -0
  272. synth_ai/sdk/inference/__init__.py +6 -0
  273. synth_ai/sdk/inference/client.py +128 -0
  274. synth_ai/sdk/jobs/__init__.py +16 -0
  275. synth_ai/sdk/jobs/client.py +371 -0
  276. synth_ai/sdk/judging/__init__.py +15 -0
  277. synth_ai/sdk/judging/base.py +24 -0
  278. synth_ai/sdk/judging/client.py +191 -0
  279. synth_ai/sdk/judging/schemas.py +222 -0
  280. synth_ai/sdk/learning/__init__.py +69 -0
  281. synth_ai/sdk/learning/client.py +240 -0
  282. synth_ai/sdk/learning/ft_client.py +7 -0
  283. synth_ai/sdk/learning/health.py +49 -0
  284. synth_ai/sdk/learning/jobs.py +202 -0
  285. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  286. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  287. synth_ai/sdk/learning/prompt_learning_types.py +185 -0
  288. synth_ai/sdk/learning/rl/client.py +268 -0
  289. synth_ai/sdk/learning/rl/contracts.py +27 -0
  290. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  291. synth_ai/sdk/learning/rl/secrets.py +13 -0
  292. synth_ai/sdk/learning/sft/client.py +95 -0
  293. synth_ai/sdk/learning/sft/config.py +270 -0
  294. synth_ai/sdk/learning/sft/data.py +698 -0
  295. synth_ai/sdk/learning/validators.py +52 -0
  296. synth_ai/sdk/research_agent/__init__.py +34 -0
  297. synth_ai/sdk/research_agent/container_builder.py +328 -0
  298. synth_ai/sdk/research_agent/container_spec.py +198 -0
  299. synth_ai/sdk/research_agent/defaults.py +34 -0
  300. synth_ai/sdk/research_agent/results_collector.py +69 -0
  301. synth_ai/sdk/specs/__init__.py +46 -0
  302. synth_ai/sdk/specs/dataclasses.py +149 -0
  303. synth_ai/sdk/specs/loader.py +144 -0
  304. synth_ai/sdk/specs/serializer.py +199 -0
  305. synth_ai/sdk/specs/validation.py +250 -0
  306. synth_ai/sdk/streaming/__init__.py +35 -0
  307. synth_ai/sdk/streaming/config.py +94 -0
  308. synth_ai/sdk/streaming/handlers.py +1997 -0
  309. synth_ai/sdk/streaming/streamer.py +704 -0
  310. synth_ai/sdk/streaming/types.py +112 -0
  311. synth_ai/sdk/task/__init__.py +151 -0
  312. synth_ai/sdk/task/apps/__init__.py +133 -0
  313. synth_ai/sdk/task/config.py +261 -0
  314. synth_ai/sdk/task/contracts.py +298 -0
  315. synth_ai/sdk/task/datasets.py +108 -0
  316. synth_ai/sdk/task/in_process.py +1190 -0
  317. synth_ai/sdk/task/in_process_runner.py +309 -0
  318. synth_ai/sdk/task/inference_api.py +299 -0
  319. synth_ai/sdk/task/proxy.py +287 -0
  320. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  321. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  322. synth_ai/sdk/task/rubrics.py +219 -0
  323. synth_ai/sdk/task/server.py +580 -0
  324. synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
  325. synth_ai/sdk/task/tracing_utils.py +95 -0
  326. synth_ai/sdk/task/validators.py +456 -0
  327. synth_ai/sdk/tracing/__init__.py +39 -0
  328. synth_ai/sdk/training/__init__.py +102 -0
  329. synth_ai/sdk/usage/__init__.py +37 -0
  330. synth_ai/sdk/usage/client.py +171 -0
  331. synth_ai/sdk/usage/models.py +261 -0
  332. synth_ai/utils/__init__.py +213 -0
  333. synth_ai-0.4.1.dist-info/METADATA +195 -0
  334. synth_ai-0.4.1.dist-info/RECORD +379 -0
  335. synth_ai-0.4.1.dist-info/top_level.txt +1 -0
  336. examples/__init__.py +0 -16
  337. examples/analyze_semantic_words.sh +0 -17
  338. examples/crafter_debug_render.py +0 -186
  339. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  340. examples/multi_step/configs/README_verilog_rl.md +0 -77
  341. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  342. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  343. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  344. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  345. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  346. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  347. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  348. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  349. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  350. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  351. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  352. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  353. examples/multi_step/crafter_rl_lora.md +0 -70
  354. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  355. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  356. examples/multi_step/readme.md +0 -48
  357. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  358. examples/multi_step/task_app_config_notes.md +0 -494
  359. examples/multi_step/verilog_rl_lora.md +0 -218
  360. examples/qwen_coder/README.md +0 -102
  361. examples/qwen_coder/_shared.py +0 -113
  362. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  363. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  364. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  365. examples/qwen_coder/generate_dataset.py +0 -98
  366. examples/qwen_coder/infer_ft_smoke.py +0 -65
  367. examples/qwen_coder/infer_prod_proxy.py +0 -73
  368. examples/qwen_coder/infer_via_synth.py +0 -87
  369. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  370. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  371. examples/qwen_coder/sft_full_17b.py +0 -103
  372. examples/qwen_coder/sft_lora_30b.py +0 -110
  373. examples/qwen_coder/subset_jsonl.py +0 -39
  374. examples/qwen_coder/todos.md +0 -38
  375. examples/qwen_coder/validate_jsonl.py +0 -60
  376. examples/rl/README.md +0 -169
  377. examples/rl/download_dataset.py +0 -80
  378. examples/run_crafter_demo.sh +0 -10
  379. examples/sft/README.md +0 -139
  380. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  381. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  382. examples/sft/evaluate.py +0 -119
  383. examples/sft/export_dataset.py +0 -117
  384. examples/sft/generate_traces.py +0 -164
  385. examples/swe/__init__.py +0 -12
  386. examples/swe/task_app/README.md +0 -105
  387. examples/swe/task_app/__init__.py +0 -2
  388. examples/swe/task_app/grpo_swe_mini.py +0 -601
  389. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  390. examples/swe/task_app/hosted/README.md +0 -173
  391. examples/swe/task_app/hosted/__init__.py +0 -5
  392. examples/swe/task_app/hosted/branching.py +0 -143
  393. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  394. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  395. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  396. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  397. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  398. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  399. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  400. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  401. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  402. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  403. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  404. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  405. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  406. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  407. examples/swe/task_app/hosted/hosted_app.py +0 -204
  408. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  409. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  410. examples/swe/task_app/hosted/main.py +0 -100
  411. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  412. examples/swe/task_app/hosted/registry.py +0 -195
  413. examples/swe/task_app/hosted/rollout.py +0 -1911
  414. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  415. examples/swe/task_app/hosted/storage/volume.py +0 -211
  416. examples/swe/task_app/hosted/test_agents.py +0 -161
  417. examples/swe/task_app/hosted/test_service.py +0 -136
  418. examples/swe/task_app/hosted/utils.py +0 -62
  419. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  420. examples/task_apps/TESTING.md +0 -275
  421. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  422. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  423. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  424. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  425. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  426. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  427. examples/task_apps/crafter/__init__.py +0 -0
  428. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  429. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  430. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  431. examples/task_apps/crafter/task_app/README.md +0 -42
  432. examples/task_apps/crafter/task_app/__init__.py +0 -5
  433. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  434. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  454. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  455. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  456. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  457. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  458. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  459. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  510. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  511. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  512. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  513. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  514. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  515. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  516. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  517. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  518. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  519. examples/task_apps/enron/__init__.py +0 -1
  520. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  521. examples/task_apps/enron/filter_sft.toml +0 -5
  522. examples/task_apps/enron/task_app/README.md +0 -14
  523. examples/task_apps/enron/task_app/__init__.py +0 -1
  524. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  525. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  526. examples/task_apps/enron/tests/__init__.py +0 -4
  527. examples/task_apps/enron/tests/conftest.py +0 -115
  528. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  529. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  530. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  531. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  532. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  533. examples/task_apps/math/README.md +0 -22
  534. examples/task_apps/math/__init__.py +0 -0
  535. examples/task_apps/math/math_single_step.py +0 -1000
  536. examples/task_apps/math/math_task_app.py +0 -115
  537. examples/task_apps/pokemon_battle/__init__.py +0 -2
  538. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  539. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  540. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  541. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  542. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  543. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  544. examples/task_apps/pokemon_red/README.md +0 -357
  545. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  546. examples/task_apps/pokemon_red/__init__.py +0 -3
  547. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  548. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  549. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  550. examples/task_apps/pokemon_red/task_app.py +0 -799
  551. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  552. examples/task_apps/sokoban/README.md +0 -307
  553. examples/task_apps/sokoban/__init__.py +0 -3
  554. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  555. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  556. examples/task_apps/sokoban/filter_sft.toml +0 -5
  557. examples/task_apps/sokoban/task_app.py +0 -1058
  558. examples/task_apps/sokoban/tests/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/conftest.py +0 -113
  560. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  561. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  562. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  563. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  564. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  565. examples/task_apps/verilog/__init__.py +0 -1
  566. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  567. examples/task_apps/verilog/filter_sft.toml +0 -5
  568. examples/task_apps/verilog/task_app/README.md +0 -12
  569. examples/task_apps/verilog/task_app/__init__.py +0 -1
  570. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  571. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  572. examples/task_apps/verilog/tests/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/conftest.py +0 -115
  574. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  575. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  576. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  577. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  578. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  579. examples/vlm/PROPOSAL.md +0 -53
  580. examples/vlm/README.md +0 -68
  581. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  582. examples/vlm/crafter_image_only_agent.py +0 -207
  583. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  584. examples/vlm/filter_image_rows.py +0 -63
  585. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  586. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  587. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  588. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  589. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  590. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  591. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  592. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  593. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  594. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  595. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  596. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  597. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  598. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  599. examples/warming_up_to_rl/groq_test.py +0 -97
  600. examples/warming_up_to_rl/manage_secrets.py +0 -131
  601. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  602. examples/warming_up_to_rl/old/notes.md +0 -73
  603. examples/warming_up_to_rl/readme.md +0 -179
  604. examples/warming_up_to_rl/run_eval.py +0 -736
  605. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  606. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  607. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  608. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  609. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  610. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  611. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  612. examples/workflows/__init__.py +0 -0
  613. examples/workflows/math_rl/__init__.py +0 -0
  614. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  615. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  616. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  617. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  618. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  619. examples/workflows/math_rl/download_dataset.py +0 -80
  620. examples/workflows/math_rl/run_eval.py +0 -436
  621. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  622. synth_ai/api/models/supported.py +0 -377
  623. synth_ai/api/train/__init__.py +0 -5
  624. synth_ai/api/train/builders.py +0 -351
  625. synth_ai/api/train/cli.py +0 -635
  626. synth_ai/api/train/config_finder.py +0 -228
  627. synth_ai/api/train/configs/__init__.py +0 -44
  628. synth_ai/api/train/configs/rl.py +0 -134
  629. synth_ai/api/train/configs/sft.py +0 -95
  630. synth_ai/api/train/configs/shared.py +0 -24
  631. synth_ai/api/train/env_resolver.py +0 -349
  632. synth_ai/api/train/pollers.py +0 -75
  633. synth_ai/api/train/supported_algos.py +0 -147
  634. synth_ai/api/train/task_app.py +0 -195
  635. synth_ai/api/train/utils.py +0 -225
  636. synth_ai/cli/_modal_wrapper.py +0 -29
  637. synth_ai/cli/_storage.py +0 -20
  638. synth_ai/cli/_typer_patch.py +0 -49
  639. synth_ai/cli/_validate_task_app.py +0 -11
  640. synth_ai/cli/balance.py +0 -216
  641. synth_ai/cli/calc.py +0 -84
  642. synth_ai/cli/demo.py +0 -165
  643. synth_ai/cli/legacy_root_backup.py +0 -468
  644. synth_ai/cli/man.py +0 -106
  645. synth_ai/cli/recent.py +0 -132
  646. synth_ai/cli/rl_demo.py +0 -254
  647. synth_ai/cli/status.py +0 -134
  648. synth_ai/cli/task_apps.py +0 -4523
  649. synth_ai/cli/traces.py +0 -164
  650. synth_ai/cli/tui.py +0 -57
  651. synth_ai/cli/watch.py +0 -506
  652. synth_ai/compound/cais.py +0 -0
  653. synth_ai/config/base_url.py +0 -107
  654. synth_ai/core/experiment.py +0 -13
  655. synth_ai/core/system.py +0 -15
  656. synth_ai/demo_registry.py +0 -295
  657. synth_ai/demos/core/__init__.py +0 -1
  658. synth_ai/demos/core/cli.py +0 -1718
  659. synth_ai/demos/demo_task_apps/core.py +0 -440
  660. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  661. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  662. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  663. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  664. synth_ai/environments/__init__.py +0 -31
  665. synth_ai/environments/environment/__init__.py +0 -1
  666. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  667. synth_ai/environments/environment/artifacts/base.py +0 -52
  668. synth_ai/environments/environment/core.py +0 -67
  669. synth_ai/environments/environment/db/__init__.py +0 -1
  670. synth_ai/environments/environment/db/sqlite.py +0 -45
  671. synth_ai/environments/environment/registry.py +0 -233
  672. synth_ai/environments/environment/resources/sqlite.py +0 -45
  673. synth_ai/environments/environment/results.py +0 -1
  674. synth_ai/environments/environment/rewards/__init__.py +0 -1
  675. synth_ai/environments/environment/rewards/core.py +0 -29
  676. synth_ai/environments/environment/shared_engine.py +0 -26
  677. synth_ai/environments/environment/tools/__init__.py +0 -200
  678. synth_ai/environments/examples/__init__.py +0 -1
  679. synth_ai/environments/examples/bandit/__init__.py +0 -33
  680. synth_ai/environments/examples/bandit/engine.py +0 -302
  681. synth_ai/environments/examples/bandit/environment.py +0 -194
  682. synth_ai/environments/examples/bandit/taskset.py +0 -200
  683. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  684. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  716. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  717. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  718. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  719. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  720. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  744. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  745. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  746. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  747. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  748. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  749. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  750. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  751. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  752. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  753. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  754. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  755. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  756. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  757. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  758. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  759. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  760. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  761. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  762. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  763. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  764. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  765. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  766. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  767. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  768. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  769. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  770. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  771. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  772. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  773. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  774. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  775. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  776. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  777. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  778. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  779. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  780. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  781. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  782. synth_ai/environments/examples/enron/engine.py +0 -300
  783. synth_ai/environments/examples/enron/environment.py +0 -234
  784. synth_ai/environments/examples/enron/taskset.py +0 -112
  785. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  786. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  787. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  788. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  789. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  790. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  791. synth_ai/environments/examples/minigrid/engine.py +0 -589
  792. synth_ai/environments/examples/minigrid/environment.py +0 -274
  793. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  794. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  795. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  796. synth_ai/environments/examples/nethack/__init__.py +0 -7
  797. synth_ai/environments/examples/nethack/achievements.py +0 -337
  798. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  799. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  800. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  801. synth_ai/environments/examples/nethack/engine.py +0 -739
  802. synth_ai/environments/examples/nethack/environment.py +0 -256
  803. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  804. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  805. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  806. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  807. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  808. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  809. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  810. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  811. synth_ai/environments/examples/nethack/taskset.py +0 -323
  812. synth_ai/environments/examples/red/__init__.py +0 -7
  813. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  814. synth_ai/environments/examples/red/config_logging.py +0 -110
  815. synth_ai/environments/examples/red/engine.py +0 -721
  816. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  817. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  818. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  828. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  829. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  830. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  831. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  832. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  833. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  834. synth_ai/environments/examples/red/environment.py +0 -298
  835. synth_ai/environments/examples/red/taskset.py +0 -79
  836. synth_ai/environments/examples/red/units/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  838. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  839. synth_ai/environments/examples/sokoban/engine.py +0 -678
  840. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  841. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  848. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  849. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  850. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  851. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  852. synth_ai/environments/examples/sokoban/environment.py +0 -229
  853. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  854. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  855. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  856. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  857. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  858. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  859. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  860. synth_ai/environments/examples/verilog/__init__.py +0 -10
  861. synth_ai/environments/examples/verilog/engine.py +0 -421
  862. synth_ai/environments/examples/verilog/environment.py +0 -350
  863. synth_ai/environments/examples/verilog/taskset.py +0 -420
  864. synth_ai/environments/examples/wordle/__init__.py +0 -29
  865. synth_ai/environments/examples/wordle/engine.py +0 -398
  866. synth_ai/environments/examples/wordle/environment.py +0 -159
  867. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  868. synth_ai/environments/examples/wordle/taskset.py +0 -230
  869. synth_ai/environments/reproducibility/core.py +0 -42
  870. synth_ai/environments/reproducibility/helpers.py +0 -0
  871. synth_ai/environments/reproducibility/tree.py +0 -363
  872. synth_ai/environments/service/app.py +0 -97
  873. synth_ai/environments/service/core_routes.py +0 -1021
  874. synth_ai/environments/service/external_registry.py +0 -56
  875. synth_ai/environments/service/registry.py +0 -9
  876. synth_ai/environments/stateful/__init__.py +0 -1
  877. synth_ai/environments/stateful/core.py +0 -163
  878. synth_ai/environments/stateful/engine.py +0 -21
  879. synth_ai/environments/stateful/state.py +0 -7
  880. synth_ai/environments/tasks/api.py +0 -19
  881. synth_ai/environments/tasks/core.py +0 -81
  882. synth_ai/environments/tasks/filters.py +0 -40
  883. synth_ai/environments/tasks/utils.py +0 -90
  884. synth_ai/environments/v0_observability/history.py +0 -3
  885. synth_ai/environments/v0_observability/log.py +0 -2
  886. synth_ai/evals/__init__.py +0 -15
  887. synth_ai/evals/base.py +0 -13
  888. synth_ai/evals/client.py +0 -82
  889. synth_ai/handshake.py +0 -109
  890. synth_ai/http.py +0 -26
  891. synth_ai/http_client.py +0 -136
  892. synth_ai/inference/__init__.py +0 -5
  893. synth_ai/inference/client.py +0 -34
  894. synth_ai/jobs/client.py +0 -295
  895. synth_ai/judge_schemas.py +0 -127
  896. synth_ai/learning/__init__.py +0 -59
  897. synth_ai/learning/client.py +0 -241
  898. synth_ai/learning/ft_client.py +0 -7
  899. synth_ai/learning/health.py +0 -49
  900. synth_ai/learning/jobs.py +0 -201
  901. synth_ai/learning/rl/client.py +0 -267
  902. synth_ai/learning/rl/contracts.py +0 -27
  903. synth_ai/learning/rl/env_keys.py +0 -166
  904. synth_ai/learning/rl/secrets.py +0 -13
  905. synth_ai/learning/sft/client.py +0 -68
  906. synth_ai/learning/sft/config.py +0 -270
  907. synth_ai/learning/sft/data.py +0 -295
  908. synth_ai/learning/validators.py +0 -49
  909. synth_ai/lm/__init__.py +0 -25
  910. synth_ai/task/__init__.py +0 -121
  911. synth_ai/task/apps/__init__.py +0 -129
  912. synth_ai/task/config.py +0 -257
  913. synth_ai/task/contracts.py +0 -236
  914. synth_ai/task/datasets.py +0 -108
  915. synth_ai/task/proxy.py +0 -251
  916. synth_ai/task/rubrics/__init__.py +0 -56
  917. synth_ai/task/rubrics/loaders.py +0 -152
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
  1057. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1058. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1068. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1069. /synth_ai/{evals → sdk/judging}/types.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1077. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1078. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1079. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1080. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1081. /synth_ai/{task → sdk/task}/client.py +0 -0
  1082. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1083. /synth_ai/{task → sdk/task}/health.py +0 -0
  1084. /synth_ai/{task → sdk/task}/json.py +0 -0
  1085. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1086. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1087. /synth_ai/{task → sdk/task}/rubrics/strict.py +0 -0
  1088. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1089. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
  1090. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/entry_points.txt +0 -0
  1091. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,1502 +0,0 @@
1
- """
2
- Simple Agent Module
3
-
4
- Provides a streamlined approach for direct frame + state -> action processing,
5
- with enhanced history tracking to prevent getting stuck in loops.
6
-
7
- Key improvements over the original simple mode:
8
- - Location-based stuck detection (tracks repeated actions at same coordinates)
9
- - Context-aware history (overworld/battle/menu/dialogue awareness)
10
- - Memory management to fit within LLM context limits
11
- - Detailed history tracking with timestamps and game state summaries
12
- - Smart context switching that helps agent avoid infinite loops
13
- - Configurable history window sizes for different use cases
14
- - Chain of thought reasoning with structured LLM responses
15
- - Objectives system with automatic and manual completion tracking
16
- - Dynamic goal setting and progress monitoring
17
-
18
- The agent maintains objectives (go to location, battle trainer, etc.) that are
19
- automatically tracked and marked complete when achieved. The LLM can also
20
- manually complete objectives and create new ones dynamically through structured
21
- commands. It uses chain of thought reasoning to make better decisions while
22
- considering current objectives. All state including objectives is forwarded
23
- to support external monitoring and debugging.
24
-
25
- Configuration defaults (can be customized):
26
- - 100 previous state/location entries (with context and reasoning)
27
- - 50 recent button presses tracked
28
- - 15 history entries shown to LLM in prompts
29
- - 20 recent actions shown to LLM in prompts
30
- - Automatic memory management to stay within LLM context limits
31
- """
32
-
33
- import logging
34
- import os
35
- import sys
36
- from collections import deque
37
- from dataclasses import dataclass, field
38
- from datetime import datetime
39
- from typing import List, Dict, Any, Optional, Tuple
40
- import numpy as np
41
- from PIL import Image
42
-
43
- from utils.state_formatter import format_state_for_llm
44
-
45
- logger = logging.getLogger(__name__)
46
-
47
- # Configurable parameters for history tracking
48
- DEFAULT_MAX_HISTORY_ENTRIES = 100 # Previous states/locations with context
49
- DEFAULT_MAX_RECENT_ACTIONS = 50 # Recent button presses
50
- DEFAULT_HISTORY_DISPLAY_COUNT = 30 # Number of history entries shown to LLM
51
- DEFAULT_ACTIONS_DISPLAY_COUNT = 40 # Number of recent actions shown to LLM
52
-
53
- def configure_simple_agent_defaults(max_history_entries: int = None, max_recent_actions: int = None,
54
- history_display_count: int = None, actions_display_count: int = None):
55
- """Configure default parameters for all new SimpleAgent instances"""
56
- global DEFAULT_MAX_HISTORY_ENTRIES, DEFAULT_MAX_RECENT_ACTIONS
57
- global DEFAULT_HISTORY_DISPLAY_COUNT, DEFAULT_ACTIONS_DISPLAY_COUNT
58
-
59
- if max_history_entries is not None:
60
- DEFAULT_MAX_HISTORY_ENTRIES = max_history_entries
61
- if max_recent_actions is not None:
62
- DEFAULT_MAX_RECENT_ACTIONS = max_recent_actions
63
- if history_display_count is not None:
64
- DEFAULT_HISTORY_DISPLAY_COUNT = history_display_count
65
- if actions_display_count is not None:
66
- DEFAULT_ACTIONS_DISPLAY_COUNT = actions_display_count
67
-
68
- logger.info(f"Updated SimpleAgent defaults: {DEFAULT_MAX_HISTORY_ENTRIES} history, {DEFAULT_MAX_RECENT_ACTIONS} actions, "
69
- f"display {DEFAULT_HISTORY_DISPLAY_COUNT}/{DEFAULT_ACTIONS_DISPLAY_COUNT}")
70
-
71
- @dataclass
72
- class Objective:
73
- """Single objective/goal for the agent"""
74
- id: str
75
- description: str
76
- objective_type: str # "location", "battle", "item", "dialogue", "custom"
77
- target_value: Optional[Any] = None # Specific target (coords, trainer name, item name, etc.)
78
- completed: bool = False
79
- created_at: datetime = field(default_factory=datetime.now)
80
- completed_at: Optional[datetime] = None
81
- progress_notes: str = ""
82
- storyline: bool = False # True for main storyline objectives (auto-verified), False for agent sub-objectives
83
- milestone_id: Optional[str] = None # Emulator milestone ID for storyline objectives
84
-
85
- @dataclass
86
- class HistoryEntry:
87
- """Single entry in the agent's history"""
88
- timestamp: datetime
89
- player_coords: Optional[Tuple[int, int]]
90
- map_id: Optional[int]
91
- context: str # "overworld", "battle", "menu", "dialogue"
92
- action_taken: str
93
- game_state_summary: str
94
-
95
- @dataclass
96
- class SimpleAgentState:
97
- """Maintains history and state for the simple agent"""
98
- # Note: We don't use defaults here because they're captured at class definition time
99
- history: deque = None
100
- recent_actions: deque = None
101
- stuck_detection: Dict[str, int] = field(default_factory=dict)
102
- step_counter: int = 0
103
- objectives: List[Objective] = field(default_factory=list)
104
- objectives_updated: bool = False
105
- failed_movements: Dict[str, List[str]] = field(default_factory=dict) # coord_key -> [failed_directions]
106
- npc_interactions: Dict[str, str] = field(default_factory=dict) # coord_key -> interaction_notes
107
-
108
- def __post_init__(self):
109
- """Initialize deques with current default values"""
110
- if self.history is None:
111
- self.history = deque(maxlen=DEFAULT_MAX_HISTORY_ENTRIES)
112
- if self.recent_actions is None:
113
- self.recent_actions = deque(maxlen=DEFAULT_MAX_RECENT_ACTIONS)
114
-
115
- class SimpleAgent:
116
- """
117
- Simple agent that processes frame + state -> action directly with history tracking
118
- """
119
-
120
- def __init__(self, vlm, max_history_entries: int = None, max_recent_actions: int = None,
121
- history_display_count: int = None, actions_display_count: int = None):
122
- self.vlm = vlm
123
-
124
- # Use current global defaults if not specified
125
- max_history_entries = max_history_entries or DEFAULT_MAX_HISTORY_ENTRIES
126
- max_recent_actions = max_recent_actions or DEFAULT_MAX_RECENT_ACTIONS
127
- history_display_count = history_display_count or DEFAULT_HISTORY_DISPLAY_COUNT
128
- actions_display_count = actions_display_count or DEFAULT_ACTIONS_DISPLAY_COUNT
129
-
130
- self.state = SimpleAgentState()
131
- self.state.history = deque(maxlen=max_history_entries)
132
- self.state.recent_actions = deque(maxlen=max_recent_actions)
133
-
134
- # Display parameters for LLM prompts
135
- self.history_display_count = history_display_count
136
- self.actions_display_count = actions_display_count
137
-
138
- # Initialize storyline objectives for Emerald progression
139
- self._initialize_storyline_objectives()
140
-
141
- def _initialize_storyline_objectives(self):
142
- """Initialize the main storyline objectives for Pokémon Emerald progression"""
143
- storyline_objectives = [
144
- {
145
- "id": "story_game_start",
146
- "description": "Complete title sequence and begin the game",
147
- "objective_type": "system",
148
- "target_value": "Game Running",
149
- "milestone_id": "GAME_RUNNING"
150
- },
151
- {
152
- "id": "story_littleroot_town",
153
- "description": "Arrive in Littleroot Town and explore the area",
154
- "objective_type": "location",
155
- "target_value": "Littleroot Town",
156
- "milestone_id": "LITTLEROOT_TOWN"
157
- },
158
- {
159
- "id": "story_route_101",
160
- "description": "Travel north to Route 101 and encounter Prof. Birch",
161
- "objective_type": "location",
162
- "target_value": "Route 101",
163
- "milestone_id": "ROUTE_101"
164
- },
165
- {
166
- "id": "story_starter_chosen",
167
- "description": "Choose starter Pokémon and receive first party member",
168
- "objective_type": "pokemon",
169
- "target_value": "Starter Pokémon",
170
- "milestone_id": "STARTER_CHOSEN"
171
- },
172
- {
173
- "id": "story_oldale_town",
174
- "description": "Continue journey to Oldale Town",
175
- "objective_type": "location",
176
- "target_value": "Oldale Town",
177
- "milestone_id": "OLDALE_TOWN"
178
- },
179
- {
180
- "id": "story_route_103",
181
- "description": "Travel to Route 103 to meet rival",
182
- "objective_type": "location",
183
- "target_value": "Route 103",
184
- "milestone_id": "ROUTE_103"
185
- },
186
- {
187
- "id": "story_route_102",
188
- "description": "Return through Route 102 toward Petalburg City",
189
- "objective_type": "location",
190
- "target_value": "Route 102",
191
- "milestone_id": "ROUTE_102"
192
- },
193
- {
194
- "id": "story_petalburg_city",
195
- "description": "Navigate to Petalburg City and visit Dad's gym",
196
- "objective_type": "location",
197
- "target_value": "Petalburg City",
198
- "milestone_id": "PETALBURG_CITY"
199
- },
200
- {
201
- "id": "story_route_104",
202
- "description": "Travel north through Route 104 toward Petalburg Woods",
203
- "objective_type": "location",
204
- "target_value": "Route 104",
205
- "milestone_id": "ROUTE_104"
206
- },
207
- {
208
- "id": "story_petalburg_woods",
209
- "description": "Navigate through Petalburg Woods to help Devon researcher",
210
- "objective_type": "location",
211
- "target_value": "Petalburg Woods",
212
- "milestone_id": "PETALBURG_WOODS"
213
- },
214
- {
215
- "id": "story_rustboro_city",
216
- "description": "Arrive in Rustboro City and deliver Devon Goods",
217
- "objective_type": "location",
218
- "target_value": "Rustboro City",
219
- "milestone_id": "RUSTBORO_CITY"
220
- },
221
- {
222
- "id": "story_rustboro_gym",
223
- "description": "Enter the Rustboro Gym and prepare for Roxanne battle",
224
- "objective_type": "location",
225
- "target_value": "Rustboro Gym",
226
- "milestone_id": None # Gym entry doesn't have separate milestone
227
- },
228
- {
229
- "id": "story_stone_badge",
230
- "description": "Defeat Roxanne and earn the Stone Badge",
231
- "objective_type": "battle",
232
- "target_value": "Stone Badge",
233
- "milestone_id": "STONE_BADGE"
234
- }
235
- ]
236
-
237
- # Add storyline objectives to the state
238
- for obj_data in storyline_objectives:
239
- objective = Objective(
240
- id=obj_data["id"],
241
- description=obj_data["description"],
242
- objective_type=obj_data["objective_type"],
243
- target_value=obj_data["target_value"],
244
- completed=False,
245
- progress_notes="Storyline objective - verified by emulator milestones",
246
- storyline=True,
247
- milestone_id=obj_data["milestone_id"]
248
- )
249
- self.state.objectives.append(objective)
250
-
251
- logger.info(f"Initialized {len(storyline_objectives)} storyline objectives for Emerald progression")
252
-
253
- def get_game_context(self, game_state: Dict[str, Any]) -> str:
254
- """Determine current game context (overworld, battle, menu, dialogue)"""
255
- try:
256
- # Check if in title sequence first
257
- player_location = game_state.get("player", {}).get("location", "")
258
- if player_location == "TITLE_SEQUENCE":
259
- return "title"
260
-
261
- # Check game state for title/intro
262
- game_state_value = game_state.get("game", {}).get("game_state", "").lower()
263
- if "title" in game_state_value or "intro" in game_state_value:
264
- return "title"
265
-
266
- # Check if player name is not set (indicates title sequence)
267
- player_name = game_state.get("player", {}).get("name", "").strip()
268
- if not player_name or player_name == "????????":
269
- return "title"
270
-
271
- # Check if in battle
272
- is_in_battle = game_state.get("game", {}).get("is_in_battle", False)
273
- if is_in_battle:
274
- logger.debug(f"Detected battle context")
275
- return "battle"
276
-
277
- # Check if dialogue is active
278
- dialogue_state = game_state.get("game", {}).get("dialogue", {})
279
- if dialogue_state.get("active", False) or dialogue_state.get("text", "").strip():
280
- return "dialogue"
281
-
282
- # Check if in menu (simplified detection)
283
- # Could be enhanced with more sophisticated menu detection
284
- player_state = game_state.get("player", {})
285
- if player_state.get("in_menu", False):
286
- return "menu"
287
-
288
- # Default to overworld
289
- return "overworld"
290
-
291
- except Exception as e:
292
- logger.warning(f"Error determining game context: {e}")
293
- return "unknown"
294
-
295
- def get_player_coords(self, game_state: Dict[str, Any]) -> Optional[Tuple[int, int]]:
296
- """Extract player coordinates from game state"""
297
- try:
298
- player = game_state.get("player", {})
299
- # Try position.x/y first (standard format)
300
- position = player.get("position", {})
301
- if position:
302
- x = position.get("x")
303
- y = position.get("y")
304
- if x is not None and y is not None:
305
- return (x, y)
306
-
307
- # Fallback: try direct x/y on player
308
- x = player.get("x")
309
- y = player.get("y")
310
- if x is not None and y is not None:
311
- return (x, y)
312
- except Exception as e:
313
- logger.warning(f"Error getting player coords: {e}")
314
- return None
315
-
316
- def get_map_id(self, game_state: Dict[str, Any]) -> Optional[int]:
317
- """Extract map ID from game state"""
318
- try:
319
- return game_state.get("map", {}).get("id")
320
- except Exception as e:
321
- logger.warning(f"Error getting map ID: {e}")
322
- return None
323
-
324
- def add_objective(self, description: str, objective_type: str, target_value: Any = None) -> str:
325
- """Add a new objective and return its ID"""
326
- obj_id = f"obj_{len(self.state.objectives)}_{int(datetime.now().timestamp())}"
327
- objective = Objective(
328
- id=obj_id,
329
- description=description,
330
- objective_type=objective_type,
331
- target_value=target_value
332
- )
333
- self.state.objectives.append(objective)
334
- self.state.objectives_updated = True
335
- logger.info(f"Added objective: {description}")
336
- return obj_id
337
-
338
- def complete_objective(self, obj_id: str, progress_notes: str = ""):
339
- """Mark an objective as completed (storyline objectives cannot be manually completed)"""
340
- for obj in self.state.objectives:
341
- if obj.id == obj_id and not obj.completed:
342
- # Prevent manual completion of storyline objectives
343
- if obj.storyline:
344
- logger.warning(f"Cannot manually complete storyline objective: {obj.description}. These are verified by emulator milestones.")
345
- return False
346
-
347
- obj.completed = True
348
- obj.completed_at = datetime.now()
349
- obj.progress_notes = progress_notes
350
- self.state.objectives_updated = True
351
- logger.info(f"Completed objective: {obj.description}")
352
- return True
353
- return False
354
-
355
- def get_active_objectives(self) -> List[Objective]:
356
- """Get list of uncompleted objectives"""
357
- return [obj for obj in self.state.objectives if not obj.completed]
358
-
359
- def get_completed_objectives(self) -> List[Objective]:
360
- """Get list of completed objectives"""
361
- return [obj for obj in self.state.objectives if obj.completed]
362
-
363
- def check_objective_completion(self, game_state: Dict[str, Any]) -> List[str]:
364
- """Check if any objectives should be marked as completed based on game state"""
365
- completed_ids = []
366
- coords = self.get_player_coords(game_state)
367
- context = self.get_game_context(game_state)
368
- map_id = self.get_map_id(game_state)
369
-
370
- for obj in self.get_active_objectives():
371
- should_complete = False
372
- notes = ""
373
-
374
- if obj.objective_type == "location" and coords and obj.target_value:
375
- # Check if player reached target location
376
- # Note: target_value is a string (location name) for storyline objectives
377
- # Location objectives are completed via milestone verification, not coordinate checking
378
- # This section is for dynamically added coordinate-based objectives
379
- if isinstance(obj.target_value, (tuple, list)) and len(obj.target_value) == 2:
380
- target_x, target_y = obj.target_value
381
- if abs(coords[0] - target_x) <= 2 and abs(coords[1] - target_y) <= 2:
382
- should_complete = True
383
- notes = f"Reached location ({coords[0]}, {coords[1]})"
384
-
385
- elif obj.objective_type == "battle" and context == "battle":
386
- # Objective completed when battle starts
387
- should_complete = True
388
- notes = "Entered battle"
389
-
390
- elif obj.objective_type == "dialogue" and context == "dialogue":
391
- # Objective completed when dialogue starts
392
- should_complete = True
393
- notes = "Started dialogue"
394
-
395
- elif obj.objective_type == "map" and map_id and obj.target_value:
396
- # Check if player reached target map
397
- if map_id == obj.target_value:
398
- should_complete = True
399
- notes = f"Reached map {map_id}"
400
-
401
- if should_complete:
402
- self.complete_objective(obj.id, notes)
403
- completed_ids.append(obj.id)
404
-
405
- return completed_ids
406
-
407
- def check_storyline_milestones(self, game_state: Dict[str, Any]) -> List[str]:
408
- """Check emulator milestones and auto-complete corresponding storyline objectives"""
409
- completed_ids = []
410
-
411
- # Get milestones from the game state (if available)
412
- milestones = game_state.get("milestones", {})
413
- if not milestones:
414
- # No milestone data available, skip checking
415
- return completed_ids
416
-
417
- for obj in self.get_active_objectives():
418
- # Only check storyline objectives with milestone IDs
419
- if obj.storyline and obj.milestone_id and not obj.completed:
420
- # Check if the corresponding emulator milestone is completed
421
- milestone_completed = milestones.get(obj.milestone_id, {}).get("completed", False)
422
-
423
- if milestone_completed:
424
- # Auto-complete the storyline objective
425
- obj.completed = True
426
- obj.completed_at = datetime.now()
427
- obj.progress_notes = f"Auto-completed by emulator milestone: {obj.milestone_id}"
428
- self.state.objectives_updated = True
429
- completed_ids.append(obj.id)
430
- logger.info(f"Auto-completed storyline objective via milestone {obj.milestone_id}: {obj.description}")
431
-
432
- return completed_ids
433
-
434
- def detect_stuck_pattern(self, coords: Optional[Tuple[int, int]], context: str, game_state: Dict[str, Any] = None) -> bool:
435
- """Detect if the agent appears to be stuck in a location/context"""
436
- # Don't trigger stuck detection during contexts where staying in place is expected
437
- if context in ["battle", "dialogue", "menu", "title"]:
438
- logger.debug(f"Skipping stuck detection - context: {context}")
439
- return False
440
-
441
- # Need valid coordinates for stuck detection
442
- if not coords or coords[0] is None or coords[1] is None:
443
- return False
444
-
445
- # Check for title sequence if game state is available
446
- if game_state:
447
- # Check if in title sequence (no player name or invalid coordinates)
448
- player_name = game_state.get("player", {}).get("name", "").strip()
449
- if not player_name or player_name == "????????":
450
- return False
451
-
452
- # Check if game state indicates title/intro
453
- game_state_value = game_state.get("game", {}).get("game_state", "").lower()
454
- if "title" in game_state_value or "intro" in game_state_value:
455
- return False
456
-
457
- # Check location for title sequence
458
- player_location = game_state.get("player", {}).get("location", "")
459
- if player_location == "TITLE_SEQUENCE":
460
- return False
461
-
462
- key = f"{coords[0]}_{coords[1]}_{context}"
463
- self.state.stuck_detection[key] = self.state.stuck_detection.get(key, 0) + 1
464
-
465
- # Consider stuck if we've been in the same location/context for 8+ consecutive steps
466
- return self.state.stuck_detection[key] >= 8
467
-
468
- def is_black_frame(self, frame) -> bool:
469
- """
470
- Check if the frame is mostly black (transition/loading screen).
471
-
472
- Args:
473
- frame: PIL Image or numpy array
474
-
475
- Returns:
476
- bool: True if frame is mostly black, False otherwise
477
- """
478
- try:
479
-
480
- # Convert to PIL Image if needed
481
- if hasattr(frame, 'convert'): # It's already a PIL Image
482
- img = frame
483
- elif hasattr(frame, 'shape'): # It's a numpy array
484
- img = Image.fromarray(frame)
485
- else:
486
- return False # Unknown type, assume not black
487
-
488
- # Convert to numpy array for analysis
489
- img_array = np.array(img)
490
-
491
- # Calculate the mean brightness
492
- # For RGB images, average across all channels
493
- if len(img_array.shape) == 3:
494
- mean_brightness = np.mean(img_array)
495
- else:
496
- mean_brightness = np.mean(img_array)
497
-
498
- # Also check the standard deviation to catch completely uniform frames
499
- std_dev = np.std(img_array)
500
-
501
- # A frame is considered "black" if:
502
- # 1. Mean brightness is very low (< 10 out of 255)
503
- # 2. OR standard deviation is very low (< 5) indicating uniform color
504
- is_black = mean_brightness < 10 or (mean_brightness < 30 and std_dev < 5)
505
-
506
- if is_black:
507
- logger.debug(f"Black frame detected: mean_brightness={mean_brightness:.2f}, std_dev={std_dev:.2f}")
508
-
509
- return is_black
510
-
511
- except Exception as e:
512
- logger.warning(f"Error checking for black frame: {e}")
513
- return False # On error, assume not black to continue processing
514
-
515
- def get_relevant_history_summary(self, current_context: str, coords: Optional[Tuple[int, int]]) -> str:
516
- """Get a concise summary of relevant recent history"""
517
- # current_context and coords could be used for more sophisticated filtering in the future
518
- _ = current_context, coords # Acknowledge unused parameters for now
519
- if not self.state.history:
520
- return "No previous history."
521
-
522
- # Get last N entries based on display count
523
- recent_entries = list(self.state.history)[-self.history_display_count:]
524
-
525
- # Format for LLM consumption
526
- summary_lines = []
527
- for i, entry in enumerate(recent_entries, 1):
528
- coord_str = f"({entry.player_coords[0]},{entry.player_coords[1]})" if entry.player_coords else "(?)"
529
- summary_lines.append(f"{i}. {entry.context} at {coord_str}: {entry.action_taken}")
530
-
531
- return "\n".join(summary_lines)
532
-
533
- def get_stuck_warning(self, coords: Optional[Tuple[int, int]], context: str, game_state: Dict[str, Any] = None) -> str:
534
- """Generate warning text if stuck pattern detected"""
535
- # Never show stuck warning in title sequence
536
- if context == "title":
537
- return ""
538
-
539
- if self.detect_stuck_pattern(coords, context, game_state):
540
- return "\n⚠️ WARNING: You appear to be stuck at this location/context. Try a different approach!\n" \
541
- "💡 TIP: If you try an action like RIGHT but coordinates don't change from (X,Y) to (X+1,Y), there's likely an obstacle. Check the map around player P for walls (#) or other barriers blocking your path."
542
- return ""
543
-
544
- def create_game_state_summary(self, game_state: Dict[str, Any]) -> str:
545
- """Create a concise summary of the current game state"""
546
- try:
547
- game_info = game_state.get("game", {})
548
-
549
- summary_parts = []
550
-
551
- # Player location
552
- coords = self.get_player_coords(game_state)
553
- if coords:
554
- summary_parts.append(f"Player at ({coords[0]}, {coords[1]})")
555
-
556
- # Map info
557
- map_id = self.get_map_id(game_state)
558
- if map_id:
559
- summary_parts.append(f"Map {map_id}")
560
-
561
- # Context-specific info
562
- context = self.get_game_context(game_state)
563
- if context == "battle":
564
- summary_parts.append("In battle")
565
- elif context == "dialogue":
566
- dialogue_text = game_info.get("dialogue", {}).get("text", "")
567
- if dialogue_text:
568
- summary_parts.append(f"Dialogue: {dialogue_text}")
569
-
570
- return " | ".join(summary_parts) if summary_parts else "Unknown state"
571
-
572
- except Exception as e:
573
- logger.warning(f"Error creating game state summary: {e}")
574
- return "Error reading state"
575
-
576
- def step(self, game_state: Dict[str, Any]) -> Dict[str, Any]:
577
- """
578
- Compatibility method for client that expects agent.step(game_state)
579
-
580
- Args:
581
- game_state: Complete game state dictionary (should include 'frame')
582
-
583
- Returns:
584
- Dictionary with 'action' and optional 'reasoning'
585
- """
586
- frame = game_state.get('frame')
587
- if frame is None:
588
- logger.error("🚫 No frame in game_state for SimpleAgent.step")
589
- return {"action": "WAIT", "reasoning": "No frame available"}
590
-
591
- action = self.process_step(frame, game_state)
592
- return {"action": action, "reasoning": "Simple agent decision"}
593
-
594
- def process_step(self, frame, game_state: Dict[str, Any]) -> str:
595
- """
596
- Main processing step for simple mode with history tracking
597
-
598
- Args:
599
- frame: Current game frame (PIL Image or similar)
600
- game_state: Complete game state dictionary
601
-
602
- Returns:
603
- Action string or list of actions
604
- """
605
- # CRITICAL: Validate frame before any VLM processing
606
- if frame is None:
607
- logger.error("🚫 CRITICAL: SimpleAgent.process_step called with None frame - cannot proceed")
608
- return "WAIT"
609
-
610
- # Validate frame is a proper image
611
- if not (hasattr(frame, 'save') or hasattr(frame, 'shape')):
612
- logger.error(f"🚫 CRITICAL: SimpleAgent.process_step called with invalid frame type {type(frame)} - cannot proceed")
613
- return "WAIT"
614
-
615
- # Additional PIL Image validation
616
- if hasattr(frame, 'size'):
617
- width, height = frame.size
618
- if width <= 0 or height <= 0:
619
- logger.error(f"🚫 CRITICAL: SimpleAgent.process_step called with invalid frame size {width}x{height} - cannot proceed")
620
- return "WAIT"
621
-
622
- # Check for black frame (transition screen)
623
- if self.is_black_frame(frame):
624
- logger.info("⏳ Black frame detected (likely a transition), waiting for next frame...")
625
- return "WAIT" # Return WAIT to skip this frame and wait for the next one
626
-
627
- try:
628
- # Increment step counter
629
- self.state.step_counter += 1
630
-
631
- # Get current state info
632
- coords = self.get_player_coords(game_state)
633
- context = self.get_game_context(game_state)
634
- map_id = self.get_map_id(game_state)
635
-
636
- # Format the current state for LLM (includes movement preview)
637
- formatted_state = format_state_for_llm(game_state)
638
-
639
- # Get movement memory for the current area
640
- movement_memory = ""
641
- if coords:
642
- movement_memory = self.get_area_movement_memory(coords)
643
-
644
- # Check for objective completion first
645
- self.check_objective_completion(game_state)
646
-
647
- # Check storyline milestones and auto-complete objectives
648
- self.check_storyline_milestones(game_state)
649
-
650
- # Get relevant history and stuck detection
651
- history_summary = self.get_relevant_history_summary(context, coords)
652
- stuck_warning = self.get_stuck_warning(coords, context, game_state)
653
- recent_actions_str = ', '.join(list(self.state.recent_actions)[-self.actions_display_count:]) if self.state.recent_actions else 'None'
654
-
655
- # Format objectives for LLM
656
- active_objectives = self.get_active_objectives()
657
- completed_objectives_list = self.get_completed_objectives()
658
- objectives_summary = self._format_objectives_for_llm(active_objectives, completed_objectives_list)
659
-
660
- # Build pathfinding rules section (only if not in title sequence)
661
- pathfinding_rules = ""
662
- if context != "title":
663
- pathfinding_rules = """
664
- 🚨 PATHFINDING RULES:
665
- 1. **SINGLE STEP FIRST**: Always prefer single actions (UP, DOWN, LEFT, RIGHT, A, B) unless you're 100% certain about multi-step paths
666
- 2. **CHECK EVERY STEP**: Before chaining movements, verify EACH step in your sequence using the MOVEMENT PREVIEW and map
667
- 3. **BLOCKED = STOP**: If ANY step shows BLOCKED in the movement preview, the entire sequence will fail
668
- 4. **NO BLIND CHAINS**: Never chain movements through areas you can't see or verify as walkable
669
- 5. **PERFORM PATHFINDING**: Find a path to a target location (X',Y') from the player position (X,Y) on the map. DO NOT TRAVERSE THROUGH OBSTACLES (#) -- it will not work.
670
-
671
- 💡 SMART MOVEMENT STRATEGY:
672
- - Use MOVEMENT PREVIEW to see exactly what happens with each direction
673
- - If your target requires multiple steps, plan ONE step at a time
674
- - Only chain 2-3 moves if ALL intermediate tiles are confirmed WALKABLE
675
- - When stuck, try a different direction rather than repeating the same blocked move
676
-
677
- EXAMPLE - DON'T DO THIS:
678
- ❌ "I want to go right 5 tiles" → "RIGHT, RIGHT, RIGHT, RIGHT, RIGHT" (may hit wall on step 2!)
679
-
680
- EXAMPLE - DO THIS INSTEAD:
681
- ✅ Check movement preview → "RIGHT shows (X+1,Y) WALKABLE" → "RIGHT" (single safe step)
682
- ✅ Next turn, check again → "RIGHT shows (X+2,Y) WALKABLE" → "RIGHT" (another safe step)
683
-
684
- 💡 SMART NAVIGATION:
685
- - Check the VISUAL FRAME for NPCs (people/trainers) before moving - they're not always on the map!
686
- - Review MOVEMENT MEMORY for locations where you've failed to move before
687
- - Only explore areas marked with ? (these are confirmed explorable edges)
688
- - Avoid areas surrounded by # (walls) - they're fully blocked
689
- - Use doors (D), stairs (S), or walk around obstacles when pathfinding suggests it
690
-
691
- 💡 NPC & OBSTACLE HANDLING:
692
- - If you see NPCs in the image, avoid walking into them or interact with A/B if needed
693
- - If a movement fails (coordinates don't change), that location likely has an NPC or obstacle
694
- - Use your MOVEMENT MEMORY to remember problem areas and plan around them
695
- - NPCs can trigger battles or dialogue, which may be useful for objectives
696
- """
697
-
698
- # Create enhanced prompt with objectives, history context and chain of thought request
699
- prompt = f"""You are playing Pokemon Emerald. Progress quickly to the milestones by balancing exploration and exploitation of things you know.
700
- Based on the current game frame and state information, think through your next move and choose the best button action.
701
-
702
- RECENT ACTION HISTORY (last {self.actions_display_count} actions):
703
- {recent_actions_str}
704
-
705
- LOCATION/CONTEXT HISTORY (last {self.history_display_count} steps):
706
- {history_summary}
707
-
708
- CURRENT OBJECTIVES:
709
- {objectives_summary}
710
-
711
- CURRENT GAME STATE:
712
- {formatted_state}
713
-
714
- {movement_memory}
715
-
716
- {stuck_warning}
717
-
718
- Available actions: A, B, START, SELECT, UP, DOWN, LEFT, RIGHT
719
-
720
- IMPORTANT: Please think step by step before choosing your action. Structure your response like this:
721
-
722
- ANALYSIS:
723
- [Analyze what you see in the frame and current game state - what's happening? where are you? what should you be doing?
724
- IMPORTANT: Look carefully at the game image for NPCs (people, trainers) that might not be shown on the map. NPCs appear as sprite characters and can block movement or trigger battles/dialogue.]
725
-
726
- OBJECTIVES:
727
- [Review your current objectives. You have main storyline objectives (story_*) that track overall Emerald progression - these are automatically verified and you CANNOT manually complete them. You can create your own sub-objectives to help achieve the main goals. Do any need to be updated, added, or marked as complete?
728
- - Add sub-objectives: ADD_OBJECTIVE: type:description:target_value (e.g., "ADD_OBJECTIVE: location:Find Pokemon Center in town:(15,20)" or "ADD_OBJECTIVE: item:Buy Pokeballs:5")
729
- - Complete sub-objectives only: COMPLETE_OBJECTIVE: objective_id:notes (e.g., "COMPLETE_OBJECTIVE: my_sub_obj_123:Successfully bought Pokeballs")
730
- - NOTE: Do NOT try to complete storyline objectives (story_*) - they auto-complete when milestones are reached]
731
-
732
- PLAN:
733
- [Think about your immediate goal - what do you want to accomplish in the next few actions? Consider your current objectives and recent history.
734
- Check MOVEMENT MEMORY for areas you've had trouble with before and plan your route accordingly.]
735
-
736
- REASONING:
737
- [Explain why you're choosing this specific action. Reference the MOVEMENT PREVIEW and MOVEMENT MEMORY sections. Check the visual frame for NPCs before moving. If you see NPCs in the image, avoid walking into them. Consider any failed movements or known obstacles from your memory.]
738
-
739
- ACTION:
740
- [Your final action choice - PREFER SINGLE ACTIONS like 'RIGHT' or 'A'. Only use multiple actions like 'UP, UP, RIGHT' if you've verified each step is WALKABLE in the movement preview and map.]
741
-
742
- {pathfinding_rules}
743
-
744
- Context: {context} | Coords: {coords} """
745
-
746
- # Print complete prompt to terminal for debugging
747
- print("\n" + "="*120)
748
- print("🤖 SIMPLE AGENT PROMPT SENT TO VLM:")
749
- print("="*120)
750
-
751
- # Print prompt in chunks to avoid terminal truncation
752
- sys.stdout.write(prompt)
753
- sys.stdout.write("\n")
754
- sys.stdout.flush()
755
-
756
- print("="*120)
757
- print("🤖 END OF SIMPLE AGENT PROMPT")
758
- print("="*120 + "\n")
759
- sys.stdout.flush()
760
-
761
- # Make VLM call - double-check frame validation before VLM
762
- if frame and (hasattr(frame, 'save') or hasattr(frame, 'shape')):
763
- print("🔍 Making VLM call...")
764
- try:
765
- response = self.vlm.get_query(frame, prompt, "simple_mode")
766
- print(f"🔍 VLM response received: {response[:100]}..." if len(response) > 100 else f"🔍 VLM response: {response}")
767
- except Exception as e:
768
- print(f"❌ VLM call failed: {e}")
769
- return "WAIT"
770
- else:
771
- logger.error("🚫 CRITICAL: About to call VLM but frame validation failed - this should never happen!")
772
- return "WAIT"
773
-
774
- # Extract action(s) from structured response
775
- actions, reasoning = self._parse_structured_response(response, game_state)
776
-
777
- # Check for failed movement by comparing previous coordinates
778
- if len(self.state.history) > 0:
779
- prev_coords = self.state.history[-1].player_coords
780
- if prev_coords and coords:
781
- # If coordinates didn't change and we attempted a movement, record it as failed
782
- if (prev_coords == coords and
783
- isinstance(actions, list) and len(actions) > 0 and
784
- actions[0] in ['UP', 'DOWN', 'LEFT', 'RIGHT']):
785
- self.record_failed_movement(coords, actions[0], "movement_blocked")
786
- elif (prev_coords == coords and
787
- isinstance(actions, str) and
788
- actions in ['UP', 'DOWN', 'LEFT', 'RIGHT']):
789
- self.record_failed_movement(coords, actions, "movement_blocked")
790
-
791
- # Record this step in history with reasoning
792
- game_state_summary = self.create_game_state_summary(game_state)
793
- action_with_reasoning = f"{actions} | Reasoning: {reasoning}" if reasoning else str(actions)
794
- history_entry = HistoryEntry(
795
- timestamp=datetime.now(),
796
- player_coords=coords,
797
- map_id=map_id,
798
- context=context,
799
- action_taken=action_with_reasoning,
800
- game_state_summary=game_state_summary
801
- )
802
- self.state.history.append(history_entry)
803
-
804
- # Update recent actions
805
- if isinstance(actions, list):
806
- self.state.recent_actions.extend(actions)
807
- else:
808
- self.state.recent_actions.append(actions)
809
-
810
- # Reset stuck detection for other locations when we move
811
- if coords:
812
- keys_to_reset = [k for k in self.state.stuck_detection.keys()
813
- if not k.startswith(f"{coords[0]}_{coords[1]}")]
814
- for key in keys_to_reset:
815
- if self.state.stuck_detection[key] > 0:
816
- self.state.stuck_detection[key] = max(0, self.state.stuck_detection[key] - 1)
817
-
818
- # Update server with agent step and metrics (for agent thinking display)
819
- self._update_server_metrics()
820
-
821
- return actions
822
-
823
- except Exception as e:
824
- logger.error(f"Error in simple agent processing: {e}")
825
- return ["A"] # Default safe action as list
826
-
827
- def _update_server_metrics(self):
828
- """Update server with current agent step count and LLM metrics"""
829
- try:
830
- import requests
831
- from utils.llm_logger import get_llm_logger
832
-
833
- # Get current LLM metrics
834
- llm_logger = get_llm_logger()
835
- metrics = llm_logger.get_cumulative_metrics()
836
-
837
- # Send metrics to server
838
- try:
839
- response = requests.post(
840
- "http://localhost:8000/agent_step",
841
- json={"metrics": metrics},
842
- timeout=1
843
- )
844
- if response.status_code != 200:
845
- logger.warning(f"Failed to update server metrics: {response.status_code}")
846
- except requests.exceptions.RequestException:
847
- # Silent fail - server might not be running or in different mode
848
- pass
849
-
850
- except Exception as e:
851
- logger.warning(f"Error updating server metrics: {e}")
852
-
853
- def _parse_actions(self, response: str, game_state: Dict[str, Any] = None) -> List[str]:
854
- """Parse action response from LLM into list of valid actions"""
855
- response_upper = response.upper().strip()
856
- valid_actions = ['A', 'B', 'START', 'SELECT', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'WAIT']
857
-
858
- # Parse multiple actions (could be comma or space separated)
859
- actions_found = []
860
- # Replace commas with spaces for consistent parsing
861
- response_clean = response_upper.replace(',', ' ').replace('.', ' ')
862
- tokens = response_clean.split()
863
-
864
- for token in tokens:
865
- if token in valid_actions:
866
- actions_found.append(token)
867
- if len(actions_found) >= 10: # Max 10 actions
868
- break
869
-
870
- # Validate movement sequences if we have game state
871
- if game_state and len(actions_found) > 1:
872
- # Check if this is a movement sequence
873
- movement_actions = [a for a in actions_found if a in ['UP', 'DOWN', 'LEFT', 'RIGHT']]
874
- if movement_actions:
875
- # Validate the movement sequence
876
- is_valid, reason = self.validate_movement_sequence(movement_actions, game_state)
877
- if not is_valid:
878
- logger.warning(f"Movement sequence validation failed: {reason}")
879
- # Only take the first movement if sequence is invalid
880
- if movement_actions:
881
- actions_found = [movement_actions[0]]
882
- logger.info(f"Reduced to single movement: {actions_found[0]}")
883
-
884
- # If no valid actions found, use default
885
- if not actions_found:
886
- actions_found = ['A']
887
-
888
- return actions_found
889
-
890
- def _format_objectives_for_llm(self, active_objectives: List[Objective], completed_objectives: List[Objective]) -> str:
891
- """Format objectives for LLM consumption"""
892
- lines = []
893
-
894
- if active_objectives:
895
- lines.append("🎯 ACTIVE OBJECTIVES:")
896
- for i, obj in enumerate(active_objectives[:5], 1): # Show top 5 active
897
- target_str = f" (Target: {obj.target_value})" if obj.target_value else ""
898
- lines.append(f" {i}. [{obj.objective_type}] {obj.description}{target_str} [ID: {obj.id}]")
899
- else:
900
- lines.append("🎯 ACTIVE OBJECTIVES: None - Consider setting some goals!")
901
-
902
- if completed_objectives:
903
- recent_completed = completed_objectives[-3:] # Show last 3 completed
904
- lines.append("✅ RECENTLY COMPLETED:")
905
- for obj in recent_completed:
906
- lines.append(f" ✓ [{obj.objective_type}] {obj.description}")
907
-
908
- return "\n".join(lines)
909
-
910
- def _parse_structured_response(self, response: str, game_state: Dict[str, Any] = None) -> Tuple[List[str], str]:
911
- """Parse structured chain-of-thought response and extract actions and reasoning"""
912
- try:
913
- # Extract sections from structured response
914
- analysis = ""
915
- objectives_section = ""
916
- plan = ""
917
- reasoning = ""
918
- actions = []
919
-
920
- # Split response into lines for processing
921
- lines = response.split('\n')
922
- current_section = None
923
-
924
- for line in lines:
925
- line = line.strip()
926
-
927
- # Identify section headers
928
- if line.upper().startswith('ANALYSIS:'):
929
- current_section = 'analysis'
930
- analysis = line[9:].strip() # Remove "ANALYSIS:" prefix
931
- elif line.upper().startswith('OBJECTIVES:'):
932
- current_section = 'objectives'
933
- objectives_section = line[11:].strip() # Remove "OBJECTIVES:" prefix
934
- elif line.upper().startswith('PLAN:'):
935
- current_section = 'plan'
936
- plan = line[5:].strip() # Remove "PLAN:" prefix
937
- elif line.upper().startswith('REASONING:'):
938
- current_section = 'reasoning'
939
- reasoning = line[10:].strip() # Remove "REASONING:" prefix
940
- elif line.upper().startswith('ACTION:'):
941
- current_section = 'action'
942
- # Extract actions from this line
943
- action_text = line[7:].strip() # Remove "ACTION:" prefix
944
- if action_text: # Only parse if there's content
945
- actions = self._parse_actions(action_text, game_state)
946
- elif line and current_section:
947
- # Continue content of current section
948
- if current_section == 'analysis':
949
- analysis += " " + line
950
- elif current_section == 'objectives':
951
- objectives_section += " " + line
952
- elif current_section == 'plan':
953
- plan += " " + line
954
- elif current_section == 'reasoning':
955
- reasoning += " " + line
956
- elif current_section == 'action':
957
- # Additional action parsing from action section content
958
- if line.strip(): # Only process non-empty lines
959
- additional_actions = self._parse_actions(line, game_state)
960
- actions.extend(additional_actions)
961
- if len(actions) >= 10: # Max 10 actions
962
- actions = actions[:10]
963
- break
964
-
965
- # Process objectives if mentioned
966
- if objectives_section:
967
- self._process_objectives_from_response(objectives_section)
968
-
969
- # If no actions found in structured format, fall back to parsing entire response
970
- if not actions:
971
- actions = self._parse_actions(response, game_state)
972
-
973
- # Create concise reasoning summary
974
- reasoning_parts = []
975
- if analysis:
976
- reasoning_parts.append(f"Analysis: {analysis}")
977
- if objectives_section:
978
- reasoning_parts.append(f"Objectives: {objectives_section}")
979
- if plan:
980
- reasoning_parts.append(f"Plan: {plan}")
981
- if reasoning:
982
- reasoning_parts.append(f"Reasoning: {reasoning}")
983
-
984
- full_reasoning = " | ".join(reasoning_parts) if reasoning_parts else "No reasoning provided"
985
-
986
- return actions, full_reasoning
987
-
988
- except Exception as e:
989
- logger.warning(f"Error parsing structured response: {e}")
990
- # Fall back to basic action parsing
991
- return self._parse_actions(response, game_state), "Error parsing reasoning"
992
-
993
- def _process_objectives_from_response(self, objectives_text: str):
994
- """Process objective management commands from LLM response"""
995
- try:
996
- # Look for ADD_OBJECTIVE and COMPLETE_OBJECTIVE commands
997
- for line in objectives_text.split('\n'):
998
- line = line.strip()
999
- if line.upper().startswith('ADD_OBJECTIVE:'):
1000
- # Parse format: ADD_OBJECTIVE: type:description:target_value
1001
- content = line[14:].strip() # Remove "ADD_OBJECTIVE:" prefix
1002
- parts = content.split(':', 2) # Split into max 3 parts
1003
-
1004
- if len(parts) >= 2:
1005
- obj_type = parts[0].strip()
1006
- description = parts[1].strip()
1007
- target_value = parts[2].strip() if len(parts) > 2 else None
1008
-
1009
- # Parse target_value based on type
1010
- parsed_target = self._parse_target_value(obj_type, target_value)
1011
-
1012
- # Add the objective
1013
- self.add_objective(description, obj_type, parsed_target)
1014
-
1015
- elif line.upper().startswith('COMPLETE_OBJECTIVE:'):
1016
- # Parse format: COMPLETE_OBJECTIVE: objective_id:notes
1017
- content = line[19:].strip() # Remove "COMPLETE_OBJECTIVE:" prefix
1018
- parts = content.split(':', 1) # Split into max 2 parts
1019
-
1020
- if len(parts) >= 1:
1021
- obj_id = parts[0].strip()
1022
- notes = parts[1].strip() if len(parts) > 1 else "Manually completed by LLM"
1023
-
1024
- # Complete the objective
1025
- success = self.complete_objective(obj_id, notes)
1026
- if success:
1027
- logger.info(f"LLM manually completed objective: {obj_id}")
1028
- else:
1029
- logger.warning(f"LLM tried to complete non-existent or already completed objective: {obj_id}")
1030
-
1031
- except Exception as e:
1032
- logger.warning(f"Error processing objectives from response: {e}")
1033
-
1034
- def _parse_target_value(self, obj_type: str, target_str: Optional[str]) -> Any:
1035
- """Parse target value based on objective type"""
1036
- if not target_str:
1037
- return None
1038
-
1039
- try:
1040
- if obj_type == "location":
1041
- # Try to parse coordinates like "(15,20)" or "15,20"
1042
- target_str = target_str.strip('()')
1043
- if ',' in target_str:
1044
- x, y = map(int, target_str.split(','))
1045
- return (x, y)
1046
- elif obj_type == "map":
1047
- # Try to parse map ID as integer
1048
- return int(target_str)
1049
- else:
1050
- # For other types, return as string
1051
- return target_str
1052
- except (ValueError, TypeError):
1053
- # If parsing fails, return as string
1054
- return target_str
1055
-
1056
- def get_memory_usage_estimate(self) -> Dict[str, int]:
1057
- """Estimate current memory usage for context management"""
1058
- history_chars = sum(len(str(entry)) for entry in self.state.history)
1059
- recent_actions_chars = sum(len(action) for action in self.state.recent_actions)
1060
- objectives_chars = sum(len(f"{obj.description} {obj.target_value}") for obj in self.state.objectives)
1061
-
1062
- return {
1063
- "history_entries": len(self.state.history),
1064
- "history_chars": history_chars,
1065
- "recent_actions": len(self.state.recent_actions),
1066
- "recent_actions_chars": recent_actions_chars,
1067
- "objectives_count": len(self.state.objectives),
1068
- "objectives_chars": objectives_chars,
1069
- "estimated_total_chars": history_chars + recent_actions_chars + objectives_chars
1070
- }
1071
-
1072
- def get_objectives_state(self) -> Dict[str, Any]:
1073
- """Get objectives formatted for forwarding in game state"""
1074
- return {
1075
- "active": [
1076
- {
1077
- "id": obj.id,
1078
- "description": obj.description,
1079
- "type": obj.objective_type,
1080
- "target": obj.target_value,
1081
- "created_at": obj.created_at.isoformat()
1082
- }
1083
- for obj in self.get_active_objectives()
1084
- ],
1085
- "completed": [
1086
- {
1087
- "id": obj.id,
1088
- "description": obj.description,
1089
- "type": obj.objective_type,
1090
- "target": obj.target_value,
1091
- "completed_at": obj.completed_at.isoformat() if obj.completed_at else None,
1092
- "notes": obj.progress_notes
1093
- }
1094
- for obj in self.get_completed_objectives()[-5:] # Last 5 completed
1095
- ],
1096
- "updated": self.state.objectives_updated
1097
- }
1098
-
1099
- def trim_history_for_context(self, max_chars: int = 4000):
1100
- """Trim history to fit within context limits"""
1101
- # Preserve minimum history for context
1102
- min_history = max(5, self.history_display_count // 2)
1103
- min_actions = max(10, self.actions_display_count // 2)
1104
-
1105
- while self.get_memory_usage_estimate()["estimated_total_chars"] > max_chars and len(self.state.history) > min_history:
1106
- self.state.history.popleft()
1107
-
1108
- while len(self.state.recent_actions) > min_actions and self.get_memory_usage_estimate()["estimated_total_chars"] > max_chars:
1109
- self.state.recent_actions.popleft()
1110
-
1111
- def reset_objectives_updated_flag(self):
1112
- """Reset the objectives updated flag (call after forwarding state)"""
1113
- self.state.objectives_updated = False
1114
-
1115
- def configure_history_limits(self, max_history_entries: int = None, max_recent_actions: int = None,
1116
- history_display_count: int = None, actions_display_count: int = None):
1117
- """Configure history tracking parameters at runtime"""
1118
- if max_history_entries is not None:
1119
- # Create new deque with updated max length, preserving existing data
1120
- existing_history = list(self.state.history)
1121
- self.state.history = deque(existing_history, maxlen=max_history_entries)
1122
-
1123
- if max_recent_actions is not None:
1124
- # Create new deque with updated max length, preserving existing data
1125
- existing_actions = list(self.state.recent_actions)
1126
- self.state.recent_actions = deque(existing_actions, maxlen=max_recent_actions)
1127
-
1128
- if history_display_count is not None:
1129
- self.history_display_count = history_display_count
1130
-
1131
- if actions_display_count is not None:
1132
- self.actions_display_count = actions_display_count
1133
-
1134
- logger.info(f"Updated history configuration: {len(self.state.history)}/{self.state.history.maxlen} history, "
1135
- f"{len(self.state.recent_actions)}/{self.state.recent_actions.maxlen} actions, "
1136
- f"display {self.history_display_count}/{self.actions_display_count}")
1137
-
1138
- def load_history_from_llm_checkpoint(self, checkpoint_file: str):
1139
- """Load SimpleAgent history from LLM checkpoint file"""
1140
- try:
1141
- from utils.llm_logger import get_llm_logger
1142
- import json
1143
- import re
1144
- from datetime import datetime
1145
-
1146
- if not os.path.exists(checkpoint_file):
1147
- logger.info(f"No checkpoint file found: {checkpoint_file}")
1148
- return False
1149
-
1150
- # Use LLM logger to restore cumulative metrics first
1151
- llm_logger = get_llm_logger()
1152
- if llm_logger:
1153
- restored_step_count = llm_logger.load_checkpoint(checkpoint_file)
1154
- if restored_step_count is not None:
1155
- logger.info(f"✅ LLM logger restored checkpoint with {restored_step_count} steps")
1156
- # Update SimpleAgent step counter to match LLM logger
1157
- self.state.step_counter = restored_step_count
1158
-
1159
- with open(checkpoint_file, 'r') as f:
1160
- checkpoint_data = json.load(f)
1161
-
1162
- log_entries = checkpoint_data.get("log_entries", [])
1163
- restored_count = 0
1164
-
1165
- for entry in log_entries:
1166
- if entry.get("type") == "interaction" and "simple_mode" in entry.get("interaction_type", ""):
1167
- try:
1168
- # Extract state info from prompt
1169
- prompt = entry.get("prompt", "")
1170
- response = entry.get("response", "")
1171
- timestamp_str = entry.get("timestamp", "")
1172
-
1173
- # Parse coordinates from prompt
1174
- coords_match = re.search(r"Position: X=(\d+), Y=(\d+)", prompt)
1175
- coords = None
1176
- if coords_match:
1177
- coords = (int(coords_match.group(1)), int(coords_match.group(2)))
1178
-
1179
- # Parse context from prompt
1180
- context = "overworld" # default
1181
- if "Game State: battle" in prompt:
1182
- context = "battle"
1183
- elif "DIALOGUE:" in prompt or "dialogue" in prompt.lower():
1184
- context = "dialogue"
1185
- elif "menu" in prompt.lower():
1186
- context = "menu"
1187
-
1188
- # Extract action from response
1189
- action_taken = "UNKNOWN"
1190
- if "ACTION:" in response:
1191
- action_section = response.split("ACTION:")[-1].strip()
1192
- action_line = action_section.split('\n')[0].strip()
1193
- action_taken = action_line
1194
-
1195
- # Parse timestamp
1196
- timestamp = datetime.now()
1197
- if timestamp_str:
1198
- try:
1199
- timestamp = datetime.fromisoformat(timestamp_str)
1200
- except:
1201
- pass
1202
-
1203
- # Create simplified game state summary
1204
- game_state_summary = f"Position: {coords}" if coords else "Position unknown"
1205
- if coords:
1206
- game_state_summary += f" | Context: {context}"
1207
-
1208
- # Add reasoning summary
1209
- reasoning = ""
1210
- if "REASONING:" in response:
1211
- reasoning_section = response.split("REASONING:")[-1].split("ACTION:")[0].strip()
1212
- reasoning = reasoning_section
1213
-
1214
- action_with_reasoning = f"{action_taken} | Reasoning: {reasoning}" if reasoning else action_taken
1215
-
1216
- # Create history entry
1217
- history_entry = HistoryEntry(
1218
- timestamp=timestamp,
1219
- player_coords=coords,
1220
- map_id=None, # Not available in checkpoint
1221
- context=context,
1222
- action_taken=action_with_reasoning,
1223
- game_state_summary=game_state_summary
1224
- )
1225
-
1226
- self.state.history.append(history_entry)
1227
-
1228
- # Also add to recent actions if it's a valid action
1229
- if action_taken and action_taken not in ["UNKNOWN", "WAIT"]:
1230
- # Parse multiple actions if comma-separated
1231
- actions = [a.strip() for a in action_taken.replace(',', ' ').split()]
1232
- for action in actions:
1233
- if action in ['UP', 'DOWN', 'LEFT', 'RIGHT', 'A', 'B', 'START', 'SELECT']:
1234
- self.state.recent_actions.append(action)
1235
-
1236
- restored_count += 1
1237
-
1238
- except Exception as e:
1239
- logger.warning(f"Error parsing checkpoint entry: {e}")
1240
- continue
1241
-
1242
- # Update step counter to match checkpoint
1243
- self.state.step_counter = restored_count
1244
-
1245
- logger.info(f"✅ Restored {restored_count} history entries from {checkpoint_file}")
1246
- logger.info(f" History: {len(self.state.history)} entries")
1247
- logger.info(f" Recent actions: {len(self.state.recent_actions)} actions")
1248
- logger.info(f" Step counter: {self.state.step_counter}")
1249
-
1250
- return True
1251
-
1252
- except Exception as e:
1253
- logger.error(f"❌ Failed to load history from checkpoint: {e}")
1254
- import traceback
1255
- traceback.print_exc()
1256
- return False
1257
-
1258
- def save_history_to_llm_checkpoint(self, checkpoint_file: str = None):
1259
- """Save SimpleAgent history using LLM logger checkpoint system"""
1260
- try:
1261
- from utils.llm_logger import get_llm_logger
1262
-
1263
- # Get the global LLM logger instance
1264
- llm_logger = get_llm_logger()
1265
- if llm_logger is None:
1266
- logger.warning("No LLM logger available for checkpoint saving")
1267
- return False
1268
-
1269
- # Save checkpoint using LLM logger which includes cumulative metrics
1270
- # The LLM logger will handle saving log_entries AND cumulative_metrics
1271
- # If checkpoint_file is None, it will use the cache folder
1272
- llm_logger.save_checkpoint(checkpoint_file, agent_step_count=self.state.step_counter)
1273
-
1274
- logger.info(f"💾 Saved LLM checkpoint to {checkpoint_file}")
1275
- logger.info(f" Step counter: {self.state.step_counter}")
1276
- logger.info(f" History: {len(self.state.history)} entries")
1277
- logger.info(f" Recent actions: {len(self.state.recent_actions)} actions")
1278
- return True
1279
-
1280
- except Exception as e:
1281
- logger.error(f"❌ Failed to save LLM checkpoint: {e}")
1282
- import traceback
1283
- traceback.print_exc()
1284
- return False
1285
-
1286
- def record_failed_movement(self, coords: Tuple[int, int], direction: str, reason: str = "blocked"):
1287
- """Record a failed movement attempt for future reference"""
1288
- coord_key = f"{coords[0]},{coords[1]}"
1289
- if coord_key not in self.state.failed_movements:
1290
- self.state.failed_movements[coord_key] = []
1291
-
1292
- failed_entry = f"{direction}:{reason}"
1293
- if failed_entry not in self.state.failed_movements[coord_key]:
1294
- self.state.failed_movements[coord_key].append(failed_entry)
1295
- logger.info(f"Recorded failed movement: {coord_key} -> {direction} ({reason})")
1296
-
1297
- def record_npc_interaction(self, coords: Tuple[int, int], interaction_type: str, notes: str = ""):
1298
- """Record an NPC interaction for future reference"""
1299
- coord_key = f"{coords[0]},{coords[1]}"
1300
- interaction_info = f"{interaction_type}: {notes}" if notes else interaction_type
1301
- self.state.npc_interactions[coord_key] = interaction_info
1302
- logger.info(f"Recorded NPC interaction: {coord_key} -> {interaction_info}")
1303
-
1304
- def get_movement_memory(self, coords: Tuple[int, int]) -> str:
1305
- """Get memory about failed movements and interactions at specific coordinates"""
1306
- coord_key = f"{coords[0]},{coords[1]}"
1307
- memory_parts = []
1308
-
1309
- # Check for failed movements
1310
- if coord_key in self.state.failed_movements:
1311
- failed_list = self.state.failed_movements[coord_key]
1312
- memory_parts.append(f"Failed moves: {', '.join(failed_list)}")
1313
-
1314
- # Check for NPC interactions
1315
- if coord_key in self.state.npc_interactions:
1316
- interaction = self.state.npc_interactions[coord_key]
1317
- memory_parts.append(f"NPC: {interaction}")
1318
-
1319
- return " | ".join(memory_parts) if memory_parts else ""
1320
-
1321
- def get_area_movement_memory(self, center_coords: Tuple[int, int], radius: int = 7) -> str:
1322
- """Get movement memory for the area around the player"""
1323
- cx, cy = center_coords
1324
- memory_lines = []
1325
-
1326
- # Check nearby coordinates for failed movements or NPC interactions
1327
- nearby_memories = []
1328
- for dx in range(-radius, radius + 1):
1329
- for dy in range(-radius, radius + 1):
1330
- if dx == 0 and dy == 0:
1331
- continue # Skip current position
1332
-
1333
- check_coords = (cx + dx, cy + dy)
1334
- memory = self.get_movement_memory(check_coords)
1335
- if memory:
1336
- nearby_memories.append(f"({check_coords[0]},{check_coords[1]}): {memory}")
1337
-
1338
- if nearby_memories:
1339
- memory_lines.append("🧠 MOVEMENT MEMORY (nearby area):")
1340
- for memory in nearby_memories[:5]: # Limit to 5 most relevant
1341
- memory_lines.append(f" {memory}")
1342
-
1343
- return "\n".join(memory_lines)
1344
-
1345
- def analyze_movement_preview(self, game_state: Dict[str, Any]) -> Dict[str, Any]:
1346
- """
1347
- Analyze the movement preview data from game state to find valid moves.
1348
-
1349
- Returns:
1350
- Dict with 'walkable_directions', 'blocked_directions', and 'special_tiles'
1351
- """
1352
- walkable_directions = []
1353
- blocked_directions = []
1354
- special_tiles = {}
1355
-
1356
- # Look for movement preview in the formatted state
1357
- formatted_state = format_state_for_llm(game_state)
1358
- lines = formatted_state.split('\n')
1359
-
1360
- in_movement_preview = False
1361
- for line in lines:
1362
- if 'MOVEMENT PREVIEW:' in line:
1363
- in_movement_preview = True
1364
- continue
1365
-
1366
- if in_movement_preview:
1367
- # Parse movement preview lines
1368
- # Format: " UP : ( 15, 10) [.] WALKABLE - Optional description"
1369
- if line.strip() and ':' in line:
1370
- parts = line.strip().split(':')
1371
- if len(parts) >= 2:
1372
- direction = parts[0].strip()
1373
- rest = parts[1].strip()
1374
-
1375
- if direction in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
1376
- if 'WALKABLE' in rest:
1377
- walkable_directions.append(direction)
1378
- # Check for special tiles
1379
- if 'Door/Entrance' in rest:
1380
- special_tiles[direction] = 'door'
1381
- elif 'Stairs/Warp' in rest:
1382
- special_tiles[direction] = 'stairs'
1383
- elif 'Tall grass' in rest:
1384
- special_tiles[direction] = 'grass'
1385
- elif 'Jump ledge' in rest and 'can jump' in rest:
1386
- special_tiles[direction] = 'ledge'
1387
- elif 'BLOCKED' in rest:
1388
- blocked_directions.append(direction)
1389
- elif not line.strip():
1390
- # Empty line typically ends the movement preview section
1391
- in_movement_preview = False
1392
-
1393
- return {
1394
- 'walkable_directions': walkable_directions,
1395
- 'blocked_directions': blocked_directions,
1396
- 'special_tiles': special_tiles
1397
- }
1398
-
1399
- def validate_movement_sequence(self, movements: List[str], game_state: Dict[str, Any]) -> Tuple[bool, str]:
1400
- """
1401
- Validate if a sequence of movements is valid based on current state.
1402
-
1403
- Args:
1404
- movements: List of movement directions
1405
- game_state: Current game state
1406
-
1407
- Returns:
1408
- Tuple of (is_valid, reason)
1409
- """
1410
- if not movements:
1411
- return True, "No movements to validate"
1412
-
1413
- # Analyze current movement options
1414
- movement_info = self.analyze_movement_preview(game_state)
1415
- walkable = movement_info['walkable_directions']
1416
- blocked = movement_info['blocked_directions']
1417
-
1418
- # Check first movement
1419
- first_move = movements[0].upper()
1420
- if first_move in blocked:
1421
- return False, f"First movement {first_move} is BLOCKED"
1422
-
1423
- if first_move not in walkable and first_move in ['UP', 'DOWN', 'LEFT', 'RIGHT']:
1424
- return False, f"First movement {first_move} is not confirmed WALKABLE"
1425
-
1426
- # For multiple movements, only allow if we're very confident
1427
- if len(movements) > 1:
1428
- # We can't predict beyond the first move accurately
1429
- # So we should discourage chaining unless explicitly safe
1430
- return False, "Cannot validate multi-step movements - use single steps instead"
1431
-
1432
- return True, "Movement validated"
1433
-
1434
- def get_history_stats(self) -> Dict[str, int]:
1435
- """Get current history tracking statistics"""
1436
- return {
1437
- "history_entries": len(self.state.history),
1438
- "max_history_entries": self.state.history.maxlen,
1439
- "recent_actions": len(self.state.recent_actions),
1440
- "max_recent_actions": self.state.recent_actions.maxlen,
1441
- "history_display_count": self.history_display_count,
1442
- "actions_display_count": self.actions_display_count,
1443
- "objectives_count": len(self.state.objectives),
1444
- "step_counter": self.state.step_counter,
1445
- "failed_movements": len(self.state.failed_movements),
1446
- "npc_interactions": len(self.state.npc_interactions)
1447
- }
1448
-
1449
- # Global simple agent instance for backward compatibility with existing multiprocess code
1450
- _global_simple_agent = None
1451
-
1452
- def get_simple_agent(vlm) -> SimpleAgent:
1453
- """Get or create the global simple agent instance"""
1454
- global _global_simple_agent
1455
- if _global_simple_agent is None:
1456
- _global_simple_agent = SimpleAgent(vlm)
1457
-
1458
- # Check if we should load from checkpoint
1459
- import os
1460
- if os.environ.get("LOAD_CHECKPOINT_MODE") == "true":
1461
- # Check cache folder first, then fall back to old location
1462
- cache_dir = ".pokeagent_cache"
1463
- checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt") if os.path.exists(cache_dir) else "checkpoint_llm.txt"
1464
- if not os.path.exists(checkpoint_file) and os.path.exists("checkpoint_llm.txt"):
1465
- checkpoint_file = "checkpoint_llm.txt"
1466
- if os.path.exists(checkpoint_file):
1467
- logger.info(f"🔄 Loading SimpleAgent history from {checkpoint_file}")
1468
- _global_simple_agent.load_history_from_llm_checkpoint(checkpoint_file)
1469
- else:
1470
- logger.info(f"⚠️ No checkpoint file found: {checkpoint_file}")
1471
-
1472
- elif _global_simple_agent.vlm != vlm:
1473
- # VLM changed, create new instance
1474
- _global_simple_agent = SimpleAgent(vlm)
1475
-
1476
- # Load checkpoint for new instance too if mode is set
1477
- import os
1478
- if os.environ.get("LOAD_CHECKPOINT_MODE") == "true":
1479
- # Check cache folder first, then fall back to old location
1480
- cache_dir = ".pokeagent_cache"
1481
- checkpoint_file = os.path.join(cache_dir, "checkpoint_llm.txt") if os.path.exists(cache_dir) else "checkpoint_llm.txt"
1482
- if not os.path.exists(checkpoint_file) and os.path.exists("checkpoint_llm.txt"):
1483
- checkpoint_file = "checkpoint_llm.txt"
1484
- if os.path.exists(checkpoint_file):
1485
- logger.info(f"🔄 Loading SimpleAgent history from {checkpoint_file}")
1486
- _global_simple_agent.load_history_from_llm_checkpoint(checkpoint_file)
1487
-
1488
- return _global_simple_agent
1489
-
1490
- def simple_mode_processing_multiprocess(vlm, game_state, args=None):
1491
- """Simple mode processing function for multiprocess mode (backward compatibility)"""
1492
- # args parameter kept for backward compatibility but not used
1493
- _ = args # Acknowledge unused parameter
1494
- agent = get_simple_agent(vlm)
1495
- frame = game_state["visual"]["screenshot"]
1496
-
1497
- # CRITICAL: Validate frame before processing
1498
- if frame is None:
1499
- logger.error("🚫 CRITICAL: simple_step called with None frame")
1500
- return "WAIT"
1501
-
1502
- return agent.process_step(frame, game_state)