synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,418 @@
1
+ """Task app validation utilities."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
- from urllib.parse import urlparse
5
+ import re
6
+ from typing import Any
7
+ from urllib.parse import urlparse, urlunparse
8
+
9
+ import click
10
+ import httpx
11
+
12
+ from synth_ai.task.contracts import TaskAppEndpoints # type: ignore[attr-defined]
13
+
14
+
15
+ def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only: bool = False) -> list[str]:
16
+ """Validate that a task app rollout response has required fields for RL training.
17
+
18
+ The backend RL trainer requires:
19
+ 1. pipeline_metadata["inference_url"] at top level (with ?cid= for trace correlation)
20
+ 2. Each step's info.meta["inference_url"] must be present (nested structure!)
21
+
22
+ Args:
23
+ response_data: The rollout response dict from task app
24
+ warn_only: If True, return warnings instead of raising exceptions
25
+
26
+ Returns:
27
+ List of validation warnings/errors
28
+
29
+ Raises:
30
+ ValueError: If critical fields are missing (unless warn_only=True)
31
+ """
32
+ issues = []
33
+
34
+ # Check pipeline_metadata
35
+ pipeline_metadata = response_data.get("pipeline_metadata")
36
+ if not isinstance(pipeline_metadata, dict):
37
+ issues.append("Missing or invalid 'pipeline_metadata' (required for RL training)")
38
+ else:
39
+ inference_url = pipeline_metadata.get("inference_url")
40
+ if not inference_url:
41
+ issues.append(
42
+ "pipeline_metadata['inference_url'] is missing. "
43
+ "RL trainer requires this field to extract traces."
44
+ )
45
+ elif not isinstance(inference_url, str):
46
+ issues.append(
47
+ f"pipeline_metadata['inference_url'] must be a string, got: {type(inference_url).__name__}"
48
+ )
49
+ elif "?cid=" not in inference_url:
50
+ issues.append(
51
+ f"pipeline_metadata['inference_url'] should contain '?cid=' for trace correlation. "
52
+ f"Got: {inference_url[:80]}..."
53
+ )
54
+
55
+ # Check trajectories and steps
56
+ trajectories = response_data.get("trajectories", [])
57
+ if not trajectories:
58
+ issues.append("No trajectories found in response")
59
+
60
+ for traj_idx, trajectory in enumerate(trajectories):
61
+ if not isinstance(trajectory, dict):
62
+ continue
63
+
64
+ steps = trajectory.get("steps", [])
65
+ for step_idx, step in enumerate(steps):
66
+ if not isinstance(step, dict):
67
+ continue
68
+
69
+ step_info = step.get("info", {})
70
+ if not isinstance(step_info, dict):
71
+ issues.append(
72
+ f"trajectory[{traj_idx}].steps[{step_idx}].info is not a dict"
73
+ )
74
+ continue
75
+
76
+ # Check for nested meta.inference_url (backend expects this structure!)
77
+ step_meta = step_info.get("meta", {})
78
+ if not isinstance(step_meta, dict):
79
+ issues.append(
80
+ f"trajectory[{traj_idx}].steps[{step_idx}].info.meta is missing or not a dict. "
81
+ f"RL trainer expects nested structure: info.meta.inference_url"
82
+ )
83
+ continue
84
+
85
+ step_inference_url = step_meta.get("inference_url")
86
+ if not step_inference_url:
87
+ issues.append(
88
+ f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] is missing. "
89
+ f"RL trainer needs this for trace extraction (nested structure required!)"
90
+ )
91
+ elif not isinstance(step_inference_url, str):
92
+ issues.append(
93
+ f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] must be a string, "
94
+ f"got: {type(step_inference_url).__name__}"
95
+ )
96
+
97
+ if issues and not warn_only:
98
+ error_msg = "Task app response validation failed for RL training:\n" + "\n".join(
99
+ f" - {issue}" for issue in issues
100
+ )
101
+ raise ValueError(error_msg)
102
+
103
+ return issues
104
+
105
+
106
+ def normalize_inference_url(url: str | None, *, default: str = "https://api.openai.com/v1/chat/completions") -> str:
107
+ """Normalize an inference URL to include the /v1/chat/completions path.
108
+
109
+ This utility ensures inference URLs have the correct path structure for OpenAI-compatible
110
+ chat completions endpoints, while preserving query parameters (e.g., ?cid=trace_123)
111
+ that may be added for tracing.
112
+
113
+ Args:
114
+ url: The inference URL to normalize (may be None or incomplete)
115
+ default: Default URL to use if url is None/empty
116
+
117
+ Returns:
118
+ Normalized URL with proper path and preserved query parameters
119
+
120
+ Examples:
121
+ >>> normalize_inference_url("https://api.groq.com")
122
+ 'https://api.groq.com/v1/chat/completions'
123
+
124
+ >>> normalize_inference_url("https://modal.host?cid=trace_123")
125
+ 'https://modal.host/v1/chat/completions?cid=trace_123'
126
+
127
+ >>> normalize_inference_url("https://api.openai.com/v1")
128
+ 'https://api.openai.com/v1/chat/completions'
129
+
130
+ >>> normalize_inference_url("https://api.groq.com/openai/v1/chat/completions")
131
+ 'https://api.groq.com/openai/v1/chat/completions'
132
+ """
133
+ candidate = (url or default).strip()
134
+ if not candidate:
135
+ candidate = default
136
+
137
+ # Parse the URL to separate path and query components
138
+ parsed = urlparse(candidate)
139
+
140
+ # Check if path already ends with a completions endpoint
141
+ path = parsed.path.rstrip('/')
142
+ if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
143
+ return candidate
144
+
145
+ # Determine what to append based on existing path
146
+ if path.endswith("/v1"):
147
+ new_path = f"{path}/chat/completions"
148
+ elif path.endswith("/chat"):
149
+ new_path = f"{path}/completions"
150
+ else:
151
+ # Default: append full path
152
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
153
+
154
+ # Reconstruct URL with new path and original query/fragment
155
+ return urlunparse(parsed._replace(path=new_path))
156
+
157
+
158
+ def validate_task_app_url(url: str | None) -> str:
159
+ """Validate and normalize a task app URL.
160
+
161
+ Args:
162
+ url: URL to validate
163
+
164
+ Returns:
165
+ Normalized URL
166
+
167
+ Raises:
168
+ ValueError: If URL is invalid
169
+ """
170
+ if not url:
171
+ raise ValueError("Task app URL is required")
172
+
173
+ url = url.strip().rstrip("/")
174
+
175
+ # Basic URL validation
176
+ url_pattern = re.compile(
177
+ r"^https?://" # http:// or https://
178
+ r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|" # domain...
179
+ r"localhost|" # localhost...
180
+ r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
181
+ r"(?::\d+)?" # optional port
182
+ r"(?:/?|[/?]\S+)$",
183
+ re.IGNORECASE,
184
+ )
185
+
186
+ if not url_pattern.match(url):
187
+ raise ValueError(f"Invalid task app URL: {url}")
188
+
189
+ return url
190
+
191
+
192
+ def _print_success(msg: str) -> None:
193
+ """Print success message in green."""
194
+ click.echo(click.style(f"✓ {msg}", fg="green"))
195
+
196
+
197
+ def _print_error(msg: str) -> None:
198
+ """Print error message in red."""
199
+ click.echo(click.style(f"✗ {msg}", fg="red"), err=True)
200
+
201
+
202
+ def _print_warning(msg: str) -> None:
203
+ """Print warning message in yellow."""
204
+ click.echo(click.style(f"⚠ {msg}", fg="yellow"))
205
+
4
206
 
207
+ def _print_info(msg: str) -> None:
208
+ """Print info message."""
209
+ click.echo(f" {msg}")
5
210
 
6
- def validate_task_app_url(url: str, *, name: str = "TASK_APP_BASE_URL") -> None:
7
- """Validate a Task App base URL (scheme + host present)."""
8
211
 
9
- p = urlparse(url)
10
- if p.scheme not in ("http", "https") or not p.netloc:
11
- raise ValueError(f"Invalid {name}: malformed: {url}")
212
+ async def validate_task_app_endpoint(
213
+ url: str,
214
+ api_key: str | None = None,
215
+ min_instances: int = 10,
216
+ verbose: bool = False,
217
+ ) -> tuple[bool, dict[str, Any]]:
218
+ """Validate a task app deployment.
219
+
220
+ Returns:
221
+ (success: bool, results: dict)
222
+ """
223
+ results: dict[str, Any] = {
224
+ "url": url,
225
+ "endpoints": {},
226
+ "auth": {},
227
+ "task_instances": {},
228
+ "overall": False,
229
+ }
230
+
231
+ all_passed = True
232
+ endpoints = TaskAppEndpoints()
233
+
234
+ # Set up headers
235
+ headers = {}
236
+ if api_key:
237
+ headers["X-API-Key"] = api_key
238
+
239
+ click.echo(f"\n{'='*60}")
240
+ click.echo(f"Validating Task App: {url}")
241
+ click.echo(f"{'='*60}\n")
242
+
243
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
244
+ # 1. Check root endpoint
245
+ click.echo("1. Checking root endpoint...")
246
+ try:
247
+ resp = await client.get(f"{url}{endpoints.root}")
248
+ if resp.status_code == 200:
249
+ data = resp.json()
250
+ _print_success(f"Root endpoint responds (status: {data.get('status')})")
251
+ results["endpoints"]["root"] = {"passed": True, "data": data}
252
+ if verbose:
253
+ _print_info(f"Service: {data.get('service', 'N/A')}")
254
+ else:
255
+ _print_error(f"Root endpoint returned {resp.status_code}")
256
+ results["endpoints"]["root"] = {"passed": False, "status": resp.status_code}
257
+ all_passed = False
258
+ except Exception as e:
259
+ _print_error(f"Root endpoint failed: {e}")
260
+ results["endpoints"]["root"] = {"passed": False, "error": str(e)}
261
+ all_passed = False
262
+
263
+ # 2. Check health endpoint
264
+ click.echo("\n2. Checking health endpoint...")
265
+ try:
266
+ resp = await client.get(f"{url}{endpoints.health}", headers=headers)
267
+ if resp.status_code == 200:
268
+ data = resp.json()
269
+ _print_success(f"Health endpoint responds (healthy: {data.get('healthy')})")
270
+ results["endpoints"]["health"] = {"passed": True, "data": data}
271
+
272
+ # Check auth configuration
273
+ auth_info = data.get("auth", {})
274
+ if auth_info.get("required"):
275
+ _print_info(f"Auth required: {auth_info.get('required')}")
276
+ _print_info(f"Expected key prefix: {auth_info.get('expected_prefix', 'N/A')}")
277
+
278
+ if api_key:
279
+ _print_success("API key provided and accepted")
280
+ results["auth"]["provided"] = True
281
+ results["auth"]["accepted"] = True
282
+ else:
283
+ _print_warning("No API key provided but may be required")
284
+ results["auth"]["provided"] = False
285
+ results["auth"]["required"] = True
286
+ else:
287
+ _print_error(f"Health endpoint returned {resp.status_code}")
288
+ results["endpoints"]["health"] = {"passed": False, "status": resp.status_code}
289
+ all_passed = False
290
+
291
+ if resp.status_code == 403:
292
+ _print_error("Authentication failed - provide API key with --api-key")
293
+ results["auth"]["error"] = "Authentication failed"
294
+
295
+ except Exception as e:
296
+ _print_error(f"Health endpoint failed: {e}")
297
+ results["endpoints"]["health"] = {"passed": False, "error": str(e)}
298
+ all_passed = False
299
+
300
+ # 3. Check info endpoint
301
+ click.echo("\n3. Checking info endpoint...")
302
+ try:
303
+ resp = await client.get(f"{url}{endpoints.info}", headers=headers)
304
+ if resp.status_code == 200:
305
+ data = resp.json()
306
+ _print_success("Info endpoint responds")
307
+ results["endpoints"]["info"] = {"passed": True, "data": data}
308
+
309
+ if verbose:
310
+ service = data.get("service", {})
311
+ task_info = service.get("task", {})
312
+ if isinstance(task_info, dict):
313
+ _print_info(f"Task: {task_info.get('name', 'N/A')}")
314
+ _print_info(f"Version: {service.get('version', 'N/A')}")
315
+
316
+ dataset = data.get("dataset", {})
317
+ if isinstance(dataset, dict):
318
+ _print_info(f"Dataset: {dataset.get('id', 'N/A')}")
319
+ else:
320
+ _print_error(f"Info endpoint returned {resp.status_code}")
321
+ results["endpoints"]["info"] = {"passed": False, "status": resp.status_code}
322
+ all_passed = False
323
+ except Exception as e:
324
+ _print_error(f"Info endpoint failed: {e}")
325
+ results["endpoints"]["info"] = {"passed": False, "error": str(e)}
326
+ all_passed = False
327
+
328
+ # 4. Check task_info endpoint and instance count
329
+ click.echo("\n4. Checking task_info endpoint and instance availability...")
330
+ try:
331
+ # Get taskset descriptor first
332
+ resp = await client.get(f"{url}{endpoints.task_info}", headers=headers)
333
+ if resp.status_code == 200:
334
+ data = resp.json()
335
+ _print_success("Task info endpoint responds")
336
+ results["endpoints"]["task_info"] = {"passed": True}
337
+
338
+ taskset = data.get("taskset", {})
339
+ if verbose and taskset:
340
+ if isinstance(taskset, dict):
341
+ _print_info(f"Taskset: {taskset.get('id', 'N/A')}")
342
+ else:
343
+ _print_info(f"Taskset: {taskset}")
344
+
345
+ # Try to get specific task instances (seeds 0-19)
346
+ # Fetch instances one by one to verify we can get at least min_instances
347
+ instances = []
348
+ for seed in range(min_instances + 5): # Try a few extra
349
+ try:
350
+ resp_seed = await client.get(
351
+ f"{url}{endpoints.task_info}",
352
+ params={"seed": seed},
353
+ headers=headers,
354
+ )
355
+ if resp_seed.status_code == 200:
356
+ instance = resp_seed.json()
357
+ instances.append(instance)
358
+ else:
359
+ break # Stop if we hit an invalid seed
360
+ except Exception:
361
+ break
362
+
363
+ instance_count = len(instances)
364
+ results["task_instances"]["count"] = instance_count
365
+ results["task_instances"]["requested"] = min_instances
366
+
367
+ if instance_count >= min_instances:
368
+ _print_success(f"Found {instance_count} task instances (≥ {min_instances} required)")
369
+ results["task_instances"]["passed"] = True
370
+
371
+ if verbose and instances:
372
+ sample = instances[0]
373
+ task_info_sample = sample.get('task', {})
374
+ if isinstance(task_info_sample, dict):
375
+ _print_info(f"Sample task: {task_info_sample.get('name', 'N/A')}")
376
+ _print_info(f"Environment: {sample.get('environment', 'N/A')}")
377
+ else:
378
+ _print_error(f"Only {instance_count} task instances available (need ≥ {min_instances})")
379
+ results["task_instances"]["passed"] = False
380
+ all_passed = False
381
+ else:
382
+ _print_error(f"Task info endpoint returned {resp.status_code}")
383
+ results["endpoints"]["task_info"] = {"passed": False, "status": resp.status_code}
384
+ all_passed = False
385
+ except Exception as e:
386
+ _print_error(f"Task info endpoint failed: {e}")
387
+ results["endpoints"]["task_info"] = {"passed": False, "error": str(e)}
388
+ results["task_instances"]["passed"] = False
389
+ all_passed = False
390
+
391
+ # 5. Check rollout endpoint structure (don't actually run a rollout)
392
+ click.echo("\n5. Checking rollout endpoint availability...")
393
+ try:
394
+ # Just check if it's registered (OPTIONS or a lightweight probe)
395
+ resp = await client.options(f"{url}{endpoints.rollout}", headers=headers)
396
+ # Many servers return 200 for OPTIONS, some return 405
397
+ if resp.status_code in (200, 204, 405):
398
+ _print_success("Rollout endpoint is registered")
399
+ results["endpoints"]["rollout"] = {"passed": True}
400
+ else:
401
+ _print_warning(f"Rollout endpoint returned unexpected status: {resp.status_code}")
402
+ results["endpoints"]["rollout"] = {"passed": True, "note": "endpoint exists"}
403
+ except Exception as e:
404
+ # OPTIONS might not be supported, that's okay
405
+ _print_info(f"Rollout endpoint check skipped (OPTIONS not supported): {e}")
406
+ results["endpoints"]["rollout"] = {"passed": True, "note": "assumed present"}
407
+
408
+ # Summary
409
+ click.echo(f"\n{'='*60}")
410
+ if all_passed:
411
+ _print_success("All validations passed!")
412
+ click.echo(f"{'='*60}\n")
413
+ else:
414
+ _print_error("Some validations failed. See errors above.")
415
+ click.echo(f"{'='*60}\n")
416
+
417
+ results["overall"] = all_passed
418
+ return all_passed, results
@@ -37,7 +37,7 @@ Concepts:
37
37
  from __future__ import annotations
38
38
 
39
39
  from dataclasses import asdict, dataclass, field
40
- from datetime import UTC, datetime
40
+ from datetime import datetime, timezone
41
41
  from typing import Any
42
42
 
43
43
  from .lm_call_record_abstractions import LLMCallRecord
@@ -249,7 +249,7 @@ class SessionTimeStep:
249
249
 
250
250
  step_id: str = ""
251
251
  step_index: int = 0
252
- timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
252
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
253
253
  turn_number: int | None = None
254
254
  events: list[BaseEvent] = field(default_factory=list)
255
255
  markov_blanket_messages: list[SessionEventMarkovBlanketMessage] = field(default_factory=list)
@@ -283,7 +283,7 @@ class SessionTrace:
283
283
  """
284
284
 
285
285
  session_id: str = ""
286
- created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
286
+ created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
287
287
  session_time_steps: list[SessionTimeStep] = field(default_factory=list)
288
288
  event_history: list[BaseEvent] = field(default_factory=list)
289
289
  markov_blanket_message_history: list[SessionEventMarkovBlanketMessage] = field(
@@ -37,10 +37,14 @@ from .utils import calculate_cost, detect_provider
37
37
  # Context variables for session and turn tracking
38
38
  # These variables automatically propagate across async call boundaries,
39
39
  # allowing deeply nested code to access tracing context without explicit passing
40
- _session_id_ctx: contextvars.ContextVar[str | None] = contextvars.ContextVar("session_id")
41
- _turn_number_ctx: contextvars.ContextVar[int | None] = contextvars.ContextVar("turn_number")
40
+ _session_id_ctx: contextvars.ContextVar[str | None] = contextvars.ContextVar(
41
+ "session_id"
42
+ )
43
+ _turn_number_ctx: contextvars.ContextVar[int | None] = contextvars.ContextVar(
44
+ "turn_number"
45
+ )
42
46
  _session_tracer_ctx: contextvars.ContextVar[Any | None] = contextvars.ContextVar(
43
- "session_tracer", default=None
47
+ "session_tracer"
44
48
  )
45
49
 
46
50
 
@@ -8,7 +8,7 @@ from __future__ import annotations
8
8
 
9
9
  import uuid
10
10
  from dataclasses import dataclass, field
11
- from datetime import UTC, datetime
11
+ from datetime import datetime, timezone
12
12
  from typing import Any, TypedDict, cast
13
13
 
14
14
  from .lm_call_record_abstractions import (
@@ -180,8 +180,8 @@ def create_llm_call_record_from_response(
180
180
  api_type=api_type,
181
181
  provider=provider,
182
182
  model_name=model_name,
183
- started_at=started_at or datetime.now(UTC),
184
- completed_at=completed_at or datetime.now(UTC),
183
+ started_at=started_at or datetime.now(timezone.utc),
184
+ completed_at=completed_at or datetime.now(timezone.utc),
185
185
  latency_ms=latency_ms,
186
186
  request_params=params,
187
187
  input_messages=input_messages,
@@ -376,8 +376,8 @@ def create_llm_call_record_from_streaming(
376
376
  api_type="responses", # Streaming typically from Responses API
377
377
  provider=provider,
378
378
  model_name=model_name,
379
- started_at=started_at or datetime.now(UTC),
380
- completed_at=completed_at or datetime.now(UTC),
379
+ started_at=started_at or datetime.now(timezone.utc),
380
+ completed_at=completed_at or datetime.now(timezone.utc),
381
381
  latency_ms=latency_ms,
382
382
  request_params=params,
383
383
  input_messages=input_messages,
@@ -25,15 +25,15 @@ application to continue without blocking on sync operations.
25
25
  """
26
26
 
27
27
  import asyncio
28
+ import importlib
28
29
  import logging
29
- from typing import Any
30
-
31
- import libsql
30
+ from typing import Any, cast
32
31
 
33
32
  from .config import CONFIG
34
33
 
35
34
  logger = logging.getLogger(__name__)
36
35
 
36
+ libsql = cast(Any, importlib.import_module("libsql"))
37
37
 
38
38
  class ReplicaSync:
39
39
  """Manages synchronization of embedded SQLite replica with remote Turso database.
@@ -53,7 +53,7 @@ class ReplicaSync:
53
53
  db_path: str = "embedded.db",
54
54
  sync_url: str | None = None,
55
55
  auth_token: str | None = None,
56
- sync_interval: int | None = None,
56
+ sync_interval: float | None = None,
57
57
  ):
58
58
  """Initialize replica sync manager.
59
59
 
@@ -55,11 +55,11 @@ def normalize_for_json(value: Any) -> Any:
55
55
  return {str(k): normalize_for_json(v) for k, v in value.items()}
56
56
 
57
57
  # Sequences
58
- if isinstance(value, (list, tuple, set)):
58
+ if isinstance(value, list | tuple | set):
59
59
  return [normalize_for_json(v) for v in value]
60
60
 
61
61
  # Datetime / Date
62
- if isinstance(value, (datetime, date)):
62
+ if isinstance(value, datetime | date):
63
63
  return value.isoformat()
64
64
 
65
65
  # Decimal
@@ -73,7 +73,7 @@ def normalize_for_json(value: Any) -> Any:
73
73
  return str(value)
74
74
 
75
75
  # Bytes-like
76
- if isinstance(value, (bytes, bytearray)):
76
+ if isinstance(value, bytes | bytearray):
77
77
  return base64.b64encode(bytes(value)).decode("ascii")
78
78
 
79
79
  # Enum
@@ -82,9 +82,9 @@ def normalize_for_json(value: Any) -> Any:
82
82
 
83
83
  # Numpy scalars / arrays
84
84
  if _np is not None:
85
- if isinstance(value, (_np.generic,)): # type: ignore[attr-defined]
85
+ if isinstance(value, _np.generic): # type: ignore[attr-defined]
86
86
  return normalize_for_json(value.item())
87
- if isinstance(value, (_np.ndarray,)):
87
+ if isinstance(value, _np.ndarray):
88
88
  return normalize_for_json(value.tolist())
89
89
 
90
90
  # Floats: sanitize NaN / Infinity to None
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import json
7
7
  from contextlib import asynccontextmanager
8
- from datetime import UTC, datetime
8
+ from datetime import datetime, timezone
9
9
  from typing import Any
10
10
 
11
11
  from .abstractions import (
@@ -106,7 +106,7 @@ class SessionTracer:
106
106
 
107
107
  self._current_trace = SessionTrace(
108
108
  session_id=session_id,
109
- created_at=datetime.now(UTC),
109
+ created_at=datetime.now(timezone.utc),
110
110
  session_time_steps=[],
111
111
  event_history=[],
112
112
  markov_blanket_message_history=[],
@@ -152,7 +152,7 @@ class SessionTracer:
152
152
  step = SessionTimeStep(
153
153
  step_id=step_id,
154
154
  step_index=len(self._current_trace.session_time_steps),
155
- timestamp=datetime.now(UTC),
155
+ timestamp=datetime.now(timezone.utc),
156
156
  turn_number=turn_number,
157
157
  step_metadata=metadata or {},
158
158
  )
@@ -197,7 +197,7 @@ class SessionTracer:
197
197
  step = self._current_step
198
198
 
199
199
  if step and step.completed_at is None:
200
- step.completed_at = datetime.now(UTC)
200
+ step.completed_at = datetime.now(timezone.utc)
201
201
 
202
202
  # Trigger hooks
203
203
  await self.hooks.trigger(
@@ -294,7 +294,7 @@ class SessionTracer:
294
294
  content=normalised_content,
295
295
  message_type=message_type,
296
296
  time_record=TimeRecord(
297
- event_time=event_time or datetime.now(UTC).timestamp(), message_time=message_time
297
+ event_time=event_time or datetime.now(timezone.utc).timestamp(), message_time=message_time
298
298
  ),
299
299
  metadata=metadata or {},
300
300
  )
@@ -368,18 +368,28 @@ class SessionTracer:
368
368
  # End any open timesteps
369
369
  for step in self._current_trace.session_time_steps:
370
370
  if step.completed_at is None:
371
- step.completed_at = datetime.now(UTC)
371
+ step.completed_at = datetime.now(timezone.utc)
372
372
 
373
373
  # Trigger pre-save hooks
374
374
  await self.hooks.trigger("before_save", session=self._current_trace)
375
375
 
376
376
  # Save if requested
377
377
  should_save = save if save is not None else self.auto_save
378
+
379
+ # Debug logging
380
+ import logging
381
+ _logger = logging.getLogger(__name__)
382
+ _logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
383
+
378
384
  if should_save and self.db:
385
+ _logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
379
386
  await self.db.insert_session_trace(self._current_trace)
387
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace completed")
380
388
 
381
389
  # Trigger post-save hooks
382
390
  await self.hooks.trigger("after_save", session=self._current_trace)
391
+ else:
392
+ _logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
383
393
 
384
394
  # Trigger session end hooks
385
395
  await self.hooks.trigger("session_end", session=self._current_trace)