synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (226) hide show
  1. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -1
  2. examples/swe/task_app/grpo_swe_mini.py +55 -26
  3. examples/swe/task_app/hosted/rollout.py +40 -0
  4. examples/swe/task_app/hosted/test_service.py +5 -6
  5. examples/task_apps/TESTING.md +275 -0
  6. examples/task_apps/__init__.py +0 -0
  7. examples/task_apps/crafter/__init__.py +0 -0
  8. examples/task_apps/crafter/task_app/__init__.py +2 -0
  9. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +18 -13
  10. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  11. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  12. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +25 -3
  13. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +10 -0
  14. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  15. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  16. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  17. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  18. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  19. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  20. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  21. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  22. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  71. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  72. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  73. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  74. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  75. examples/task_apps/enron/__init__.py +1 -0
  76. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  77. examples/task_apps/enron/task_app/README.md +14 -0
  78. examples/task_apps/enron/task_app/__init__.py +1 -0
  79. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  80. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  81. examples/task_apps/enron/tests/__init__.py +2 -0
  82. examples/task_apps/enron/tests/conftest.py +115 -0
  83. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  84. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  85. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  86. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  87. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  88. examples/task_apps/math/__init__.py +0 -0
  89. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  90. examples/task_apps/pokemon_battle/__init__.py +2 -0
  91. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  92. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  93. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  94. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  95. examples/task_apps/pokemon_red/README.md +357 -0
  96. examples/task_apps/pokemon_red/__init__.py +3 -0
  97. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  98. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  99. examples/task_apps/pokemon_red/task_app.py +606 -0
  100. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  101. examples/task_apps/sokoban/README.md +307 -0
  102. examples/task_apps/sokoban/__init__.py +3 -0
  103. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  104. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  105. examples/task_apps/sokoban/task_app.py +1058 -0
  106. examples/task_apps/sokoban/tests/__init__.py +2 -0
  107. examples/task_apps/sokoban/tests/conftest.py +113 -0
  108. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  109. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  110. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  111. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  112. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  113. examples/task_apps/verilog/__init__.py +1 -0
  114. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  115. examples/task_apps/verilog/task_app/README.md +12 -0
  116. examples/task_apps/verilog/task_app/__init__.py +1 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  118. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  119. examples/task_apps/verilog/tests/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/conftest.py +115 -0
  121. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  122. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  123. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  124. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  125. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  126. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  127. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  128. examples/workflows/__init__.py +0 -0
  129. examples/workflows/math_rl/__init__.py +0 -0
  130. examples/workflows/math_rl/download_dataset.py +80 -0
  131. synth_ai/__init__.py +2 -2
  132. synth_ai/api/train/builders.py +25 -11
  133. synth_ai/api/train/cli.py +12 -6
  134. synth_ai/api/train/configs/__init__.py +10 -10
  135. synth_ai/api/train/configs/rl.py +5 -4
  136. synth_ai/api/train/configs/sft.py +4 -3
  137. synth_ai/api/train/env_resolver.py +5 -2
  138. synth_ai/api/train/supported_algos.py +10 -5
  139. synth_ai/api/train/utils.py +7 -4
  140. synth_ai/cli/__init__.py +7 -51
  141. synth_ai/cli/_storage.py +4 -3
  142. synth_ai/cli/_validate_task_app.py +11 -0
  143. synth_ai/cli/balance.py +4 -3
  144. synth_ai/cli/calc.py +2 -2
  145. synth_ai/cli/demo.py +14 -7
  146. synth_ai/cli/legacy_root_backup.py +1 -1
  147. synth_ai/cli/rl_demo.py +8 -7
  148. synth_ai/cli/root.py +0 -97
  149. synth_ai/cli/task_apps.py +1707 -186
  150. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  151. synth_ai/environments/examples/enron/engine.py +7 -2
  152. synth_ai/environments/examples/enron/environment.py +68 -0
  153. synth_ai/environments/examples/red/engine.py +27 -0
  154. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  155. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  156. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  157. synth_ai/environments/examples/red/environment.py +60 -0
  158. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  159. synth_ai/environments/examples/verilog/engine.py +30 -4
  160. synth_ai/evals/client.py +58 -61
  161. synth_ai/jobs/client.py +16 -4
  162. synth_ai/judge_schemas.py +16 -16
  163. synth_ai/py.typed +0 -0
  164. synth_ai/task/__init__.py +14 -5
  165. synth_ai/task/contracts.py +124 -38
  166. synth_ai/task/proxy.py +48 -56
  167. synth_ai/task/rubrics/__init__.py +53 -0
  168. synth_ai/task/rubrics/loaders.py +133 -0
  169. synth_ai/task/rubrics/models.py +57 -0
  170. synth_ai/task/rubrics/scoring.py +113 -0
  171. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  172. synth_ai/task/server.py +8 -7
  173. synth_ai/task/validators.py +269 -6
  174. synth_ai/tracing_v3/decorators.py +7 -3
  175. synth_ai/tracing_v3/replica_sync.py +4 -4
  176. synth_ai/tracing_v3/serialization.py +5 -5
  177. synth_ai/tracing_v3/trace_utils.py +317 -0
  178. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  179. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  180. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +214 -101
  181. examples/agora_ex/README_MoE.md +0 -224
  182. examples/agora_ex/__init__.py +0 -7
  183. examples/agora_ex/agora_ex.py +0 -65
  184. examples/agora_ex/agora_ex_task_app.py +0 -590
  185. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  186. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  187. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  188. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  189. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  190. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  191. synth_ai/rubrics/__init__.py +0 -22
  192. synth_ai/task/rubrics.py +0 -219
  193. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  194. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  195. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  196. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  197. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  214. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  215. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  216. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  217. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  218. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  219. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  222. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  223. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  224. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -0
  225. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  226. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,317 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sqlite3
5
+ from collections.abc import Sequence
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+
9
+ Row = sqlite3.Row
10
+
11
+
12
+ def connect(db_path: str | bytes | int) -> sqlite3.Connection:
13
+ conn = sqlite3.connect(db_path)
14
+ conn.row_factory = sqlite3.Row
15
+ return conn
16
+
17
+
18
+ def _json_load(value: Any) -> Any:
19
+ if value is None:
20
+ return None
21
+ if isinstance(value, dict | list):
22
+ return value
23
+ if isinstance(value, bytes | bytearray):
24
+ value = value.decode("utf-8", errors="ignore")
25
+ try:
26
+ return json.loads(value)
27
+ except Exception:
28
+ return value
29
+
30
+
31
+ def fetch_crafter_sessions(
32
+ conn: sqlite3.Connection,
33
+ *,
34
+ limit: int,
35
+ metadata_filter: str | None = None,
36
+ session_ids: Sequence[str] | None = None,
37
+ min_event_count: int = 0,
38
+ ) -> list[str]:
39
+ if session_ids:
40
+ placeholders = ",".join("?" for _ in session_ids)
41
+ rows = conn.execute(
42
+ f"""
43
+ SELECT session_id
44
+ FROM session_traces
45
+ WHERE session_id IN ({placeholders})
46
+ ORDER BY created_at DESC
47
+ """,
48
+ tuple(session_ids),
49
+ ).fetchall()
50
+ return [row["session_id"] for row in rows]
51
+
52
+ params: list[Any] = []
53
+ where_clauses: list[str] = []
54
+ if metadata_filter:
55
+ where_clauses.append("session_traces.metadata LIKE ?")
56
+ params.append(f"%{metadata_filter}%")
57
+ where_sql = ""
58
+ if where_clauses:
59
+ where_sql = "WHERE " + " AND ".join(where_clauses)
60
+
61
+ having_sql = ""
62
+ if min_event_count > 0:
63
+ having_sql = "HAVING COUNT(events.id) >= ?"
64
+ params.append(min_event_count)
65
+
66
+ query = f"""
67
+ SELECT session_traces.session_id
68
+ FROM session_traces
69
+ LEFT JOIN events ON session_traces.session_id = events.session_id
70
+ {where_sql}
71
+ GROUP BY session_traces.session_id
72
+ {having_sql}
73
+ ORDER BY session_traces.created_at DESC
74
+ LIMIT ?
75
+ """
76
+ rows = conn.execute(query, (*params, limit)).fetchall()
77
+ return [row["session_id"] for row in rows]
78
+
79
+
80
+ def load_session_trace(conn: sqlite3.Connection, session_id: str) -> dict[str, Any]:
81
+ session_row = conn.execute(
82
+ """
83
+ SELECT session_id, created_at, metadata
84
+ FROM session_traces
85
+ WHERE session_id = ?
86
+ """,
87
+ (session_id,),
88
+ ).fetchone()
89
+ if not session_row:
90
+ raise ValueError(f"Session {session_id} not found")
91
+
92
+ timesteps = conn.execute(
93
+ """
94
+ SELECT step_id,
95
+ step_index,
96
+ turn_number,
97
+ started_at,
98
+ completed_at,
99
+ step_metadata
100
+ FROM session_timesteps
101
+ WHERE session_id = ?
102
+ ORDER BY step_index ASC
103
+ """,
104
+ (session_id,),
105
+ ).fetchall()
106
+
107
+ event_rows = conn.execute(
108
+ """
109
+ SELECT *
110
+ FROM events
111
+ WHERE session_id = ?
112
+ ORDER BY event_time ASC, id ASC
113
+ """,
114
+ (session_id,),
115
+ ).fetchall()
116
+
117
+ message_rows = conn.execute(
118
+ """
119
+ SELECT *
120
+ FROM messages
121
+ WHERE session_id = ?
122
+ ORDER BY event_time ASC, id ASC
123
+ """,
124
+ (session_id,),
125
+ ).fetchall()
126
+
127
+ event_rewards = conn.execute(
128
+ """
129
+ SELECT *
130
+ FROM event_rewards
131
+ WHERE session_id = ?
132
+ ORDER BY turn_number ASC, id ASC
133
+ """,
134
+ (session_id,),
135
+ ).fetchall()
136
+
137
+ outcome_rewards = conn.execute(
138
+ """
139
+ SELECT *
140
+ FROM outcome_rewards
141
+ WHERE session_id = ?
142
+ ORDER BY created_at ASC
143
+ """,
144
+ (session_id,),
145
+ ).fetchall()
146
+
147
+ metadata = _json_load(session_row["metadata"]) or {}
148
+ if isinstance(metadata, dict):
149
+ episode_id = metadata.get("episode_id")
150
+ if episode_id is not None and not isinstance(episode_id, str):
151
+ metadata["episode_id"] = str(episode_id)
152
+
153
+ events_payload = [
154
+ {
155
+ "id": row["id"],
156
+ "event_type": row["event_type"],
157
+ "system_instance_id": row["system_instance_id"],
158
+ "time_record": {
159
+ "event_time": row["event_time"],
160
+ "message_time": row["message_time"],
161
+ "created_at": row["created_at"],
162
+ },
163
+ "model_name": row["model_name"],
164
+ "provider": row["provider"],
165
+ "input_tokens": row["input_tokens"],
166
+ "output_tokens": row["output_tokens"],
167
+ "total_tokens": row["total_tokens"],
168
+ "cost_usd": row["cost_usd"],
169
+ "latency_ms": row["latency_ms"],
170
+ "span_id": row["span_id"],
171
+ "trace_id": row["trace_id"],
172
+ "call_records": _json_load(row["call_records"]) or [],
173
+ "reward": row["reward"],
174
+ "terminated": row["terminated"],
175
+ "truncated": row["truncated"],
176
+ "system_state_before": _json_load(row["system_state_before"]),
177
+ "system_state_after": _json_load(row["system_state_after"]),
178
+ "metadata": _json_load(row["metadata"]) or {},
179
+ "event_metadata": _json_load(row["event_metadata"]),
180
+ }
181
+ for row in event_rows
182
+ ]
183
+
184
+ messages_payload = [
185
+ {
186
+ "id": row["id"],
187
+ "message_type": row["message_type"],
188
+ "content": row["content"],
189
+ "time_record": {
190
+ "event_time": row["event_time"],
191
+ "message_time": row["message_time"],
192
+ "timestamp": row["timestamp"],
193
+ },
194
+ "metadata": _json_load(row["metadata"]) or {},
195
+ }
196
+ for row in message_rows
197
+ ]
198
+
199
+ trace: dict[str, Any] = {
200
+ "session_id": session_row["session_id"],
201
+ "created_at": session_row["created_at"],
202
+ "metadata": metadata,
203
+ "session_time_steps": [
204
+ {
205
+ "step_id": row["step_id"],
206
+ "step_index": row["step_index"],
207
+ "turn_number": row["turn_number"],
208
+ "started_at": row["started_at"],
209
+ "completed_at": row["completed_at"],
210
+ "metadata": _json_load(row["step_metadata"]) or {},
211
+ }
212
+ for row in timesteps
213
+ ],
214
+ "event_history": events_payload,
215
+ "events": events_payload,
216
+ "markov_blanket_message_history": messages_payload,
217
+ "messages": messages_payload,
218
+ "event_rewards": [
219
+ {
220
+ "id": row["id"],
221
+ "event_id": row["event_id"],
222
+ "turn_number": row["turn_number"],
223
+ "reward_value": row["reward_value"],
224
+ "reward_type": row["reward_type"],
225
+ "key": row["key"],
226
+ "annotation": _json_load(row["annotation"]) or {},
227
+ "source": row["source"],
228
+ "created_at": row["created_at"],
229
+ }
230
+ for row in event_rewards
231
+ ],
232
+ "outcome_rewards": [
233
+ {
234
+ "id": row["id"],
235
+ "total_reward": row["total_reward"],
236
+ "reward_metadata": _json_load(row["reward_metadata"]) or {},
237
+ "created_at": row["created_at"],
238
+ }
239
+ for row in outcome_rewards
240
+ ],
241
+ }
242
+ return trace
243
+
244
+
245
+ @dataclass
246
+ class DeterministicMetrics:
247
+ session_id: str
248
+ unique_achievement_reward: float
249
+ achievement_reward: float
250
+ outcome_total_reward: float
251
+ unique_achievement_count: int
252
+ final_achievement_count: int
253
+
254
+
255
+ def compute_deterministic_metrics(conn: sqlite3.Connection, session_id: str) -> DeterministicMetrics:
256
+ event_rows = conn.execute(
257
+ """
258
+ SELECT reward_type, reward_value, annotation
259
+ FROM event_rewards
260
+ WHERE session_id = ?
261
+ """,
262
+ (session_id,),
263
+ ).fetchall()
264
+
265
+ unique_total = 0.0
266
+ all_total = 0.0
267
+ unique_achievements: set[str] = set()
268
+
269
+ for row in event_rows:
270
+ reward_type = row["reward_type"]
271
+ value = float(row["reward_value"] or 0.0)
272
+ if reward_type == "unique_achievement_delta":
273
+ unique_total += value
274
+ annotation = _json_load(row["annotation"]) or {}
275
+ for name in annotation.get("new_unique") or []:
276
+ if isinstance(name, str):
277
+ unique_achievements.add(name)
278
+ elif reward_type == "achievement_delta":
279
+ all_total += value
280
+
281
+ outcome_rows = conn.execute(
282
+ """
283
+ SELECT total_reward, reward_metadata
284
+ FROM outcome_rewards
285
+ WHERE session_id = ?
286
+ """,
287
+ (session_id,),
288
+ ).fetchall()
289
+
290
+ outcome_total = 0.0
291
+ final_achievements: set[str] = set()
292
+ for row in outcome_rows:
293
+ outcome_total += float(row["total_reward"] or 0.0)
294
+ metadata = _json_load(row["reward_metadata"]) or {}
295
+ for name in metadata.get("achievements") or []:
296
+ if isinstance(name, str):
297
+ final_achievements.add(name)
298
+
299
+ return DeterministicMetrics(
300
+ session_id=session_id,
301
+ unique_achievement_reward=unique_total,
302
+ achievement_reward=all_total,
303
+ outcome_total_reward=outcome_total,
304
+ unique_achievement_count=len(unique_achievements),
305
+ final_achievement_count=len(final_achievements),
306
+ )
307
+
308
+
309
+ __all__ = [
310
+ "DeterministicMetrics",
311
+ "compute_deterministic_metrics",
312
+ "connect",
313
+ "fetch_crafter_sessions",
314
+ "load_session_trace",
315
+ ]
316
+
317
+
@@ -117,7 +117,7 @@ def _maybe_datetime(value: Any) -> Any:
117
117
 
118
118
 
119
119
  def _load_json(value: Any) -> Any:
120
- if value is None or isinstance(value, (dict, list)):
120
+ if value is None or isinstance(value, dict | list):
121
121
  return value or {}
122
122
  if isinstance(value, str):
123
123
  try:
@@ -584,7 +584,7 @@ class NativeLibsqlTraceManager(TraceStorage):
584
584
  raise ValueError("No named parameters found in query for provided mapping")
585
585
  values = tuple(params[key] for key in keys)
586
586
  return new_query, values
587
- if isinstance(params, (list, tuple)):
587
+ if isinstance(params, list | tuple):
588
588
  return query, tuple(params)
589
589
  raise TypeError("Unsupported parameter type for query execution")
590
590
 
@@ -881,7 +881,7 @@ class NativeLibsqlTraceManager(TraceStorage):
881
881
  ) -> int:
882
882
  await self.initialize()
883
883
 
884
- if not isinstance(event, (EnvironmentEvent, LMCAISEvent, RuntimeEvent)):
884
+ if not isinstance(event, EnvironmentEvent | LMCAISEvent | RuntimeEvent):
885
885
  raise TypeError(f"Unsupported event type for native manager: {type(event)!r}")
886
886
 
887
887
  metadata_json = metadata_override or event.metadata or {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: synth-ai
3
- Version: 0.2.13.dev1
3
+ Version: 0.2.13.dev2
4
4
  Summary: RL as a service SDK - Core AI functionality and tracing
5
5
  Author-email: Synth AI <josh@usesynth.ai>
6
6
  License-Expression: MIT
@@ -50,9 +50,12 @@ Requires-Dist: textual>=1.1.0
50
50
  Requires-Dist: openai-harmony>=0.0.1
51
51
  Requires-Dist: asyncpg>=0.30.0
52
52
  Requires-Dist: aiohttp>=3.8.0
53
+ Requires-Dist: httpx>=0.28.1
53
54
  Requires-Dist: datasets>=4.0.0
54
55
  Requires-Dist: transformers>=4.56.1
55
56
  Requires-Dist: modal==1.1.4
57
+ Requires-Dist: pyboy>=2.6.0
58
+ Requires-Dist: setuptools>=80.9.0
56
59
  Provides-Extra: dev
57
60
  Requires-Dist: build>=1.2.2.post1; extra == "dev"
58
61
  Requires-Dist: twine>=4.0.0; extra == "dev"