synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (110) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +5 -4
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +1 -0
  19. examples/swe/task_app/hosted/rollout.py +2 -0
  20. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  21. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  22. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  23. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  24. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  25. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  26. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  27. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  28. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  29. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  30. examples/task_apps/crafter/task_app/__init__.py +3 -0
  31. examples/task_apps/crafter/task_app/grpo_crafter.py +306 -8
  32. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  33. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +16 -3
  34. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  35. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  36. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +52 -1
  37. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +111 -13
  38. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  39. examples/task_apps/enron/filter_sft.toml +5 -0
  40. examples/task_apps/enron/tests/__init__.py +2 -0
  41. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  42. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  43. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  44. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  45. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  46. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  47. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  48. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  49. examples/task_apps/pokemon_red/task_app.py +199 -6
  50. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  51. examples/task_apps/sokoban/filter_sft.toml +5 -0
  52. examples/task_apps/sokoban/tests/__init__.py +2 -0
  53. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  54. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  55. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  56. examples/task_apps/verilog/filter_sft.toml +5 -0
  57. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  58. examples/task_apps/verilog/tests/__init__.py +2 -0
  59. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  60. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  61. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  62. examples/warming_up_to_rl/groq_test.py +2 -0
  63. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  64. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  65. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  66. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  67. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  68. synth_ai/api/models/supported.py +1 -0
  69. synth_ai/cli/__init__.py +46 -13
  70. synth_ai/cli/_modal_wrapper.py +3 -2
  71. synth_ai/cli/recent.py +1 -1
  72. synth_ai/cli/status.py +1 -1
  73. synth_ai/cli/task_apps.py +354 -143
  74. synth_ai/cli/traces.py +1 -1
  75. synth_ai/cli/tui.py +57 -0
  76. synth_ai/cli/turso.py +1 -1
  77. synth_ai/cli/watch.py +1 -1
  78. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  79. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  80. synth_ai/environments/examples/verilog/engine.py +76 -10
  81. synth_ai/judge_schemas.py +8 -8
  82. synth_ai/task/__init__.py +11 -1
  83. synth_ai/task/apps/__init__.py +1 -0
  84. synth_ai/task/config.py +257 -0
  85. synth_ai/task/contracts.py +15 -2
  86. synth_ai/task/rubrics/__init__.py +3 -0
  87. synth_ai/task/rubrics/loaders.py +22 -3
  88. synth_ai/task/rubrics/scoring.py +3 -0
  89. synth_ai/task/trace_correlation_helpers.py +315 -0
  90. synth_ai/task/validators.py +144 -0
  91. synth_ai/tracing_v3/abstractions.py +3 -3
  92. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  93. synth_ai/tracing_v3/session_tracer.py +16 -6
  94. synth_ai/tracing_v3/storage/base.py +29 -29
  95. synth_ai/tracing_v3/storage/config.py +3 -3
  96. synth_ai/tracing_v3/turso/daemon.py +8 -7
  97. synth_ai/tracing_v3/turso/native_manager.py +63 -40
  98. synth_ai/tracing_v3/utils.py +3 -3
  99. synth_ai/tui/__init__.py +5 -0
  100. synth_ai/tui/__main__.py +13 -0
  101. synth_ai/tui/cli/__init__.py +1 -0
  102. synth_ai/tui/cli/query_experiments.py +164 -0
  103. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  104. synth_ai/tui/dashboard.py +906 -0
  105. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/METADATA +1 -1
  106. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/RECORD +110 -71
  107. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  108. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  109. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  110. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import asyncio
6
6
  import json
7
7
  from contextlib import asynccontextmanager
8
- from datetime import UTC, datetime
8
+ from datetime import datetime, timezone
9
9
  from typing import Any
10
10
 
11
11
  from .abstractions import (
@@ -106,7 +106,7 @@ class SessionTracer:
106
106
 
107
107
  self._current_trace = SessionTrace(
108
108
  session_id=session_id,
109
- created_at=datetime.now(UTC),
109
+ created_at=datetime.now(timezone.utc),
110
110
  session_time_steps=[],
111
111
  event_history=[],
112
112
  markov_blanket_message_history=[],
@@ -152,7 +152,7 @@ class SessionTracer:
152
152
  step = SessionTimeStep(
153
153
  step_id=step_id,
154
154
  step_index=len(self._current_trace.session_time_steps),
155
- timestamp=datetime.now(UTC),
155
+ timestamp=datetime.now(timezone.utc),
156
156
  turn_number=turn_number,
157
157
  step_metadata=metadata or {},
158
158
  )
@@ -197,7 +197,7 @@ class SessionTracer:
197
197
  step = self._current_step
198
198
 
199
199
  if step and step.completed_at is None:
200
- step.completed_at = datetime.now(UTC)
200
+ step.completed_at = datetime.now(timezone.utc)
201
201
 
202
202
  # Trigger hooks
203
203
  await self.hooks.trigger(
@@ -294,7 +294,7 @@ class SessionTracer:
294
294
  content=normalised_content,
295
295
  message_type=message_type,
296
296
  time_record=TimeRecord(
297
- event_time=event_time or datetime.now(UTC).timestamp(), message_time=message_time
297
+ event_time=event_time or datetime.now(timezone.utc).timestamp(), message_time=message_time
298
298
  ),
299
299
  metadata=metadata or {},
300
300
  )
@@ -368,18 +368,28 @@ class SessionTracer:
368
368
  # End any open timesteps
369
369
  for step in self._current_trace.session_time_steps:
370
370
  if step.completed_at is None:
371
- step.completed_at = datetime.now(UTC)
371
+ step.completed_at = datetime.now(timezone.utc)
372
372
 
373
373
  # Trigger pre-save hooks
374
374
  await self.hooks.trigger("before_save", session=self._current_trace)
375
375
 
376
376
  # Save if requested
377
377
  should_save = save if save is not None else self.auto_save
378
+
379
+ # Debug logging
380
+ import logging
381
+ _logger = logging.getLogger(__name__)
382
+ _logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
383
+
378
384
  if should_save and self.db:
385
+ _logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
379
386
  await self.db.insert_session_trace(self._current_trace)
387
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace completed")
380
388
 
381
389
  # Trigger post-save hooks
382
390
  await self.hooks.trigger("after_save", session=self._current_trace)
391
+ else:
392
+ _logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
383
393
 
384
394
  # Trigger session end hooks
385
395
  await self.hooks.trigger("session_end", session=self._current_trace)
@@ -2,7 +2,7 @@
2
2
 
3
3
  from abc import ABC, abstractmethod
4
4
  from datetime import datetime
5
- from typing import Any
5
+ from typing import Any, Optional
6
6
 
7
7
  from ..abstractions import SessionTrace
8
8
 
@@ -28,7 +28,7 @@ class TraceStorage(ABC):
28
28
  pass
29
29
 
30
30
  @abstractmethod
31
- async def get_session_trace(self, session_id: str) -> dict[str, Any] | None:
31
+ async def get_session_trace(self, session_id: str) -> Optional[dict[str, Any]]:
32
32
  """Retrieve a session trace by ID.
33
33
 
34
34
  Args:
@@ -40,7 +40,7 @@ class TraceStorage(ABC):
40
40
  pass
41
41
 
42
42
  @abstractmethod
43
- async def query_traces(self, query: str, params: dict[str, Any] | None = None) -> Any:
43
+ async def query_traces(self, query: str, params: Optional[dict[str, Any]] = None) -> Any:
44
44
  """Execute a query and return results.
45
45
 
46
46
  Args:
@@ -55,9 +55,9 @@ class TraceStorage(ABC):
55
55
  @abstractmethod
56
56
  async def get_model_usage(
57
57
  self,
58
- start_date: datetime | None = None,
59
- end_date: datetime | None = None,
60
- model_name: str | None = None,
58
+ start_date: Optional[datetime] = None,
59
+ end_date: Optional[datetime] = None,
60
+ model_name: Optional[str] = None,
61
61
  ) -> Any:
62
62
  """Get model usage statistics.
63
63
 
@@ -95,8 +95,8 @@ class TraceStorage(ABC):
95
95
  self,
96
96
  session_id: str,
97
97
  *,
98
- created_at: datetime | None = None,
99
- metadata: dict[str, Any] | None = None,
98
+ created_at: Optional[datetime] = None,
99
+ metadata: Optional[dict[str, Any]] = None,
100
100
  ) -> None:
101
101
  """Ensure a session row exists for the given session id."""
102
102
  pass
@@ -108,10 +108,10 @@ class TraceStorage(ABC):
108
108
  *,
109
109
  step_id: str,
110
110
  step_index: int,
111
- turn_number: int | None = None,
112
- started_at: datetime | None = None,
113
- completed_at: datetime | None = None,
114
- metadata: dict[str, Any] | None = None,
111
+ turn_number: Optional[int] = None,
112
+ started_at: Optional[datetime] = None,
113
+ completed_at: Optional[datetime] = None,
114
+ metadata: Optional[dict[str, Any]] = None,
115
115
  ) -> int:
116
116
  """Ensure a timestep row exists and return its database id."""
117
117
  pass
@@ -121,9 +121,9 @@ class TraceStorage(ABC):
121
121
  self,
122
122
  session_id: str,
123
123
  *,
124
- timestep_db_id: int | None,
124
+ timestep_db_id: Optional[int],
125
125
  event: Any,
126
- metadata_override: dict[str, Any] | None = None,
126
+ metadata_override: Optional[dict[str, Any]] = None,
127
127
  ) -> int:
128
128
  """Insert an event and return its database id."""
129
129
  pass
@@ -133,12 +133,12 @@ class TraceStorage(ABC):
133
133
  self,
134
134
  session_id: str,
135
135
  *,
136
- timestep_db_id: int | None,
136
+ timestep_db_id: Optional[int],
137
137
  message_type: str,
138
138
  content: Any,
139
- event_time: float | None = None,
140
- message_time: int | None = None,
141
- metadata: dict[str, Any] | None = None,
139
+ event_time: Optional[float] = None,
140
+ message_time: Optional[int] = None,
141
+ metadata: Optional[dict[str, Any]] = None,
142
142
  ) -> int:
143
143
  """Insert a message row linked to a session/timestep."""
144
144
  pass
@@ -151,7 +151,7 @@ class TraceStorage(ABC):
151
151
  total_reward: int,
152
152
  achievements_count: int,
153
153
  total_steps: int,
154
- reward_metadata: dict | None = None,
154
+ reward_metadata: Optional[dict] = None,
155
155
  ) -> int:
156
156
  """Record an outcome reward for a session."""
157
157
  pass
@@ -162,13 +162,13 @@ class TraceStorage(ABC):
162
162
  session_id: str,
163
163
  *,
164
164
  event_id: int,
165
- message_id: int | None = None,
166
- turn_number: int | None = None,
165
+ message_id: Optional[int] = None,
166
+ turn_number: Optional[int] = None,
167
167
  reward_value: float = 0.0,
168
- reward_type: str | None = None,
169
- key: str | None = None,
170
- annotation: dict[str, Any] | None = None,
171
- source: str | None = None,
168
+ reward_type: Optional[str] = None,
169
+ key: Optional[str] = None,
170
+ annotation: Optional[dict[str, Any]] = None,
171
+ source: Optional[str] = None,
172
172
  ) -> int:
173
173
  """Record a reward tied to a specific event."""
174
174
  pass
@@ -178,8 +178,8 @@ class TraceStorage(ABC):
178
178
  self,
179
179
  experiment_id: str,
180
180
  name: str,
181
- description: str | None = None,
182
- configuration: dict[str, Any] | None = None,
181
+ description: Optional[str] = None,
182
+ configuration: Optional[dict[str, Any]] = None,
183
183
  ) -> str:
184
184
  """Create a new experiment."""
185
185
  raise NotImplementedError("Experiment management not supported by this backend")
@@ -189,14 +189,14 @@ class TraceStorage(ABC):
189
189
  raise NotImplementedError("Experiment management not supported by this backend")
190
190
 
191
191
  async def get_sessions_by_experiment(
192
- self, experiment_id: str, limit: int | None = None
192
+ self, experiment_id: str, limit: Optional[int] = None
193
193
  ) -> list[dict[str, Any]]:
194
194
  """Get all sessions for an experiment."""
195
195
  raise NotImplementedError("Experiment management not supported by this backend")
196
196
 
197
197
  # Batch operations
198
198
  async def batch_insert_sessions(
199
- self, traces: list[SessionTrace], batch_size: int | None = 1000
199
+ self, traces: list[SessionTrace], batch_size: Optional[int] = 1000
200
200
  ) -> list[str]:
201
201
  """Batch insert multiple session traces.
202
202
 
@@ -3,7 +3,7 @@
3
3
  import os
4
4
  from dataclasses import dataclass
5
5
  from enum import Enum
6
- from typing import Any
6
+ from typing import Any, Optional
7
7
 
8
8
 
9
9
  class StorageBackend(str, Enum):
@@ -14,7 +14,7 @@ class StorageBackend(str, Enum):
14
14
  POSTGRES = "postgres" # Future support
15
15
 
16
16
 
17
- def _is_enabled(value: str | None) -> bool:
17
+ def _is_enabled(value: Optional[str]) -> bool:
18
18
  if value is None:
19
19
  return False
20
20
  return value.lower() in {"1", "true", "yes", "on"}
@@ -25,7 +25,7 @@ class StorageConfig:
25
25
  """Configuration for storage backend."""
26
26
 
27
27
  backend: StorageBackend = StorageBackend.TURSO_NATIVE
28
- connection_string: str | None = None
28
+ connection_string: Optional[str] = None
29
29
 
30
30
  # Turso-specific settings
31
31
  turso_url: str = os.getenv("TURSO_DATABASE_URL", "sqlite+libsql://http://127.0.0.1:8080")
@@ -7,6 +7,7 @@ import time
7
7
 
8
8
  import requests
9
9
  from requests import RequestException
10
+ from typing import Any, Optional
10
11
 
11
12
  from ..config import CONFIG
12
13
 
@@ -16,9 +17,9 @@ class SqldDaemon:
16
17
 
17
18
  def __init__(
18
19
  self,
19
- db_path: str | None = None,
20
- http_port: int | None = None,
21
- binary_path: str | None = None,
20
+ db_path: Optional[str] = None,
21
+ http_port: Optional[int] = None,
22
+ binary_path: Optional[str] = None,
22
23
  ):
23
24
  """Initialize sqld daemon manager.
24
25
 
@@ -30,7 +31,7 @@ class SqldDaemon:
30
31
  self.db_path = db_path or CONFIG.sqld_db_path
31
32
  self.http_port = http_port or CONFIG.sqld_http_port
32
33
  self.binary_path = binary_path or self._find_binary()
33
- self.process: subprocess.Popen | None = None
34
+ self.process: Optional[Any] = None
34
35
 
35
36
  def _find_binary(self) -> str:
36
37
  """Find sqld binary in PATH."""
@@ -123,10 +124,10 @@ class SqldDaemon:
123
124
 
124
125
 
125
126
  # Convenience functions
126
- _daemon: SqldDaemon | None = None
127
+ _daemon: Optional[SqldDaemon] = None
127
128
 
128
129
 
129
- def start_sqld(db_path: str | None = None, port: int | None = None) -> SqldDaemon:
130
+ def start_sqld(db_path: Optional[str] = None, port: Optional[int] = None) -> SqldDaemon:
130
131
  """Start a global sqld daemon instance."""
131
132
  global _daemon
132
133
  if _daemon and _daemon.is_running():
@@ -145,6 +146,6 @@ def stop_sqld():
145
146
  _daemon = None
146
147
 
147
148
 
148
- def get_daemon() -> SqldDaemon | None:
149
+ def get_daemon() -> Optional[SqldDaemon]:
149
150
  """Get the global daemon instance."""
150
151
  return _daemon
@@ -13,7 +13,7 @@ import logging
13
13
  import re
14
14
  from collections.abc import Callable
15
15
  from dataclasses import asdict, dataclass
16
- from datetime import UTC, datetime
16
+ from datetime import datetime, timezone
17
17
  from typing import TYPE_CHECKING, Any, cast
18
18
 
19
19
  import libsql
@@ -370,8 +370,18 @@ class NativeLibsqlTraceManager(TraceStorage):
370
370
 
371
371
  async def insert_session_trace(self, trace: SessionTrace) -> str:
372
372
  await self.initialize()
373
-
374
- if await self._session_exists(trace.session_id):
373
+
374
+ import logging as _logging
375
+ _logger = _logging.getLogger(__name__)
376
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace START: session_id={trace.session_id}, {len(trace.markov_blanket_message_history)} messages")
377
+
378
+ session_exists = await self._session_exists(trace.session_id)
379
+ _logger.info(f"[TRACE_DEBUG] Session exists: {session_exists}")
380
+
381
+ if session_exists:
382
+ _logger.warning(f"[TRACE_DEBUG] Session {trace.session_id} already exists, need to save messages anyway!")
383
+ # Don't return early - we need to save messages!
384
+ # Just update metadata
375
385
  async with self._op_lock:
376
386
  conn = self._conn
377
387
  assert conn is not None
@@ -380,32 +390,34 @@ class NativeLibsqlTraceManager(TraceStorage):
380
390
  (_json_dumps(trace.metadata or {}), trace.session_id),
381
391
  )
382
392
  conn.commit()
383
- return trace.session_id
393
+ # Continue to save messages instead of returning
384
394
 
385
- created_at = trace.created_at or datetime.now(UTC)
395
+ if not session_exists:
396
+ created_at = trace.created_at or datetime.now(timezone.utc)
386
397
 
387
- async with self._op_lock:
388
- conn = self._conn
389
- assert conn is not None
390
- conn.execute(
391
- """
392
- INSERT INTO session_traces (
393
- session_id,
394
- created_at,
395
- num_timesteps,
396
- num_events,
397
- num_messages,
398
- metadata
398
+ async with self._op_lock:
399
+ conn = self._conn
400
+ assert conn is not None
401
+ conn.execute(
402
+ """
403
+ INSERT INTO session_traces (
404
+ session_id,
405
+ created_at,
406
+ num_timesteps,
407
+ num_events,
408
+ num_messages,
409
+ metadata
410
+ )
411
+ VALUES (?, ?, 0, 0, 0, ?)
412
+ """,
413
+ (
414
+ trace.session_id,
415
+ created_at.isoformat(),
416
+ _json_dumps(trace.metadata or {}),
417
+ ),
399
418
  )
400
- VALUES (?, ?, 0, 0, 0, ?)
401
- """,
402
- (
403
- trace.session_id,
404
- created_at.isoformat(),
405
- _json_dumps(trace.metadata or {}),
406
- ),
407
- )
408
- conn.commit()
419
+ conn.commit()
420
+ _logger.info(f"[TRACE_DEBUG] Session row inserted")
409
421
 
410
422
  step_id_map: dict[str, int] = {}
411
423
 
@@ -434,7 +446,11 @@ class NativeLibsqlTraceManager(TraceStorage):
434
446
  metadata_override=event.metadata or {},
435
447
  )
436
448
 
437
- for msg in trace.markov_blanket_message_history:
449
+ import logging as _logging
450
+ _logger = _logging.getLogger(__name__)
451
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace: saving {len(trace.markov_blanket_message_history)} messages")
452
+
453
+ for idx, msg in enumerate(trace.markov_blanket_message_history):
438
454
  metadata = dict(getattr(msg, "metadata", {}) or {})
439
455
  step_ref = metadata.get("step_id")
440
456
  content_value = msg.content
@@ -452,15 +468,22 @@ class NativeLibsqlTraceManager(TraceStorage):
452
468
  except (TypeError, ValueError):
453
469
  content_value = str(content_value)
454
470
 
455
- await self.insert_message_row(
456
- trace.session_id,
457
- timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
458
- message_type=msg.message_type,
459
- content=content_value,
460
- event_time=msg.time_record.event_time,
461
- message_time=msg.time_record.message_time,
462
- metadata=metadata,
463
- )
471
+ _logger.info(f"[TRACE_DEBUG] Message {idx+1}: type={msg.message_type}, content_len={len(str(content_value))}")
472
+
473
+ try:
474
+ await self.insert_message_row(
475
+ trace.session_id,
476
+ timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
477
+ message_type=msg.message_type,
478
+ content=content_value,
479
+ event_time=msg.time_record.event_time,
480
+ message_time=msg.time_record.message_time,
481
+ metadata=metadata,
482
+ )
483
+ _logger.info(f"[TRACE_DEBUG] Message {idx+1}: saved successfully")
484
+ except Exception as exc:
485
+ _logger.error(f"[TRACE_DEBUG] Message {idx+1}: FAILED TO SAVE: {exc}", exc_info=True)
486
+ raise
464
487
 
465
488
  async with self._op_lock:
466
489
  conn = self._conn
@@ -783,7 +806,7 @@ class NativeLibsqlTraceManager(TraceStorage):
783
806
  ) -> None:
784
807
  await self.initialize()
785
808
 
786
- created_at_val = (created_at or datetime.now(UTC)).isoformat()
809
+ created_at_val = (created_at or datetime.now(timezone.utc)).isoformat()
787
810
  metadata_json = _json_dumps(metadata or {})
788
811
 
789
812
  async with self._op_lock:
@@ -815,7 +838,7 @@ class NativeLibsqlTraceManager(TraceStorage):
815
838
  ) -> int:
816
839
  await self.initialize()
817
840
 
818
- started_at_val = (started_at or datetime.now(UTC)).isoformat()
841
+ started_at_val = (started_at or datetime.now(timezone.utc)).isoformat()
819
842
  completed_at_val = completed_at.isoformat() if completed_at else None
820
843
  metadata_json = _json_dumps(metadata or {})
821
844
 
@@ -1127,7 +1150,7 @@ class NativeLibsqlTraceManager(TraceStorage):
1127
1150
  total_reward,
1128
1151
  achievements_count,
1129
1152
  total_steps,
1130
- datetime.now(UTC).isoformat(),
1153
+ datetime.now(timezone.utc).isoformat(),
1131
1154
  _json_dumps(reward_metadata),
1132
1155
  ),
1133
1156
  )
@@ -1179,7 +1202,7 @@ class NativeLibsqlTraceManager(TraceStorage):
1179
1202
  key,
1180
1203
  _json_dumps(annotation),
1181
1204
  source,
1182
- datetime.now(UTC).isoformat(),
1205
+ datetime.now(timezone.utc).isoformat(),
1183
1206
  ),
1184
1207
  )
1185
1208
  conn.commit()
@@ -5,13 +5,13 @@ from __future__ import annotations
5
5
  import hashlib
6
6
  import json
7
7
  import uuid
8
- from datetime import UTC, datetime
8
+ from datetime import datetime, timezone
9
9
  from typing import Any
10
10
 
11
11
 
12
12
  def iso_now() -> str:
13
- """Get current UTC time as ISO format string."""
14
- return datetime.now(UTC).isoformat()
13
+ """Get current timezone.utc time as ISO format string."""
14
+ return datetime.now(timezone.utc).isoformat()
15
15
 
16
16
 
17
17
  def json_dumps(obj: Any) -> str:
@@ -0,0 +1,5 @@
1
+ """Text User Interface utilities for synth-ai."""
2
+
3
+ from .dashboard import main
4
+
5
+ __all__ = ["main"]
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Entry point for Synth AI TUI dashboard.
4
+
5
+ Usage:
6
+ python -m synth_ai.tui
7
+ python -m synth_ai.tui --url sqlite+aiosqlite:///path/to/db
8
+ """
9
+
10
+ from .dashboard import main
11
+
12
+ if __name__ == "__main__":
13
+ main()
@@ -0,0 +1 @@
1
+ """Command Line Interface tools for synth-ai."""
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Query experiments and sessions from Turso/sqld using v3 tracing.
4
+ """
5
+
6
+ import argparse
7
+ import asyncio
8
+
9
+ from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
10
+
11
+
12
+ async def list_experiments(db_url: str):
13
+ """List all experiments in the database."""
14
+ db = AsyncSQLTraceManager(db_url)
15
+ await db.initialize()
16
+
17
+ try:
18
+ df = await db.query_traces("""
19
+ SELECT
20
+ e.experiment_id,
21
+ e.name,
22
+ e.description,
23
+ e.created_at,
24
+ COUNT(DISTINCT st.session_id) as num_sessions,
25
+ COUNT(DISTINCT ev.id) as num_events,
26
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
27
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
28
+ FROM experiments e
29
+ LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
30
+ LEFT JOIN events ev ON st.session_id = ev.session_id
31
+ GROUP BY e.experiment_id, e.name, e.description, e.created_at
32
+ ORDER BY e.created_at DESC
33
+ """)
34
+
35
+ if df.empty:
36
+ print("No experiments found in database.")
37
+ return
38
+
39
+ print(f"\n{'=' * 100}")
40
+ print(f"{'Experiments in ' + db_url:^100}")
41
+ print(f"{'=' * 100}\n")
42
+
43
+ for _, row in df.iterrows():
44
+ print(f"🧪 {row['name']} (id: {row['experiment_id'][:8]}...)")
45
+ print(f" Created: {row['created_at']}")
46
+ print(f" Description: {row['description']}")
47
+ print(f" Sessions: {row['num_sessions']}")
48
+ print(f" Events: {row['num_events']:,}")
49
+ if row["total_cost"] and row["total_cost"] > 0:
50
+ print(f" Cost: ${row['total_cost']:.4f}")
51
+ if row["total_tokens"] and row["total_tokens"] > 0:
52
+ print(f" Tokens: {int(row['total_tokens']):,}")
53
+ print()
54
+ finally:
55
+ await db.close()
56
+
57
+
58
+ async def show_experiment_details(db_url: str, experiment_id: str):
59
+ """Show detailed information about a specific experiment."""
60
+ db = AsyncSQLTraceManager(db_url)
61
+ await db.initialize()
62
+
63
+ try:
64
+ # Get experiment info
65
+ exp_df = await db.query_traces(
66
+ """
67
+ SELECT * FROM experiments WHERE experiment_id LIKE :exp_id
68
+ """,
69
+ {"exp_id": f"{experiment_id}%"},
70
+ )
71
+
72
+ if exp_df.empty:
73
+ print(f"No experiment found matching ID: {experiment_id}")
74
+ return
75
+
76
+ exp = exp_df.iloc[0]
77
+ print(f"\n{'=' * 100}")
78
+ print(f"Experiment: {exp['name']} ({exp['experiment_id']})")
79
+ print(f"{'=' * 100}\n")
80
+
81
+ # Get session statistics
82
+ sessions_df = await db.get_sessions_by_experiment(exp["experiment_id"])
83
+
84
+ if sessions_df:
85
+ print(f"Sessions: {len(sessions_df)}")
86
+
87
+ # Get aggregated stats
88
+ stats_df = await db.query_traces(
89
+ """
90
+ SELECT
91
+ COUNT(DISTINCT ev.id) as total_events,
92
+ COUNT(DISTINCT m.id) as total_messages,
93
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
94
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
95
+ FROM session_traces st
96
+ LEFT JOIN events ev ON st.session_id = ev.session_id
97
+ LEFT JOIN messages m ON st.session_id = m.session_id
98
+ WHERE st.experiment_id = :exp_id
99
+ """,
100
+ {"exp_id": exp["experiment_id"]},
101
+ )
102
+
103
+ if not stats_df.empty:
104
+ stats = stats_df.iloc[0]
105
+ print(f"Total events: {int(stats['total_events']):,}")
106
+ print(f"Total messages: {int(stats['total_messages']):,}")
107
+ print(f"Total cost: ${stats['total_cost']:.4f}")
108
+ print(f"Total tokens: {int(stats['total_tokens']):,}")
109
+
110
+ # Show session list
111
+ print("\nSession list:")
112
+ for sess in sessions_df:
113
+ print(f" - {sess['session_id']} ({sess['created_at']})")
114
+ print(
115
+ f" Timesteps: {sess['num_timesteps']}, Events: {sess['num_events']}, Messages: {sess['num_messages']}"
116
+ )
117
+ finally:
118
+ await db.close()
119
+
120
+
121
+ async def show_model_usage(db_url: str, model_name: str | None = None):
122
+ """Show model usage statistics."""
123
+ db = AsyncSQLTraceManager(db_url)
124
+ await db.initialize()
125
+
126
+ try:
127
+ df = await db.get_model_usage(model_name=model_name)
128
+
129
+ if df.empty:
130
+ print("No model usage data found.")
131
+ return
132
+
133
+ print(f"\n{'=' * 100}")
134
+ print(f"{'Model Usage Statistics':^100}")
135
+ print(f"{'=' * 100}\n")
136
+
137
+ print(df.to_string(index=False))
138
+ finally:
139
+ await db.close()
140
+
141
+
142
+ async def main():
143
+ parser = argparse.ArgumentParser(description="Query experiments from Turso/sqld (v3)")
144
+ parser.add_argument(
145
+ "-u", "--url", default="sqlite+libsql://http://127.0.0.1:8080", help="Turso database URL"
146
+ )
147
+ parser.add_argument(
148
+ "-e", "--experiment", help="Show details for specific experiment ID (can be partial)"
149
+ )
150
+ parser.add_argument("-m", "--model", help="Show usage for specific model")
151
+ parser.add_argument("--usage", action="store_true", help="Show model usage statistics")
152
+
153
+ args = parser.parse_args()
154
+
155
+ if args.usage or args.model:
156
+ await show_model_usage(args.url, args.model)
157
+ elif args.experiment:
158
+ await show_experiment_details(args.url, args.experiment)
159
+ else:
160
+ await list_experiments(args.url)
161
+
162
+
163
+ if __name__ == "__main__":
164
+ asyncio.run(main())