synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +5 -4
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
- examples/sft/evaluate.py +2 -0
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +1 -0
- examples/swe/task_app/hosted/rollout.py +2 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +3 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +306 -8
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +16 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +52 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +111 -13
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
- examples/task_apps/pokemon_red/task_app.py +199 -6
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- synth_ai/api/models/supported.py +1 -0
- synth_ai/cli/__init__.py +46 -13
- synth_ai/cli/_modal_wrapper.py +3 -2
- synth_ai/cli/recent.py +1 -1
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_apps.py +354 -143
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/tui.py +57 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/verilog/engine.py +76 -10
- synth_ai/judge_schemas.py +8 -8
- synth_ai/task/__init__.py +11 -1
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +257 -0
- synth_ai/task/contracts.py +15 -2
- synth_ai/task/rubrics/__init__.py +3 -0
- synth_ai/task/rubrics/loaders.py +22 -3
- synth_ai/task/rubrics/scoring.py +3 -0
- synth_ai/task/trace_correlation_helpers.py +315 -0
- synth_ai/task/validators.py +144 -0
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +16 -6
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -7
- synth_ai/tracing_v3/turso/native_manager.py +63 -40
- synth_ai/tracing_v3/utils.py +3 -3
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +906 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/METADATA +1 -1
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/RECORD +110 -71
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import asyncio
|
|
6
6
|
import json
|
|
7
7
|
from contextlib import asynccontextmanager
|
|
8
|
-
from datetime import
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
9
|
from typing import Any
|
|
10
10
|
|
|
11
11
|
from .abstractions import (
|
|
@@ -106,7 +106,7 @@ class SessionTracer:
|
|
|
106
106
|
|
|
107
107
|
self._current_trace = SessionTrace(
|
|
108
108
|
session_id=session_id,
|
|
109
|
-
created_at=datetime.now(
|
|
109
|
+
created_at=datetime.now(timezone.utc),
|
|
110
110
|
session_time_steps=[],
|
|
111
111
|
event_history=[],
|
|
112
112
|
markov_blanket_message_history=[],
|
|
@@ -152,7 +152,7 @@ class SessionTracer:
|
|
|
152
152
|
step = SessionTimeStep(
|
|
153
153
|
step_id=step_id,
|
|
154
154
|
step_index=len(self._current_trace.session_time_steps),
|
|
155
|
-
timestamp=datetime.now(
|
|
155
|
+
timestamp=datetime.now(timezone.utc),
|
|
156
156
|
turn_number=turn_number,
|
|
157
157
|
step_metadata=metadata or {},
|
|
158
158
|
)
|
|
@@ -197,7 +197,7 @@ class SessionTracer:
|
|
|
197
197
|
step = self._current_step
|
|
198
198
|
|
|
199
199
|
if step and step.completed_at is None:
|
|
200
|
-
step.completed_at = datetime.now(
|
|
200
|
+
step.completed_at = datetime.now(timezone.utc)
|
|
201
201
|
|
|
202
202
|
# Trigger hooks
|
|
203
203
|
await self.hooks.trigger(
|
|
@@ -294,7 +294,7 @@ class SessionTracer:
|
|
|
294
294
|
content=normalised_content,
|
|
295
295
|
message_type=message_type,
|
|
296
296
|
time_record=TimeRecord(
|
|
297
|
-
event_time=event_time or datetime.now(
|
|
297
|
+
event_time=event_time or datetime.now(timezone.utc).timestamp(), message_time=message_time
|
|
298
298
|
),
|
|
299
299
|
metadata=metadata or {},
|
|
300
300
|
)
|
|
@@ -368,18 +368,28 @@ class SessionTracer:
|
|
|
368
368
|
# End any open timesteps
|
|
369
369
|
for step in self._current_trace.session_time_steps:
|
|
370
370
|
if step.completed_at is None:
|
|
371
|
-
step.completed_at = datetime.now(
|
|
371
|
+
step.completed_at = datetime.now(timezone.utc)
|
|
372
372
|
|
|
373
373
|
# Trigger pre-save hooks
|
|
374
374
|
await self.hooks.trigger("before_save", session=self._current_trace)
|
|
375
375
|
|
|
376
376
|
# Save if requested
|
|
377
377
|
should_save = save if save is not None else self.auto_save
|
|
378
|
+
|
|
379
|
+
# Debug logging
|
|
380
|
+
import logging
|
|
381
|
+
_logger = logging.getLogger(__name__)
|
|
382
|
+
_logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
|
|
383
|
+
|
|
378
384
|
if should_save and self.db:
|
|
385
|
+
_logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
|
|
379
386
|
await self.db.insert_session_trace(self._current_trace)
|
|
387
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace completed")
|
|
380
388
|
|
|
381
389
|
# Trigger post-save hooks
|
|
382
390
|
await self.hooks.trigger("after_save", session=self._current_trace)
|
|
391
|
+
else:
|
|
392
|
+
_logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
|
|
383
393
|
|
|
384
394
|
# Trigger session end hooks
|
|
385
395
|
await self.hooks.trigger("session_end", session=self._current_trace)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
from datetime import datetime
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any, Optional
|
|
6
6
|
|
|
7
7
|
from ..abstractions import SessionTrace
|
|
8
8
|
|
|
@@ -28,7 +28,7 @@ class TraceStorage(ABC):
|
|
|
28
28
|
pass
|
|
29
29
|
|
|
30
30
|
@abstractmethod
|
|
31
|
-
async def get_session_trace(self, session_id: str) -> dict[str, Any]
|
|
31
|
+
async def get_session_trace(self, session_id: str) -> Optional[dict[str, Any]]:
|
|
32
32
|
"""Retrieve a session trace by ID.
|
|
33
33
|
|
|
34
34
|
Args:
|
|
@@ -40,7 +40,7 @@ class TraceStorage(ABC):
|
|
|
40
40
|
pass
|
|
41
41
|
|
|
42
42
|
@abstractmethod
|
|
43
|
-
async def query_traces(self, query: str, params: dict[str, Any]
|
|
43
|
+
async def query_traces(self, query: str, params: Optional[dict[str, Any]] = None) -> Any:
|
|
44
44
|
"""Execute a query and return results.
|
|
45
45
|
|
|
46
46
|
Args:
|
|
@@ -55,9 +55,9 @@ class TraceStorage(ABC):
|
|
|
55
55
|
@abstractmethod
|
|
56
56
|
async def get_model_usage(
|
|
57
57
|
self,
|
|
58
|
-
start_date: datetime
|
|
59
|
-
end_date: datetime
|
|
60
|
-
model_name: str
|
|
58
|
+
start_date: Optional[datetime] = None,
|
|
59
|
+
end_date: Optional[datetime] = None,
|
|
60
|
+
model_name: Optional[str] = None,
|
|
61
61
|
) -> Any:
|
|
62
62
|
"""Get model usage statistics.
|
|
63
63
|
|
|
@@ -95,8 +95,8 @@ class TraceStorage(ABC):
|
|
|
95
95
|
self,
|
|
96
96
|
session_id: str,
|
|
97
97
|
*,
|
|
98
|
-
created_at: datetime
|
|
99
|
-
metadata: dict[str, Any]
|
|
98
|
+
created_at: Optional[datetime] = None,
|
|
99
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
100
100
|
) -> None:
|
|
101
101
|
"""Ensure a session row exists for the given session id."""
|
|
102
102
|
pass
|
|
@@ -108,10 +108,10 @@ class TraceStorage(ABC):
|
|
|
108
108
|
*,
|
|
109
109
|
step_id: str,
|
|
110
110
|
step_index: int,
|
|
111
|
-
turn_number: int
|
|
112
|
-
started_at: datetime
|
|
113
|
-
completed_at: datetime
|
|
114
|
-
metadata: dict[str, Any]
|
|
111
|
+
turn_number: Optional[int] = None,
|
|
112
|
+
started_at: Optional[datetime] = None,
|
|
113
|
+
completed_at: Optional[datetime] = None,
|
|
114
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
115
115
|
) -> int:
|
|
116
116
|
"""Ensure a timestep row exists and return its database id."""
|
|
117
117
|
pass
|
|
@@ -121,9 +121,9 @@ class TraceStorage(ABC):
|
|
|
121
121
|
self,
|
|
122
122
|
session_id: str,
|
|
123
123
|
*,
|
|
124
|
-
timestep_db_id: int
|
|
124
|
+
timestep_db_id: Optional[int],
|
|
125
125
|
event: Any,
|
|
126
|
-
metadata_override: dict[str, Any]
|
|
126
|
+
metadata_override: Optional[dict[str, Any]] = None,
|
|
127
127
|
) -> int:
|
|
128
128
|
"""Insert an event and return its database id."""
|
|
129
129
|
pass
|
|
@@ -133,12 +133,12 @@ class TraceStorage(ABC):
|
|
|
133
133
|
self,
|
|
134
134
|
session_id: str,
|
|
135
135
|
*,
|
|
136
|
-
timestep_db_id: int
|
|
136
|
+
timestep_db_id: Optional[int],
|
|
137
137
|
message_type: str,
|
|
138
138
|
content: Any,
|
|
139
|
-
event_time: float
|
|
140
|
-
message_time: int
|
|
141
|
-
metadata: dict[str, Any]
|
|
139
|
+
event_time: Optional[float] = None,
|
|
140
|
+
message_time: Optional[int] = None,
|
|
141
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
142
142
|
) -> int:
|
|
143
143
|
"""Insert a message row linked to a session/timestep."""
|
|
144
144
|
pass
|
|
@@ -151,7 +151,7 @@ class TraceStorage(ABC):
|
|
|
151
151
|
total_reward: int,
|
|
152
152
|
achievements_count: int,
|
|
153
153
|
total_steps: int,
|
|
154
|
-
reward_metadata: dict
|
|
154
|
+
reward_metadata: Optional[dict] = None,
|
|
155
155
|
) -> int:
|
|
156
156
|
"""Record an outcome reward for a session."""
|
|
157
157
|
pass
|
|
@@ -162,13 +162,13 @@ class TraceStorage(ABC):
|
|
|
162
162
|
session_id: str,
|
|
163
163
|
*,
|
|
164
164
|
event_id: int,
|
|
165
|
-
message_id: int
|
|
166
|
-
turn_number: int
|
|
165
|
+
message_id: Optional[int] = None,
|
|
166
|
+
turn_number: Optional[int] = None,
|
|
167
167
|
reward_value: float = 0.0,
|
|
168
|
-
reward_type: str
|
|
169
|
-
key: str
|
|
170
|
-
annotation: dict[str, Any]
|
|
171
|
-
source: str
|
|
168
|
+
reward_type: Optional[str] = None,
|
|
169
|
+
key: Optional[str] = None,
|
|
170
|
+
annotation: Optional[dict[str, Any]] = None,
|
|
171
|
+
source: Optional[str] = None,
|
|
172
172
|
) -> int:
|
|
173
173
|
"""Record a reward tied to a specific event."""
|
|
174
174
|
pass
|
|
@@ -178,8 +178,8 @@ class TraceStorage(ABC):
|
|
|
178
178
|
self,
|
|
179
179
|
experiment_id: str,
|
|
180
180
|
name: str,
|
|
181
|
-
description: str
|
|
182
|
-
configuration: dict[str, Any]
|
|
181
|
+
description: Optional[str] = None,
|
|
182
|
+
configuration: Optional[dict[str, Any]] = None,
|
|
183
183
|
) -> str:
|
|
184
184
|
"""Create a new experiment."""
|
|
185
185
|
raise NotImplementedError("Experiment management not supported by this backend")
|
|
@@ -189,14 +189,14 @@ class TraceStorage(ABC):
|
|
|
189
189
|
raise NotImplementedError("Experiment management not supported by this backend")
|
|
190
190
|
|
|
191
191
|
async def get_sessions_by_experiment(
|
|
192
|
-
self, experiment_id: str, limit: int
|
|
192
|
+
self, experiment_id: str, limit: Optional[int] = None
|
|
193
193
|
) -> list[dict[str, Any]]:
|
|
194
194
|
"""Get all sessions for an experiment."""
|
|
195
195
|
raise NotImplementedError("Experiment management not supported by this backend")
|
|
196
196
|
|
|
197
197
|
# Batch operations
|
|
198
198
|
async def batch_insert_sessions(
|
|
199
|
-
self, traces: list[SessionTrace], batch_size: int
|
|
199
|
+
self, traces: list[SessionTrace], batch_size: Optional[int] = 1000
|
|
200
200
|
) -> list[str]:
|
|
201
201
|
"""Batch insert multiple session traces.
|
|
202
202
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
import os
|
|
4
4
|
from dataclasses import dataclass
|
|
5
5
|
from enum import Enum
|
|
6
|
-
from typing import Any
|
|
6
|
+
from typing import Any, Optional
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class StorageBackend(str, Enum):
|
|
@@ -14,7 +14,7 @@ class StorageBackend(str, Enum):
|
|
|
14
14
|
POSTGRES = "postgres" # Future support
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def _is_enabled(value: str
|
|
17
|
+
def _is_enabled(value: Optional[str]) -> bool:
|
|
18
18
|
if value is None:
|
|
19
19
|
return False
|
|
20
20
|
return value.lower() in {"1", "true", "yes", "on"}
|
|
@@ -25,7 +25,7 @@ class StorageConfig:
|
|
|
25
25
|
"""Configuration for storage backend."""
|
|
26
26
|
|
|
27
27
|
backend: StorageBackend = StorageBackend.TURSO_NATIVE
|
|
28
|
-
connection_string: str
|
|
28
|
+
connection_string: Optional[str] = None
|
|
29
29
|
|
|
30
30
|
# Turso-specific settings
|
|
31
31
|
turso_url: str = os.getenv("TURSO_DATABASE_URL", "sqlite+libsql://http://127.0.0.1:8080")
|
|
@@ -7,6 +7,7 @@ import time
|
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
9
|
from requests import RequestException
|
|
10
|
+
from typing import Any, Optional
|
|
10
11
|
|
|
11
12
|
from ..config import CONFIG
|
|
12
13
|
|
|
@@ -16,9 +17,9 @@ class SqldDaemon:
|
|
|
16
17
|
|
|
17
18
|
def __init__(
|
|
18
19
|
self,
|
|
19
|
-
db_path: str
|
|
20
|
-
http_port: int
|
|
21
|
-
binary_path: str
|
|
20
|
+
db_path: Optional[str] = None,
|
|
21
|
+
http_port: Optional[int] = None,
|
|
22
|
+
binary_path: Optional[str] = None,
|
|
22
23
|
):
|
|
23
24
|
"""Initialize sqld daemon manager.
|
|
24
25
|
|
|
@@ -30,7 +31,7 @@ class SqldDaemon:
|
|
|
30
31
|
self.db_path = db_path or CONFIG.sqld_db_path
|
|
31
32
|
self.http_port = http_port or CONFIG.sqld_http_port
|
|
32
33
|
self.binary_path = binary_path or self._find_binary()
|
|
33
|
-
self.process:
|
|
34
|
+
self.process: Optional[Any] = None
|
|
34
35
|
|
|
35
36
|
def _find_binary(self) -> str:
|
|
36
37
|
"""Find sqld binary in PATH."""
|
|
@@ -123,10 +124,10 @@ class SqldDaemon:
|
|
|
123
124
|
|
|
124
125
|
|
|
125
126
|
# Convenience functions
|
|
126
|
-
_daemon: SqldDaemon
|
|
127
|
+
_daemon: Optional[SqldDaemon] = None
|
|
127
128
|
|
|
128
129
|
|
|
129
|
-
def start_sqld(db_path: str
|
|
130
|
+
def start_sqld(db_path: Optional[str] = None, port: Optional[int] = None) -> SqldDaemon:
|
|
130
131
|
"""Start a global sqld daemon instance."""
|
|
131
132
|
global _daemon
|
|
132
133
|
if _daemon and _daemon.is_running():
|
|
@@ -145,6 +146,6 @@ def stop_sqld():
|
|
|
145
146
|
_daemon = None
|
|
146
147
|
|
|
147
148
|
|
|
148
|
-
def get_daemon() -> SqldDaemon
|
|
149
|
+
def get_daemon() -> Optional[SqldDaemon]:
|
|
149
150
|
"""Get the global daemon instance."""
|
|
150
151
|
return _daemon
|
|
@@ -13,7 +13,7 @@ import logging
|
|
|
13
13
|
import re
|
|
14
14
|
from collections.abc import Callable
|
|
15
15
|
from dataclasses import asdict, dataclass
|
|
16
|
-
from datetime import
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
17
|
from typing import TYPE_CHECKING, Any, cast
|
|
18
18
|
|
|
19
19
|
import libsql
|
|
@@ -370,8 +370,18 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
370
370
|
|
|
371
371
|
async def insert_session_trace(self, trace: SessionTrace) -> str:
|
|
372
372
|
await self.initialize()
|
|
373
|
-
|
|
374
|
-
|
|
373
|
+
|
|
374
|
+
import logging as _logging
|
|
375
|
+
_logger = _logging.getLogger(__name__)
|
|
376
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace START: session_id={trace.session_id}, {len(trace.markov_blanket_message_history)} messages")
|
|
377
|
+
|
|
378
|
+
session_exists = await self._session_exists(trace.session_id)
|
|
379
|
+
_logger.info(f"[TRACE_DEBUG] Session exists: {session_exists}")
|
|
380
|
+
|
|
381
|
+
if session_exists:
|
|
382
|
+
_logger.warning(f"[TRACE_DEBUG] Session {trace.session_id} already exists, need to save messages anyway!")
|
|
383
|
+
# Don't return early - we need to save messages!
|
|
384
|
+
# Just update metadata
|
|
375
385
|
async with self._op_lock:
|
|
376
386
|
conn = self._conn
|
|
377
387
|
assert conn is not None
|
|
@@ -380,32 +390,34 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
380
390
|
(_json_dumps(trace.metadata or {}), trace.session_id),
|
|
381
391
|
)
|
|
382
392
|
conn.commit()
|
|
383
|
-
|
|
393
|
+
# Continue to save messages instead of returning
|
|
384
394
|
|
|
385
|
-
|
|
395
|
+
if not session_exists:
|
|
396
|
+
created_at = trace.created_at or datetime.now(timezone.utc)
|
|
386
397
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
398
|
+
async with self._op_lock:
|
|
399
|
+
conn = self._conn
|
|
400
|
+
assert conn is not None
|
|
401
|
+
conn.execute(
|
|
402
|
+
"""
|
|
403
|
+
INSERT INTO session_traces (
|
|
404
|
+
session_id,
|
|
405
|
+
created_at,
|
|
406
|
+
num_timesteps,
|
|
407
|
+
num_events,
|
|
408
|
+
num_messages,
|
|
409
|
+
metadata
|
|
410
|
+
)
|
|
411
|
+
VALUES (?, ?, 0, 0, 0, ?)
|
|
412
|
+
""",
|
|
413
|
+
(
|
|
414
|
+
trace.session_id,
|
|
415
|
+
created_at.isoformat(),
|
|
416
|
+
_json_dumps(trace.metadata or {}),
|
|
417
|
+
),
|
|
399
418
|
)
|
|
400
|
-
|
|
401
|
-
""
|
|
402
|
-
(
|
|
403
|
-
trace.session_id,
|
|
404
|
-
created_at.isoformat(),
|
|
405
|
-
_json_dumps(trace.metadata or {}),
|
|
406
|
-
),
|
|
407
|
-
)
|
|
408
|
-
conn.commit()
|
|
419
|
+
conn.commit()
|
|
420
|
+
_logger.info(f"[TRACE_DEBUG] Session row inserted")
|
|
409
421
|
|
|
410
422
|
step_id_map: dict[str, int] = {}
|
|
411
423
|
|
|
@@ -434,7 +446,11 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
434
446
|
metadata_override=event.metadata or {},
|
|
435
447
|
)
|
|
436
448
|
|
|
437
|
-
|
|
449
|
+
import logging as _logging
|
|
450
|
+
_logger = _logging.getLogger(__name__)
|
|
451
|
+
_logger.info(f"[TRACE_DEBUG] insert_session_trace: saving {len(trace.markov_blanket_message_history)} messages")
|
|
452
|
+
|
|
453
|
+
for idx, msg in enumerate(trace.markov_blanket_message_history):
|
|
438
454
|
metadata = dict(getattr(msg, "metadata", {}) or {})
|
|
439
455
|
step_ref = metadata.get("step_id")
|
|
440
456
|
content_value = msg.content
|
|
@@ -452,15 +468,22 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
452
468
|
except (TypeError, ValueError):
|
|
453
469
|
content_value = str(content_value)
|
|
454
470
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
471
|
+
_logger.info(f"[TRACE_DEBUG] Message {idx+1}: type={msg.message_type}, content_len={len(str(content_value))}")
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
await self.insert_message_row(
|
|
475
|
+
trace.session_id,
|
|
476
|
+
timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
|
|
477
|
+
message_type=msg.message_type,
|
|
478
|
+
content=content_value,
|
|
479
|
+
event_time=msg.time_record.event_time,
|
|
480
|
+
message_time=msg.time_record.message_time,
|
|
481
|
+
metadata=metadata,
|
|
482
|
+
)
|
|
483
|
+
_logger.info(f"[TRACE_DEBUG] Message {idx+1}: saved successfully")
|
|
484
|
+
except Exception as exc:
|
|
485
|
+
_logger.error(f"[TRACE_DEBUG] Message {idx+1}: FAILED TO SAVE: {exc}", exc_info=True)
|
|
486
|
+
raise
|
|
464
487
|
|
|
465
488
|
async with self._op_lock:
|
|
466
489
|
conn = self._conn
|
|
@@ -783,7 +806,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
783
806
|
) -> None:
|
|
784
807
|
await self.initialize()
|
|
785
808
|
|
|
786
|
-
created_at_val = (created_at or datetime.now(
|
|
809
|
+
created_at_val = (created_at or datetime.now(timezone.utc)).isoformat()
|
|
787
810
|
metadata_json = _json_dumps(metadata or {})
|
|
788
811
|
|
|
789
812
|
async with self._op_lock:
|
|
@@ -815,7 +838,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
815
838
|
) -> int:
|
|
816
839
|
await self.initialize()
|
|
817
840
|
|
|
818
|
-
started_at_val = (started_at or datetime.now(
|
|
841
|
+
started_at_val = (started_at or datetime.now(timezone.utc)).isoformat()
|
|
819
842
|
completed_at_val = completed_at.isoformat() if completed_at else None
|
|
820
843
|
metadata_json = _json_dumps(metadata or {})
|
|
821
844
|
|
|
@@ -1127,7 +1150,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
1127
1150
|
total_reward,
|
|
1128
1151
|
achievements_count,
|
|
1129
1152
|
total_steps,
|
|
1130
|
-
datetime.now(
|
|
1153
|
+
datetime.now(timezone.utc).isoformat(),
|
|
1131
1154
|
_json_dumps(reward_metadata),
|
|
1132
1155
|
),
|
|
1133
1156
|
)
|
|
@@ -1179,7 +1202,7 @@ class NativeLibsqlTraceManager(TraceStorage):
|
|
|
1179
1202
|
key,
|
|
1180
1203
|
_json_dumps(annotation),
|
|
1181
1204
|
source,
|
|
1182
|
-
datetime.now(
|
|
1205
|
+
datetime.now(timezone.utc).isoformat(),
|
|
1183
1206
|
),
|
|
1184
1207
|
)
|
|
1185
1208
|
conn.commit()
|
synth_ai/tracing_v3/utils.py
CHANGED
|
@@ -5,13 +5,13 @@ from __future__ import annotations
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import json
|
|
7
7
|
import uuid
|
|
8
|
-
from datetime import
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
9
|
from typing import Any
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def iso_now() -> str:
|
|
13
|
-
"""Get current
|
|
14
|
-
return datetime.now(
|
|
13
|
+
"""Get current timezone.utc time as ISO format string."""
|
|
14
|
+
return datetime.now(timezone.utc).isoformat()
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def json_dumps(obj: Any) -> str:
|
synth_ai/tui/__init__.py
ADDED
synth_ai/tui/__main__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Command Line Interface tools for synth-ai."""
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Query experiments and sessions from Turso/sqld using v3 tracing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import asyncio
|
|
8
|
+
|
|
9
|
+
from synth_ai.tracing_v3.turso.manager import AsyncSQLTraceManager
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def list_experiments(db_url: str):
|
|
13
|
+
"""List all experiments in the database."""
|
|
14
|
+
db = AsyncSQLTraceManager(db_url)
|
|
15
|
+
await db.initialize()
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
df = await db.query_traces("""
|
|
19
|
+
SELECT
|
|
20
|
+
e.experiment_id,
|
|
21
|
+
e.name,
|
|
22
|
+
e.description,
|
|
23
|
+
e.created_at,
|
|
24
|
+
COUNT(DISTINCT st.session_id) as num_sessions,
|
|
25
|
+
COUNT(DISTINCT ev.id) as num_events,
|
|
26
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
|
|
27
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
|
|
28
|
+
FROM experiments e
|
|
29
|
+
LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
|
|
30
|
+
LEFT JOIN events ev ON st.session_id = ev.session_id
|
|
31
|
+
GROUP BY e.experiment_id, e.name, e.description, e.created_at
|
|
32
|
+
ORDER BY e.created_at DESC
|
|
33
|
+
""")
|
|
34
|
+
|
|
35
|
+
if df.empty:
|
|
36
|
+
print("No experiments found in database.")
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
print(f"\n{'=' * 100}")
|
|
40
|
+
print(f"{'Experiments in ' + db_url:^100}")
|
|
41
|
+
print(f"{'=' * 100}\n")
|
|
42
|
+
|
|
43
|
+
for _, row in df.iterrows():
|
|
44
|
+
print(f"🧪 {row['name']} (id: {row['experiment_id'][:8]}...)")
|
|
45
|
+
print(f" Created: {row['created_at']}")
|
|
46
|
+
print(f" Description: {row['description']}")
|
|
47
|
+
print(f" Sessions: {row['num_sessions']}")
|
|
48
|
+
print(f" Events: {row['num_events']:,}")
|
|
49
|
+
if row["total_cost"] and row["total_cost"] > 0:
|
|
50
|
+
print(f" Cost: ${row['total_cost']:.4f}")
|
|
51
|
+
if row["total_tokens"] and row["total_tokens"] > 0:
|
|
52
|
+
print(f" Tokens: {int(row['total_tokens']):,}")
|
|
53
|
+
print()
|
|
54
|
+
finally:
|
|
55
|
+
await db.close()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def show_experiment_details(db_url: str, experiment_id: str):
|
|
59
|
+
"""Show detailed information about a specific experiment."""
|
|
60
|
+
db = AsyncSQLTraceManager(db_url)
|
|
61
|
+
await db.initialize()
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
# Get experiment info
|
|
65
|
+
exp_df = await db.query_traces(
|
|
66
|
+
"""
|
|
67
|
+
SELECT * FROM experiments WHERE experiment_id LIKE :exp_id
|
|
68
|
+
""",
|
|
69
|
+
{"exp_id": f"{experiment_id}%"},
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if exp_df.empty:
|
|
73
|
+
print(f"No experiment found matching ID: {experiment_id}")
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
exp = exp_df.iloc[0]
|
|
77
|
+
print(f"\n{'=' * 100}")
|
|
78
|
+
print(f"Experiment: {exp['name']} ({exp['experiment_id']})")
|
|
79
|
+
print(f"{'=' * 100}\n")
|
|
80
|
+
|
|
81
|
+
# Get session statistics
|
|
82
|
+
sessions_df = await db.get_sessions_by_experiment(exp["experiment_id"])
|
|
83
|
+
|
|
84
|
+
if sessions_df:
|
|
85
|
+
print(f"Sessions: {len(sessions_df)}")
|
|
86
|
+
|
|
87
|
+
# Get aggregated stats
|
|
88
|
+
stats_df = await db.query_traces(
|
|
89
|
+
"""
|
|
90
|
+
SELECT
|
|
91
|
+
COUNT(DISTINCT ev.id) as total_events,
|
|
92
|
+
COUNT(DISTINCT m.id) as total_messages,
|
|
93
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
|
|
94
|
+
SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
|
|
95
|
+
FROM session_traces st
|
|
96
|
+
LEFT JOIN events ev ON st.session_id = ev.session_id
|
|
97
|
+
LEFT JOIN messages m ON st.session_id = m.session_id
|
|
98
|
+
WHERE st.experiment_id = :exp_id
|
|
99
|
+
""",
|
|
100
|
+
{"exp_id": exp["experiment_id"]},
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if not stats_df.empty:
|
|
104
|
+
stats = stats_df.iloc[0]
|
|
105
|
+
print(f"Total events: {int(stats['total_events']):,}")
|
|
106
|
+
print(f"Total messages: {int(stats['total_messages']):,}")
|
|
107
|
+
print(f"Total cost: ${stats['total_cost']:.4f}")
|
|
108
|
+
print(f"Total tokens: {int(stats['total_tokens']):,}")
|
|
109
|
+
|
|
110
|
+
# Show session list
|
|
111
|
+
print("\nSession list:")
|
|
112
|
+
for sess in sessions_df:
|
|
113
|
+
print(f" - {sess['session_id']} ({sess['created_at']})")
|
|
114
|
+
print(
|
|
115
|
+
f" Timesteps: {sess['num_timesteps']}, Events: {sess['num_events']}, Messages: {sess['num_messages']}"
|
|
116
|
+
)
|
|
117
|
+
finally:
|
|
118
|
+
await db.close()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
async def show_model_usage(db_url: str, model_name: str | None = None):
|
|
122
|
+
"""Show model usage statistics."""
|
|
123
|
+
db = AsyncSQLTraceManager(db_url)
|
|
124
|
+
await db.initialize()
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
df = await db.get_model_usage(model_name=model_name)
|
|
128
|
+
|
|
129
|
+
if df.empty:
|
|
130
|
+
print("No model usage data found.")
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
print(f"\n{'=' * 100}")
|
|
134
|
+
print(f"{'Model Usage Statistics':^100}")
|
|
135
|
+
print(f"{'=' * 100}\n")
|
|
136
|
+
|
|
137
|
+
print(df.to_string(index=False))
|
|
138
|
+
finally:
|
|
139
|
+
await db.close()
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def main():
|
|
143
|
+
parser = argparse.ArgumentParser(description="Query experiments from Turso/sqld (v3)")
|
|
144
|
+
parser.add_argument(
|
|
145
|
+
"-u", "--url", default="sqlite+libsql://http://127.0.0.1:8080", help="Turso database URL"
|
|
146
|
+
)
|
|
147
|
+
parser.add_argument(
|
|
148
|
+
"-e", "--experiment", help="Show details for specific experiment ID (can be partial)"
|
|
149
|
+
)
|
|
150
|
+
parser.add_argument("-m", "--model", help="Show usage for specific model")
|
|
151
|
+
parser.add_argument("--usage", action="store_true", help="Show model usage statistics")
|
|
152
|
+
|
|
153
|
+
args = parser.parse_args()
|
|
154
|
+
|
|
155
|
+
if args.usage or args.model:
|
|
156
|
+
await show_model_usage(args.url, args.model)
|
|
157
|
+
elif args.experiment:
|
|
158
|
+
await show_experiment_details(args.url, args.experiment)
|
|
159
|
+
else:
|
|
160
|
+
await list_experiments(args.url)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
if __name__ == "__main__":
|
|
164
|
+
asyncio.run(main())
|