synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
- examples/rl/configs/rl_from_base_qwen17.toml +1 -0
- examples/swe/task_app/hosted/inference/openai_client.py +0 -34
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/task_app.py +254 -36
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
- synth_ai/api/train/builders.py +90 -1
- synth_ai/api/train/cli.py +396 -21
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +15 -1
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +29 -0
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +85 -17
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +1 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/eval/core.py +13 -10
- synth_ai/cli/commands/filter/core.py +53 -17
- synth_ai/cli/commands/help/core.py +0 -1
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/train/judge_schemas.py +1 -0
- synth_ai/cli/commands/train/judge_validation.py +1 -0
- synth_ai/cli/commands/train/validation.py +0 -57
- synth_ai/cli/demo.py +35 -3
- synth_ai/cli/deploy/__init__.py +40 -25
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/task_app_deploy.py +1 -1
- synth_ai/cli/task_apps.py +53 -53
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/judge_schemas.py +1 -0
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/handlers.py +53 -4
- synth_ai/streaming/streamer.py +19 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +44 -8
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +17 -17
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +283 -1
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
- synth_ai/cli/commands/deploy/__init__.py +0 -23
- synth_ai/cli/commands/deploy/core.py +0 -614
- synth_ai/cli/commands/deploy/errors.py +0 -72
- synth_ai/cli/commands/deploy/validation.py +0 -11
- synth_ai/cli/deploy/core.py +0 -5
- synth_ai/cli/deploy/errors.py +0 -23
- synth_ai/cli/deploy/validation.py +0 -5
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
synth_ai/task/tracing_utils.py
CHANGED
|
@@ -26,34 +26,34 @@ def tracing_env_enabled(default: bool = False) -> bool:
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def resolve_tracing_db_url() -> str | None:
|
|
29
|
-
"""Resolve tracing database URL
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
"""Resolve tracing database URL using centralized tracing_v3 config logic.
|
|
30
|
+
|
|
31
|
+
This delegates to synth_ai.tracing_v3.config.resolve_trace_db_settings() which
|
|
32
|
+
handles Modal detection, remote Turso, local sqld, and SQLite fallbacks.
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
from synth_ai.tracing_v3.config import resolve_trace_db_settings
|
|
36
|
+
db_url, _ = resolve_trace_db_settings(ensure_dir=True)
|
|
33
37
|
return db_url
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
existing = os.getenv("TASKAPP_TRACE_DB_PATH")
|
|
47
|
-
if existing:
|
|
48
|
-
path = Path(existing).expanduser()
|
|
49
|
-
else:
|
|
38
|
+
except ImportError:
|
|
39
|
+
# Fallback if tracing_v3 is not available (shouldn't happen in normal usage)
|
|
40
|
+
db_url = (
|
|
41
|
+
os.getenv("TURSO_LOCAL_DB_URL")
|
|
42
|
+
or os.getenv("LIBSQL_URL")
|
|
43
|
+
or os.getenv("SYNTH_TRACES_DB")
|
|
44
|
+
)
|
|
45
|
+
if db_url:
|
|
46
|
+
return db_url
|
|
47
|
+
|
|
48
|
+
# Auto-provision local sqld location for callers that rely on trace directories.
|
|
50
49
|
base_dir = TRACE_DB_DIR.expanduser()
|
|
51
50
|
base_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
-
|
|
53
|
-
os.environ["TASKAPP_TRACE_DB_PATH"] = str(
|
|
54
|
-
os.environ.setdefault("SQLD_DB_PATH", str(
|
|
55
|
-
|
|
56
|
-
|
|
51
|
+
candidate = base_dir / canonical_trace_db_name(timestamp=datetime.now())
|
|
52
|
+
os.environ["TASKAPP_TRACE_DB_PATH"] = str(candidate)
|
|
53
|
+
os.environ.setdefault("SQLD_DB_PATH", str(candidate))
|
|
54
|
+
|
|
55
|
+
default_url = os.getenv("LIBSQL_DEFAULT_URL", "http://127.0.0.1:8081")
|
|
56
|
+
return default_url
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
def build_tracer_factory(
|
synth_ai/task/validators.py
CHANGED
|
@@ -133,13 +133,46 @@ def normalize_inference_url(url: str | None, *, default: str = "https://api.open
|
|
|
133
133
|
if not candidate:
|
|
134
134
|
candidate = default
|
|
135
135
|
|
|
136
|
-
# Parse the URL to separate path and query components
|
|
137
136
|
parsed = urlparse(candidate)
|
|
138
|
-
|
|
137
|
+
path = (parsed.path or "").rstrip("/")
|
|
138
|
+
query = parsed.query or ""
|
|
139
|
+
|
|
140
|
+
# Repair malformed URLs where the completions path ended up in the query string.
|
|
141
|
+
# Example: https://host?cid=trace/v1/chat/completions -> https://host/v1/chat/completions?cid=trace
|
|
142
|
+
if query and "/" in query:
|
|
143
|
+
base_query, remainder = query.split("/", 1)
|
|
144
|
+
remainder_path = remainder
|
|
145
|
+
extra_query = ""
|
|
146
|
+
for separator in ("&", "?"):
|
|
147
|
+
idx = remainder_path.find(separator)
|
|
148
|
+
if idx != -1:
|
|
149
|
+
extra_query = remainder_path[idx + 1 :]
|
|
150
|
+
remainder_path = remainder_path[:idx]
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
query_path = "/" + remainder_path.lstrip("/")
|
|
154
|
+
merged_query_parts: list[str] = []
|
|
155
|
+
if base_query:
|
|
156
|
+
merged_query_parts.append(base_query)
|
|
157
|
+
if extra_query:
|
|
158
|
+
merged_query_parts.append(extra_query)
|
|
159
|
+
merged_query = "&".join(part for part in merged_query_parts if part)
|
|
160
|
+
|
|
161
|
+
if query_path and query_path != "/":
|
|
162
|
+
combined_path = f"{path.rstrip('/')}{query_path}" if path else query_path
|
|
163
|
+
else:
|
|
164
|
+
combined_path = path
|
|
165
|
+
|
|
166
|
+
parsed = parsed._replace(path=combined_path or "", query=merged_query)
|
|
167
|
+
path = (parsed.path or "").rstrip("/")
|
|
168
|
+
query = parsed.query or ""
|
|
169
|
+
|
|
139
170
|
# Check if path already ends with a completions endpoint
|
|
140
|
-
path = parsed.path.rstrip('/')
|
|
141
171
|
if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
|
|
142
|
-
|
|
172
|
+
final_query = parsed.query or ""
|
|
173
|
+
if final_query and "/" in final_query:
|
|
174
|
+
parsed = parsed._replace(query=final_query.split("/", 1)[0])
|
|
175
|
+
return urlunparse(parsed)
|
|
143
176
|
|
|
144
177
|
# Determine what to append based on existing path
|
|
145
178
|
if path.endswith("/v1"):
|
|
@@ -147,11 +180,14 @@ def normalize_inference_url(url: str | None, *, default: str = "https://api.open
|
|
|
147
180
|
elif path.endswith("/chat"):
|
|
148
181
|
new_path = f"{path}/completions"
|
|
149
182
|
else:
|
|
150
|
-
# Default: append full path
|
|
151
183
|
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
184
|
+
|
|
185
|
+
parsed = parsed._replace(path=new_path)
|
|
186
|
+
final_query = parsed.query or ""
|
|
187
|
+
if final_query and "/" in final_query:
|
|
188
|
+
parsed = parsed._replace(query=final_query.split("/", 1)[0])
|
|
189
|
+
|
|
190
|
+
return urlunparse(parsed)
|
|
155
191
|
|
|
156
192
|
|
|
157
193
|
def validate_task_app_url(url: str | None) -> str:
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Literal, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LocalTaskAppConfig(BaseModel):
|
|
8
|
+
task_app_path: Path
|
|
9
|
+
trace: bool = True
|
|
10
|
+
host: str = "127.0.0.1"
|
|
11
|
+
port: int = 8000
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ModalTaskAppConfig(BaseModel):
|
|
16
|
+
task_app_path: Path
|
|
17
|
+
modal_app_path: Path
|
|
18
|
+
modal_bin_path: Path
|
|
19
|
+
cmd_arg: Literal["deploy", "serve"] = "deploy"
|
|
20
|
+
task_app_name: Optional[str] = None
|
|
21
|
+
dry_run: bool = False
|
synth_ai/tracing_v3/config.py
CHANGED
|
@@ -1,19 +1,162 @@
|
|
|
1
|
-
"""Configuration for tracing v3
|
|
1
|
+
"""Configuration helpers for tracing v3.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
This module centralises the logic for discovering which datastore the tracer
|
|
4
|
+
should use. Historically the project defaulted to a local SQLite file which
|
|
5
|
+
breaks under parallel load. The new resolver inspects environment variables
|
|
6
|
+
and defaults to Turso/libSQL whenever credentials are supplied, while keeping a
|
|
7
|
+
SQLite fallback for contributors without remote access.
|
|
8
|
+
"""
|
|
5
9
|
|
|
6
|
-
from
|
|
10
|
+
from __future__ import annotations
|
|
7
11
|
|
|
8
|
-
|
|
12
|
+
import os
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
|
|
9
17
|
|
|
18
|
+
from synth_ai.tracing_v3.constants import canonical_trace_db_path
|
|
10
19
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
20
|
+
# STARTUP DIAGNOSTIC - Commented out to reduce noise
|
|
21
|
+
# print(f"[TRACING_V3_CONFIG_LOADED] Python={sys.version_info.major}.{sys.version_info.minor} MODAL_IS_REMOTE={os.getenv('MODAL_IS_REMOTE')}", flush=True)
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# DSN resolution helpers
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
_CANONICAL_DB_PATH = canonical_trace_db_path()
|
|
28
|
+
_DEFAULT_TRACE_DIR = Path(os.getenv("SYNTH_TRACES_DIR", _CANONICAL_DB_PATH.parent))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _normalise_path(path: Path) -> Path:
|
|
32
|
+
"""Resolve relative paths and expand user/home markers."""
|
|
33
|
+
path = path.expanduser()
|
|
34
|
+
if not path.is_absolute():
|
|
35
|
+
path = (Path.cwd() / path).resolve()
|
|
36
|
+
return path
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _is_modal_environment() -> bool:
|
|
40
|
+
"""Detect if running in Modal container.
|
|
41
|
+
|
|
42
|
+
Modal automatically sets MODAL_IS_REMOTE=1 in all deployed containers.
|
|
43
|
+
We check this first, then fall back to other Modal env vars.
|
|
44
|
+
"""
|
|
45
|
+
# Modal sets this in all deployed containers
|
|
46
|
+
if os.getenv("MODAL_IS_REMOTE") == "1":
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
# Additional Modal env vars as fallback
|
|
50
|
+
return bool(
|
|
51
|
+
os.getenv("MODAL_TASK_ID")
|
|
52
|
+
or os.getenv("MODAL_ENVIRONMENT")
|
|
53
|
+
or os.getenv("SERVICE", "").upper() == "MODAL"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _split_auth_from_url(url: str) -> tuple[str, str | None]:
|
|
58
|
+
"""Strip any auth_token query parameter from a DSN."""
|
|
59
|
+
parsed = urlparse(url)
|
|
60
|
+
if not parsed.query:
|
|
61
|
+
return url, None
|
|
62
|
+
|
|
63
|
+
params = dict(parse_qsl(parsed.query, keep_blank_values=True))
|
|
64
|
+
token = params.pop("auth_token", None)
|
|
65
|
+
query = urlencode(params, doseq=True)
|
|
66
|
+
# urlunparse will omit the '?' automatically when query is empty
|
|
67
|
+
sanitised = urlunparse(parsed._replace(query=query))
|
|
68
|
+
return sanitised, token
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _default_sqlite_url(*, ensure_dir: bool = True) -> tuple[str, str | None]:
|
|
72
|
+
"""Generate a SQLite URL from SYNTH_TRACES_DIR if set, otherwise raise."""
|
|
73
|
+
traces_dir = os.getenv("SYNTH_TRACES_DIR")
|
|
74
|
+
if traces_dir:
|
|
75
|
+
dir_path = _normalise_path(Path(traces_dir))
|
|
76
|
+
if ensure_dir:
|
|
77
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
db_path = dir_path / "synth_traces.db"
|
|
79
|
+
sqlite_url = f"sqlite+aiosqlite:///{db_path}"
|
|
80
|
+
return sqlite_url, None
|
|
81
|
+
raise RuntimeError("SQLite fallback is disabled; configure LIBSQL_URL or run sqld locally.")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def resolve_trace_db_settings(*, ensure_dir: bool = True) -> tuple[str, str | None]:
|
|
85
|
+
"""Resolve the tracing database URL and optional auth token.
|
|
86
|
+
|
|
87
|
+
Resolution order:
|
|
88
|
+
1. `SYNTH_TRACES_DB` (explicit DSN override)
|
|
89
|
+
2. `LIBSQL_URL` / `TURSO_DATABASE_URL` (remote libSQL endpoints)
|
|
90
|
+
3. `TURSO_LOCAL_DB_URL` (legacy env for local sqld)
|
|
91
|
+
4. Modal environment: plain SQLite file (no sqld, no auth)
|
|
92
|
+
5. Local dev: sqld default
|
|
93
|
+
"""
|
|
94
|
+
import logging
|
|
95
|
+
logger = logging.getLogger(__name__)
|
|
96
|
+
|
|
97
|
+
explicit = os.getenv("SYNTH_TRACES_DB")
|
|
98
|
+
if explicit:
|
|
99
|
+
logger.info(f"[TRACE_CONFIG] Using explicit SYNTH_TRACES_DB: {explicit}")
|
|
100
|
+
return _split_auth_from_url(explicit)
|
|
101
|
+
|
|
102
|
+
remote = os.getenv("LIBSQL_URL") or os.getenv("TURSO_DATABASE_URL")
|
|
103
|
+
if remote:
|
|
104
|
+
logger.info(f"[TRACE_CONFIG] Using remote Turso: {remote}")
|
|
105
|
+
url, token = _split_auth_from_url(remote)
|
|
106
|
+
if token:
|
|
107
|
+
return url, token
|
|
108
|
+
env_token = os.getenv("LIBSQL_AUTH_TOKEN") or os.getenv("TURSO_AUTH_TOKEN")
|
|
109
|
+
return url, env_token
|
|
110
|
+
|
|
111
|
+
local_override = os.getenv("TURSO_LOCAL_DB_URL")
|
|
112
|
+
if local_override:
|
|
113
|
+
logger.info(f"[TRACE_CONFIG] Using TURSO_LOCAL_DB_URL: {local_override}")
|
|
114
|
+
url, token = _split_auth_from_url(local_override)
|
|
115
|
+
if token:
|
|
116
|
+
return url, token
|
|
117
|
+
env_token = os.getenv("LIBSQL_AUTH_TOKEN") or os.getenv("TURSO_AUTH_TOKEN")
|
|
118
|
+
return url, env_token
|
|
119
|
+
|
|
120
|
+
# Check for SYNTH_TRACES_DIR to generate SQLite URL
|
|
121
|
+
traces_dir = os.getenv("SYNTH_TRACES_DIR")
|
|
122
|
+
if traces_dir:
|
|
123
|
+
try:
|
|
124
|
+
sqlite_url, _ = _default_sqlite_url(ensure_dir=ensure_dir)
|
|
125
|
+
logger.info(f"[TRACE_CONFIG] Using SQLite from SYNTH_TRACES_DIR: {sqlite_url}")
|
|
126
|
+
return sqlite_url, None
|
|
127
|
+
except RuntimeError:
|
|
128
|
+
pass # Fall through to other options
|
|
129
|
+
|
|
130
|
+
# Modal environment: use plain SQLite file (no sqld daemon, no auth required)
|
|
131
|
+
is_modal = _is_modal_environment()
|
|
132
|
+
logger.info(f"[TRACE_CONFIG] Modal detection: {is_modal} (MODAL_IS_REMOTE={os.getenv('MODAL_IS_REMOTE')})")
|
|
133
|
+
if is_modal:
|
|
134
|
+
logger.info("[TRACE_CONFIG] Using Modal SQLite: file:/tmp/synth_traces.db")
|
|
135
|
+
return "file:/tmp/synth_traces.db", None
|
|
136
|
+
|
|
137
|
+
# Local dev: default to sqld HTTP API
|
|
138
|
+
default_url = os.getenv("LIBSQL_DEFAULT_URL", "http://127.0.0.1:8081")
|
|
139
|
+
logger.info(f"[TRACE_CONFIG] Using local sqld: {default_url}")
|
|
140
|
+
return default_url, None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def resolve_trace_db_url(*, ensure_dir: bool = True) -> str:
|
|
144
|
+
"""Return just the DSN, discarding any auth token."""
|
|
145
|
+
url, _ = resolve_trace_db_settings(ensure_dir=ensure_dir)
|
|
146
|
+
return url
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def resolve_trace_db_auth_token() -> str | None:
|
|
150
|
+
"""Return the resolved auth token for the tracing datastore."""
|
|
151
|
+
_, token = resolve_trace_db_settings()
|
|
152
|
+
return token
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
# Config dataclasses
|
|
157
|
+
# ---------------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
DEFAULT_DB_FILE = str(_normalise_path(_DEFAULT_TRACE_DIR) / _CANONICAL_DB_PATH.name)
|
|
17
160
|
|
|
18
161
|
|
|
19
162
|
@dataclass
|
|
@@ -24,12 +167,12 @@ class TursoConfig:
|
|
|
24
167
|
DEFAULT_DB_FILE = DEFAULT_DB_FILE
|
|
25
168
|
DEFAULT_HTTP_PORT = 8080
|
|
26
169
|
|
|
27
|
-
#
|
|
28
|
-
db_url: str =
|
|
170
|
+
# Resolve DB URL and auth token from environment (libSQL preferred)
|
|
171
|
+
db_url: str = field(default_factory=resolve_trace_db_url)
|
|
29
172
|
|
|
30
173
|
# Remote database sync configuration
|
|
31
|
-
sync_url: str = os.getenv("
|
|
32
|
-
auth_token: str =
|
|
174
|
+
sync_url: str = os.getenv("LIBSQL_SYNC_URL") or os.getenv("TURSO_SYNC_URL", "")
|
|
175
|
+
auth_token: str = resolve_trace_db_auth_token() or ""
|
|
33
176
|
sync_interval: int = int(
|
|
34
177
|
os.getenv("TURSO_SYNC_SECONDS", "2")
|
|
35
178
|
) # 2 seconds for responsive local development
|
|
@@ -54,16 +197,16 @@ class TursoConfig:
|
|
|
54
197
|
sqld_http_port: int = int(os.getenv("SQLD_HTTP_PORT", "8080"))
|
|
55
198
|
sqld_idle_shutdown: int = int(os.getenv("SQLD_IDLE_SHUTDOWN", "0")) # 0 = no idle shutdown
|
|
56
199
|
|
|
57
|
-
def get_connect_args(self) -> dict:
|
|
200
|
+
def get_connect_args(self) -> dict[str, str]:
|
|
58
201
|
"""Get SQLAlchemy connection arguments."""
|
|
59
|
-
args = {}
|
|
202
|
+
args: dict[str, str] = {}
|
|
60
203
|
if self.auth_token:
|
|
61
204
|
args["auth_token"] = self.auth_token
|
|
62
205
|
return args
|
|
63
206
|
|
|
64
|
-
def get_engine_kwargs(self) -> dict:
|
|
207
|
+
def get_engine_kwargs(self) -> dict[str, Any]:
|
|
65
208
|
"""Get SQLAlchemy engine creation kwargs."""
|
|
66
|
-
kwargs = {
|
|
209
|
+
kwargs: dict[str, Any] = {
|
|
67
210
|
"echo": self.echo_sql,
|
|
68
211
|
"future": True,
|
|
69
212
|
}
|
synth_ai/tracing_v3/constants.py
CHANGED
synth_ai/tracing_v3/db_config.py
CHANGED
|
@@ -30,11 +30,12 @@ class DatabaseConfig:
|
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
32
|
db_path: Path to database file. If None, uses DEFAULT_DB_FILE from serve.sh.
|
|
33
|
-
http_port:
|
|
33
|
+
http_port: Hrana WebSocket port for sqld daemon (env: SQLD_HTTP_PORT). If None, uses DEFAULT_HTTP_PORT.
|
|
34
34
|
use_sqld: Whether to use sqld daemon or direct SQLite.
|
|
35
35
|
"""
|
|
36
36
|
self.use_sqld = use_sqld and self._sqld_binary_available()
|
|
37
|
-
|
|
37
|
+
# Note: SQLD_HTTP_PORT is actually the hrana port (8080), not the HTTP API port
|
|
38
|
+
self.hrana_port = http_port or int(os.getenv("SQLD_HTTP_PORT", self.DEFAULT_HTTP_PORT))
|
|
38
39
|
self._daemon: SqldDaemon | None = None
|
|
39
40
|
|
|
40
41
|
# Set up database path to match serve.sh configuration
|
|
@@ -57,21 +58,16 @@ class DatabaseConfig:
|
|
|
57
58
|
abs_path = os.path.abspath(self.db_file)
|
|
58
59
|
sqld_data_path = os.path.join(abs_path, "dbs", "default", "data")
|
|
59
60
|
|
|
60
|
-
if os.path.exists(sqld_data_path):
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
else:
|
|
65
|
-
# Direct SQLite file
|
|
66
|
-
if not os.path.exists(abs_path):
|
|
67
|
-
logger.debug(f"⚠️ Database file not found at: {abs_path}")
|
|
68
|
-
logger.debug("🔧 Make sure to run './serve.sh' to start the turso/sqld service")
|
|
69
|
-
else:
|
|
70
|
-
logger.debug(f"📁 Using direct SQLite file at: {abs_path}")
|
|
71
|
-
actual_db_path = abs_path
|
|
61
|
+
if not os.path.exists(sqld_data_path) and not os.path.exists(abs_path):
|
|
62
|
+
raise RuntimeError(
|
|
63
|
+
"sqld data directory not found. Run `sqld --db-path <path>` before using the tracing database."
|
|
64
|
+
)
|
|
72
65
|
|
|
73
|
-
#
|
|
74
|
-
|
|
66
|
+
# Use http:// for local sqld HTTP API port
|
|
67
|
+
# sqld has two ports: hrana_port (Hrana WebSocket) and hrana_port+1 (HTTP API)
|
|
68
|
+
# Python libsql client uses HTTP API with http:// URLs
|
|
69
|
+
http_api_port = self.hrana_port + 1
|
|
70
|
+
return f"http://127.0.0.1:{http_api_port}"
|
|
75
71
|
|
|
76
72
|
def _sqld_binary_available(self) -> bool:
|
|
77
73
|
"""Check if the sqld (Turso) binary is available on PATH."""
|
|
@@ -84,18 +80,12 @@ class DatabaseConfig:
|
|
|
84
80
|
return True
|
|
85
81
|
|
|
86
82
|
if binary_override:
|
|
87
|
-
|
|
88
|
-
"Configured SQLD_BINARY='
|
|
89
|
-
"Falling back to direct SQLite.",
|
|
90
|
-
binary_override,
|
|
83
|
+
raise RuntimeError(
|
|
84
|
+
f"Configured SQLD_BINARY='{binary_override}' but the executable was not found on PATH."
|
|
91
85
|
)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
"Install Turso's sqld or set SQLD_BINARY to enable the Turso daemon."
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
return False
|
|
86
|
+
raise RuntimeError(
|
|
87
|
+
"sqld binary not detected; install Turso's sqld or set SQLD_BINARY so that libSQL can be used."
|
|
88
|
+
)
|
|
99
89
|
|
|
100
90
|
def start_daemon(self, wait_time: float = 2.0):
|
|
101
91
|
"""
|
|
@@ -114,7 +104,7 @@ class DatabaseConfig:
|
|
|
114
104
|
# Import here to avoid circular dependency
|
|
115
105
|
from .turso.daemon import SqldDaemon
|
|
116
106
|
|
|
117
|
-
self._daemon = SqldDaemon(db_path=self.db_base_path,
|
|
107
|
+
self._daemon = SqldDaemon(db_path=self.db_base_path, hrana_port=self.hrana_port)
|
|
118
108
|
|
|
119
109
|
self._daemon.start()
|
|
120
110
|
|
|
@@ -160,11 +150,13 @@ def get_default_db_config() -> DatabaseConfig:
|
|
|
160
150
|
# Check if sqld is already running (started by serve.sh)
|
|
161
151
|
import subprocess
|
|
162
152
|
|
|
163
|
-
|
|
153
|
+
sqld_hrana_port = int(os.getenv("SQLD_HTTP_PORT", DatabaseConfig.DEFAULT_HTTP_PORT))
|
|
154
|
+
sqld_http_port = sqld_hrana_port + 1
|
|
164
155
|
sqld_running = False
|
|
165
156
|
try:
|
|
157
|
+
# Check for either hrana or http port in the process command line
|
|
166
158
|
result = subprocess.run(
|
|
167
|
-
["pgrep", "-f", f"sqld
|
|
159
|
+
["pgrep", "-f", f"sqld.*(--hrana-listen-addr.*:{sqld_hrana_port}|--http-listen-addr.*:{sqld_http_port})"],
|
|
168
160
|
capture_output=True,
|
|
169
161
|
text=True,
|
|
170
162
|
)
|
|
@@ -172,18 +164,12 @@ def get_default_db_config() -> DatabaseConfig:
|
|
|
172
164
|
# sqld is already running, don't start a new one
|
|
173
165
|
sqld_running = True
|
|
174
166
|
use_sqld = False
|
|
175
|
-
logger.debug(f"✅ Detected sqld already running on
|
|
167
|
+
logger.debug(f"✅ Detected sqld already running on ports {sqld_hrana_port} (hrana) and {sqld_http_port} (http)")
|
|
176
168
|
except Exception as e:
|
|
177
169
|
logger.debug(f"Could not check for sqld process: {e}")
|
|
178
170
|
|
|
179
171
|
if not sqld_running and use_sqld:
|
|
180
|
-
logger.warning("
|
|
181
|
-
logger.warning("🔧 Please start the turso/sqld service by running:")
|
|
182
|
-
logger.warning(" ./serve.sh")
|
|
183
|
-
logger.warning("")
|
|
184
|
-
logger.warning("This will start:")
|
|
185
|
-
logger.warning(" - sqld daemon (SQLite server) on port 8080")
|
|
186
|
-
logger.warning(" - Environment service on port 8901")
|
|
172
|
+
logger.warning("sqld service not detected. Start the Turso daemon (./serve.sh) before running tracing workloads.")
|
|
187
173
|
|
|
188
174
|
_default_config = DatabaseConfig(db_path=db_path, use_sqld=use_sqld)
|
|
189
175
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"""Storage configuration for tracing v3."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
-
from dataclasses import dataclass
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
5
|
from enum import Enum
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from ..config import resolve_trace_db_auth_token, resolve_trace_db_settings
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class StorageBackend(str, Enum):
|
|
10
12
|
"""Supported storage backends."""
|
|
@@ -24,12 +26,9 @@ def _is_enabled(value: str | None) -> bool:
|
|
|
24
26
|
class StorageConfig:
|
|
25
27
|
"""Configuration for storage backend."""
|
|
26
28
|
|
|
27
|
-
backend: StorageBackend = StorageBackend.TURSO_NATIVE
|
|
28
29
|
connection_string: str | None = None
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
turso_url: str = os.getenv("TURSO_DATABASE_URL", "sqlite+libsql://http://127.0.0.1:8080")
|
|
32
|
-
turso_auth_token: str = os.getenv("TURSO_AUTH_TOKEN", "")
|
|
30
|
+
backend: StorageBackend | None = None
|
|
31
|
+
turso_auth_token: str | None = field(default=None)
|
|
33
32
|
|
|
34
33
|
# Common settings
|
|
35
34
|
pool_size: int = int(os.getenv("STORAGE_POOL_SIZE", "8"))
|
|
@@ -44,9 +43,48 @@ class StorageConfig:
|
|
|
44
43
|
# Allow legacy override while keeping compatibility with existing TURSO_NATIVE env flag
|
|
45
44
|
native_env = os.getenv("TURSO_NATIVE")
|
|
46
45
|
native_flag = _is_enabled(native_env) if native_env is not None else None
|
|
46
|
+
resolved_url: str | None = self.connection_string
|
|
47
|
+
resolved_token: str | None = self.turso_auth_token
|
|
48
|
+
|
|
49
|
+
if resolved_url is None:
|
|
50
|
+
resolved_url, inferred_token = resolve_trace_db_settings()
|
|
51
|
+
self.connection_string = resolved_url
|
|
52
|
+
resolved_token = inferred_token
|
|
53
|
+
|
|
54
|
+
if resolved_token is None:
|
|
55
|
+
resolved_token = resolve_trace_db_auth_token()
|
|
56
|
+
|
|
57
|
+
self.turso_auth_token = resolved_token or ""
|
|
58
|
+
|
|
59
|
+
if self.backend is None:
|
|
60
|
+
self.backend = self._infer_backend(self.connection_string or "")
|
|
47
61
|
|
|
48
62
|
if native_flag is False:
|
|
49
|
-
|
|
63
|
+
raise RuntimeError("TURSO_NATIVE=false is no longer supported; only Turso/libSQL backend is available.")
|
|
64
|
+
|
|
65
|
+
# Allow both TURSO_NATIVE and SQLITE backends (both use libsql.connect)
|
|
66
|
+
if self.backend not in (StorageBackend.TURSO_NATIVE, StorageBackend.SQLITE):
|
|
67
|
+
raise RuntimeError(f"Unsupported backend: {self.backend}. Only Turso/libSQL and SQLite are supported.")
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _infer_backend(connection_string: str) -> StorageBackend:
|
|
71
|
+
"""Infer backend type from the connection string."""
|
|
72
|
+
scheme = connection_string.split(":", 1)[0].lower()
|
|
73
|
+
|
|
74
|
+
# Plain SQLite files: file://, /absolute/path, or no scheme
|
|
75
|
+
if (
|
|
76
|
+
scheme == "file"
|
|
77
|
+
or scheme.startswith("sqlite")
|
|
78
|
+
or connection_string.startswith("/")
|
|
79
|
+
or "://" not in connection_string
|
|
80
|
+
):
|
|
81
|
+
return StorageBackend.SQLITE
|
|
82
|
+
|
|
83
|
+
# Turso/sqld: libsql://, http://, https://
|
|
84
|
+
if scheme.startswith("libsql") or "libsql" in scheme or scheme in ("http", "https"):
|
|
85
|
+
return StorageBackend.TURSO_NATIVE
|
|
86
|
+
|
|
87
|
+
raise RuntimeError(f"Unsupported tracing backend scheme: {scheme}")
|
|
50
88
|
|
|
51
89
|
def get_connection_string(self) -> str:
|
|
52
90
|
"""Get the appropriate connection string for the backend."""
|
|
@@ -54,12 +92,8 @@ class StorageConfig:
|
|
|
54
92
|
return self.connection_string
|
|
55
93
|
|
|
56
94
|
if self.backend == StorageBackend.TURSO_NATIVE:
|
|
57
|
-
return self.
|
|
58
|
-
|
|
59
|
-
return "sqlite+aiosqlite:///traces.db"
|
|
60
|
-
if self.backend == StorageBackend.POSTGRES:
|
|
61
|
-
return os.getenv("POSTGRES_URL", "postgresql+asyncpg://localhost/traces")
|
|
62
|
-
raise ValueError(f"Unknown backend: {self.backend}")
|
|
95
|
+
return self.connection_string or ""
|
|
96
|
+
raise ValueError(f"Unsupported backend: {self.backend}")
|
|
63
97
|
|
|
64
98
|
def get_backend_config(self) -> dict[str, Any]:
|
|
65
99
|
"""Get backend-specific configuration."""
|
|
@@ -24,14 +24,14 @@ def create_storage(config: StorageConfig | None = None) -> TraceStorage:
|
|
|
24
24
|
|
|
25
25
|
connection_string = config.get_connection_string()
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
# Both TURSO_NATIVE and SQLITE use NativeLibsqlTraceManager
|
|
28
|
+
# because libsql.connect() handles both remote and local file databases
|
|
29
|
+
if config.backend in (StorageBackend.TURSO_NATIVE, StorageBackend.SQLITE):
|
|
28
30
|
backend_config = config.get_backend_config()
|
|
29
31
|
return NativeLibsqlTraceManager(
|
|
30
32
|
db_url=connection_string,
|
|
31
33
|
auth_token=backend_config.get("auth_token"),
|
|
32
34
|
)
|
|
33
|
-
elif config.backend == StorageBackend.SQLITE:
|
|
34
|
-
return NativeLibsqlTraceManager(db_url=connection_string)
|
|
35
35
|
elif config.backend == StorageBackend.POSTGRES:
|
|
36
36
|
# Future: PostgreSQL implementation
|
|
37
37
|
raise NotImplementedError("PostgreSQL backend not yet implemented")
|