synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +13 -13
- synth_ai/cli/__init__.py +6 -15
- synth_ai/cli/commands/eval/__init__.py +6 -15
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +236 -1091
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +44 -117
- synth_ai/cli/commands/filter/core.py +7 -7
- synth_ai/cli/commands/filter/validation.py +2 -2
- synth_ai/cli/commands/smoke/core.py +7 -17
- synth_ai/cli/commands/status/__init__.py +1 -64
- synth_ai/cli/commands/status/client.py +50 -151
- synth_ai/cli/commands/status/config.py +3 -83
- synth_ai/cli/commands/status/errors.py +4 -13
- synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +18 -63
- synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
- synth_ai/cli/commands/status/subcommands/models.py +18 -62
- synth_ai/cli/commands/status/subcommands/runs.py +16 -63
- synth_ai/cli/commands/status/subcommands/session.py +67 -172
- synth_ai/cli/commands/status/subcommands/summary.py +24 -32
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +16 -107
- synth_ai/cli/commands/train/__init__.py +18 -20
- synth_ai/cli/commands/train/errors.py +3 -3
- synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
- synth_ai/cli/commands/train/validation.py +7 -7
- synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
- synth_ai/cli/commands/train/verifier_validation.py +235 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/math/config.toml +0 -1
- synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
- synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
- synth_ai/cli/lib/apps/task_app.py +12 -13
- synth_ai/cli/lib/task_app_discovery.py +6 -6
- synth_ai/cli/lib/train_cfgs.py +10 -10
- synth_ai/cli/task_apps/__init__.py +11 -0
- synth_ai/cli/task_apps/commands.py +7 -15
- synth_ai/core/env.py +12 -1
- synth_ai/core/errors.py +1 -2
- synth_ai/core/integrations/cloudflare.py +209 -33
- synth_ai/core/tracing_v3/abstractions.py +46 -0
- synth_ai/data/__init__.py +3 -30
- synth_ai/data/enums.py +1 -20
- synth_ai/data/rewards.py +100 -3
- synth_ai/products/graph_evolve/__init__.py +1 -2
- synth_ai/products/graph_evolve/config.py +16 -16
- synth_ai/products/graph_evolve/converters/__init__.py +3 -3
- synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +45 -35
- synth_ai/sdk/api/eval/__init__.py +33 -0
- synth_ai/sdk/api/eval/job.py +732 -0
- synth_ai/sdk/api/research_agent/__init__.py +276 -66
- synth_ai/sdk/api/train/builders.py +181 -0
- synth_ai/sdk/api/train/cli.py +41 -33
- synth_ai/sdk/api/train/configs/__init__.py +6 -4
- synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
- synth_ai/sdk/api/train/configs/rl.py +264 -16
- synth_ai/sdk/api/train/configs/sft.py +165 -1
- synth_ai/sdk/api/train/graph_validators.py +12 -12
- synth_ai/sdk/api/train/graphgen.py +169 -51
- synth_ai/sdk/api/train/graphgen_models.py +95 -45
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +36 -0
- synth_ai/sdk/api/train/prompt_learning.py +390 -60
- synth_ai/sdk/api/train/rl.py +41 -5
- synth_ai/sdk/api/train/sft.py +2 -0
- synth_ai/sdk/api/train/task_app.py +20 -0
- synth_ai/sdk/api/train/validators.py +17 -17
- synth_ai/sdk/graphs/completions.py +239 -33
- synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
- synth_ai/sdk/learning/__init__.py +35 -5
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +294 -0
- synth_ai/sdk/learning/prompt_learning_client.py +1 -1
- synth_ai/sdk/learning/prompt_learning_types.py +2 -1
- synth_ai/sdk/learning/rl/__init__.py +0 -4
- synth_ai/sdk/learning/rl/contracts.py +0 -4
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +93 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +49 -0
- synth_ai/sdk/streaming/handlers.py +6 -6
- synth_ai/sdk/streaming/streamer.py +10 -6
- synth_ai/sdk/task/__init__.py +18 -5
- synth_ai/sdk/task/apps/__init__.py +37 -1
- synth_ai/sdk/task/client.py +9 -1
- synth_ai/sdk/task/config.py +6 -11
- synth_ai/sdk/task/contracts.py +137 -95
- synth_ai/sdk/task/in_process.py +32 -22
- synth_ai/sdk/task/in_process_runner.py +9 -4
- synth_ai/sdk/task/rubrics/__init__.py +2 -3
- synth_ai/sdk/task/rubrics/loaders.py +4 -4
- synth_ai/sdk/task/rubrics/strict.py +3 -4
- synth_ai/sdk/task/server.py +76 -16
- synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
- synth_ai/sdk/task/validators.py +34 -49
- synth_ai/sdk/training/__init__.py +7 -16
- synth_ai/sdk/tunnels/__init__.py +118 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/sdk/tunnels/tunneled_api.py +363 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
- synth_ai/cli/commands/baseline/__init__.py +0 -12
- synth_ai/cli/commands/baseline/core.py +0 -636
- synth_ai/cli/commands/baseline/list.py +0 -94
- synth_ai/cli/commands/eval/errors.py +0 -81
- synth_ai/cli/commands/status/formatters.py +0 -164
- synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
- synth_ai/cli/commands/status/subcommands/usage.py +0 -203
- synth_ai/cli/commands/train/judge_validation.py +0 -305
- synth_ai/cli/usage.py +0 -159
- synth_ai/data/specs.py +0 -36
- synth_ai/sdk/api/research_agent/cli.py +0 -428
- synth_ai/sdk/api/research_agent/config.py +0 -357
- synth_ai/sdk/api/research_agent/job.py +0 -717
- synth_ai/sdk/baseline/__init__.py +0 -25
- synth_ai/sdk/baseline/config.py +0 -209
- synth_ai/sdk/baseline/discovery.py +0 -216
- synth_ai/sdk/baseline/execution.py +0 -154
- synth_ai/sdk/judging/__init__.py +0 -15
- synth_ai/sdk/judging/base.py +0 -24
- synth_ai/sdk/judging/client.py +0 -191
- synth_ai/sdk/judging/types.py +0 -42
- synth_ai/sdk/research_agent/__init__.py +0 -34
- synth_ai/sdk/research_agent/container_builder.py +0 -328
- synth_ai/sdk/research_agent/container_spec.py +0 -198
- synth_ai/sdk/research_agent/defaults.py +0 -34
- synth_ai/sdk/research_agent/results_collector.py +0 -69
- synth_ai/sdk/specs/__init__.py +0 -46
- synth_ai/sdk/specs/dataclasses.py +0 -149
- synth_ai/sdk/specs/loader.py +0 -144
- synth_ai/sdk/specs/serializer.py +0 -199
- synth_ai/sdk/specs/validation.py +0 -250
- synth_ai/sdk/tracing/__init__.py +0 -39
- synth_ai/sdk/usage/__init__.py +0 -37
- synth_ai/sdk/usage/client.py +0 -171
- synth_ai/sdk/usage/models.py +0 -261
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
synth_ai/core/env.py
CHANGED
|
@@ -14,7 +14,7 @@ from typing import Literal
|
|
|
14
14
|
from .errors import AuthenticationError, ConfigError
|
|
15
15
|
|
|
16
16
|
# Default production URL
|
|
17
|
-
PROD_BASE_URL = "https://
|
|
17
|
+
PROD_BASE_URL = "https://api.usesynth.ai"
|
|
18
18
|
PROD_BASE_URL_DEFAULT = PROD_BASE_URL # Alias for backward compatibility
|
|
19
19
|
|
|
20
20
|
|
|
@@ -180,6 +180,8 @@ def get_backend_from_env() -> tuple[str, str]:
|
|
|
180
180
|
direct_override = (os.environ.get("BACKEND_OVERRIDE") or "").strip()
|
|
181
181
|
if direct_override:
|
|
182
182
|
base = _normalize_url(direct_override)
|
|
183
|
+
if not base:
|
|
184
|
+
raise ConfigError("BACKEND_OVERRIDE is set but empty or invalid")
|
|
183
185
|
api_key = os.environ.get("SYNTH_API_KEY", "").strip()
|
|
184
186
|
return base, api_key
|
|
185
187
|
|
|
@@ -189,16 +191,25 @@ def get_backend_from_env() -> tuple[str, str]:
|
|
|
189
191
|
|
|
190
192
|
if mode == "local":
|
|
191
193
|
base = os.environ.get("LOCAL_BACKEND_URL", "http://localhost:8000")
|
|
194
|
+
# If explicitly set to empty string, use default
|
|
195
|
+
if not base or not base.strip():
|
|
196
|
+
base = "http://localhost:8000"
|
|
192
197
|
key = os.environ.get("TESTING_LOCAL_SYNTH_API_KEY", "")
|
|
193
198
|
return _normalize_url(base), key
|
|
194
199
|
|
|
195
200
|
if mode == "dev":
|
|
196
201
|
base = os.environ.get("DEV_BACKEND_URL", "") or "http://localhost:8000"
|
|
202
|
+
# If explicitly set to empty string, use default
|
|
203
|
+
if not base or not base.strip():
|
|
204
|
+
base = "http://localhost:8000"
|
|
197
205
|
key = os.environ.get("DEV_SYNTH_API_KEY", "")
|
|
198
206
|
return _normalize_url(base), key
|
|
199
207
|
|
|
200
208
|
# prod
|
|
201
209
|
base = os.environ.get("PROD_BACKEND_URL", PROD_BASE_URL)
|
|
210
|
+
# If explicitly set to empty string, use default
|
|
211
|
+
if not base or not base.strip():
|
|
212
|
+
base = PROD_BASE_URL
|
|
202
213
|
key = (
|
|
203
214
|
os.environ.get("PROD_SYNTH_API_KEY", "")
|
|
204
215
|
or os.environ.get("TESTING_PROD_SYNTH_API_KEY", "")
|
synth_ai/core/errors.py
CHANGED
|
@@ -87,7 +87,7 @@ class UsageLimitError(SynthError):
|
|
|
87
87
|
|
|
88
88
|
Attributes:
|
|
89
89
|
limit_type: The type of limit exceeded (e.g., "inference_tokens_per_day")
|
|
90
|
-
api: The API that hit the limit (e.g., "inference", "
|
|
90
|
+
api: The API that hit the limit (e.g., "inference", "verifiers", "prompt_opt")
|
|
91
91
|
current: Current usage value
|
|
92
92
|
limit: The limit value
|
|
93
93
|
tier: The org's tier (e.g., "free", "starter", "growth")
|
|
@@ -123,4 +123,3 @@ __all__ = [
|
|
|
123
123
|
"ModelNotSupportedError",
|
|
124
124
|
"UsageLimitError",
|
|
125
125
|
]
|
|
126
|
-
|
|
@@ -738,13 +738,20 @@ async def resolve_hostname_with_explicit_resolvers(hostname: str) -> str:
|
|
|
738
738
|
timeout = float(os.getenv("SYNTH_TUNNEL_DNS_TIMEOUT_PER_ATTEMPT_SECS", "5"))
|
|
739
739
|
loop = asyncio.get_event_loop()
|
|
740
740
|
|
|
741
|
-
# Try
|
|
742
|
-
|
|
741
|
+
# Try various dig resolvers, then fall back to system resolver
|
|
742
|
+
# Order: 1.1.1.1, 8.8.8.8, then plain dig (uses system's configured DNS but bypasses cache)
|
|
743
|
+
resolvers = [
|
|
744
|
+
("1.1.1.1", ["dig", "@1.1.1.1", "+short", hostname]),
|
|
745
|
+
("8.8.8.8", ["dig", "@8.8.8.8", "+short", hostname]),
|
|
746
|
+
("default", ["dig", "+short", hostname]), # Plain dig bypasses negative cache
|
|
747
|
+
]
|
|
748
|
+
|
|
749
|
+
for resolver_name, cmd in resolvers:
|
|
743
750
|
try:
|
|
744
751
|
result = await loop.run_in_executor(
|
|
745
752
|
None,
|
|
746
|
-
lambda
|
|
747
|
-
|
|
753
|
+
lambda c=cmd: subprocess.run(
|
|
754
|
+
c,
|
|
748
755
|
capture_output=True,
|
|
749
756
|
text=True,
|
|
750
757
|
timeout=timeout,
|
|
@@ -753,17 +760,17 @@ async def resolve_hostname_with_explicit_resolvers(hostname: str) -> str:
|
|
|
753
760
|
if result.returncode == 0 and result.stdout.strip():
|
|
754
761
|
first = result.stdout.strip().splitlines()[0].strip()
|
|
755
762
|
if first:
|
|
756
|
-
logger.debug(f"Resolved via {
|
|
763
|
+
logger.debug(f"Resolved via {resolver_name}: {hostname} -> {first}")
|
|
757
764
|
return first
|
|
758
765
|
except FileNotFoundError:
|
|
759
|
-
logger.debug(f"dig not found, skipping {
|
|
766
|
+
logger.debug(f"dig not found, skipping {resolver_name}")
|
|
760
767
|
continue
|
|
761
768
|
except Exception as e:
|
|
762
|
-
logger.debug(f"Resolver {
|
|
769
|
+
logger.debug(f"Resolver {resolver_name} failed: {e}")
|
|
763
770
|
continue
|
|
764
|
-
|
|
765
|
-
#
|
|
766
|
-
logger.debug(f"Falling back to
|
|
771
|
+
|
|
772
|
+
# Final fallback: system resolver (may hit negative cache)
|
|
773
|
+
logger.debug(f"Falling back to socket.gethostbyname for {hostname}")
|
|
767
774
|
return await loop.run_in_executor(
|
|
768
775
|
None,
|
|
769
776
|
socket.gethostbyname,
|
|
@@ -822,13 +829,24 @@ async def verify_tunnel_dns_resolution(
|
|
|
822
829
|
resolved_ip = await resolve_hostname_with_explicit_resolvers(hostname)
|
|
823
830
|
logger.info(f"DNS resolution successful (attempt {attempt}): {hostname} -> {resolved_ip}")
|
|
824
831
|
|
|
825
|
-
# 2. HTTP connectivity:
|
|
826
|
-
#
|
|
832
|
+
# 2. HTTP connectivity: use curl with --resolve to bypass system DNS cache
|
|
833
|
+
# The system resolver may have negative-cached the hostname, so we use
|
|
834
|
+
# curl with explicit IP resolution to bypass it while maintaining proper SNI.
|
|
827
835
|
try:
|
|
828
836
|
scheme = parsed.scheme or "https"
|
|
829
|
-
test_url = f"{scheme}://{
|
|
830
|
-
|
|
831
|
-
|
|
837
|
+
test_url = f"{scheme}://{hostname}/health"
|
|
838
|
+
port = 443 if scheme == "https" else 80
|
|
839
|
+
|
|
840
|
+
# Build curl command with --resolve to bypass system DNS
|
|
841
|
+
# Format: --resolve hostname:port:ip
|
|
842
|
+
curl_cmd = [
|
|
843
|
+
"curl", "-s", "-o", "/dev/null", "-w", "%{http_code}",
|
|
844
|
+
"--max-time", "5",
|
|
845
|
+
"-k", # Allow self-signed certs
|
|
846
|
+
"--resolve", f"{hostname}:{port}:{resolved_ip}",
|
|
847
|
+
test_url,
|
|
848
|
+
]
|
|
849
|
+
|
|
832
850
|
# Include API key if provided (or from env var)
|
|
833
851
|
if api_key is None:
|
|
834
852
|
# Try to load .env file if available
|
|
@@ -839,25 +857,34 @@ async def verify_tunnel_dns_resolution(
|
|
|
839
857
|
pass
|
|
840
858
|
api_key = os.getenv("ENVIRONMENT_API_KEY")
|
|
841
859
|
if api_key:
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
860
|
+
curl_cmd.extend(["-H", f"X-API-Key: {api_key}"])
|
|
861
|
+
|
|
862
|
+
result = await loop.run_in_executor(
|
|
863
|
+
None,
|
|
864
|
+
lambda: subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10),
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
status_code = int(result.stdout.strip()) if result.returncode == 0 and result.stdout.strip().isdigit() else 0
|
|
868
|
+
|
|
869
|
+
# Accept various status codes that indicate the tunnel is working:
|
|
870
|
+
# - 200: OK (service is running)
|
|
871
|
+
# - 400/401/403: Auth required (server is reachable)
|
|
872
|
+
# - 404/405: Not found / method not allowed (server is reachable)
|
|
873
|
+
# - 502: Bad gateway (cloudflared connected but local service isn't running)
|
|
874
|
+
if status_code in (200, 400, 401, 403, 404, 405, 502):
|
|
875
|
+
logger.info(f"HTTP connectivity verified: {test_url} -> {status_code}")
|
|
876
|
+
return
|
|
877
|
+
else:
|
|
878
|
+
# 530 errors are common when tunnel is still establishing - retry
|
|
879
|
+
if status_code == 530:
|
|
880
|
+
logger.debug("HTTP 530 (tunnel establishing) - will retry")
|
|
881
|
+
last_exc = RuntimeError("tunnel not ready yet (HTTP 530)")
|
|
882
|
+
elif result.returncode != 0:
|
|
883
|
+
logger.warning(f"curl failed: {result.stderr}")
|
|
884
|
+
last_exc = RuntimeError(f"curl failed: {result.stderr}")
|
|
853
885
|
else:
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
logger.debug("HTTP 530 (tunnel establishing) - will retry")
|
|
857
|
-
last_exc = RuntimeError("tunnel not ready yet (HTTP 530)")
|
|
858
|
-
else:
|
|
859
|
-
logger.warning(f"HTTP check returned unexpected status: {resp.status_code}")
|
|
860
|
-
last_exc = RuntimeError(f"unexpected HTTP status {resp.status_code}")
|
|
886
|
+
logger.warning(f"HTTP check returned unexpected status: {status_code}")
|
|
887
|
+
last_exc = RuntimeError(f"unexpected HTTP status {status_code}")
|
|
861
888
|
except Exception as http_exc:
|
|
862
889
|
logger.warning(f"HTTP connectivity check failed (attempt {attempt}): {http_exc}")
|
|
863
890
|
last_exc = http_exc
|
|
@@ -1102,6 +1129,155 @@ def open_managed_tunnel(tunnel_token: str) -> subprocess.Popen:
|
|
|
1102
1129
|
)
|
|
1103
1130
|
|
|
1104
1131
|
|
|
1132
|
+
async def wait_for_cloudflared_connection(
|
|
1133
|
+
proc: subprocess.Popen,
|
|
1134
|
+
timeout_seconds: float = 30.0,
|
|
1135
|
+
) -> bool:
|
|
1136
|
+
"""
|
|
1137
|
+
Wait for cloudflared to establish a connection to Cloudflare's edge.
|
|
1138
|
+
|
|
1139
|
+
This monitors cloudflared's stdout/stderr for connection success messages.
|
|
1140
|
+
DNS records only resolve AFTER cloudflared has connected, so this function
|
|
1141
|
+
must be called before attempting DNS verification.
|
|
1142
|
+
|
|
1143
|
+
Args:
|
|
1144
|
+
proc: The cloudflared subprocess from open_managed_tunnel()
|
|
1145
|
+
timeout_seconds: Maximum time to wait for connection
|
|
1146
|
+
|
|
1147
|
+
Returns:
|
|
1148
|
+
True if connection was established, False if timeout or error
|
|
1149
|
+
|
|
1150
|
+
Raises:
|
|
1151
|
+
RuntimeError: If cloudflared exits with an error before connecting
|
|
1152
|
+
"""
|
|
1153
|
+
import select
|
|
1154
|
+
|
|
1155
|
+
# Patterns that indicate successful connection
|
|
1156
|
+
# cloudflared outputs: "INF Registered tunnel connection connIndex=0 connection=..."
|
|
1157
|
+
# We need to be specific - "connIndex=" alone triggers too early on curve preferences log
|
|
1158
|
+
connection_patterns = [
|
|
1159
|
+
"Registered tunnel connection",
|
|
1160
|
+
"Connection registered",
|
|
1161
|
+
# Don't use "connIndex=" alone - it matches curve preferences log before actual connection
|
|
1162
|
+
]
|
|
1163
|
+
|
|
1164
|
+
# Patterns that indicate fatal errors
|
|
1165
|
+
error_patterns = [
|
|
1166
|
+
"failed to connect",
|
|
1167
|
+
"error connecting",
|
|
1168
|
+
"tunnel credentials",
|
|
1169
|
+
"invalid token",
|
|
1170
|
+
"tunnel not found",
|
|
1171
|
+
"unauthorized",
|
|
1172
|
+
]
|
|
1173
|
+
|
|
1174
|
+
loop = asyncio.get_event_loop()
|
|
1175
|
+
start_time = loop.time()
|
|
1176
|
+
output_lines: list[str] = []
|
|
1177
|
+
|
|
1178
|
+
logger.info(f"Waiting for cloudflared to connect (timeout {timeout_seconds}s)...")
|
|
1179
|
+
|
|
1180
|
+
while True:
|
|
1181
|
+
elapsed = loop.time() - start_time
|
|
1182
|
+
if elapsed >= timeout_seconds:
|
|
1183
|
+
logger.warning(
|
|
1184
|
+
f"cloudflared connection timeout after {elapsed:.1f}s. "
|
|
1185
|
+
f"Output: {' | '.join(output_lines[-10:])}"
|
|
1186
|
+
)
|
|
1187
|
+
return False
|
|
1188
|
+
|
|
1189
|
+
# Check if process exited
|
|
1190
|
+
if proc.poll() is not None:
|
|
1191
|
+
# Process exited - read remaining output
|
|
1192
|
+
remaining = proc.stdout.read() if proc.stdout else ""
|
|
1193
|
+
if remaining:
|
|
1194
|
+
output_lines.extend(remaining.splitlines())
|
|
1195
|
+
|
|
1196
|
+
all_output = "\n".join(output_lines)
|
|
1197
|
+
logger.error(
|
|
1198
|
+
f"cloudflared exited with code {proc.returncode} before connecting. "
|
|
1199
|
+
f"Output:\n{all_output[:2000]}"
|
|
1200
|
+
)
|
|
1201
|
+
raise RuntimeError(
|
|
1202
|
+
f"cloudflared exited with code {proc.returncode} before establishing connection. "
|
|
1203
|
+
f"This usually means the tunnel token is invalid or the tunnel was deleted. "
|
|
1204
|
+
f"Output: {all_output[:500]}"
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
# Try to read output (non-blocking)
|
|
1208
|
+
if proc.stdout:
|
|
1209
|
+
try:
|
|
1210
|
+
# Use select for non-blocking read
|
|
1211
|
+
ready, _, _ = select.select([proc.stdout], [], [], 0.1)
|
|
1212
|
+
if ready:
|
|
1213
|
+
line = proc.stdout.readline()
|
|
1214
|
+
if line:
|
|
1215
|
+
line = line.strip()
|
|
1216
|
+
output_lines.append(line)
|
|
1217
|
+
logger.debug(f"cloudflared: {line}")
|
|
1218
|
+
|
|
1219
|
+
# Check for connection success
|
|
1220
|
+
line_lower = line.lower()
|
|
1221
|
+
for pattern in connection_patterns:
|
|
1222
|
+
if pattern.lower() in line_lower:
|
|
1223
|
+
logger.info(
|
|
1224
|
+
f"cloudflared connected after {elapsed:.1f}s: {line}"
|
|
1225
|
+
)
|
|
1226
|
+
return True
|
|
1227
|
+
|
|
1228
|
+
# Check for fatal errors
|
|
1229
|
+
for pattern in error_patterns:
|
|
1230
|
+
if pattern.lower() in line_lower:
|
|
1231
|
+
logger.error(f"cloudflared error detected: {line}")
|
|
1232
|
+
raise RuntimeError(
|
|
1233
|
+
f"cloudflared connection failed: {line}"
|
|
1234
|
+
)
|
|
1235
|
+
except (ValueError, OSError) as e:
|
|
1236
|
+
logger.debug(f"Error reading cloudflared output: {e}")
|
|
1237
|
+
|
|
1238
|
+
# Small sleep to avoid busy loop
|
|
1239
|
+
await asyncio.sleep(0.1)
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
async def open_managed_tunnel_with_connection_wait(
|
|
1243
|
+
tunnel_token: str,
|
|
1244
|
+
timeout_seconds: float = 30.0,
|
|
1245
|
+
) -> subprocess.Popen:
|
|
1246
|
+
"""
|
|
1247
|
+
Open a managed tunnel and wait for cloudflared to connect.
|
|
1248
|
+
|
|
1249
|
+
This is the preferred method for starting managed tunnels as it ensures
|
|
1250
|
+
cloudflared has actually connected to Cloudflare's edge before returning.
|
|
1251
|
+
DNS records only resolve after this connection is established.
|
|
1252
|
+
|
|
1253
|
+
Args:
|
|
1254
|
+
tunnel_token: Cloudflare tunnel token from backend API
|
|
1255
|
+
timeout_seconds: Maximum time to wait for connection
|
|
1256
|
+
|
|
1257
|
+
Returns:
|
|
1258
|
+
Process handle for the connected tunnel
|
|
1259
|
+
|
|
1260
|
+
Raises:
|
|
1261
|
+
RuntimeError: If cloudflared fails to connect within timeout
|
|
1262
|
+
"""
|
|
1263
|
+
proc = open_managed_tunnel(tunnel_token)
|
|
1264
|
+
|
|
1265
|
+
try:
|
|
1266
|
+
connected = await wait_for_cloudflared_connection(proc, timeout_seconds)
|
|
1267
|
+
if not connected:
|
|
1268
|
+
# Timeout - kill process and raise
|
|
1269
|
+
stop_tunnel(proc)
|
|
1270
|
+
raise RuntimeError(
|
|
1271
|
+
f"cloudflared failed to connect within {timeout_seconds}s. "
|
|
1272
|
+
"The tunnel may be invalid or Cloudflare may be experiencing issues."
|
|
1273
|
+
)
|
|
1274
|
+
return proc
|
|
1275
|
+
except Exception:
|
|
1276
|
+
# Cleanup on any error
|
|
1277
|
+
stop_tunnel(proc)
|
|
1278
|
+
raise
|
|
1279
|
+
|
|
1280
|
+
|
|
1105
1281
|
def stop_tunnel(proc: Optional[subprocess.Popen]) -> None:
|
|
1106
1282
|
"""
|
|
1107
1283
|
Gracefully stop a tunnel process.
|
|
@@ -32,6 +32,52 @@ Concepts:
|
|
|
32
32
|
tool result back, and the agent sending a reply to the user. Do not confuse these with
|
|
33
33
|
provider-specific LLM API "messages" (prompt formatting) — those belong inside an LMCAISEvent
|
|
34
34
|
as part of its input/output content, not as SessionEventMessages.
|
|
35
|
+
|
|
36
|
+
Example usage:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from synth_ai.core.tracing_v3.abstractions import (
|
|
40
|
+
SessionTrace,
|
|
41
|
+
SessionTimeStep,
|
|
42
|
+
LMCAISEvent,
|
|
43
|
+
EnvironmentEvent,
|
|
44
|
+
TimeRecord,
|
|
45
|
+
)
|
|
46
|
+
import time
|
|
47
|
+
|
|
48
|
+
# Create a simple trace with one turn
|
|
49
|
+
trace = SessionTrace(
|
|
50
|
+
session_id="sess_example",
|
|
51
|
+
session_time_steps=[
|
|
52
|
+
SessionTimeStep(
|
|
53
|
+
step_id="turn_1",
|
|
54
|
+
step_index=0,
|
|
55
|
+
events=[
|
|
56
|
+
LMCAISEvent(
|
|
57
|
+
system_instance_id="llm",
|
|
58
|
+
time_record=TimeRecord(event_time=time.time()),
|
|
59
|
+
model_name="gpt-4o",
|
|
60
|
+
input_tokens=150,
|
|
61
|
+
output_tokens=50,
|
|
62
|
+
),
|
|
63
|
+
EnvironmentEvent(
|
|
64
|
+
system_instance_id="tool_executor",
|
|
65
|
+
time_record=TimeRecord(event_time=time.time()),
|
|
66
|
+
reward=1.0,
|
|
67
|
+
terminated=True,
|
|
68
|
+
),
|
|
69
|
+
],
|
|
70
|
+
)
|
|
71
|
+
],
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Convert to dict for serialization
|
|
75
|
+
trace_dict = trace.to_dict()
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
See Also:
|
|
79
|
+
- V3 Traces SDK guide: /sdk/tracing/v3-traces
|
|
80
|
+
- Event rewards: /sdk/tracing/rewards/event-rewards
|
|
35
81
|
"""
|
|
36
82
|
|
|
37
83
|
from __future__ import annotations
|
synth_ai/data/__init__.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
"""Synth AI Data Layer.
|
|
2
2
|
|
|
3
3
|
This module provides pure data types with no IO dependencies.
|
|
4
|
-
Contains actual data schemas for traces
|
|
4
|
+
Contains actual data schemas for traces and rewards.
|
|
5
5
|
|
|
6
6
|
Data vs SDK distinction:
|
|
7
|
-
- data/: Pure data records (traces, rewards
|
|
8
|
-
- sdk/: API abstractions (jobs, training,
|
|
7
|
+
- data/: Pure data records (traces, rewards) - actual data
|
|
8
|
+
- sdk/: API abstractions (jobs, training, graphs) - SDK interfaces
|
|
9
9
|
|
|
10
10
|
Dependency rule: data/ imports nothing from synth_ai except typing helpers.
|
|
11
11
|
"""
|
|
@@ -16,13 +16,11 @@ from __future__ import annotations
|
|
|
16
16
|
from synth_ai.data.enums import (
|
|
17
17
|
AdaptiveBatchLevel,
|
|
18
18
|
AdaptiveCurriculumLevel,
|
|
19
|
-
ContainerBackend,
|
|
20
19
|
InferenceMode,
|
|
21
20
|
JobStatus,
|
|
22
21
|
JobType,
|
|
23
22
|
PromptLearningMethod,
|
|
24
23
|
ProviderName,
|
|
25
|
-
ResearchAgentAlgorithm,
|
|
26
24
|
RewardSource,
|
|
27
25
|
RLMethod,
|
|
28
26
|
SFTMethod,
|
|
@@ -38,19 +36,6 @@ from synth_ai.data.rewards import (
|
|
|
38
36
|
RewardRecord,
|
|
39
37
|
)
|
|
40
38
|
|
|
41
|
-
# Spec data types (re-exports)
|
|
42
|
-
from synth_ai.data.specs import (
|
|
43
|
-
Constraints,
|
|
44
|
-
Example,
|
|
45
|
-
GlossaryItem,
|
|
46
|
-
Interfaces,
|
|
47
|
-
Metadata,
|
|
48
|
-
Principle,
|
|
49
|
-
Rule,
|
|
50
|
-
Spec,
|
|
51
|
-
TestCase,
|
|
52
|
-
)
|
|
53
|
-
|
|
54
39
|
# Trace data types (re-exports from tracing_v3)
|
|
55
40
|
from synth_ai.data.traces import (
|
|
56
41
|
BaseEvent,
|
|
@@ -71,8 +56,6 @@ __all__ = [
|
|
|
71
56
|
"PromptLearningMethod",
|
|
72
57
|
"RLMethod",
|
|
73
58
|
"SFTMethod",
|
|
74
|
-
"ResearchAgentAlgorithm",
|
|
75
|
-
"ContainerBackend",
|
|
76
59
|
"InferenceMode",
|
|
77
60
|
"ProviderName",
|
|
78
61
|
"RewardSource",
|
|
@@ -85,16 +68,6 @@ __all__ = [
|
|
|
85
68
|
"RewardAggregates",
|
|
86
69
|
"CalibrationExample",
|
|
87
70
|
"GoldExample",
|
|
88
|
-
# Spec data
|
|
89
|
-
"Spec",
|
|
90
|
-
"Metadata",
|
|
91
|
-
"Principle",
|
|
92
|
-
"Rule",
|
|
93
|
-
"Constraints",
|
|
94
|
-
"Example",
|
|
95
|
-
"TestCase",
|
|
96
|
-
"Interfaces",
|
|
97
|
-
"GlossaryItem",
|
|
98
71
|
# Trace data
|
|
99
72
|
"SessionTrace",
|
|
100
73
|
"SessionTimeStep",
|
synth_ai/data/enums.py
CHANGED
|
@@ -54,22 +54,6 @@ class SFTMethod(str, Enum):
|
|
|
54
54
|
QLORA = "qlora"
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
class ResearchAgentAlgorithm(str, Enum):
|
|
58
|
-
"""Research agent algorithms."""
|
|
59
|
-
|
|
60
|
-
SCAFFOLD_TUNING = "scaffold_tuning"
|
|
61
|
-
EVALUATION = "evaluation"
|
|
62
|
-
TRACE_ANALYSIS = "trace_analysis"
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class ContainerBackend(str, Enum):
|
|
66
|
-
"""Container backends for research agent."""
|
|
67
|
-
|
|
68
|
-
DAYTONA = "daytona"
|
|
69
|
-
MODAL = "modal"
|
|
70
|
-
DOCKER = "docker"
|
|
71
|
-
|
|
72
|
-
|
|
73
57
|
class InferenceMode(str, Enum):
|
|
74
58
|
"""Inference modes for policy evaluation."""
|
|
75
59
|
|
|
@@ -89,7 +73,7 @@ class RewardSource(str, Enum):
|
|
|
89
73
|
"""Source of reward signal for training."""
|
|
90
74
|
|
|
91
75
|
TASK_APP = "task_app"
|
|
92
|
-
|
|
76
|
+
VERIFIER = "verifier"
|
|
93
77
|
FUSED = "fused"
|
|
94
78
|
|
|
95
79
|
|
|
@@ -128,8 +112,6 @@ __all__ = [
|
|
|
128
112
|
"PromptLearningMethod",
|
|
129
113
|
"RLMethod",
|
|
130
114
|
"SFTMethod",
|
|
131
|
-
"ResearchAgentAlgorithm",
|
|
132
|
-
"ContainerBackend",
|
|
133
115
|
"InferenceMode",
|
|
134
116
|
"ProviderName",
|
|
135
117
|
"RewardSource",
|
|
@@ -138,4 +120,3 @@ __all__ = [
|
|
|
138
120
|
"SynthModelName",
|
|
139
121
|
"SYNTH_MODEL_NAMES",
|
|
140
122
|
]
|
|
141
|
-
|
synth_ai/data/rewards.py
CHANGED
|
@@ -2,6 +2,43 @@
|
|
|
2
2
|
|
|
3
3
|
This module defines pure data types for representing rewards in training
|
|
4
4
|
and evaluation contexts. These are actual data records, not API abstractions.
|
|
5
|
+
|
|
6
|
+
Synth AI uses two primary reward scopes:
|
|
7
|
+
|
|
8
|
+
- **Event Rewards**: Fine-grained rewards attached to individual events within a session
|
|
9
|
+
(e.g., each tool call, each LLM response). Use `EventRewardRecord` to annotate specific
|
|
10
|
+
events with reward values.
|
|
11
|
+
|
|
12
|
+
- **Outcome Rewards**: Episode-level rewards that summarize the overall success of a
|
|
13
|
+
complete session. Use `OutcomeRewardRecord` for aggregate metrics.
|
|
14
|
+
|
|
15
|
+
Example usage:
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
from synth_ai.data.rewards import EventRewardRecord, OutcomeRewardRecord
|
|
19
|
+
|
|
20
|
+
# Annotate a specific event with a reward
|
|
21
|
+
event_reward = EventRewardRecord(
|
|
22
|
+
event_id="evt_123",
|
|
23
|
+
session_id="sess_abc",
|
|
24
|
+
reward_value=0.8,
|
|
25
|
+
reward_type="evaluator",
|
|
26
|
+
annotation={"reason": "Correct tool selection"}
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Record episode-level outcome
|
|
30
|
+
outcome = OutcomeRewardRecord(
|
|
31
|
+
session_id="sess_abc",
|
|
32
|
+
total_reward=0.85,
|
|
33
|
+
achievements_count=3,
|
|
34
|
+
total_steps=10,
|
|
35
|
+
metadata={"task": "code_generation"}
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
See Also:
|
|
40
|
+
- Event rewards SDK guide: /sdk/tracing/rewards/event-rewards
|
|
41
|
+
- Outcome rewards SDK guide: /sdk/tracing/rewards/outcome-rewards
|
|
5
42
|
"""
|
|
6
43
|
|
|
7
44
|
from __future__ import annotations
|
|
@@ -17,6 +54,20 @@ class RewardRecord:
|
|
|
17
54
|
|
|
18
55
|
Represents a reward signal at a specific point in a trajectory,
|
|
19
56
|
with metadata about its source and scope.
|
|
57
|
+
|
|
58
|
+
Attributes:
|
|
59
|
+
value: The numeric reward value (typically in range [0, 1] or unbounded).
|
|
60
|
+
reward_type: Category of reward - "shaped" (dense), "sparse" (terminal only),
|
|
61
|
+
"achievement" (milestone), "penalty" (negative signal), "evaluator"
|
|
62
|
+
(from LLM verifier), or "human" (manual annotation).
|
|
63
|
+
scope: Granularity level - "step" (per action), "event" (per significant event),
|
|
64
|
+
or "outcome" (episode-level).
|
|
65
|
+
source: Origin of the reward - "environment" (task env), "runner" (framework),
|
|
66
|
+
"evaluator" (verifier), or "human" (annotator).
|
|
67
|
+
key: Optional identifier like achievement name or rubric criterion ID.
|
|
68
|
+
turn: Turn number within the session where reward was earned.
|
|
69
|
+
timestamp: When the reward was recorded.
|
|
70
|
+
metadata: Additional context (e.g., rubric scores, evaluation details).
|
|
20
71
|
"""
|
|
21
72
|
|
|
22
73
|
value: float
|
|
@@ -34,7 +85,29 @@ class OutcomeRewardRecord:
|
|
|
34
85
|
"""Episode-level reward summary.
|
|
35
86
|
|
|
36
87
|
Aggregates reward information for a complete episode/session,
|
|
37
|
-
including total reward, achievements, and step counts.
|
|
88
|
+
including total reward, achievements, and step counts. This is the
|
|
89
|
+
primary data structure for outcome rewards used in training.
|
|
90
|
+
|
|
91
|
+
Attributes:
|
|
92
|
+
session_id: Unique identifier linking to the SessionTrace.
|
|
93
|
+
total_reward: Aggregate reward for the entire episode (typically 0.0-1.0).
|
|
94
|
+
achievements_count: Number of achievements/milestones reached.
|
|
95
|
+
total_steps: Total number of steps in the episode.
|
|
96
|
+
metadata: Task-specific metadata (e.g., {"task": "code_gen", "difficulty": "hard"}).
|
|
97
|
+
annotation: Human or evaluator annotations explaining the score.
|
|
98
|
+
created_at: When this record was created.
|
|
99
|
+
|
|
100
|
+
Example:
|
|
101
|
+
```python
|
|
102
|
+
outcome = OutcomeRewardRecord(
|
|
103
|
+
session_id="sess_abc123",
|
|
104
|
+
total_reward=0.75,
|
|
105
|
+
achievements_count=2,
|
|
106
|
+
total_steps=8,
|
|
107
|
+
metadata={"task": "customer_support"},
|
|
108
|
+
annotation={"evaluator": "Resolved issue but could improve tone"}
|
|
109
|
+
)
|
|
110
|
+
```
|
|
38
111
|
"""
|
|
39
112
|
|
|
40
113
|
session_id: str
|
|
@@ -51,7 +124,32 @@ class EventRewardRecord:
|
|
|
51
124
|
"""Event-level reward annotation.
|
|
52
125
|
|
|
53
126
|
Links a reward to a specific event in a trace, with optional
|
|
54
|
-
annotations and source information.
|
|
127
|
+
annotations and source information. Event rewards provide fine-grained
|
|
128
|
+
feedback on individual actions or decisions within a session.
|
|
129
|
+
|
|
130
|
+
Attributes:
|
|
131
|
+
event_id: Unique identifier of the event being rewarded.
|
|
132
|
+
session_id: Session containing this event.
|
|
133
|
+
reward_value: Reward for this specific event (typically 0.0-1.0).
|
|
134
|
+
reward_type: Category of reward (e.g., "tool_success", "reasoning", "progress").
|
|
135
|
+
key: Rubric criterion or achievement key this reward relates to.
|
|
136
|
+
turn_number: Turn/step within the session where event occurred.
|
|
137
|
+
source: Origin of the reward ("environment", "evaluator", "human").
|
|
138
|
+
annotation: Explanation or details about why this reward was given.
|
|
139
|
+
created_at: When this record was created.
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
```python
|
|
143
|
+
event_reward = EventRewardRecord(
|
|
144
|
+
event_id="evt_tool_call_5",
|
|
145
|
+
session_id="sess_abc123",
|
|
146
|
+
reward_value=1.0,
|
|
147
|
+
reward_type="tool_success",
|
|
148
|
+
turn_number=3,
|
|
149
|
+
source="environment",
|
|
150
|
+
annotation={"tool": "search", "result": "found_answer"}
|
|
151
|
+
)
|
|
152
|
+
```
|
|
55
153
|
"""
|
|
56
154
|
|
|
57
155
|
event_id: str
|
|
@@ -149,4 +247,3 @@ __all__ = [
|
|
|
149
247
|
"CalibrationExample",
|
|
150
248
|
"GoldExample",
|
|
151
249
|
]
|
|
152
|
-
|
|
@@ -4,7 +4,7 @@ This product provides tools for optimizing LLM-based workflow graphs
|
|
|
4
4
|
using evolutionary algorithms. It can optimize both:
|
|
5
5
|
|
|
6
6
|
- **Policy graphs**: Graphs that solve tasks (e.g., multi-hop QA, reasoning)
|
|
7
|
-
- **Verifier graphs**: Graphs that
|
|
7
|
+
- **Verifier graphs**: Graphs that verify/score existing results
|
|
8
8
|
|
|
9
9
|
Algorithms:
|
|
10
10
|
- `graph_evolve`: Evolutionary optimization for graph structure
|
|
@@ -43,4 +43,3 @@ __all__ = [
|
|
|
43
43
|
"ConversionWarning",
|
|
44
44
|
"ConversionError",
|
|
45
45
|
]
|
|
46
|
-
|