synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show
  1. synth_ai/__init__.py +13 -13
  2. synth_ai/cli/__init__.py +6 -15
  3. synth_ai/cli/commands/eval/__init__.py +6 -15
  4. synth_ai/cli/commands/eval/config.py +338 -0
  5. synth_ai/cli/commands/eval/core.py +236 -1091
  6. synth_ai/cli/commands/eval/runner.py +704 -0
  7. synth_ai/cli/commands/eval/validation.py +44 -117
  8. synth_ai/cli/commands/filter/core.py +7 -7
  9. synth_ai/cli/commands/filter/validation.py +2 -2
  10. synth_ai/cli/commands/smoke/core.py +7 -17
  11. synth_ai/cli/commands/status/__init__.py +1 -64
  12. synth_ai/cli/commands/status/client.py +50 -151
  13. synth_ai/cli/commands/status/config.py +3 -83
  14. synth_ai/cli/commands/status/errors.py +4 -13
  15. synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
  16. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  17. synth_ai/cli/commands/status/subcommands/files.py +18 -63
  18. synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
  19. synth_ai/cli/commands/status/subcommands/models.py +18 -62
  20. synth_ai/cli/commands/status/subcommands/runs.py +16 -63
  21. synth_ai/cli/commands/status/subcommands/session.py +67 -172
  22. synth_ai/cli/commands/status/subcommands/summary.py +24 -32
  23. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  24. synth_ai/cli/commands/status/utils.py +16 -107
  25. synth_ai/cli/commands/train/__init__.py +18 -20
  26. synth_ai/cli/commands/train/errors.py +3 -3
  27. synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
  28. synth_ai/cli/commands/train/validation.py +7 -7
  29. synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
  30. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  31. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
  32. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
  33. synth_ai/cli/demo_apps/math/config.toml +0 -1
  34. synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
  35. synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
  36. synth_ai/cli/lib/apps/task_app.py +12 -13
  37. synth_ai/cli/lib/task_app_discovery.py +6 -6
  38. synth_ai/cli/lib/train_cfgs.py +10 -10
  39. synth_ai/cli/task_apps/__init__.py +11 -0
  40. synth_ai/cli/task_apps/commands.py +7 -15
  41. synth_ai/core/env.py +12 -1
  42. synth_ai/core/errors.py +1 -2
  43. synth_ai/core/integrations/cloudflare.py +209 -33
  44. synth_ai/core/tracing_v3/abstractions.py +46 -0
  45. synth_ai/data/__init__.py +3 -30
  46. synth_ai/data/enums.py +1 -20
  47. synth_ai/data/rewards.py +100 -3
  48. synth_ai/products/graph_evolve/__init__.py +1 -2
  49. synth_ai/products/graph_evolve/config.py +16 -16
  50. synth_ai/products/graph_evolve/converters/__init__.py +3 -3
  51. synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
  52. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
  53. synth_ai/products/graph_gepa/__init__.py +23 -0
  54. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  55. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  56. synth_ai/sdk/__init__.py +45 -35
  57. synth_ai/sdk/api/eval/__init__.py +33 -0
  58. synth_ai/sdk/api/eval/job.py +732 -0
  59. synth_ai/sdk/api/research_agent/__init__.py +276 -66
  60. synth_ai/sdk/api/train/builders.py +181 -0
  61. synth_ai/sdk/api/train/cli.py +41 -33
  62. synth_ai/sdk/api/train/configs/__init__.py +6 -4
  63. synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
  64. synth_ai/sdk/api/train/configs/rl.py +264 -16
  65. synth_ai/sdk/api/train/configs/sft.py +165 -1
  66. synth_ai/sdk/api/train/graph_validators.py +12 -12
  67. synth_ai/sdk/api/train/graphgen.py +169 -51
  68. synth_ai/sdk/api/train/graphgen_models.py +95 -45
  69. synth_ai/sdk/api/train/local_api.py +10 -0
  70. synth_ai/sdk/api/train/pollers.py +36 -0
  71. synth_ai/sdk/api/train/prompt_learning.py +390 -60
  72. synth_ai/sdk/api/train/rl.py +41 -5
  73. synth_ai/sdk/api/train/sft.py +2 -0
  74. synth_ai/sdk/api/train/task_app.py +20 -0
  75. synth_ai/sdk/api/train/validators.py +17 -17
  76. synth_ai/sdk/graphs/completions.py +239 -33
  77. synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
  78. synth_ai/sdk/learning/__init__.py +35 -5
  79. synth_ai/sdk/learning/context_learning_client.py +531 -0
  80. synth_ai/sdk/learning/context_learning_types.py +294 -0
  81. synth_ai/sdk/learning/prompt_learning_client.py +1 -1
  82. synth_ai/sdk/learning/prompt_learning_types.py +2 -1
  83. synth_ai/sdk/learning/rl/__init__.py +0 -4
  84. synth_ai/sdk/learning/rl/contracts.py +0 -4
  85. synth_ai/sdk/localapi/__init__.py +40 -0
  86. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  87. synth_ai/sdk/localapi/client.py +10 -0
  88. synth_ai/sdk/localapi/contracts.py +10 -0
  89. synth_ai/sdk/localapi/helpers.py +519 -0
  90. synth_ai/sdk/localapi/rollouts.py +93 -0
  91. synth_ai/sdk/localapi/server.py +29 -0
  92. synth_ai/sdk/localapi/template.py +49 -0
  93. synth_ai/sdk/streaming/handlers.py +6 -6
  94. synth_ai/sdk/streaming/streamer.py +10 -6
  95. synth_ai/sdk/task/__init__.py +18 -5
  96. synth_ai/sdk/task/apps/__init__.py +37 -1
  97. synth_ai/sdk/task/client.py +9 -1
  98. synth_ai/sdk/task/config.py +6 -11
  99. synth_ai/sdk/task/contracts.py +137 -95
  100. synth_ai/sdk/task/in_process.py +32 -22
  101. synth_ai/sdk/task/in_process_runner.py +9 -4
  102. synth_ai/sdk/task/rubrics/__init__.py +2 -3
  103. synth_ai/sdk/task/rubrics/loaders.py +4 -4
  104. synth_ai/sdk/task/rubrics/strict.py +3 -4
  105. synth_ai/sdk/task/server.py +76 -16
  106. synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
  107. synth_ai/sdk/task/validators.py +34 -49
  108. synth_ai/sdk/training/__init__.py +7 -16
  109. synth_ai/sdk/tunnels/__init__.py +118 -0
  110. synth_ai/sdk/tunnels/cleanup.py +83 -0
  111. synth_ai/sdk/tunnels/ports.py +120 -0
  112. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  113. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
  114. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
  115. synth_ai/cli/commands/baseline/__init__.py +0 -12
  116. synth_ai/cli/commands/baseline/core.py +0 -636
  117. synth_ai/cli/commands/baseline/list.py +0 -94
  118. synth_ai/cli/commands/eval/errors.py +0 -81
  119. synth_ai/cli/commands/status/formatters.py +0 -164
  120. synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
  121. synth_ai/cli/commands/status/subcommands/usage.py +0 -203
  122. synth_ai/cli/commands/train/judge_validation.py +0 -305
  123. synth_ai/cli/usage.py +0 -159
  124. synth_ai/data/specs.py +0 -36
  125. synth_ai/sdk/api/research_agent/cli.py +0 -428
  126. synth_ai/sdk/api/research_agent/config.py +0 -357
  127. synth_ai/sdk/api/research_agent/job.py +0 -717
  128. synth_ai/sdk/baseline/__init__.py +0 -25
  129. synth_ai/sdk/baseline/config.py +0 -209
  130. synth_ai/sdk/baseline/discovery.py +0 -216
  131. synth_ai/sdk/baseline/execution.py +0 -154
  132. synth_ai/sdk/judging/__init__.py +0 -15
  133. synth_ai/sdk/judging/base.py +0 -24
  134. synth_ai/sdk/judging/client.py +0 -191
  135. synth_ai/sdk/judging/types.py +0 -42
  136. synth_ai/sdk/research_agent/__init__.py +0 -34
  137. synth_ai/sdk/research_agent/container_builder.py +0 -328
  138. synth_ai/sdk/research_agent/container_spec.py +0 -198
  139. synth_ai/sdk/research_agent/defaults.py +0 -34
  140. synth_ai/sdk/research_agent/results_collector.py +0 -69
  141. synth_ai/sdk/specs/__init__.py +0 -46
  142. synth_ai/sdk/specs/dataclasses.py +0 -149
  143. synth_ai/sdk/specs/loader.py +0 -144
  144. synth_ai/sdk/specs/serializer.py +0 -199
  145. synth_ai/sdk/specs/validation.py +0 -250
  146. synth_ai/sdk/tracing/__init__.py +0 -39
  147. synth_ai/sdk/usage/__init__.py +0 -37
  148. synth_ai/sdk/usage/client.py +0 -171
  149. synth_ai/sdk/usage/models.py +0 -261
  150. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  151. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  152. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
  153. {synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0
synth_ai/core/env.py CHANGED
@@ -14,7 +14,7 @@ from typing import Literal
14
14
  from .errors import AuthenticationError, ConfigError
15
15
 
16
16
  # Default production URL
17
- PROD_BASE_URL = "https://www.api.usesynth.ai"
17
+ PROD_BASE_URL = "https://api.usesynth.ai"
18
18
  PROD_BASE_URL_DEFAULT = PROD_BASE_URL # Alias for backward compatibility
19
19
 
20
20
 
@@ -180,6 +180,8 @@ def get_backend_from_env() -> tuple[str, str]:
180
180
  direct_override = (os.environ.get("BACKEND_OVERRIDE") or "").strip()
181
181
  if direct_override:
182
182
  base = _normalize_url(direct_override)
183
+ if not base:
184
+ raise ConfigError("BACKEND_OVERRIDE is set but empty or invalid")
183
185
  api_key = os.environ.get("SYNTH_API_KEY", "").strip()
184
186
  return base, api_key
185
187
 
@@ -189,16 +191,25 @@ def get_backend_from_env() -> tuple[str, str]:
189
191
 
190
192
  if mode == "local":
191
193
  base = os.environ.get("LOCAL_BACKEND_URL", "http://localhost:8000")
194
+ # If explicitly set to empty string, use default
195
+ if not base or not base.strip():
196
+ base = "http://localhost:8000"
192
197
  key = os.environ.get("TESTING_LOCAL_SYNTH_API_KEY", "")
193
198
  return _normalize_url(base), key
194
199
 
195
200
  if mode == "dev":
196
201
  base = os.environ.get("DEV_BACKEND_URL", "") or "http://localhost:8000"
202
+ # If explicitly set to empty string, use default
203
+ if not base or not base.strip():
204
+ base = "http://localhost:8000"
197
205
  key = os.environ.get("DEV_SYNTH_API_KEY", "")
198
206
  return _normalize_url(base), key
199
207
 
200
208
  # prod
201
209
  base = os.environ.get("PROD_BACKEND_URL", PROD_BASE_URL)
210
+ # If explicitly set to empty string, use default
211
+ if not base or not base.strip():
212
+ base = PROD_BASE_URL
202
213
  key = (
203
214
  os.environ.get("PROD_SYNTH_API_KEY", "")
204
215
  or os.environ.get("TESTING_PROD_SYNTH_API_KEY", "")
synth_ai/core/errors.py CHANGED
@@ -87,7 +87,7 @@ class UsageLimitError(SynthError):
87
87
 
88
88
  Attributes:
89
89
  limit_type: The type of limit exceeded (e.g., "inference_tokens_per_day")
90
- api: The API that hit the limit (e.g., "inference", "judges", "prompt_opt")
90
+ api: The API that hit the limit (e.g., "inference", "verifiers", "prompt_opt")
91
91
  current: Current usage value
92
92
  limit: The limit value
93
93
  tier: The org's tier (e.g., "free", "starter", "growth")
@@ -123,4 +123,3 @@ __all__ = [
123
123
  "ModelNotSupportedError",
124
124
  "UsageLimitError",
125
125
  ]
126
-
@@ -738,13 +738,20 @@ async def resolve_hostname_with_explicit_resolvers(hostname: str) -> str:
738
738
  timeout = float(os.getenv("SYNTH_TUNNEL_DNS_TIMEOUT_PER_ATTEMPT_SECS", "5"))
739
739
  loop = asyncio.get_event_loop()
740
740
 
741
- # Try Cloudflare / Google first via `dig`, then fall back to system resolver
742
- for resolver_ip in ("1.1.1.1", "8.8.8.8"):
741
+ # Try various dig resolvers, then fall back to system resolver
742
+ # Order: 1.1.1.1, 8.8.8.8, then plain dig (uses system's configured DNS but bypasses cache)
743
+ resolvers = [
744
+ ("1.1.1.1", ["dig", "@1.1.1.1", "+short", hostname]),
745
+ ("8.8.8.8", ["dig", "@8.8.8.8", "+short", hostname]),
746
+ ("default", ["dig", "+short", hostname]), # Plain dig bypasses negative cache
747
+ ]
748
+
749
+ for resolver_name, cmd in resolvers:
743
750
  try:
744
751
  result = await loop.run_in_executor(
745
752
  None,
746
- lambda ip=resolver_ip: subprocess.run(
747
- ["dig", f"@{ip}", "+short", hostname],
753
+ lambda c=cmd: subprocess.run(
754
+ c,
748
755
  capture_output=True,
749
756
  text=True,
750
757
  timeout=timeout,
@@ -753,17 +760,17 @@ async def resolve_hostname_with_explicit_resolvers(hostname: str) -> str:
753
760
  if result.returncode == 0 and result.stdout.strip():
754
761
  first = result.stdout.strip().splitlines()[0].strip()
755
762
  if first:
756
- logger.debug(f"Resolved via {resolver_ip}: {hostname} -> {first}")
763
+ logger.debug(f"Resolved via {resolver_name}: {hostname} -> {first}")
757
764
  return first
758
765
  except FileNotFoundError:
759
- logger.debug(f"dig not found, skipping {resolver_ip}")
766
+ logger.debug(f"dig not found, skipping {resolver_name}")
760
767
  continue
761
768
  except Exception as e:
762
- logger.debug(f"Resolver {resolver_ip} failed: {e}")
769
+ logger.debug(f"Resolver {resolver_name} failed: {e}")
763
770
  continue
764
-
765
- # Fallback: system resolver
766
- logger.debug(f"Falling back to system resolver for {hostname}")
771
+
772
+ # Final fallback: system resolver (may hit negative cache)
773
+ logger.debug(f"Falling back to socket.gethostbyname for {hostname}")
767
774
  return await loop.run_in_executor(
768
775
  None,
769
776
  socket.gethostbyname,
@@ -822,13 +829,24 @@ async def verify_tunnel_dns_resolution(
822
829
  resolved_ip = await resolve_hostname_with_explicit_resolvers(hostname)
823
830
  logger.info(f"DNS resolution successful (attempt {attempt}): {hostname} -> {resolved_ip}")
824
831
 
825
- # 2. HTTP connectivity: hit the tunnel via the resolved IP, but keep Host header.
826
- # This avoids depending on the system resolver, which is what gave you EAI_NONAME.
832
+ # 2. HTTP connectivity: use curl with --resolve to bypass system DNS cache
833
+ # The system resolver may have negative-cached the hostname, so we use
834
+ # curl with explicit IP resolution to bypass it while maintaining proper SNI.
827
835
  try:
828
836
  scheme = parsed.scheme or "https"
829
- test_url = f"{scheme}://{resolved_ip}/health"
830
- headers = {"Host": hostname}
831
-
837
+ test_url = f"{scheme}://{hostname}/health"
838
+ port = 443 if scheme == "https" else 80
839
+
840
+ # Build curl command with --resolve to bypass system DNS
841
+ # Format: --resolve hostname:port:ip
842
+ curl_cmd = [
843
+ "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}",
844
+ "--max-time", "5",
845
+ "-k", # Allow self-signed certs
846
+ "--resolve", f"{hostname}:{port}:{resolved_ip}",
847
+ test_url,
848
+ ]
849
+
832
850
  # Include API key if provided (or from env var)
833
851
  if api_key is None:
834
852
  # Try to load .env file if available
@@ -839,25 +857,34 @@ async def verify_tunnel_dns_resolution(
839
857
  pass
840
858
  api_key = os.getenv("ENVIRONMENT_API_KEY")
841
859
  if api_key:
842
- headers["X-API-Key"] = api_key
843
-
844
- # For Quick Tunnels, TLS cert is for *.trycloudflare.com, not the bare IP,
845
- # so we disable verification here; this is just a readiness probe.
846
- async with httpx.AsyncClient(timeout=5.0, verify=False) as client:
847
- resp = await client.get(test_url, headers=headers)
848
- # Accept 200 (OK), 400/401 (auth required - server is reachable), 404/405 (not found/method not allowed)
849
- # All of these indicate the tunnel is working and the server is responding
850
- if resp.status_code in (200, 400, 401, 404, 405):
851
- logger.info(f"HTTP connectivity verified via IP: {test_url} -> {resp.status_code}")
852
- return
860
+ curl_cmd.extend(["-H", f"X-API-Key: {api_key}"])
861
+
862
+ result = await loop.run_in_executor(
863
+ None,
864
+ lambda: subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10),
865
+ )
866
+
867
+ status_code = int(result.stdout.strip()) if result.returncode == 0 and result.stdout.strip().isdigit() else 0
868
+
869
+ # Accept various status codes that indicate the tunnel is working:
870
+ # - 200: OK (service is running)
871
+ # - 400/401/403: Auth required (server is reachable)
872
+ # - 404/405: Not found / method not allowed (server is reachable)
873
+ # - 502: Bad gateway (cloudflared connected but local service isn't running)
874
+ if status_code in (200, 400, 401, 403, 404, 405, 502):
875
+ logger.info(f"HTTP connectivity verified: {test_url} -> {status_code}")
876
+ return
877
+ else:
878
+ # 530 errors are common when tunnel is still establishing - retry
879
+ if status_code == 530:
880
+ logger.debug("HTTP 530 (tunnel establishing) - will retry")
881
+ last_exc = RuntimeError("tunnel not ready yet (HTTP 530)")
882
+ elif result.returncode != 0:
883
+ logger.warning(f"curl failed: {result.stderr}")
884
+ last_exc = RuntimeError(f"curl failed: {result.stderr}")
853
885
  else:
854
- # 530 errors are common when tunnel is still establishing - be lenient
855
- if resp.status_code == 530:
856
- logger.debug("HTTP 530 (tunnel establishing) - will retry")
857
- last_exc = RuntimeError("tunnel not ready yet (HTTP 530)")
858
- else:
859
- logger.warning(f"HTTP check returned unexpected status: {resp.status_code}")
860
- last_exc = RuntimeError(f"unexpected HTTP status {resp.status_code}")
886
+ logger.warning(f"HTTP check returned unexpected status: {status_code}")
887
+ last_exc = RuntimeError(f"unexpected HTTP status {status_code}")
861
888
  except Exception as http_exc:
862
889
  logger.warning(f"HTTP connectivity check failed (attempt {attempt}): {http_exc}")
863
890
  last_exc = http_exc
@@ -1102,6 +1129,155 @@ def open_managed_tunnel(tunnel_token: str) -> subprocess.Popen:
1102
1129
  )
1103
1130
 
1104
1131
 
1132
+ async def wait_for_cloudflared_connection(
1133
+ proc: subprocess.Popen,
1134
+ timeout_seconds: float = 30.0,
1135
+ ) -> bool:
1136
+ """
1137
+ Wait for cloudflared to establish a connection to Cloudflare's edge.
1138
+
1139
+ This monitors cloudflared's stdout/stderr for connection success messages.
1140
+ DNS records only resolve AFTER cloudflared has connected, so this function
1141
+ must be called before attempting DNS verification.
1142
+
1143
+ Args:
1144
+ proc: The cloudflared subprocess from open_managed_tunnel()
1145
+ timeout_seconds: Maximum time to wait for connection
1146
+
1147
+ Returns:
1148
+ True if connection was established, False if timeout or error
1149
+
1150
+ Raises:
1151
+ RuntimeError: If cloudflared exits with an error before connecting
1152
+ """
1153
+ import select
1154
+
1155
+ # Patterns that indicate successful connection
1156
+ # cloudflared outputs: "INF Registered tunnel connection connIndex=0 connection=..."
1157
+ # We need to be specific - "connIndex=" alone triggers too early on curve preferences log
1158
+ connection_patterns = [
1159
+ "Registered tunnel connection",
1160
+ "Connection registered",
1161
+ # Don't use "connIndex=" alone - it matches curve preferences log before actual connection
1162
+ ]
1163
+
1164
+ # Patterns that indicate fatal errors
1165
+ error_patterns = [
1166
+ "failed to connect",
1167
+ "error connecting",
1168
+ "tunnel credentials",
1169
+ "invalid token",
1170
+ "tunnel not found",
1171
+ "unauthorized",
1172
+ ]
1173
+
1174
+ loop = asyncio.get_event_loop()
1175
+ start_time = loop.time()
1176
+ output_lines: list[str] = []
1177
+
1178
+ logger.info(f"Waiting for cloudflared to connect (timeout {timeout_seconds}s)...")
1179
+
1180
+ while True:
1181
+ elapsed = loop.time() - start_time
1182
+ if elapsed >= timeout_seconds:
1183
+ logger.warning(
1184
+ f"cloudflared connection timeout after {elapsed:.1f}s. "
1185
+ f"Output: {' | '.join(output_lines[-10:])}"
1186
+ )
1187
+ return False
1188
+
1189
+ # Check if process exited
1190
+ if proc.poll() is not None:
1191
+ # Process exited - read remaining output
1192
+ remaining = proc.stdout.read() if proc.stdout else ""
1193
+ if remaining:
1194
+ output_lines.extend(remaining.splitlines())
1195
+
1196
+ all_output = "\n".join(output_lines)
1197
+ logger.error(
1198
+ f"cloudflared exited with code {proc.returncode} before connecting. "
1199
+ f"Output:\n{all_output[:2000]}"
1200
+ )
1201
+ raise RuntimeError(
1202
+ f"cloudflared exited with code {proc.returncode} before establishing connection. "
1203
+ f"This usually means the tunnel token is invalid or the tunnel was deleted. "
1204
+ f"Output: {all_output[:500]}"
1205
+ )
1206
+
1207
+ # Try to read output (non-blocking)
1208
+ if proc.stdout:
1209
+ try:
1210
+ # Use select for non-blocking read
1211
+ ready, _, _ = select.select([proc.stdout], [], [], 0.1)
1212
+ if ready:
1213
+ line = proc.stdout.readline()
1214
+ if line:
1215
+ line = line.strip()
1216
+ output_lines.append(line)
1217
+ logger.debug(f"cloudflared: {line}")
1218
+
1219
+ # Check for connection success
1220
+ line_lower = line.lower()
1221
+ for pattern in connection_patterns:
1222
+ if pattern.lower() in line_lower:
1223
+ logger.info(
1224
+ f"cloudflared connected after {elapsed:.1f}s: {line}"
1225
+ )
1226
+ return True
1227
+
1228
+ # Check for fatal errors
1229
+ for pattern in error_patterns:
1230
+ if pattern.lower() in line_lower:
1231
+ logger.error(f"cloudflared error detected: {line}")
1232
+ raise RuntimeError(
1233
+ f"cloudflared connection failed: {line}"
1234
+ )
1235
+ except (ValueError, OSError) as e:
1236
+ logger.debug(f"Error reading cloudflared output: {e}")
1237
+
1238
+ # Small sleep to avoid busy loop
1239
+ await asyncio.sleep(0.1)
1240
+
1241
+
1242
+ async def open_managed_tunnel_with_connection_wait(
1243
+ tunnel_token: str,
1244
+ timeout_seconds: float = 30.0,
1245
+ ) -> subprocess.Popen:
1246
+ """
1247
+ Open a managed tunnel and wait for cloudflared to connect.
1248
+
1249
+ This is the preferred method for starting managed tunnels as it ensures
1250
+ cloudflared has actually connected to Cloudflare's edge before returning.
1251
+ DNS records only resolve after this connection is established.
1252
+
1253
+ Args:
1254
+ tunnel_token: Cloudflare tunnel token from backend API
1255
+ timeout_seconds: Maximum time to wait for connection
1256
+
1257
+ Returns:
1258
+ Process handle for the connected tunnel
1259
+
1260
+ Raises:
1261
+ RuntimeError: If cloudflared fails to connect within timeout
1262
+ """
1263
+ proc = open_managed_tunnel(tunnel_token)
1264
+
1265
+ try:
1266
+ connected = await wait_for_cloudflared_connection(proc, timeout_seconds)
1267
+ if not connected:
1268
+ # Timeout - kill process and raise
1269
+ stop_tunnel(proc)
1270
+ raise RuntimeError(
1271
+ f"cloudflared failed to connect within {timeout_seconds}s. "
1272
+ "The tunnel may be invalid or Cloudflare may be experiencing issues."
1273
+ )
1274
+ return proc
1275
+ except Exception:
1276
+ # Cleanup on any error
1277
+ stop_tunnel(proc)
1278
+ raise
1279
+
1280
+
1105
1281
  def stop_tunnel(proc: Optional[subprocess.Popen]) -> None:
1106
1282
  """
1107
1283
  Gracefully stop a tunnel process.
@@ -32,6 +32,52 @@ Concepts:
32
32
  tool result back, and the agent sending a reply to the user. Do not confuse these with
33
33
  provider-specific LLM API "messages" (prompt formatting) — those belong inside an LMCAISEvent
34
34
  as part of its input/output content, not as SessionEventMessages.
35
+
36
+ Example usage:
37
+
38
+ ```python
39
+ from synth_ai.core.tracing_v3.abstractions import (
40
+ SessionTrace,
41
+ SessionTimeStep,
42
+ LMCAISEvent,
43
+ EnvironmentEvent,
44
+ TimeRecord,
45
+ )
46
+ import time
47
+
48
+ # Create a simple trace with one turn
49
+ trace = SessionTrace(
50
+ session_id="sess_example",
51
+ session_time_steps=[
52
+ SessionTimeStep(
53
+ step_id="turn_1",
54
+ step_index=0,
55
+ events=[
56
+ LMCAISEvent(
57
+ system_instance_id="llm",
58
+ time_record=TimeRecord(event_time=time.time()),
59
+ model_name="gpt-4o",
60
+ input_tokens=150,
61
+ output_tokens=50,
62
+ ),
63
+ EnvironmentEvent(
64
+ system_instance_id="tool_executor",
65
+ time_record=TimeRecord(event_time=time.time()),
66
+ reward=1.0,
67
+ terminated=True,
68
+ ),
69
+ ],
70
+ )
71
+ ],
72
+ )
73
+
74
+ # Convert to dict for serialization
75
+ trace_dict = trace.to_dict()
76
+ ```
77
+
78
+ See Also:
79
+ - V3 Traces SDK guide: /sdk/tracing/v3-traces
80
+ - Event rewards: /sdk/tracing/rewards/event-rewards
35
81
  """
36
82
 
37
83
  from __future__ import annotations
synth_ai/data/__init__.py CHANGED
@@ -1,11 +1,11 @@
1
1
  """Synth AI Data Layer.
2
2
 
3
3
  This module provides pure data types with no IO dependencies.
4
- Contains actual data schemas for traces, rewards, and specs.
4
+ Contains actual data schemas for traces and rewards.
5
5
 
6
6
  Data vs SDK distinction:
7
- - data/: Pure data records (traces, rewards, specs) - actual data
8
- - sdk/: API abstractions (jobs, training, judging) - SDK interfaces
7
+ - data/: Pure data records (traces, rewards) - actual data
8
+ - sdk/: API abstractions (jobs, training, graphs) - SDK interfaces
9
9
 
10
10
  Dependency rule: data/ imports nothing from synth_ai except typing helpers.
11
11
  """
@@ -16,13 +16,11 @@ from __future__ import annotations
16
16
  from synth_ai.data.enums import (
17
17
  AdaptiveBatchLevel,
18
18
  AdaptiveCurriculumLevel,
19
- ContainerBackend,
20
19
  InferenceMode,
21
20
  JobStatus,
22
21
  JobType,
23
22
  PromptLearningMethod,
24
23
  ProviderName,
25
- ResearchAgentAlgorithm,
26
24
  RewardSource,
27
25
  RLMethod,
28
26
  SFTMethod,
@@ -38,19 +36,6 @@ from synth_ai.data.rewards import (
38
36
  RewardRecord,
39
37
  )
40
38
 
41
- # Spec data types (re-exports)
42
- from synth_ai.data.specs import (
43
- Constraints,
44
- Example,
45
- GlossaryItem,
46
- Interfaces,
47
- Metadata,
48
- Principle,
49
- Rule,
50
- Spec,
51
- TestCase,
52
- )
53
-
54
39
  # Trace data types (re-exports from tracing_v3)
55
40
  from synth_ai.data.traces import (
56
41
  BaseEvent,
@@ -71,8 +56,6 @@ __all__ = [
71
56
  "PromptLearningMethod",
72
57
  "RLMethod",
73
58
  "SFTMethod",
74
- "ResearchAgentAlgorithm",
75
- "ContainerBackend",
76
59
  "InferenceMode",
77
60
  "ProviderName",
78
61
  "RewardSource",
@@ -85,16 +68,6 @@ __all__ = [
85
68
  "RewardAggregates",
86
69
  "CalibrationExample",
87
70
  "GoldExample",
88
- # Spec data
89
- "Spec",
90
- "Metadata",
91
- "Principle",
92
- "Rule",
93
- "Constraints",
94
- "Example",
95
- "TestCase",
96
- "Interfaces",
97
- "GlossaryItem",
98
71
  # Trace data
99
72
  "SessionTrace",
100
73
  "SessionTimeStep",
synth_ai/data/enums.py CHANGED
@@ -54,22 +54,6 @@ class SFTMethod(str, Enum):
54
54
  QLORA = "qlora"
55
55
 
56
56
 
57
- class ResearchAgentAlgorithm(str, Enum):
58
- """Research agent algorithms."""
59
-
60
- SCAFFOLD_TUNING = "scaffold_tuning"
61
- EVALUATION = "evaluation"
62
- TRACE_ANALYSIS = "trace_analysis"
63
-
64
-
65
- class ContainerBackend(str, Enum):
66
- """Container backends for research agent."""
67
-
68
- DAYTONA = "daytona"
69
- MODAL = "modal"
70
- DOCKER = "docker"
71
-
72
-
73
57
  class InferenceMode(str, Enum):
74
58
  """Inference modes for policy evaluation."""
75
59
 
@@ -89,7 +73,7 @@ class RewardSource(str, Enum):
89
73
  """Source of reward signal for training."""
90
74
 
91
75
  TASK_APP = "task_app"
92
- JUDGE = "judge"
76
+ VERIFIER = "verifier"
93
77
  FUSED = "fused"
94
78
 
95
79
 
@@ -128,8 +112,6 @@ __all__ = [
128
112
  "PromptLearningMethod",
129
113
  "RLMethod",
130
114
  "SFTMethod",
131
- "ResearchAgentAlgorithm",
132
- "ContainerBackend",
133
115
  "InferenceMode",
134
116
  "ProviderName",
135
117
  "RewardSource",
@@ -138,4 +120,3 @@ __all__ = [
138
120
  "SynthModelName",
139
121
  "SYNTH_MODEL_NAMES",
140
122
  ]
141
-
synth_ai/data/rewards.py CHANGED
@@ -2,6 +2,43 @@
2
2
 
3
3
  This module defines pure data types for representing rewards in training
4
4
  and evaluation contexts. These are actual data records, not API abstractions.
5
+
6
+ Synth AI uses two primary reward scopes:
7
+
8
+ - **Event Rewards**: Fine-grained rewards attached to individual events within a session
9
+ (e.g., each tool call, each LLM response). Use `EventRewardRecord` to annotate specific
10
+ events with reward values.
11
+
12
+ - **Outcome Rewards**: Episode-level rewards that summarize the overall success of a
13
+ complete session. Use `OutcomeRewardRecord` for aggregate metrics.
14
+
15
+ Example usage:
16
+
17
+ ```python
18
+ from synth_ai.data.rewards import EventRewardRecord, OutcomeRewardRecord
19
+
20
+ # Annotate a specific event with a reward
21
+ event_reward = EventRewardRecord(
22
+ event_id="evt_123",
23
+ session_id="sess_abc",
24
+ reward_value=0.8,
25
+ reward_type="evaluator",
26
+ annotation={"reason": "Correct tool selection"}
27
+ )
28
+
29
+ # Record episode-level outcome
30
+ outcome = OutcomeRewardRecord(
31
+ session_id="sess_abc",
32
+ total_reward=0.85,
33
+ achievements_count=3,
34
+ total_steps=10,
35
+ metadata={"task": "code_generation"}
36
+ )
37
+ ```
38
+
39
+ See Also:
40
+ - Event rewards SDK guide: /sdk/tracing/rewards/event-rewards
41
+ - Outcome rewards SDK guide: /sdk/tracing/rewards/outcome-rewards
5
42
  """
6
43
 
7
44
  from __future__ import annotations
@@ -17,6 +54,20 @@ class RewardRecord:
17
54
 
18
55
  Represents a reward signal at a specific point in a trajectory,
19
56
  with metadata about its source and scope.
57
+
58
+ Attributes:
59
+ value: The numeric reward value (typically in range [0, 1] or unbounded).
60
+ reward_type: Category of reward - "shaped" (dense), "sparse" (terminal only),
61
+ "achievement" (milestone), "penalty" (negative signal), "evaluator"
62
+ (from LLM verifier), or "human" (manual annotation).
63
+ scope: Granularity level - "step" (per action), "event" (per significant event),
64
+ or "outcome" (episode-level).
65
+ source: Origin of the reward - "environment" (task env), "runner" (framework),
66
+ "evaluator" (verifier), or "human" (annotator).
67
+ key: Optional identifier like achievement name or rubric criterion ID.
68
+ turn: Turn number within the session where reward was earned.
69
+ timestamp: When the reward was recorded.
70
+ metadata: Additional context (e.g., rubric scores, evaluation details).
20
71
  """
21
72
 
22
73
  value: float
@@ -34,7 +85,29 @@ class OutcomeRewardRecord:
34
85
  """Episode-level reward summary.
35
86
 
36
87
  Aggregates reward information for a complete episode/session,
37
- including total reward, achievements, and step counts.
88
+ including total reward, achievements, and step counts. This is the
89
+ primary data structure for outcome rewards used in training.
90
+
91
+ Attributes:
92
+ session_id: Unique identifier linking to the SessionTrace.
93
+ total_reward: Aggregate reward for the entire episode (typically 0.0-1.0).
94
+ achievements_count: Number of achievements/milestones reached.
95
+ total_steps: Total number of steps in the episode.
96
+ metadata: Task-specific metadata (e.g., {"task": "code_gen", "difficulty": "hard"}).
97
+ annotation: Human or evaluator annotations explaining the score.
98
+ created_at: When this record was created.
99
+
100
+ Example:
101
+ ```python
102
+ outcome = OutcomeRewardRecord(
103
+ session_id="sess_abc123",
104
+ total_reward=0.75,
105
+ achievements_count=2,
106
+ total_steps=8,
107
+ metadata={"task": "customer_support"},
108
+ annotation={"evaluator": "Resolved issue but could improve tone"}
109
+ )
110
+ ```
38
111
  """
39
112
 
40
113
  session_id: str
@@ -51,7 +124,32 @@ class EventRewardRecord:
51
124
  """Event-level reward annotation.
52
125
 
53
126
  Links a reward to a specific event in a trace, with optional
54
- annotations and source information.
127
+ annotations and source information. Event rewards provide fine-grained
128
+ feedback on individual actions or decisions within a session.
129
+
130
+ Attributes:
131
+ event_id: Unique identifier of the event being rewarded.
132
+ session_id: Session containing this event.
133
+ reward_value: Reward for this specific event (typically 0.0-1.0).
134
+ reward_type: Category of reward (e.g., "tool_success", "reasoning", "progress").
135
+ key: Rubric criterion or achievement key this reward relates to.
136
+ turn_number: Turn/step within the session where event occurred.
137
+ source: Origin of the reward ("environment", "evaluator", "human").
138
+ annotation: Explanation or details about why this reward was given.
139
+ created_at: When this record was created.
140
+
141
+ Example:
142
+ ```python
143
+ event_reward = EventRewardRecord(
144
+ event_id="evt_tool_call_5",
145
+ session_id="sess_abc123",
146
+ reward_value=1.0,
147
+ reward_type="tool_success",
148
+ turn_number=3,
149
+ source="environment",
150
+ annotation={"tool": "search", "result": "found_answer"}
151
+ )
152
+ ```
55
153
  """
56
154
 
57
155
  event_id: str
@@ -149,4 +247,3 @@ __all__ = [
149
247
  "CalibrationExample",
150
248
  "GoldExample",
151
249
  ]
152
-
@@ -4,7 +4,7 @@ This product provides tools for optimizing LLM-based workflow graphs
4
4
  using evolutionary algorithms. It can optimize both:
5
5
 
6
6
  - **Policy graphs**: Graphs that solve tasks (e.g., multi-hop QA, reasoning)
7
- - **Verifier graphs**: Graphs that judge/score existing results
7
+ - **Verifier graphs**: Graphs that verify/score existing results
8
8
 
9
9
  Algorithms:
10
10
  - `graph_evolve`: Evolutionary optimization for graph structure
@@ -43,4 +43,3 @@ __all__ = [
43
43
  "ConversionWarning",
44
44
  "ConversionError",
45
45
  ]
46
-