synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show
  1. examples/baseline/banking77_baseline.py +204 -0
  2. examples/baseline/crafter_baseline.py +407 -0
  3. examples/baseline/pokemon_red_baseline.py +326 -0
  4. examples/baseline/simple_baseline.py +56 -0
  5. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  6. examples/blog_posts/gepa/README.md +355 -0
  7. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  9. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  10. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  13. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  15. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  16. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  18. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  19. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  20. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  21. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  22. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  23. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  24. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  25. examples/blog_posts/gepa/task_apps.py +105 -0
  26. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  27. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  28. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  29. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
  30. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
  31. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  32. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  33. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  34. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  35. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  36. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  37. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  38. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  39. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  40. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  41. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  42. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  43. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
  44. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  45. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
  46. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
  47. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  48. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  49. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  50. examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
  51. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
  52. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
  53. examples/rl/configs/rl_from_base_qwen17.toml +1 -0
  54. examples/swe/task_app/hosted/inference/openai_client.py +0 -34
  55. examples/swe/task_app/hosted/policy_routes.py +17 -0
  56. examples/swe/task_app/hosted/rollout.py +4 -2
  57. examples/task_apps/banking77/__init__.py +6 -0
  58. examples/task_apps/banking77/banking77_task_app.py +841 -0
  59. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  60. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  61. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  62. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  63. examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
  64. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  65. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  69. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  70. examples/task_apps/gepa_benchmarks/common.py +260 -0
  71. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  72. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  73. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  74. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  75. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  76. examples/task_apps/pokemon_red/task_app.py +254 -36
  77. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
  78. examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  83. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
  85. synth_ai/api/train/builders.py +90 -1
  86. synth_ai/api/train/cli.py +396 -21
  87. synth_ai/api/train/config_finder.py +13 -2
  88. synth_ai/api/train/configs/__init__.py +15 -1
  89. synth_ai/api/train/configs/prompt_learning.py +442 -0
  90. synth_ai/api/train/configs/rl.py +29 -0
  91. synth_ai/api/train/task_app.py +1 -1
  92. synth_ai/api/train/validators.py +277 -0
  93. synth_ai/baseline/__init__.py +25 -0
  94. synth_ai/baseline/config.py +209 -0
  95. synth_ai/baseline/discovery.py +214 -0
  96. synth_ai/baseline/execution.py +146 -0
  97. synth_ai/cli/__init__.py +85 -17
  98. synth_ai/cli/__main__.py +0 -0
  99. synth_ai/cli/claude.py +70 -0
  100. synth_ai/cli/codex.py +84 -0
  101. synth_ai/cli/commands/__init__.py +1 -0
  102. synth_ai/cli/commands/baseline/__init__.py +12 -0
  103. synth_ai/cli/commands/baseline/core.py +637 -0
  104. synth_ai/cli/commands/baseline/list.py +93 -0
  105. synth_ai/cli/commands/eval/core.py +13 -10
  106. synth_ai/cli/commands/filter/core.py +53 -17
  107. synth_ai/cli/commands/help/core.py +0 -1
  108. synth_ai/cli/commands/smoke/__init__.py +7 -0
  109. synth_ai/cli/commands/smoke/core.py +1436 -0
  110. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  111. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  112. synth_ai/cli/commands/train/judge_schemas.py +1 -0
  113. synth_ai/cli/commands/train/judge_validation.py +1 -0
  114. synth_ai/cli/commands/train/validation.py +0 -57
  115. synth_ai/cli/demo.py +35 -3
  116. synth_ai/cli/deploy/__init__.py +40 -25
  117. synth_ai/cli/deploy.py +162 -0
  118. synth_ai/cli/legacy_root_backup.py +14 -8
  119. synth_ai/cli/opencode.py +107 -0
  120. synth_ai/cli/root.py +9 -5
  121. synth_ai/cli/task_app_deploy.py +1 -1
  122. synth_ai/cli/task_apps.py +53 -53
  123. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  124. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  125. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  126. synth_ai/judge_schemas.py +1 -0
  127. synth_ai/learning/__init__.py +10 -0
  128. synth_ai/learning/prompt_learning_client.py +276 -0
  129. synth_ai/learning/prompt_learning_types.py +184 -0
  130. synth_ai/pricing/__init__.py +2 -0
  131. synth_ai/pricing/model_pricing.py +57 -0
  132. synth_ai/streaming/handlers.py +53 -4
  133. synth_ai/streaming/streamer.py +19 -0
  134. synth_ai/task/apps/__init__.py +1 -0
  135. synth_ai/task/config.py +2 -0
  136. synth_ai/task/tracing_utils.py +25 -25
  137. synth_ai/task/validators.py +44 -8
  138. synth_ai/task_app_cfgs.py +21 -0
  139. synth_ai/tracing_v3/config.py +162 -19
  140. synth_ai/tracing_v3/constants.py +1 -1
  141. synth_ai/tracing_v3/db_config.py +24 -38
  142. synth_ai/tracing_v3/storage/config.py +47 -13
  143. synth_ai/tracing_v3/storage/factory.py +3 -3
  144. synth_ai/tracing_v3/turso/daemon.py +113 -11
  145. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  146. synth_ai/types.py +8 -0
  147. synth_ai/urls.py +11 -0
  148. synth_ai/utils/__init__.py +30 -1
  149. synth_ai/utils/agents.py +74 -0
  150. synth_ai/utils/bin.py +39 -0
  151. synth_ai/utils/cli.py +149 -5
  152. synth_ai/utils/env.py +17 -17
  153. synth_ai/utils/json.py +72 -0
  154. synth_ai/utils/modal.py +283 -1
  155. synth_ai/utils/paths.py +48 -0
  156. synth_ai/utils/uvicorn.py +113 -0
  157. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
  158. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
  159. synth_ai/cli/commands/deploy/__init__.py +0 -23
  160. synth_ai/cli/commands/deploy/core.py +0 -614
  161. synth_ai/cli/commands/deploy/errors.py +0 -72
  162. synth_ai/cli/commands/deploy/validation.py +0 -11
  163. synth_ai/cli/deploy/core.py +0 -5
  164. synth_ai/cli/deploy/errors.py +0 -23
  165. synth_ai/cli/deploy/validation.py +0 -5
  166. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  167. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  168. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  169. {synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,46 @@
1
+ """Lightweight Modal deploy wrapper for Banking77 task app (web)."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ try:
8
+ import modal # type: ignore
9
+ except Exception as exc: # pragma: no cover
10
+ raise SystemExit(f"Modal is required to deploy: {exc}")
11
+
12
+ _here = Path(__file__).resolve()
13
+ _parents = list(_here.parents)
14
+ REPO_ROOT = _parents[3] if len(_parents) > 3 else Path.cwd()
15
+
16
+ app = modal.App("synth-banking77-web")
17
+
18
+ _image = (
19
+ modal.Image.debian_slim(python_version="3.11")
20
+ .pip_install(
21
+ "synth-ai",
22
+ "datasets>=2.14.0",
23
+ "fastapi>=0.115.0",
24
+ "pydantic>=2.0.0",
25
+ "httpx>=0.26.0",
26
+ "python-dotenv>=1.0.0",
27
+ )
28
+ .env({"PYTHONPATH": "/opt/synth_ai_repo"})
29
+ .add_local_dir(str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai", copy=True)
30
+ .add_local_dir(str(REPO_ROOT / "examples"), "/opt/synth_ai_repo/examples", copy=True)
31
+ )
32
+ _env_file = REPO_ROOT / ".env"
33
+ if _env_file.exists():
34
+ _image = _image.add_local_file(str(_env_file), "/opt/synth_ai_repo/.env")
35
+
36
+
37
+ @app.function(image=_image, timeout=600)
38
+ @modal.asgi_app()
39
+ def web():
40
+ # Lazy import the task app to avoid local heavy deps
41
+ import contextlib
42
+ with contextlib.suppress(Exception):
43
+ from dotenv import load_dotenv # type: ignore
44
+ load_dotenv(str(REPO_ROOT / ".env"), override=False)
45
+ from examples.task_apps.banking77.banking77_task_app import fastapi_app # type: ignore
46
+ return fastapi_app()
@@ -271,3 +271,7 @@ min_official_score = 0.01 # Filter by outcome_rewards
271
271
  - `QUERY_EXAMPLES.md` - SQL queries for trace analysis
272
272
 
273
273
 
274
+
275
+
276
+
277
+
@@ -172,3 +172,7 @@ cat ft_data/crafter_image_only_sft.jsonl | jq .
172
172
  **Action Required**: Debug why messages aren't being saved to the database despite correct code path.
173
173
 
174
174
 
175
+
176
+
177
+
178
+
@@ -266,3 +266,7 @@ sqlite3 traces/v3/crafter_eval.db \
266
266
  **Status**: 🎉 **WORKING END-TO-END!**
267
267
 
268
268
 
269
+
270
+
271
+
272
+
@@ -6,6 +6,7 @@ import json
6
6
  import logging
7
7
  import os
8
8
  import sys
9
+ from urllib.parse import parse_qs, urlparse
9
10
  from collections.abc import Iterable, Sequence
10
11
  from contextlib import suppress
11
12
  from dataclasses import dataclass
@@ -41,7 +42,16 @@ except Exception: # pragma: no cover - utils unavailable if optional deps missi
41
42
  """Fallback to shared utility for URL normalization."""
42
43
  return normalize_inference_url(raw_url) if raw_url else raw_url
43
44
 
44
- def extract_trace_correlation_id(_raw_url):
45
+ def extract_trace_correlation_id(_raw_url, mode=None):
46
+ if not isinstance(_raw_url, str):
47
+ return None
48
+ parsed = urlparse(_raw_url)
49
+ query_params = parse_qs(parsed.query or "")
50
+ for key in ("cid", "trace", "trace_correlation_id"):
51
+ values = query_params.get(key) or []
52
+ for value in values:
53
+ if isinstance(value, str) and value.strip():
54
+ return value.strip()
45
55
  return None
46
56
  logger = logging.getLogger(__name__)
47
57
 
@@ -655,7 +665,7 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
655
665
  if stripped:
656
666
  return stripped
657
667
 
658
- return extract_trace_correlation_id(policy_cfg.get("inference_url"))
668
+ return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
659
669
 
660
670
 
661
671
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
@@ -812,6 +822,17 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
812
822
  trace_correlation_id,
813
823
  )
814
824
  data = legacy_response.model_dump()
825
+ legacy_trace = getattr(legacy_response, "trace", None)
826
+ if legacy_trace is not None:
827
+ if isinstance(legacy_trace, dict):
828
+ legacy_trace_preview = list(legacy_trace.keys())[:5]
829
+ else:
830
+ legacy_trace_preview = type(legacy_trace)
831
+ logger.info(
832
+ "ROLLOUT_EXEC: legacy response trace present type=%s preview=%s",
833
+ type(legacy_trace),
834
+ legacy_trace_preview,
835
+ )
815
836
  logger.debug(
816
837
  "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
817
838
  sorted(data.keys()),
@@ -1025,6 +1046,7 @@ register_task_app(
1025
1046
  (str(RUBRICS_ROOT), "/opt/synth_ai_repo/examples/multi_step/rubrics"),
1026
1047
  ),
1027
1048
  secret_names=("groq-api-key", "openai-api-key"),
1049
+ env_vars={"SERVICE": "MODAL"},
1028
1050
  memory=16384,
1029
1051
  cpu=4.0,
1030
1052
  max_containers=10,
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import contextlib
4
+ import logging
4
5
  import os
5
6
 
6
7
  from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
9
10
  from pydantic import BaseModel
10
11
  from starlette.requests import Request
11
12
 
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _VERSION_LOGGED = False
16
+
17
+
18
+ def _resolve_task_app_version() -> str:
19
+ env_version = os.getenv("TASK_APP_VERSION")
20
+ if isinstance(env_version, str) and env_version.strip():
21
+ return env_version.strip()
22
+
23
+ try:
24
+ import importlib.metadata as importlib_metadata # python 3.11 stdlib
25
+
26
+ pkg_version = importlib_metadata.version("synth-ai")
27
+ if isinstance(pkg_version, str) and pkg_version.strip():
28
+ return pkg_version.strip()
29
+ except Exception:
30
+ pass
31
+
32
+ try:
33
+ import synth_ai
34
+
35
+ attr_version = getattr(synth_ai, "__version__", None)
36
+ if isinstance(attr_version, str) and attr_version.strip():
37
+ return attr_version.strip()
38
+ except Exception:
39
+ pass
40
+
41
+ return "unknown"
42
+
43
+
44
+ def _log_task_app_version_once() -> None:
45
+ global _VERSION_LOGGED
46
+ if _VERSION_LOGGED:
47
+ return
48
+
49
+ version = _resolve_task_app_version()
50
+ build_id = os.getenv("TASK_APP_BUILD_ID")
51
+
52
+ if build_id:
53
+ logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
54
+ else:
55
+ logger.info("TASK_APP_VERSION: %s", version)
56
+
57
+ _VERSION_LOGGED = True
58
+
12
59
 
13
60
  class TaskApp:
14
61
  """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
56
103
  allow_headers=["*"],
57
104
  )
58
105
 
106
+ _log_task_app_version_once()
107
+
59
108
  # Initialize task app configuration
60
109
  task_app = TaskApp()
61
110
  app.state.task_app = task_app