synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ """Legacy entrypoint for the math single-step task app."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+
8
+ from fastapi.exceptions import RequestValidationError
9
+ from fastapi.responses import JSONResponse
10
+ from starlette.requests import Request
11
+ from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
12
+ from synth_ai.task.server import create_task_app, run_task_app
13
+
14
+ from .math_single_step import build_config
15
+
16
+
17
+ def fastapi_app():
18
+ """Return a FastAPI application for hosting the math task app."""
19
+
20
+ app = create_task_app(build_config())
21
+
22
+ # Replace default health endpoints with auth-tolerant handlers.
23
+ filtered_routes = []
24
+ for route in app.router.routes:
25
+ path = getattr(route, "path", None)
26
+ methods = getattr(route, "methods", set()) or set()
27
+ if path in {"/health", "/health/rollout"} and "GET" in methods:
28
+ continue
29
+ filtered_routes.append(route)
30
+ app.router.routes = filtered_routes
31
+
32
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
33
+ if not env_key:
34
+ return None
35
+ prefix = env_key[: max(1, len(env_key) // 2)]
36
+ print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
37
+ return prefix
38
+
39
+ @app.get("/health")
40
+ async def health(request: Request):
41
+ env_key = normalize_environment_api_key()
42
+ if not env_key:
43
+ return JSONResponse(
44
+ status_code=503,
45
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
46
+ )
47
+ if not is_api_key_header_authorized(request):
48
+ prefix = _log_env_key_prefix("health", env_key)
49
+ content = {"status": "healthy", "authorized": False}
50
+ if prefix:
51
+ content["expected_api_key_prefix"] = prefix
52
+ return JSONResponse(status_code=200, content=content)
53
+ return {"status": "healthy", "authorized": True}
54
+
55
+ @app.get("/health/rollout")
56
+ async def health_rollout(request: Request):
57
+ env_key = normalize_environment_api_key()
58
+ if not env_key:
59
+ return JSONResponse(
60
+ status_code=503,
61
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
62
+ )
63
+ if not is_api_key_header_authorized(request):
64
+ prefix = _log_env_key_prefix("health/rollout", env_key)
65
+ content = {"status": "healthy", "authorized": False}
66
+ if prefix:
67
+ content["expected_api_key_prefix"] = prefix
68
+ return JSONResponse(status_code=200, content=content)
69
+ return {"ok": True, "authorized": True}
70
+
71
+ @app.exception_handler(RequestValidationError)
72
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
73
+ try:
74
+ hdr = request.headers
75
+ snapshot = {
76
+ "path": str(request.url.path),
77
+ "have_x_api_key": bool(hdr.get("x-api-key")),
78
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
79
+ "have_authorization": bool(hdr.get("authorization")),
80
+ "errors": exc.errors()[:5],
81
+ }
82
+ print("[422] validation", snapshot, flush=True)
83
+ except Exception:
84
+ pass
85
+ return JSONResponse(
86
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
87
+ )
88
+
89
+ return app
90
+
91
+
92
+ if __name__ == "__main__":
93
+ parser = argparse.ArgumentParser(description="Run the math single-step task app locally")
94
+ parser.add_argument("--host", default="0.0.0.0")
95
+ parser.add_argument("--port", type=int, default=8101)
96
+ parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
97
+ parser.add_argument(
98
+ "--env-file",
99
+ action="append",
100
+ default=[],
101
+ help="Path to .env file to load (can be specified multiple times)",
102
+ )
103
+ args = parser.parse_args()
104
+
105
+ run_task_app(
106
+ build_config,
107
+ host=args.host,
108
+ port=args.port,
109
+ reload=args.reload,
110
+ env_files=args.env_file or [],
111
+ )
examples/sft/README.md CHANGED
@@ -27,7 +27,7 @@ You can generate traces with the Crafter task app and then export them to SFT JS
27
27
  # Serve the task app locally with tracing enabled (example)
28
28
  uvx synth-ai serve grpo-crafter \
29
29
  --trace traces/v3 \
30
- --trace-db traces/v3/synth_ai.db \
30
+ --trace-db traces/v3/task_app_traces_<timestamp>.db \
31
31
  --port 8001
32
32
 
33
33
  # Or run traced local rollouts to accumulate data
@@ -36,9 +36,9 @@ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
36
36
 
37
37
  # Export SFT dataset from the trace DB
38
38
  uv run python examples/warming_up_to_rl/export_trace_sft.py \
39
- --db traces/v3/synth_ai.db \
39
+ --db traces/v3/task_app_traces_<timestamp>.db \
40
40
  --min-unique 0 \
41
- --output examples/sft/ft_data/crafter_traces.jsonl
41
+ --output examples/sft/ft_data/crafter_sft.jsonl
42
42
  ```
43
43
 
44
44
  Notes:
@@ -56,7 +56,7 @@ Use the standard CLI. Do not use a custom Python finetuning script. Point the CL
56
56
  uvx synth-ai train \
57
57
  --type sft \
58
58
  --config examples/sft/configs/crafter_lora_qwen0p6b.toml \
59
- --dataset examples/sft/ft_data/crafter_traces.jsonl \
59
+ --dataset examples/sft/ft_data/crafter_sft.jsonl \
60
60
  --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
61
61
  ```
62
62
 
@@ -76,7 +76,7 @@ Full finetuning updates all weights and uses a near-identical CLI flow with the
76
76
  uvx synth-ai train \
77
77
  --type sft \
78
78
  --config examples/sft/configs/crafter_fft_qwen0p6b.toml \
79
- --dataset examples/sft/ft_data/crafter_traces.jsonl \
79
+ --dataset examples/sft/ft_data/crafter_sft.jsonl \
80
80
  --env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
81
81
  ```
82
82
 
@@ -1,7 +1,9 @@
1
+ type = "sft"
2
+
1
3
  [job]
2
4
  model = "Qwen/Qwen3-0.6B"
3
5
  # Prefer passing --dataset at runtime for repeatability
4
- # data = "examples/sft/ft_data/crafter_traces.jsonl"
6
+ # data = "examples/sft/ft_data/crafter_sft.jsonl"
5
7
 
6
8
  [compute]
7
9
  gpu_type = "H100"
@@ -11,7 +13,7 @@ nodes = 1
11
13
  [data]
12
14
  topology = {}
13
15
  # Optional validation set if you have one locally
14
- # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
16
+ # validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
15
17
 
16
18
  [training]
17
19
  mode = "sft_offline"
@@ -1,7 +1,9 @@
1
+ type = "sft"
2
+
1
3
  [job]
2
4
  model = "Qwen/Qwen3-0.6B"
3
5
  # Optionally set here, but prefer passing --dataset at runtime
4
- # data = "examples/sft/ft_data/crafter_traces.jsonl"
6
+ # data = "examples/sft/ft_data/crafter_sft.jsonl"
5
7
 
6
8
  [compute]
7
9
  gpu_type = "H100"
@@ -12,7 +14,7 @@ nodes = 1
12
14
  # Forwarded into metadata.effective_config
13
15
  topology = {}
14
16
  # Optional validation set if you have one locally
15
- # validation_path = "examples/sft/ft_data/crafter_traces.val.jsonl"
17
+ # validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
16
18
 
17
19
  [training]
18
20
  mode = "lora"
@@ -42,4 +44,3 @@ fsdp = false
42
44
  bf16 = true
43
45
  fp16 = false
44
46
  activation_checkpointing = true
45
-
examples/sft/evaluate.py CHANGED
@@ -11,6 +11,7 @@ from __future__ import annotations
11
11
  import argparse
12
12
  import asyncio
13
13
  import os
14
+ from contextlib import suppress
14
15
  from dataclasses import dataclass
15
16
  from typing import Any
16
17
 
@@ -104,10 +105,8 @@ async def main() -> None:
104
105
  for r in results:
105
106
  ers = r.get("episode_returns") or []
106
107
  if isinstance(ers, list) and ers:
107
- try:
108
+ with suppress(Exception):
108
109
  flat_returns.append(float(ers[0]))
109
- except Exception:
110
- pass
111
110
  if flat_returns:
112
111
  mean_ret = sum(flat_returns) / len(flat_returns)
113
112
  print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
@@ -116,4 +115,3 @@ async def main() -> None:
116
115
  if __name__ == "__main__":
117
116
  asyncio.run(main())
118
117
 
119
-
@@ -20,12 +20,17 @@ from examples.warming_up_to_rl.export_trace_sft import (
20
20
  parse_event_filters,
21
21
  write_jsonl,
22
22
  )
23
+ from synth_ai.tracing_v3.constants import TRACE_DB_DIR, canonical_trace_db_name
23
24
 
24
25
 
25
26
  def main() -> None:
26
27
  p = argparse.ArgumentParser(description=__doc__)
27
- p.add_argument("--db", type=Path, default=Path("traces/v3/synth_ai.db"))
28
- p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_traces.jsonl"))
28
+ p.add_argument(
29
+ "--db",
30
+ type=Path,
31
+ default=TRACE_DB_DIR / canonical_trace_db_name(),
32
+ )
33
+ p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_sft.jsonl"))
29
34
  p.add_argument("--model", action="append", dest="models")
30
35
  p.add_argument("--provider", action="append", dest="providers")
31
36
  p.add_argument("--min-unique", type=int, default=0)
@@ -113,5 +118,3 @@ def main() -> None:
113
118
 
114
119
  if __name__ == "__main__":
115
120
  main()
116
-
117
-
@@ -38,7 +38,7 @@ uvx synth-ai serve swe-mini \
38
38
  --port 8020 \
39
39
  --env-file .env \
40
40
  --trace traces/v3 \
41
- --trace-db traces/v3/synth_ai.db
41
+ --trace-db traces/v3/task_app_traces_<timestamp>.db
42
42
  ```
43
43
 
44
44
  This avoids interactive prompts (useful for CI) and loads `ENVIRONMENT_API_KEY`, `OPENAI_API_KEY`, etc. from `.env`.
@@ -556,7 +556,6 @@ register_task_app(
556
556
  description="mini-swe-agent task app with rollout + proxy endpoints",
557
557
  config_factory=build_config,
558
558
  aliases=("mini-swe", "swe-mini-task"),
559
- env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
560
559
  modal=ModalDeploymentConfig(
561
560
  app_name="swe-mini-task-app",
562
561
  python_version="3.11",
@@ -114,23 +114,11 @@ if __name__ == "__main__":
114
114
  parser.add_argument("--host", default="0.0.0.0")
115
115
  parser.add_argument("--port", type=int, default=8020)
116
116
  parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
117
- parser.add_argument(
118
- "--env-file",
119
- action="append",
120
- default=[],
121
- help="Additional .env files to load before startup",
122
- )
123
117
  args = parser.parse_args()
124
118
 
125
- default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
126
- env_files = [str(default_env)] if default_env.exists() else []
127
- env_files.extend(args.env_file or [])
128
-
129
119
  run_task_app(
130
120
  build_task_app_config,
131
121
  host=args.host,
132
122
  port=args.port,
133
123
  reload=args.reload,
134
- env_files=env_files,
135
124
  )
136
-
@@ -776,7 +776,7 @@ class MiniSweEnvironmentWrapper:
776
776
  or os.getenv("SWE_REX_MODAL_SANDBOX_KWARGS")
777
777
  )
778
778
  modal_kwargs: dict[str, Any] = {}
779
- if isinstance(modal_kwargs_raw, (dict, list)):
779
+ if isinstance(modal_kwargs_raw, dict | list):
780
780
  modal_kwargs = dict(modal_kwargs_raw or {})
781
781
  elif isinstance(modal_kwargs_raw, str) and modal_kwargs_raw.strip():
782
782
  try:
@@ -841,9 +841,9 @@ class MiniSweEnvironmentWrapper:
841
841
  instance_image_tag=instance_image_tag,
842
842
  env_image_tag=env_image_tag,
843
843
  model_name=model_name,
844
- Command=Command,
845
- WriteFileRequest=WriteFileRequest,
846
- ReadFileRequest=ReadFileRequest,
844
+ command_cls=Command,
845
+ write_file_request_cls=WriteFileRequest,
846
+ read_file_request_cls=ReadFileRequest,
847
847
  )
848
848
  try:
849
849
  return self._run_coroutine_blocking(coro)
@@ -867,9 +867,9 @@ class MiniSweEnvironmentWrapper:
867
867
  instance_image_tag: str,
868
868
  env_image_tag: str,
869
869
  model_name: str,
870
- Command,
871
- WriteFileRequest,
872
- ReadFileRequest,
870
+ command_cls,
871
+ write_file_request_cls,
872
+ read_file_request_cls,
873
873
  ) -> dict[str, Any]:
874
874
  deployment = deployment_config.get_deployment()
875
875
  await deployment.start()
@@ -880,7 +880,7 @@ class MiniSweEnvironmentWrapper:
880
880
 
881
881
  # Ensure working directory exists.
882
882
  mkdir_resp = await runtime.execute(
883
- Command(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
883
+ command_cls(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
884
884
  )
885
885
  if mkdir_resp.exit_code not in (0, None):
886
886
  logger.warning("Failed to ensure remote directory %s (exit=%s)", remote_root, mkdir_resp.exit_code)
@@ -888,8 +888,8 @@ class MiniSweEnvironmentWrapper:
888
888
  # Upload dataset & predictions.
889
889
  dataset_blob = json.dumps([instance], ensure_ascii=False)
890
890
  predictions_blob = json.dumps({instance_id: prediction}, ensure_ascii=False)
891
- await runtime.write_file(WriteFileRequest(path=dataset_remote_path, content=dataset_blob))
892
- await runtime.write_file(WriteFileRequest(path=predictions_remote_path, content=predictions_blob))
891
+ await runtime.write_file(write_file_request_cls(path=dataset_remote_path, content=dataset_blob))
892
+ await runtime.write_file(write_file_request_cls(path=predictions_remote_path, content=predictions_blob))
893
893
 
894
894
  eval_cmd = [
895
895
  "python",
@@ -921,7 +921,7 @@ class MiniSweEnvironmentWrapper:
921
921
 
922
922
  command_timeout = max(eval_timeout + 900, 1200)
923
923
  response = await runtime.execute(
924
- Command(
924
+ command_cls(
925
925
  command=eval_cmd,
926
926
  timeout=command_timeout,
927
927
  cwd=remote_root,
@@ -945,7 +945,7 @@ class MiniSweEnvironmentWrapper:
945
945
  for filename in ("report.json", "test_output.txt", "run_instance.log", "patch.diff"):
946
946
  remote_path = f"{remote_log_dir}/{filename}"
947
947
  try:
948
- content = await runtime.read_file(ReadFileRequest(path=remote_path))
948
+ content = await runtime.read_file(read_file_request_cls(path=remote_path))
949
949
  except Exception:
950
950
  continue
951
951
  if getattr(content, "content", None):
@@ -1073,7 +1073,7 @@ class MiniSweEnvironmentWrapper:
1073
1073
  return value
1074
1074
  if isinstance(value, str):
1075
1075
  return value.strip().lower() in {"1", "true", "yes", "on"}
1076
- if isinstance(value, (int, float)):
1076
+ if isinstance(value, int | float):
1077
1077
  return bool(value)
1078
1078
  return False # pragma: no cover - defensive default
1079
1079
 
@@ -343,8 +343,6 @@ async def step_policy(
343
343
  inf_req = meta["inference_request"]
344
344
  msgs = inf_req["messages"]
345
345
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
346
- system_messages: list[str] = []
347
- user_messages: list[str] = []
348
346
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
349
347
  sys_text = msgs[0]["content"]
350
348
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
@@ -888,14 +888,6 @@ async def execute_rollout(
888
888
  logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
889
889
  tracing_context = RolloutTracingContext(tracer_instance, request, req)
890
890
  await tracing_context.start_session()
891
- # Print whether tracing is active for this rollout
892
- try:
893
- print(
894
- f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
895
- flush=True,
896
- )
897
- except Exception:
898
- pass
899
891
 
900
892
  # Register run
901
893
  registry.register_run(request.run_id)
@@ -7,6 +7,7 @@ import logging
7
7
  import os
8
8
  import sys
9
9
  from collections.abc import Iterable, Sequence
10
+ from contextlib import suppress
10
11
  from dataclasses import dataclass
11
12
  from pathlib import Path
12
13
  from typing import Any
@@ -614,16 +615,14 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
614
615
 
615
616
  coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
616
617
 
617
- try:
618
+ with suppress(Exception):
618
619
  print(
619
620
  "[rollout] remapped math request -> crafter "
620
621
  f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
621
622
  f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
622
623
  flush=True,
623
624
  )
624
- except Exception:
625
- pass
626
- try:
625
+ with suppress(Exception):
627
626
  logger.info(
628
627
  "ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
629
628
  request.env.env_name,
@@ -631,8 +630,6 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
631
630
  request.policy.policy_name,
632
631
  coerced.policy.policy_name,
633
632
  )
634
- except Exception:
635
- pass
636
633
 
637
634
  return coerced
638
635
 
@@ -654,7 +651,7 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
654
651
  if stripped:
655
652
  return stripped
656
653
 
657
- return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
654
+ return extract_trace_correlation_id(policy_cfg.get("inference_url"))
658
655
 
659
656
 
660
657
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
@@ -59,6 +59,13 @@ class CrafterPolicy(Policy):
59
59
  self.trajectory_history: list[dict[str, Any]] = [] # env/policy step records
60
60
 
61
61
  async def initialize(self, config: dict[str, Any]) -> None:
62
+ # DEBUG: Log the incoming config
63
+ import logging
64
+ _logger = logging.getLogger(__name__)
65
+ _logger.debug(f"🔊 [POLICY_INIT] Received config keys: {list(config.keys())}")
66
+ _logger.debug(f"🔊 [POLICY_INIT] use_vision in config: {'use_vision' in config}, value: {config.get('use_vision')}")
67
+ _logger.debug(f"🔊 [POLICY_INIT] image_only_mode in config: {'image_only_mode' in config}, value: {config.get('image_only_mode')}")
68
+
62
69
  if "inference_url" in config:
63
70
  self.inference_url = config["inference_url"]
64
71
  if "model" in config:
@@ -67,6 +74,7 @@ class CrafterPolicy(Policy):
67
74
  self.use_tools = bool(config["use_tools"])
68
75
  if "use_vision" in config:
69
76
  self.use_vision = bool(config["use_vision"])
77
+ _logger.debug(f"🔊 [POLICY_INIT] Set use_vision={self.use_vision} from config")
70
78
  if "image_only_mode" in config:
71
79
  self.image_only_mode = bool(config["image_only_mode"])
72
80
  # If image_only_mode is enabled, automatically enable vision
@@ -97,6 +105,9 @@ class CrafterPolicy(Policy):
97
105
  self.history_messages = []
98
106
  self.turn_index = 0
99
107
  self.trajectory_history = []
108
+
109
+ # DEBUG: Log final state
110
+ _logger.debug(f"🔊 [POLICY_INIT] FINAL STATE: use_vision={self.use_vision}, image_only_mode={self.image_only_mode}, model={self.model}")
100
111
 
101
112
  def _append_user_observation(self, observation_text: str) -> None:
102
113
  self.history_messages.append({"role": "user", "content": observation_text})
@@ -131,10 +142,36 @@ class CrafterPolicy(Policy):
131
142
  history=history,
132
143
  turn=turn,
133
144
  image_parts=image_parts,
145
+ image_only_mode=self.image_only_mode,
134
146
  )
147
+
148
+ # DEBUG: Log message structure
149
+ import logging
150
+ _logger = logging.getLogger(__name__)
151
+ _logger.debug(f"🔊 [BUILD_REQUEST] Built {len(messages)} messages")
152
+ for idx, msg in enumerate(messages):
153
+ role = msg.get("role")
154
+ content = msg.get("content")
155
+ if isinstance(content, list):
156
+ _logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content=list[{len(content)}]")
157
+ for part_idx, part in enumerate(content):
158
+ if isinstance(part, dict):
159
+ part_type = part.get("type")
160
+ _logger.debug(f"🔊 [BUILD_REQUEST] Part[{part_idx}]: type={part_type}")
161
+ else:
162
+ content_len = len(str(content)) if content else 0
163
+ _logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content_len={content_len}")
164
+
135
165
  payload: dict[str, Any] = {
136
166
  "messages": messages,
137
167
  }
168
+
169
+ # DEBUG: Verify messages are in payload correctly
170
+ _logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Created payload with {len(payload['messages'])} messages")
171
+ for idx, msg in enumerate(payload["messages"]):
172
+ content = msg.get("content")
173
+ _logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Payload message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}, len={len(content) if isinstance(content, list) else len(str(content)) if content else 0}")
174
+
138
175
  if self.model is not None:
139
176
  payload["model"] = self.model
140
177
  # Thinking controls
@@ -360,7 +397,18 @@ class CrafterPolicy(Policy):
360
397
  raw_candidate = metadata.get("raw_observation")
361
398
  if isinstance(raw_candidate, dict):
362
399
  raw_observation = raw_candidate
400
+
401
+ # DEBUG: Log image extraction
402
+ import logging
403
+ _logger = logging.getLogger(__name__)
404
+ _logger.debug(f"🔊 [POLICY] use_vision={self.use_vision}, has_raw_obs={raw_observation is not None}")
405
+ if raw_observation:
406
+ obs = raw_observation.get("observation", raw_observation)
407
+ data_url = obs.get("observation_image_data_url") if isinstance(obs, dict) else None
408
+ _logger.debug(f"🔊 [POLICY] has_data_url={data_url is not None}, url_preview={data_url[:50] if data_url else 'NONE'}...")
409
+
363
410
  image_parts = self._extract_image_parts(raw_observation)
411
+ _logger.debug(f"🔊 [POLICY] Extracted {len(image_parts)} image parts")
364
412
 
365
413
  payload = self.build_inference_request(
366
414
  combined_text,
@@ -368,7 +416,17 @@ class CrafterPolicy(Policy):
368
416
  turn=self.turn_index,
369
417
  image_parts=image_parts,
370
418
  )
371
- # print("Debugging only:; ", payload)
419
+
420
+ # DEBUG: Verify payload before returning
421
+ _logger.debug(f"🔊 [POLICY_STEP_RETURN] About to return payload with {len(payload.get('messages', []))} messages")
422
+ for idx, msg in enumerate(payload.get("messages", [])):
423
+ content = msg.get("content")
424
+ _logger.debug(f"🔊 [POLICY_STEP_RETURN] Return message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
425
+ if isinstance(content, list):
426
+ _logger.debug(f"🔊 [POLICY_STEP_RETURN] Content list has {len(content)} items")
427
+ # Add assertion to catch corruption early
428
+ assert len(content) > 0, f"Message content list is empty! This should contain images."
429
+
372
430
  meta_out = {
373
431
  "inference_url": self.inference_url,
374
432
  "inference_request": payload,
@@ -218,8 +218,20 @@ class OpenAIClient:
218
218
  # Do NOT fall back silently; surface the error so callers fail fast
219
219
  raise
220
220
 
221
+ # DEBUG: Log request BEFORE _fix_model_parameters
222
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
223
+ if len(request.get("messages", [])) > 1:
224
+ msg1_content = request["messages"][1].get("content")
225
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
226
+
221
227
  # Fix parameter compatibility for newer models
222
228
  processed_request = self._fix_model_parameters(request, target_url=url)
229
+
230
+ # DEBUG: Log request AFTER _fix_model_parameters
231
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
232
+ if len(processed_request.get("messages", [])) > 1:
233
+ msg1_content_post = processed_request["messages"][1].get("content")
234
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
223
235
 
224
236
  # Log request (redact messages in production)
225
237
  logger.info(f"Inference POST target: {url}")
@@ -228,6 +240,24 @@ class OpenAIClient:
228
240
  with contextlib.suppress(Exception):
229
241
  keys_preview = sorted(processed_request.keys())
230
242
  logger.info(f"Request keys: {keys_preview}")
243
+ # DEBUG: Log message structure for vision debugging
244
+ if "messages" in processed_request:
245
+ msgs = processed_request["messages"]
246
+ if isinstance(msgs, list):
247
+ logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
248
+ for idx, msg in enumerate(msgs):
249
+ if isinstance(msg, dict):
250
+ role = msg.get("role")
251
+ content = msg.get("content")
252
+ if isinstance(content, list):
253
+ logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
254
+ for part_idx, part in enumerate(content):
255
+ if isinstance(part, dict):
256
+ part_type = part.get("type")
257
+ logger.debug(f"🔊 [OPENAI_CLIENT] Part[{part_idx}]: type={part_type}")
258
+ else:
259
+ content_len = len(str(content)) if content else 0
260
+ logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
231
261
 
232
262
  # Final hard-guard for OpenAI: ensure unsupported field is not present
233
263
  try: