synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
+ import logging
4
5
  from collections.abc import Iterable, Iterator, Sequence
5
6
  from dataclasses import dataclass, field
6
7
  from pathlib import Path
7
8
  from typing import Any
8
9
 
10
+ logger = logging.getLogger(__name__)
11
+
9
12
  SFTMessageContent = str | dict[str, Any] | list[Any] | None
10
13
 
11
14
 
@@ -37,6 +40,8 @@ class SFTMessage:
37
40
  tool_calls: list[SFTToolCall] = field(default_factory=list)
38
41
  tool_call_id: str | None = None
39
42
  name: str | None = None
43
+ reasoning: str | None = None # NEW: Explicit reasoning/thinking content
44
+ raw_content: str | None = None # NEW: Original unparsed content (before reasoning extraction)
40
45
  extra: dict[str, Any] = field(default_factory=dict)
41
46
 
42
47
 
@@ -86,9 +91,11 @@ def _coerce_tool_call(raw: Any, *, index: int) -> SFTToolCall:
86
91
  name: str | None = None
87
92
  arguments: Any = None
88
93
 
89
- if isinstance(raw.get("function"), dict):
90
- fn_payload = raw["function"]
91
- name = fn_payload.get("name") if isinstance(fn_payload.get("name"), str) else None
94
+ fn_obj = raw.get("function")
95
+ if isinstance(fn_obj, dict):
96
+ fn_payload = fn_obj
97
+ name_val = fn_payload.get("name")
98
+ name = name_val if isinstance(name_val, str) else None
92
99
  arguments = fn_payload.get("arguments")
93
100
  if name is None:
94
101
  maybe_name = raw.get("name")
@@ -143,11 +150,20 @@ def _coerce_message(raw: Any, *, index: int) -> SFTMessage:
143
150
  name = raw.get("name")
144
151
  if name is not None and not isinstance(name, str):
145
152
  raise SFTDataError(f"message {index} name must be a string if present")
153
+
154
+ # NEW: Extract reasoning and raw_content if present
155
+ reasoning = raw.get("reasoning")
156
+ if reasoning is not None and not isinstance(reasoning, str):
157
+ raise SFTDataError(f"message {index} reasoning must be a string if present")
158
+
159
+ raw_content = raw.get("raw_content")
160
+ if raw_content is not None and not isinstance(raw_content, str):
161
+ raise SFTDataError(f"message {index} raw_content must be a string if present")
146
162
 
147
163
  extra = {
148
164
  key: value
149
165
  for key, value in raw.items()
150
- if key not in {"role", "content", "tool_calls", "tool_call_id", "name"}
166
+ if key not in {"role", "content", "tool_calls", "tool_call_id", "name", "reasoning", "raw_content"}
151
167
  }
152
168
 
153
169
  return SFTMessage(
@@ -156,6 +172,8 @@ def _coerce_message(raw: Any, *, index: int) -> SFTMessage:
156
172
  tool_calls=tool_calls,
157
173
  tool_call_id=tool_call_id,
158
174
  name=name,
175
+ reasoning=reasoning,
176
+ raw_content=raw_content,
159
177
  extra=extra,
160
178
  )
161
179
 
@@ -280,6 +298,378 @@ def load_jsonl(path: Path, *, min_messages: int = 1) -> list[SFTExample]:
280
298
  return list(iter_sft_examples(fh, min_messages=min_messages))
281
299
 
282
300
 
301
+ # Reasoning/Thinking Utilities
302
+ # ============================================================================
303
+
304
+
305
+ def extract_reasoning(content: str, *, tag: str = "think") -> tuple[str | None, str]:
306
+ """Extract reasoning from content with <think> tags.
307
+
308
+ Args:
309
+ content: Raw content string
310
+ tag: Tag name to extract (default: "think")
311
+
312
+ Returns:
313
+ Tuple of (reasoning, clean_content)
314
+ - reasoning: Content inside tags, or None if no tags found
315
+ - clean_content: Content with tags removed
316
+
317
+ Examples:
318
+ >>> extract_reasoning("<think>Let me analyze...</think>The answer is 42")
319
+ ('Let me analyze...', 'The answer is 42')
320
+ >>> extract_reasoning("Just plain text")
321
+ (None, 'Just plain text')
322
+ """
323
+ import re
324
+
325
+ pattern = rf"<{tag}>(.*?)</{tag}>"
326
+ matches = re.findall(pattern, content, re.DOTALL)
327
+
328
+ if not matches:
329
+ return None, content
330
+
331
+ # Combine all reasoning blocks
332
+ reasoning = "\n\n".join(m.strip() for m in matches)
333
+
334
+ # Remove all reasoning blocks from content
335
+ clean_content = re.sub(pattern, "", content, flags=re.DOTALL).strip()
336
+
337
+ return reasoning, clean_content
338
+
339
+
340
+ def strip_reasoning(content: str, *, tag: str = "think") -> str:
341
+ """Remove reasoning tags from content.
342
+
343
+ Args:
344
+ content: Content with potential reasoning tags
345
+ tag: Tag name to strip (default: "think")
346
+
347
+ Returns:
348
+ Content with reasoning tags removed
349
+ """
350
+ _, clean = extract_reasoning(content, tag=tag)
351
+ return clean
352
+
353
+
354
+ def message_has_reasoning(message: SFTMessage) -> bool:
355
+ """Check if a message has explicit reasoning.
356
+
357
+ Args:
358
+ message: SFTMessage to check
359
+
360
+ Returns:
361
+ True if message has reasoning field or <think> tags in content
362
+ """
363
+ # Check explicit reasoning field
364
+ if message.reasoning:
365
+ return True
366
+
367
+ # Check for reasoning tags in content
368
+ if isinstance(message.content, str):
369
+ reasoning, _ = extract_reasoning(message.content)
370
+ return reasoning is not None
371
+
372
+ return False
373
+
374
+
375
+ def validate_message_content(
376
+ message: SFTMessage, *, require_content: bool = True
377
+ ) -> tuple[bool, str | None]:
378
+ """Validate that message has valid content combinations.
379
+
380
+ Rules:
381
+ - Must have at least one of: reasoning + tool_calls, reasoning + content,
382
+ content, raw_content, or tool_calls
383
+ - If raw_content present with reasoning + content, they should be consistent
384
+ - Cannot have neither reasoning, content, raw_content, nor tool_calls
385
+
386
+ Args:
387
+ message: SFTMessage to validate
388
+ require_content: If True, require some form of content (default: True)
389
+
390
+ Returns:
391
+ Tuple of (is_valid, error_message)
392
+ """
393
+ has_reasoning = bool(message.reasoning)
394
+ has_content = message.content is not None and message.content != ""
395
+ has_raw = bool(message.raw_content)
396
+ has_tools = len(message.tool_calls) > 0
397
+
398
+ # Check for completely empty message
399
+ if require_content and not (has_reasoning or has_content or has_raw or has_tools):
400
+ return False, "Message has no reasoning, content, raw_content, or tool_calls"
401
+
402
+ # Valid combinations:
403
+ # 1. reasoning + tool_calls (reasoning-based action)
404
+ if has_reasoning and has_tools:
405
+ return True, None
406
+
407
+ # 2. reasoning + content (reasoning then output)
408
+ if has_reasoning and has_content:
409
+ # If raw_content present, validate consistency
410
+ if has_raw and message.raw_content:
411
+ # Raw should contain both reasoning and content
412
+ reasoning_in_raw, content_in_raw = extract_reasoning(message.raw_content)
413
+ if message.reasoning and reasoning_in_raw != message.reasoning.strip():
414
+ logger.warning(
415
+ "raw_content reasoning doesn't match reasoning field"
416
+ )
417
+ # This is okay - just a warning, not an error
418
+ return True, None
419
+
420
+ # 3. content only (standard message)
421
+ if has_content and not has_reasoning:
422
+ return True, None
423
+
424
+ # 4. raw_content only (unparsed content)
425
+ if has_raw and not (has_reasoning and has_content):
426
+ return True, None
427
+
428
+ # 5. tool_calls only (action without reasoning/content - like OpenAI format)
429
+ if has_tools and not has_content:
430
+ return True, None
431
+
432
+ # 6. reasoning only (pure thinking turn)
433
+ if has_reasoning and not has_content and not has_tools:
434
+ return True, None
435
+
436
+ return True, None
437
+
438
+
439
+ # Vision/Multimodal Utilities
440
+ # ============================================================================
441
+
442
+
443
+ def has_image_content(content: SFTMessageContent) -> bool:
444
+ """Check if message content contains image data (OpenAI multimodal format).
445
+
446
+ Supports:
447
+ - List of content parts: [{"type": "text", ...}, {"type": "image_url", ...}]
448
+ - Single dict with type field: {"type": "image_url", "image_url": {...}}
449
+
450
+ Args:
451
+ content: Message content (can be str, list, dict, or None)
452
+
453
+ Returns:
454
+ True if content contains an image segment
455
+
456
+ Examples:
457
+ >>> has_image_content([{"type": "text", "text": "What's this?"},
458
+ ... {"type": "image_url", "image_url": {"url": "..."}}])
459
+ True
460
+ >>> has_image_content("Just text")
461
+ False
462
+ """
463
+ if isinstance(content, list):
464
+ return any(
465
+ isinstance(part, dict) and part.get("type") in {"image", "image_url"}
466
+ for part in content
467
+ )
468
+ elif isinstance(content, dict):
469
+ return content.get("type") in {"image", "image_url"}
470
+ return False
471
+
472
+
473
+ def message_has_image(message: SFTMessage) -> bool:
474
+ """Check if an SFTMessage contains image content.
475
+
476
+ Args:
477
+ message: SFTMessage to check
478
+
479
+ Returns:
480
+ True if the message contains image content
481
+ """
482
+ return has_image_content(message.content)
483
+
484
+
485
+ def example_has_image(example: SFTExample) -> bool:
486
+ """Check if an SFTExample contains any image content.
487
+
488
+ Args:
489
+ example: SFTExample to check
490
+
491
+ Returns:
492
+ True if any message in the example contains image content
493
+ """
494
+ return any(message_has_image(msg) for msg in example.messages)
495
+
496
+
497
+ def count_images_in_content(content: SFTMessageContent) -> int:
498
+ """Count the number of images in message content.
499
+
500
+ Args:
501
+ content: Message content to analyze
502
+
503
+ Returns:
504
+ Number of image segments found
505
+ """
506
+ if isinstance(content, list):
507
+ return sum(
508
+ 1 for part in content
509
+ if isinstance(part, dict) and part.get("type") in {"image", "image_url"}
510
+ )
511
+ elif isinstance(content, dict) and content.get("type") in {"image", "image_url"}:
512
+ return 1
513
+ return 0
514
+
515
+
516
+ def extract_image_urls(content: SFTMessageContent) -> list[str]:
517
+ """Extract all image URLs from message content.
518
+
519
+ Filters out invalid entries:
520
+ - Non-string URLs
521
+ - Empty strings
522
+ - Whitespace-only strings
523
+
524
+ Args:
525
+ content: Message content to extract from
526
+
527
+ Returns:
528
+ List of valid image URL strings (may be http(s):// URLs or data:image/... base64)
529
+ """
530
+ urls: list[str] = []
531
+
532
+ if isinstance(content, list):
533
+ for part in content:
534
+ if isinstance(part, dict) and part.get("type") in {"image", "image_url"}:
535
+ # Handle both formats:
536
+ # {"type": "image_url", "image_url": {"url": "..."}}
537
+ # {"type": "image", "image": "..."}
538
+ if "image_url" in part and isinstance(part["image_url"], dict):
539
+ url = part["image_url"].get("url")
540
+ if isinstance(url, str) and url.strip(): # Filter empty/whitespace
541
+ urls.append(url)
542
+ elif "image" in part and isinstance(part["image"], str):
543
+ if part["image"].strip(): # Filter empty/whitespace
544
+ urls.append(part["image"])
545
+ elif isinstance(content, dict) and content.get("type") in {"image", "image_url"}:
546
+ image_url_data = content.get("image_url")
547
+ if isinstance(image_url_data, dict):
548
+ url = image_url_data.get("url")
549
+ if isinstance(url, str) and url.strip(): # Filter empty/whitespace
550
+ urls.append(url)
551
+ else:
552
+ image_value = content.get("image")
553
+ if isinstance(image_value, str) and image_value.strip(): # Filter empty/whitespace
554
+ urls.append(image_value)
555
+
556
+ return urls
557
+
558
+
559
+ def validate_vision_example(
560
+ example: SFTExample, *, require_images: bool = True
561
+ ) -> tuple[bool, str | None]:
562
+ """Validate a vision SFT example.
563
+
564
+ Checks:
565
+ - If require_images is True, at least one message must contain an image
566
+ - All image URLs must be non-empty, non-whitespace strings
567
+ - Image entries must have valid URL data
568
+ - Messages must follow valid structure
569
+
570
+ Args:
571
+ example: SFTExample to validate
572
+ require_images: If True, fail if no images are present
573
+
574
+ Returns:
575
+ Tuple of (is_valid, error_message)
576
+ If valid, error_message is None
577
+ """
578
+ # Count actual valid URLs and detect any invalid entries
579
+ total_valid_urls = 0
580
+
581
+ # Validate image URLs in each message
582
+ for i, msg in enumerate(example.messages):
583
+ # Check if this message has image_url type entries
584
+ if not isinstance(msg.content, list | dict):
585
+ continue
586
+
587
+ # Count image_url type entries vs valid URLs
588
+ content_list = msg.content if isinstance(msg.content, list) else [msg.content]
589
+ image_type_count = sum(
590
+ 1 for item in content_list
591
+ if isinstance(item, dict) and item.get("type") in {"image", "image_url"}
592
+ )
593
+
594
+ if image_type_count > 0:
595
+ # Extract valid URLs (after filtering)
596
+ urls = extract_image_urls(msg.content)
597
+
598
+ # If we have image_url type entries but fewer valid URLs, some are invalid
599
+ if len(urls) < image_type_count:
600
+ return False, f"Message {i}: Has {image_type_count} image_url entries but only {len(urls)} valid URLs (some are empty, null, or missing)"
601
+
602
+ # Validate each URL (double-check, though extract_image_urls should have filtered)
603
+ for url in urls:
604
+ # extract_image_urls already filters for isinstance(url, str) and url.strip()
605
+ # but let's be defensive
606
+ if not isinstance(url, str):
607
+ return False, f"Message {i}: Image URL is not a string: {type(url)}"
608
+
609
+ if not url.strip():
610
+ return False, f"Message {i}: Invalid or empty image URL"
611
+
612
+ # Basic URL format check
613
+ if not url.startswith(("http://", "https://", "data:image/")):
614
+ logger.warning(
615
+ f"Message {i}: Image URL doesn't start with http://, https://, or data:image/ - "
616
+ f"this may cause issues during training. URL: {url[:100]}"
617
+ )
618
+
619
+ total_valid_urls += 1
620
+
621
+ # Final check: if images are required, ensure we found at least one valid URL
622
+ if require_images and total_valid_urls == 0:
623
+ return False, "No image content found in any message"
624
+
625
+ return True, None
626
+
627
+
628
+ def iter_vision_examples(
629
+ source: Iterable[str],
630
+ *,
631
+ min_messages: int = 1,
632
+ skip_empty: bool = True,
633
+ require_images: bool = True,
634
+ log_validation_errors: bool = False,
635
+ ) -> Iterator[SFTExample]:
636
+ """Iterate over vision SFT examples from JSONL source.
637
+
638
+ Similar to iter_sft_examples but with vision-specific validation.
639
+
640
+ Args:
641
+ source: Iterable of JSONL lines
642
+ min_messages: Minimum number of messages required
643
+ skip_empty: Skip empty lines
644
+ require_images: If True, skip examples without images
645
+ log_validation_errors: If True, log validation failures
646
+
647
+ Yields:
648
+ Valid vision SFTExample objects
649
+ """
650
+ for line in source:
651
+ if skip_empty and not line.strip():
652
+ continue
653
+
654
+ try:
655
+ example = parse_jsonl_line(line, min_messages=min_messages)
656
+
657
+ # Validate vision content if required
658
+ if require_images:
659
+ is_valid, error = validate_vision_example(example, require_images=True)
660
+ if not is_valid:
661
+ if log_validation_errors:
662
+ logger.warning(f"Skipping invalid vision example: {error}")
663
+ continue
664
+
665
+ yield example
666
+
667
+ except (json.JSONDecodeError, SFTDataError) as exc:
668
+ if log_validation_errors:
669
+ logger.warning(f"Failed to parse vision example: {exc}")
670
+ continue
671
+
672
+
283
673
  __all__ = [
284
674
  "SFTDataError",
285
675
  "SFTExample",
@@ -292,4 +682,17 @@ __all__ = [
292
682
  "load_jsonl",
293
683
  "parse_jsonl_line",
294
684
  "validate_jsonl_or_raise",
685
+ # Reasoning utilities
686
+ "extract_reasoning",
687
+ "strip_reasoning",
688
+ "message_has_reasoning",
689
+ "validate_message_content",
690
+ # Vision utilities
691
+ "has_image_content",
692
+ "message_has_image",
693
+ "example_has_image",
694
+ "count_images_in_content",
695
+ "extract_image_urls",
696
+ "validate_vision_example",
697
+ "iter_vision_examples",
295
698
  ]
@@ -37,7 +37,10 @@ def validate_training_jsonl(path: str | Path, *, sample_lines: int = 50) -> None
37
37
  def validate_task_app_url(url: str, *, name: str = "TASK_APP_BASE_URL") -> None:
38
38
  from synth_ai.task.validators import validate_task_app_url as _vt
39
39
 
40
- _vt(url, name=name)
40
+ try:
41
+ _vt(url)
42
+ except ValueError as exc:
43
+ raise ValueError(f"{name}: {exc}") from exc
41
44
 
42
45
 
43
46
  def validate_trainer_cfg_rl(trainer: dict[str, Any]) -> None:
@@ -50,10 +50,12 @@ class TaskAppRegistry:
50
50
 
51
51
  def register(self, entry: TaskAppEntry) -> None:
52
52
  if entry.app_id in self._entries:
53
- raise ValueError(f"Task app already registered: {entry.app_id}")
53
+ # Allow idempotent registration when modules are imported multiple times.
54
+ return
54
55
  self._entries[entry.app_id] = entry
55
56
  for alias in entry.aliases:
56
- if alias in self._alias_to_id:
57
+ existing = self._alias_to_id.get(alias)
58
+ if existing and existing != entry.app_id:
57
59
  raise ValueError(f"Alias already registered: {alias}")
58
60
  self._alias_to_id[alias] = entry.app_id
59
61
 
synth_ai/task/config.py CHANGED
@@ -185,9 +185,12 @@ class FilterConfig:
185
185
  raise ValueError(f"output must be a .jsonl or .json file, got: {self.output}")
186
186
 
187
187
  # Validate score thresholds
188
- if self.min_official_score is not None and self.max_official_score is not None:
189
- if self.min_official_score > self.max_official_score:
190
- raise ValueError("min_official_score cannot be greater than max_official_score")
188
+ if (
189
+ self.min_official_score is not None
190
+ and self.max_official_score is not None
191
+ and self.min_official_score > self.max_official_score
192
+ ):
193
+ raise ValueError("min_official_score cannot be greater than max_official_score")
191
194
 
192
195
  # Validate limit/offset
193
196
  if self.limit is not None and self.limit < 1:
@@ -254,4 +257,3 @@ class FilterConfig:
254
257
  output_path.parent.mkdir(parents=True, exist_ok=True)
255
258
  return output_path
256
259
 
257
-
@@ -9,10 +9,9 @@ This module provides:
9
9
  """
10
10
 
11
11
  # Core models (flexible validation)
12
- from .models import Criterion, Rubric
13
-
14
12
  # Loading and blending
15
13
  from .loaders import blend_rubrics, load_rubric
14
+ from .models import Criterion, Rubric
16
15
 
17
16
  # Scoring
18
17
  from .scoring import score_events_against_rubric, score_outcome_against_rubric
@@ -78,15 +78,20 @@ def load_rubric(source: str | dict[str, Any] | Rubric | None) -> Rubric | None:
78
78
  data = _parse_structured(text, suffix)
79
79
 
80
80
  # Check if this looks like a backend judge rubric (wrong format)
81
- if isinstance(data, dict) and "event" in data and "outcome" in data:
82
- # Missing required task app rubric fields
83
- if "version" not in data and "goal_text" not in data and "criteria" not in data:
84
- source_hint = f" ({source})" if isinstance(source, str) else ""
85
- raise ValueError(
86
- f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
87
- f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
88
- f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
89
- )
81
+ if (
82
+ isinstance(data, dict)
83
+ and "event" in data
84
+ and "outcome" in data
85
+ and "version" not in data
86
+ and "goal_text" not in data
87
+ and "criteria" not in data
88
+ ):
89
+ source_hint = f" ({source})" if isinstance(source, str) else ""
90
+ raise ValueError(
91
+ f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
92
+ f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
93
+ f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
94
+ )
90
95
 
91
96
  return Rubric.model_validate(data)
92
97
 
@@ -149,4 +154,3 @@ def blend_rubrics(base: Rubric | None, override: Rubric | None) -> Rubric | None
149
154
  criteria=merged,
150
155
  aggregation=aggregation,
151
156
  )
152
-