PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show

examples/README.md +1 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
examples/qwen_coder/configs/coder_lora_small.toml +2 -1
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +154 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +275 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +423 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +62 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +1 -1
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +37 -0
examples/rl/configs/rl_from_base_qwen17.toml +76 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +22 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/sft/README.md +5 -5
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +1 -1
examples/swe/task_app/grpo_swe_mini.py +0 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
examples/task_apps/enron/__init__.py +1 -0
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/cli.py +30 -7
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/cli/__init__.py +62 -78
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/setup.py +266 -0
synth_ai/cli/status.py +1 -1
synth_ai/cli/task_app_deploy.py +16 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +16 -0
synth_ai/cli/task_app_serve.py +18 -0
synth_ai/cli/task_apps.py +71 -31
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train.py +18 -0
synth_ai/cli/tui.py +7 -2
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +8 -8
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +1 -1
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +2 -3
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/tui/cli/query_experiments.py +4 -4
synth_ai/tui/cli/query_experiments_v3.py +4 -4
synth_ai/tui/dashboard.py +14 -9
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +287 -0
synth_ai/utils/http.py +169 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
synth_ai/cli/man.py +0 -106
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/http.py +0 -26
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0

synth_ai/learning/sft/data.py CHANGED Viewed

@@ -1,11 +1,14 @@
 from __future__ import annotations
 import json
+import logging
 from collections.abc import Iterable, Iterator, Sequence
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
+logger = logging.getLogger(__name__)
 SFTMessageContent = str | dict[str, Any] | list[Any] | None
@@ -37,6 +40,8 @@ class SFTMessage:
     tool_calls: list[SFTToolCall] = field(default_factory=list)
     tool_call_id: str | None = None
     name: str | None = None
+    reasoning: str | None = None  # NEW: Explicit reasoning/thinking content
+    raw_content: str | None = None  # NEW: Original unparsed content (before reasoning extraction)
     extra: dict[str, Any] = field(default_factory=dict)
@@ -86,9 +91,11 @@ def _coerce_tool_call(raw: Any, *, index: int) -> SFTToolCall:
     name: str | None = None
     arguments: Any = None
-    if isinstance(raw.get("function"), dict):
-        fn_payload = raw["function"]
-        name = fn_payload.get("name") if isinstance(fn_payload.get("name"), str) else None
+    fn_obj = raw.get("function")
+    if isinstance(fn_obj, dict):
+        fn_payload = fn_obj
+        name_val = fn_payload.get("name")
+        name = name_val if isinstance(name_val, str) else None
         arguments = fn_payload.get("arguments")
     if name is None:
         maybe_name = raw.get("name")
@@ -143,11 +150,20 @@ def _coerce_message(raw: Any, *, index: int) -> SFTMessage:
     name = raw.get("name")
     if name is not None and not isinstance(name, str):
         raise SFTDataError(f"message {index} name must be a string if present")
+    # NEW: Extract reasoning and raw_content if present
+    reasoning = raw.get("reasoning")
+    if reasoning is not None and not isinstance(reasoning, str):
+        raise SFTDataError(f"message {index} reasoning must be a string if present")
+    raw_content = raw.get("raw_content")
+    if raw_content is not None and not isinstance(raw_content, str):
+        raise SFTDataError(f"message {index} raw_content must be a string if present")
     extra = {
         key: value
         for key, value in raw.items()
-        if key not in {"role", "content", "tool_calls", "tool_call_id", "name"}
+        if key not in {"role", "content", "tool_calls", "tool_call_id", "name", "reasoning", "raw_content"}
     }
     return SFTMessage(
@@ -156,6 +172,8 @@ def _coerce_message(raw: Any, *, index: int) -> SFTMessage:
         tool_calls=tool_calls,
         tool_call_id=tool_call_id,
         name=name,
+        reasoning=reasoning,
+        raw_content=raw_content,
         extra=extra,
     )
@@ -280,6 +298,378 @@ def load_jsonl(path: Path, *, min_messages: int = 1) -> list[SFTExample]:
         return list(iter_sft_examples(fh, min_messages=min_messages))
+# Reasoning/Thinking Utilities
+# ============================================================================
+def extract_reasoning(content: str, *, tag: str = "think") -> tuple[str | None, str]:
+    """Extract reasoning from content with <think> tags.
+    Args:
+        content: Raw content string
+        tag: Tag name to extract (default: "think")
+    Returns:
+        Tuple of (reasoning, clean_content)
+        - reasoning: Content inside tags, or None if no tags found
+        - clean_content: Content with tags removed
+    Examples:
+        >>> extract_reasoning("<think>Let me analyze...</think>The answer is 42")
+        ('Let me analyze...', 'The answer is 42')
+        >>> extract_reasoning("Just plain text")
+        (None, 'Just plain text')
+    """
+    import re
+    pattern = rf"<{tag}>(.*?)</{tag}>"
+    matches = re.findall(pattern, content, re.DOTALL)
+    if not matches:
+        return None, content
+    # Combine all reasoning blocks
+    reasoning = "\n\n".join(m.strip() for m in matches)
+    # Remove all reasoning blocks from content
+    clean_content = re.sub(pattern, "", content, flags=re.DOTALL).strip()
+    return reasoning, clean_content
+def strip_reasoning(content: str, *, tag: str = "think") -> str:
+    """Remove reasoning tags from content.
+    Args:
+        content: Content with potential reasoning tags
+        tag: Tag name to strip (default: "think")
+    Returns:
+        Content with reasoning tags removed
+    """
+    _, clean = extract_reasoning(content, tag=tag)
+    return clean
+def message_has_reasoning(message: SFTMessage) -> bool:
+    """Check if a message has explicit reasoning.
+    Args:
+        message: SFTMessage to check
+    Returns:
+        True if message has reasoning field or <think> tags in content
+    """
+    # Check explicit reasoning field
+    if message.reasoning:
+        return True
+    # Check for reasoning tags in content
+    if isinstance(message.content, str):
+        reasoning, _ = extract_reasoning(message.content)
+        return reasoning is not None
+    return False
+def validate_message_content(
+    message: SFTMessage, *, require_content: bool = True
+) -> tuple[bool, str | None]:
+    """Validate that message has valid content combinations.
+    Rules:
+    - Must have at least one of: reasoning + tool_calls, reasoning + content,
+      content, raw_content, or tool_calls
+    - If raw_content present with reasoning + content, they should be consistent
+    - Cannot have neither reasoning, content, raw_content, nor tool_calls
+    Args:
+        message: SFTMessage to validate
+        require_content: If True, require some form of content (default: True)
+    Returns:
+        Tuple of (is_valid, error_message)
+    """
+    has_reasoning = bool(message.reasoning)
+    has_content = message.content is not None and message.content != ""
+    has_raw = bool(message.raw_content)
+    has_tools = len(message.tool_calls) > 0
+    # Check for completely empty message
+    if require_content and not (has_reasoning or has_content or has_raw or has_tools):
+        return False, "Message has no reasoning, content, raw_content, or tool_calls"
+    # Valid combinations:
+    # 1. reasoning + tool_calls (reasoning-based action)
+    if has_reasoning and has_tools:
+        return True, None
+    # 2. reasoning + content (reasoning then output)
+    if has_reasoning and has_content:
+        # If raw_content present, validate consistency
+        if has_raw and message.raw_content:
+            # Raw should contain both reasoning and content
+            reasoning_in_raw, content_in_raw = extract_reasoning(message.raw_content)
+            if message.reasoning and reasoning_in_raw != message.reasoning.strip():
+                logger.warning(
+                    "raw_content reasoning doesn't match reasoning field"
+                )
+            # This is okay - just a warning, not an error
+        return True, None
+    # 3. content only (standard message)
+    if has_content and not has_reasoning:
+        return True, None
+    # 4. raw_content only (unparsed content)
+    if has_raw and not (has_reasoning and has_content):
+        return True, None
+    # 5. tool_calls only (action without reasoning/content - like OpenAI format)
+    if has_tools and not has_content:
+        return True, None
+    # 6. reasoning only (pure thinking turn)
+    if has_reasoning and not has_content and not has_tools:
+        return True, None
+    return True, None
+# Vision/Multimodal Utilities
+# ============================================================================
+def has_image_content(content: SFTMessageContent) -> bool:
+    """Check if message content contains image data (OpenAI multimodal format).
+    Supports:
+    - List of content parts: [{"type": "text", ...}, {"type": "image_url", ...}]
+    - Single dict with type field: {"type": "image_url", "image_url": {...}}
+    Args:
+        content: Message content (can be str, list, dict, or None)
+    Returns:
+        True if content contains an image segment
+    Examples:
+        >>> has_image_content([{"type": "text", "text": "What's this?"},
+        ...                    {"type": "image_url", "image_url": {"url": "..."}}])
+        True
+        >>> has_image_content("Just text")
+        False
+    """
+    if isinstance(content, list):
+        return any(
+            isinstance(part, dict) and part.get("type") in {"image", "image_url"}
+            for part in content
+        )
+    elif isinstance(content, dict):
+        return content.get("type") in {"image", "image_url"}
+    return False
+def message_has_image(message: SFTMessage) -> bool:
+    """Check if an SFTMessage contains image content.
+    Args:
+        message: SFTMessage to check
+    Returns:
+        True if the message contains image content
+    """
+    return has_image_content(message.content)
+def example_has_image(example: SFTExample) -> bool:
+    """Check if an SFTExample contains any image content.
+    Args:
+        example: SFTExample to check
+    Returns:
+        True if any message in the example contains image content
+    """
+    return any(message_has_image(msg) for msg in example.messages)
+def count_images_in_content(content: SFTMessageContent) -> int:
+    """Count the number of images in message content.
+    Args:
+        content: Message content to analyze
+    Returns:
+        Number of image segments found
+    """
+    if isinstance(content, list):
+        return sum(
+            1 for part in content
+            if isinstance(part, dict) and part.get("type") in {"image", "image_url"}
+        )
+    elif isinstance(content, dict) and content.get("type") in {"image", "image_url"}:
+        return 1
+    return 0
+def extract_image_urls(content: SFTMessageContent) -> list[str]:
+    """Extract all image URLs from message content.
+    Filters out invalid entries:
+    - Non-string URLs
+    - Empty strings
+    - Whitespace-only strings
+    Args:
+        content: Message content to extract from
+    Returns:
+        List of valid image URL strings (may be http(s):// URLs or data:image/... base64)
+    """
+    urls: list[str] = []
+    if isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict) and part.get("type") in {"image", "image_url"}:
+                # Handle both formats:
+                # {"type": "image_url", "image_url": {"url": "..."}}
+                # {"type": "image", "image": "..."}
+                if "image_url" in part and isinstance(part["image_url"], dict):
+                    url = part["image_url"].get("url")
+                    if isinstance(url, str) and url.strip():  # Filter empty/whitespace
+                        urls.append(url)
+                elif "image" in part and isinstance(part["image"], str):
+                    if part["image"].strip():  # Filter empty/whitespace
+                        urls.append(part["image"])
+    elif isinstance(content, dict) and content.get("type") in {"image", "image_url"}:
+        image_url_data = content.get("image_url")
+        if isinstance(image_url_data, dict):
+            url = image_url_data.get("url")
+            if isinstance(url, str) and url.strip():  # Filter empty/whitespace
+                urls.append(url)
+        else:
+            image_value = content.get("image")
+            if isinstance(image_value, str) and image_value.strip():  # Filter empty/whitespace
+                urls.append(image_value)
+    return urls
+def validate_vision_example(
+    example: SFTExample, *, require_images: bool = True
+) -> tuple[bool, str | None]:
+    """Validate a vision SFT example.
+    Checks:
+    - If require_images is True, at least one message must contain an image
+    - All image URLs must be non-empty, non-whitespace strings
+    - Image entries must have valid URL data
+    - Messages must follow valid structure
+    Args:
+        example: SFTExample to validate
+        require_images: If True, fail if no images are present
+    Returns:
+        Tuple of (is_valid, error_message)
+        If valid, error_message is None
+    """
+    # Count actual valid URLs and detect any invalid entries
+    total_valid_urls = 0
+    # Validate image URLs in each message
+    for i, msg in enumerate(example.messages):
+        # Check if this message has image_url type entries
+        if not isinstance(msg.content, list | dict):
+            continue
+        # Count image_url type entries vs valid URLs
+        content_list = msg.content if isinstance(msg.content, list) else [msg.content]
+        image_type_count = sum(
+            1 for item in content_list
+            if isinstance(item, dict) and item.get("type") in {"image", "image_url"}
+        )
+        if image_type_count > 0:
+            # Extract valid URLs (after filtering)
+            urls = extract_image_urls(msg.content)
+            # If we have image_url type entries but fewer valid URLs, some are invalid
+            if len(urls) < image_type_count:
+                return False, f"Message {i}: Has {image_type_count} image_url entries but only {len(urls)} valid URLs (some are empty, null, or missing)"
+            # Validate each URL (double-check, though extract_image_urls should have filtered)
+            for url in urls:
+                # extract_image_urls already filters for isinstance(url, str) and url.strip()
+                # but let's be defensive
+                if not isinstance(url, str):
+                    return False, f"Message {i}: Image URL is not a string: {type(url)}"
+                if not url.strip():
+                    return False, f"Message {i}: Invalid or empty image URL"
+                # Basic URL format check
+                if not url.startswith(("http://", "https://", "data:image/")):
+                    logger.warning(
+                        f"Message {i}: Image URL doesn't start with http://, https://, or data:image/ - "
+                        f"this may cause issues during training. URL: {url[:100]}"
+                    )
+                total_valid_urls += 1
+    # Final check: if images are required, ensure we found at least one valid URL
+    if require_images and total_valid_urls == 0:
+        return False, "No image content found in any message"
+    return True, None
+def iter_vision_examples(
+    source: Iterable[str],
+    *,
+    min_messages: int = 1,
+    skip_empty: bool = True,
+    require_images: bool = True,
+    log_validation_errors: bool = False,
+) -> Iterator[SFTExample]:
+    """Iterate over vision SFT examples from JSONL source.
+    Similar to iter_sft_examples but with vision-specific validation.
+    Args:
+        source: Iterable of JSONL lines
+        min_messages: Minimum number of messages required
+        skip_empty: Skip empty lines
+        require_images: If True, skip examples without images
+        log_validation_errors: If True, log validation failures
+    Yields:
+        Valid vision SFTExample objects
+    """
+    for line in source:
+        if skip_empty and not line.strip():
+            continue
+        try:
+            example = parse_jsonl_line(line, min_messages=min_messages)
+            # Validate vision content if required
+            if require_images:
+                is_valid, error = validate_vision_example(example, require_images=True)
+                if not is_valid:
+                    if log_validation_errors:
+                        logger.warning(f"Skipping invalid vision example: {error}")
+                    continue
+            yield example
+        except (json.JSONDecodeError, SFTDataError) as exc:
+            if log_validation_errors:
+                logger.warning(f"Failed to parse vision example: {exc}")
+            continue
 __all__ = [
     "SFTDataError",
     "SFTExample",
@@ -292,4 +682,17 @@ __all__ = [
     "load_jsonl",
     "parse_jsonl_line",
     "validate_jsonl_or_raise",
+    # Reasoning utilities
+    "extract_reasoning",
+    "strip_reasoning",
+    "message_has_reasoning",
+    "validate_message_content",
+    # Vision utilities
+    "has_image_content",
+    "message_has_image",
+    "example_has_image",
+    "count_images_in_content",
+    "extract_image_urls",
+    "validate_vision_example",
+    "iter_vision_examples",
 ]

synth_ai/learning/validators.py CHANGED Viewed

@@ -37,7 +37,10 @@ def validate_training_jsonl(path: str | Path, *, sample_lines: int = 50) -> None
 def validate_task_app_url(url: str, *, name: str = "TASK_APP_BASE_URL") -> None:
     from synth_ai.task.validators import validate_task_app_url as _vt
-    _vt(url, name=name)
+    try:
+        _vt(url)
+    except ValueError as exc:
+        raise ValueError(f"{name}: {exc}") from exc
 def validate_trainer_cfg_rl(trainer: dict[str, Any]) -> None:

synth_ai/task/apps/__init__.py CHANGED Viewed

@@ -50,10 +50,12 @@ class TaskAppRegistry:
     def register(self, entry: TaskAppEntry) -> None:
         if entry.app_id in self._entries:
-            raise ValueError(f"Task app already registered: {entry.app_id}")
+            # Allow idempotent registration when modules are imported multiple times.
+            return
         self._entries[entry.app_id] = entry
         for alias in entry.aliases:
-            if alias in self._alias_to_id:
+            existing = self._alias_to_id.get(alias)
+            if existing and existing != entry.app_id:
                 raise ValueError(f"Alias already registered: {alias}")
             self._alias_to_id[alias] = entry.app_id

synth_ai/task/config.py CHANGED Viewed

@@ -185,9 +185,12 @@ class FilterConfig:
             raise ValueError(f"output must be a .jsonl or .json file, got: {self.output}")
         # Validate score thresholds
-        if self.min_official_score is not None and self.max_official_score is not None:
-            if self.min_official_score > self.max_official_score:
-                raise ValueError("min_official_score cannot be greater than max_official_score")
+        if (
+            self.min_official_score is not None
+            and self.max_official_score is not None
+            and self.min_official_score > self.max_official_score
+        ):
+            raise ValueError("min_official_score cannot be greater than max_official_score")
         # Validate limit/offset
         if self.limit is not None and self.limit < 1:
@@ -254,4 +257,3 @@ class FilterConfig:
         output_path.parent.mkdir(parents=True, exist_ok=True)
         return output_path

synth_ai/task/rubrics/__init__.py CHANGED Viewed

@@ -9,10 +9,9 @@ This module provides:
 """
 # Core models (flexible validation)
-from .models import Criterion, Rubric
 # Loading and blending
 from .loaders import blend_rubrics, load_rubric
+from .models import Criterion, Rubric
 # Scoring
 from .scoring import score_events_against_rubric, score_outcome_against_rubric

synth_ai/task/rubrics/loaders.py CHANGED Viewed

@@ -78,15 +78,20 @@ def load_rubric(source: str | dict[str, Any] | Rubric | None) -> Rubric | None:
         data = _parse_structured(text, suffix)
     # Check if this looks like a backend judge rubric (wrong format)
-    if isinstance(data, dict) and "event" in data and "outcome" in data:
-        # Missing required task app rubric fields
-        if "version" not in data and "goal_text" not in data and "criteria" not in data:
-            source_hint = f" ({source})" if isinstance(source, str) else ""
-            raise ValueError(
-                f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
-                f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
-                f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
-            )
+    if (
+        isinstance(data, dict)
+        and "event" in data
+        and "outcome" in data
+        and "version" not in data
+        and "goal_text" not in data
+        and "criteria" not in data
+    ):
+        source_hint = f" ({source})" if isinstance(source, str) else ""
+        raise ValueError(
+            f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
+            f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
+            f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
+        )
     return Rubric.model_validate(data)
@@ -149,4 +154,3 @@ def blend_rubrics(base: Rubric | None, override: Rubric | None) -> Rubric | None
         criteria=merged,
         aggregation=aggregation,
     )

synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.16py3-none-any.whl