PyPI - synth-ai - Versions diffs - 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev8__py3-none-any.whl - Mend

synth-ai 0.2.4.dev7py3-none-any.whl → 0.2.4.dev8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

synth_ai/__init__.py +1 -1
synth_ai/cli/balance.py +3 -15
synth_ai/config/base_url.py +47 -0
synth_ai/http.py +102 -0
synth_ai/inference/__init__.py +7 -0
synth_ai/inference/client.py +20 -0
synth_ai/jobs/client.py +246 -0
synth_ai/learning/__init__.py +24 -0
synth_ai/learning/client.py +149 -0
synth_ai/learning/config.py +43 -0
synth_ai/learning/constants.py +29 -0
synth_ai/learning/ft_client.py +59 -0
synth_ai/learning/health.py +43 -0
synth_ai/learning/jobs.py +205 -0
synth_ai/learning/rl_client.py +256 -0
synth_ai/learning/sse.py +58 -0
synth_ai/learning/validators.py +48 -0
synth_ai/lm/core/main_v3.py +13 -0
synth_ai/lm/core/synth_models.py +48 -0
synth_ai/lm/core/vendor_clients.py +9 -6
synth_ai/lm/vendors/core/openai_api.py +31 -3
synth_ai/lm/vendors/openai_standard.py +45 -14
synth_ai/lm/vendors/supported/custom_endpoint.py +12 -2
synth_ai/lm/vendors/synth_client.py +372 -28
synth_ai/rl/__init__.py +30 -0
synth_ai/rl/contracts.py +32 -0
synth_ai/rl/env_keys.py +137 -0
synth_ai/rl/secrets.py +19 -0
synth_ai/scripts/verify_rewards.py +100 -0
synth_ai/task/__init__.py +10 -0
synth_ai/task/contracts.py +120 -0
synth_ai/task/health.py +28 -0
synth_ai/task/validators.py +12 -0
synth_ai/tracing_v3/hooks.py +3 -1
synth_ai/tracing_v3/session_tracer.py +123 -2
synth_ai/tracing_v3/turso/manager.py +218 -0
synth_ai/tracing_v3/turso/models.py +53 -0
synth_ai-0.2.4.dev8.dist-info/METADATA +635 -0
{synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/RECORD +43 -25
synth_ai/tui/__init__.py +0 -1
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -340
synth_ai-0.2.4.dev7.dist-info/METADATA +0 -193
{synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/WHEEL +0 -0
{synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.4.dev7.dist-info → synth_ai-0.2.4.dev8.dist-info}/top_level.txt +0 -0

synth_ai/learning/sse.py ADDED Viewed

@@ -0,0 +1,58 @@
+from __future__ import annotations
+import json
+import time
+from typing import Any, Callable, Optional
+import aiohttp
+def _api_base(b: str) -> str:
+    b = (b or "").rstrip("/")
+    return b if b.endswith("/api") else f"{b}/api"
+async def stream_events(
+    base_url: str,
+    api_key: str,
+    job_id: str,
+    *,
+    seconds: int = 60,
+    on_event: Optional[Callable[[dict], None]] = None,
+) -> None:
+    if seconds <= 0:
+        return
+    headers = {"Accept": "text/event-stream", "Authorization": f"Bearer {api_key}"}
+    candidates = [
+        f"{_api_base(base_url)}/rl/jobs/{job_id}/events?since_seq=0",
+        f"{_api_base(base_url)}/learning/jobs/{job_id}/events?since_seq=0",
+    ]
+    for url in candidates:
+        try:
+            async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=None)) as session:
+                async with session.get(url, headers=headers) as resp:
+                    if resp.status != 200:
+                        continue
+                    start_t = time.time()
+                    async for raw in resp.content:
+                        line = raw.decode(errors="ignore").strip()
+                        if not line or line.startswith(":"):
+                            continue
+                        if not line.startswith("data:"):
+                            continue
+                        data = line[5:].strip()
+                        try:
+                            obj = json.loads(data)
+                        except Exception:
+                            continue
+                        if on_event:
+                            try:
+                                on_event(obj)
+                            except Exception:
+                                pass
+                        if (time.time() - start_t) >= seconds:
+                            return
+        except Exception:
+            continue

synth_ai/learning/validators.py ADDED Viewed

@@ -0,0 +1,48 @@
+from __future__ import annotations
+from pathlib import Path
+import json
+from typing import Any, Dict
+from urllib.parse import urlparse
+def validate_training_jsonl(path: str | Path, *, sample_lines: int = 50) -> None:
+    p = Path(path)
+    if not p.exists():
+        raise FileNotFoundError(str(p))
+    lines = p.read_text().splitlines()
+    if not lines:
+        raise ValueError("empty JSONL")
+    for i, line in enumerate(lines[: max(1, sample_lines) ], start=1):
+        if not line.strip():
+            continue
+        try:
+            obj = json.loads(line)
+        except Exception as e:
+            raise ValueError(f"invalid json on line {i}: {e}") from e
+        msgs = obj.get("messages")
+        if not isinstance(msgs, list) or len(msgs) < 2:
+            raise ValueError(f"line {i}: missing messages[] with at least 2 turns")
+        roles = [m.get("role") for m in msgs if isinstance(m, dict)]
+        if not roles or not isinstance(roles[0], str):
+            raise ValueError(f"line {i}: missing first role")
+        for m in msgs:
+            if not isinstance(m, dict):
+                raise ValueError(f"line {i}: non-dict message")
+            if not isinstance(m.get("role"), str) or not isinstance(m.get("content"), str) or not m["content"].strip():
+                raise ValueError(f"line {i}: invalid role/content")
+def validate_task_app_url(url: str, *, name: str = "TASK_APP_BASE_URL") -> None:
+    from synth_ai.task.validators import validate_task_app_url as _vt
+    _vt(url, name=name)
+def validate_trainer_cfg_rl(trainer: Dict[str, Any]) -> None:
+    bs = int(trainer.get("batch_size", 1))
+    gs = int(trainer.get("group_size", 2))
+    if bs < 1:
+        raise ValueError("trainer.batch_size must be >= 1")
+    if gs < 2:
+        raise ValueError("trainer.group_size must be >= 2")

synth_ai/lm/core/main_v3.py CHANGED Viewed

@@ -117,7 +117,11 @@ class LM:
         if enable_v2_tracing is not None:
             enable_v3_tracing = enable_v2_tracing
+        # Debug logging
+        print(f"🔍 LM __init__: provider={provider}, vendor={vendor}, model={model}")
         # If vendor not provided, infer from model name
+        # But only if no explicit provider was given
         if vendor is None and model is not None:
             # Import vendor detection logic
             from synth_ai.lm.core.vendor_clients import (
@@ -156,6 +160,7 @@ class LM:
         self.vendor = vendor
         self.model = model
+        print(f"🔍 LM final: vendor={self.vendor}, model={self.model}")
         self.is_structured = is_structured
         self.structured_outputs_vendor = structured_outputs_vendor
         self.response_format = response_format
@@ -337,6 +342,14 @@ class LM:
             if hasattr(vendor_wrapper, "_hit_api_async"):
                 # OpenAIStandard expects lm_config
                 lm_config = {"temperature": self.temperature, **self.additional_params, **kwargs}
+                # Map convenience enable_thinking => thinking_mode unless explicitly set
+                if "enable_thinking" in lm_config and "thinking_mode" not in lm_config:
+                    try:
+                        et = lm_config.get("enable_thinking")
+                        if isinstance(et, bool):
+                            lm_config["thinking_mode"] = "think" if et else "no_think"
+                    except Exception:
+                        pass
                 if self.json_mode:
                     lm_config["response_format"] = {"type": "json_object"}

synth_ai/lm/core/synth_models.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""
+Synth-supported models registry.
+This module defines the specific models that are supported by Synth's infrastructure.
+Models are organized by family and size for easy maintenance and extension.
+MAINTENANCE GUIDE:
+1. Add new model families to the appropriate lists (QWEN_MODELS, OTHER_SYNTH_MODELS)
+2. Fine-tuned models (ft:) are automatically detected by regex
+3. Update SYNTH_SUPPORTED_MODELS set when adding new models
+4. Test changes with: pytest tests/lms/test_qwen_chat_completions.py
+WHY THIS EXISTS:
+- The previous regex (^.*\/.*$) was too broad and caught unintended models
+- This provides explicit control over which models use Synth infrastructure
+- Easier to maintain and debug model routing issues
+"""
+from typing import List, Set
+# Qwen3 model families supported by Synth
+QWEN3_MODELS: List[str] = [
+    # Qwen3 base models
+    "Qwen/Qwen3-0.6B",
+    "Qwen/Qwen3-1.7B",
+    "Qwen/Qwen3-4B",
+    "Qwen/Qwen3-8B",
+    "Qwen/Qwen3-14B",
+    "Qwen/Qwen3-32B",
+    # Qwen3 specialized variants
+    "Qwen/Qwen3-4B-Instruct-2507",
+    "Qwen/Qwen3-4B-Thinking-2507",
+]
+# Fine-tuned models pattern - any model starting with "ft:" is considered Synth-compatible
+# These are dynamically detected, but we can add specific known ones here
+FINE_TUNED_MODELS: List[str] = [
+    # Add specific fine-tuned models that are known to work with Synth
+    # Examples:
+    # "ft:Qwen/Qwen3-4B-Instruct-2507:ftjob-22",
+]
+# Combine all Synth-supported models
+SYNTH_SUPPORTED_MODELS: Set[str] = set(QWEN3_MODELS + FINE_TUNED_MODELS)
+# Export the main set for easy import
+__all__ = ["SYNTH_SUPPORTED_MODELS", "QWEN3_MODELS", "FINE_TUNED_MODELS"]

synth_ai/lm/core/vendor_clients.py CHANGED Viewed

@@ -21,6 +21,7 @@ from synth_ai.lm.core.all import (
     OpenRouterClient,
     TogetherClient,
 )
+from synth_ai.lm.core.synth_models import SYNTH_SUPPORTED_MODELS
 # Regular expressions to match model names to their respective providers
 openai_naming_regexes: list[Pattern] = [
@@ -39,8 +40,10 @@ gemini_naming_regexes: list[Pattern] = [
 deepseek_naming_regexes: list[Pattern] = [
     re.compile(r"^deepseek-.*$"),
 ]
-together_naming_regexes: list[Pattern] = [
-    re.compile(r"^.*\/.*$"),
+# Synth-specific model patterns (Qwen3 and fine-tuned models)
+synth_naming_regexes: list[Pattern] = [
+    re.compile(r"^ft:.*$"),  # Fine-tuned models (ft:model-name)
+    re.compile(r"^Qwen/Qwen3.*$"),  # Qwen3 models specifically (Qwen/Qwen3-*)
 ]
 groq_naming_regexes: list[Pattern] = [
@@ -79,8 +82,6 @@ openrouter_naming_regexes: list[Pattern] = [
 # Custom endpoint patterns - check these before generic patterns
 custom_endpoint_naming_regexes: list[Pattern] = [
-    # Modal endpoints: org--app.modal.run
-    re.compile(r"^[a-zA-Z0-9\-]+--[a-zA-Z0-9\-]+\.modal\.run$"),
     # Generic domain patterns for custom endpoints
     re.compile(r"^[a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+\.[a-zA-Z]+$"),  # domain.tld
     re.compile(r"^[a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+\.[a-zA-Z]+\/[a-zA-Z0-9\-\/]+$"),  # domain.tld/path
@@ -179,7 +180,9 @@ def get_client(
     elif any(regex.match(model_name) for regex in custom_endpoint_naming_regexes):
         # Custom endpoints are passed as the endpoint URL
         return CustomEndpointClient(endpoint_url=model_name)
-    elif any(regex.match(model_name) for regex in together_naming_regexes):
-        return TogetherClient()
+    elif (any(regex.match(model_name) for regex in synth_naming_regexes) or
+          model_name in SYNTH_SUPPORTED_MODELS):
+        # Synth models use OpenAI-compatible client with custom endpoint
+        return OpenAIStructuredOutputClient(synth_logging=synth_logging)
     else:
         raise ValueError(f"Invalid model name: {model_name}")

synth_ai/lm/vendors/core/openai_api.py CHANGED Viewed

@@ -6,6 +6,7 @@ supporting both standard and structured output modes.
 """
 import json
+import os
 from typing import Any
 import openai
@@ -42,18 +43,45 @@ class OpenAIStructuredOutputClient(OpenAIStandard):
     """
     def __init__(self, synth_logging: bool = True):
-        if synth_logging:
+        # Check if we should use Synth clients instead of OpenAI
+        openai_base = os.getenv("OPENAI_API_BASE", "")
+        use_synth = (openai_base.startswith("https://synth") or
+                    openai_base.startswith("https://agent-learning") or
+                    os.getenv("SYNTH_BASE_URL") or os.getenv("MODAL_BASE_URL"))
+        if use_synth:
+            # Use Synth clients for Synth endpoints
+            from synth_ai.lm.vendors.synth_client import AsyncSynthClient, SyncSynthClient
+            from synth_ai.lm.config import SynthConfig
+            # Create config from OPENAI_* environment variables if available
+            openai_base = os.getenv("OPENAI_API_BASE")
+            openai_key = os.getenv("OPENAI_API_KEY")
+            if openai_base and openai_key:
+                config = SynthConfig(base_url=openai_base, api_key=openai_key)
+                sync_client = SyncSynthClient(config)
+                async_client = AsyncSynthClient(config)
+            else:
+                # Fall back to default config loading
+                sync_client = SyncSynthClient()
+                async_client = AsyncSynthClient()
+        elif synth_logging:
             # print("Using synth logging - OpenAIStructuredOutputClient")
             from synth_ai.lm.provider_support.openai import AsyncOpenAI, OpenAI
+            sync_client = OpenAI()
+            async_client = AsyncOpenAI()
         else:
             # print("Not using synth logging - OpenAIStructuredOutputClient")
             from openai import AsyncOpenAI, OpenAI
+            sync_client = OpenAI()
+            async_client = AsyncOpenAI()
         super().__init__(
             used_for_structured_outputs=True,
             exceptions_to_retry=OPENAI_EXCEPTIONS_TO_RETRY,
-            sync_client=OpenAI(),
-            async_client=AsyncOpenAI(),
+            sync_client=sync_client,
+            async_client=async_client,
         )
     async def _hit_api_async_structured_output(

synth_ai/lm/vendors/openai_standard.py CHANGED Viewed

@@ -207,7 +207,22 @@ class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
             api_params = apply_tool_overrides(api_params)
             api_params = apply_param_overrides(api_params)
-        # Forward Qwen3 chat template kwargs via extra_body when requested
+        # Thinking controls: route via extra_body.chat_template_kwargs for compatibility
+        thinking_mode_val = lm_config.get("thinking_mode")
+        thinking_budget_val = lm_config.get("thinking_budget")
+        if thinking_mode_val is not None or thinking_budget_val is not None:
+            api_params["extra_body"] = api_params.get("extra_body", {})
+            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
+            if thinking_mode_val is not None:
+                ctk["thinking_mode"] = thinking_mode_val
+            if thinking_budget_val is not None:
+                try:
+                    ctk["thinking_budget"] = int(thinking_budget_val)
+                except Exception:
+                    ctk["thinking_budget"] = thinking_budget_val
+            api_params["extra_body"]["chat_template_kwargs"] = ctk
+        # Backward-compatible: forward legacy enable_thinking only via extra_body for callers still using it
         if lm_config.get("enable_thinking") is not None:
             api_params["extra_body"] = api_params.get("extra_body", {})
             ctk = api_params["extra_body"].get("chat_template_kwargs", {})
@@ -220,7 +235,7 @@ class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
                 **api_params.get("extra_body", {}),
                 **(lm_config.get("extra_body") or {}),
             }
-        # Forward Qwen3 chat template kwargs via extra_body when requested
+        # Ensure legacy extra_body flag remains merged (do not override top-level fields)
         if lm_config.get("enable_thinking") is not None:
             api_params["extra_body"] = api_params.get("extra_body", {})
             ctk = api_params["extra_body"].get("chat_template_kwargs", {})
@@ -387,20 +402,36 @@ class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
         #     raise
         message = output.choices[0].message
-        # Convert tool calls to dict format
+        # Convert tool calls to dict format, preferring dict-shaped entries first
         tool_calls = None
         if message.tool_calls:
-            tool_calls = [
-                {
-                    "id": tc.id,
-                    "type": tc.type,
-                    "function": {
-                        "name": tc.function.name,
-                        "arguments": tc.function.arguments,
-                    },
-                }
-                for tc in message.tool_calls
-            ]
+            converted: list[dict] = []
+            for tc in message.tool_calls:
+                if isinstance(tc, dict):
+                    fn = tc.get("function") or {}
+                    converted.append(
+                        {
+                            "id": tc.get("id"),
+                            "type": tc.get("type", "function"),
+                            "function": {
+                                "name": fn.get("name") or tc.get("name"),
+                                "arguments": fn.get("arguments") or tc.get("arguments"),
+                            },
+                        }
+                    )
+                else:
+                    # SDK object path
+                    converted.append(
+                        {
+                            "id": getattr(tc, "id", None),
+                            "type": getattr(tc, "type", "function"),
+                            "function": {
+                                "name": getattr(getattr(tc, "function", None), "name", None),
+                                "arguments": getattr(getattr(tc, "function", None), "arguments", None),
+                            },
+                        }
+                    )
+            tool_calls = converted or None
         # Attach basic usage if available
         usage_dict = None

synth_ai/lm/vendors/supported/custom_endpoint.py CHANGED Viewed

@@ -38,8 +38,18 @@ class CustomEndpointAPI(VendorBase):
         # Construct full chat completions URL
         if endpoint_url.endswith("/"):
             endpoint_url = endpoint_url[:-1]
-        self.chat_completions_url = f"https://{endpoint_url}/chat/completions"
-        self.health_url = f"https://{endpoint_url}/health"
+        # Handle full URLs that already include protocol
+        if endpoint_url.startswith(("http://", "https://")):
+            # Remove protocol and domain part, keep only the base path if any
+            parsed = endpoint_url.replace("https://", "").replace("http://", "")
+            base_url = parsed.split("/")[0]  # Get domain only
+            self.chat_completions_url = f"https://{base_url}/chat/completions"
+            self.health_url = f"https://{base_url}/health"
+        else:
+            # Original logic for domain-only URLs
+            self.chat_completions_url = f"https://{endpoint_url}/chat/completions"
+            self.health_url = f"https://{endpoint_url}/health"
         # Setup session with connection pooling and retries
         self.session = self._create_session()

synth-ai 0.2.4.dev7__py3-none-any.whl → 0.2.4.dev8__py3-none-any.whl

synth-ai 0.2.4.dev7py3-none-any.whl → 0.2.4.dev8py3-none-any.whl