PyPI - synth-ai - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (169) hide show

examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +12 -10
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +1 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +1 -1
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +60 -10
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +1 -1
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -0
examples/rl/configs/rl_from_base_qwen17.toml +1 -0
examples/swe/task_app/hosted/inference/openai_client.py +0 -34
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +24 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +355 -58
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +68 -7
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +78 -21
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/task_app.py +254 -36
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +1 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +53 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +152 -41
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +31 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +1 -0
synth_ai/api/train/builders.py +90 -1
synth_ai/api/train/cli.py +396 -21
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +15 -1
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +29 -0
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +85 -17
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +1 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/eval/core.py +13 -10
synth_ai/cli/commands/filter/core.py +53 -17
synth_ai/cli/commands/help/core.py +0 -1
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/train/judge_schemas.py +1 -0
synth_ai/cli/commands/train/judge_validation.py +1 -0
synth_ai/cli/commands/train/validation.py +0 -57
synth_ai/cli/demo.py +35 -3
synth_ai/cli/deploy/__init__.py +40 -25
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/task_app_deploy.py +1 -1
synth_ai/cli/task_apps.py +53 -53
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/judge_schemas.py +1 -0
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/handlers.py +53 -4
synth_ai/streaming/streamer.py +19 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +44 -8
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +17 -17
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +283 -1
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/METADATA +102 -4
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/RECORD +162 -88
synth_ai/cli/commands/deploy/__init__.py +0 -23
synth_ai/cli/commands/deploy/core.py +0 -614
synth_ai/cli/commands/deploy/errors.py +0 -72
synth_ai/cli/commands/deploy/validation.py +0 -11
synth_ai/cli/deploy/core.py +0 -5
synth_ai/cli/deploy/errors.py +0 -23
synth_ai/cli/deploy/validation.py +0 -5
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.17.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import asyncio
 import contextlib
 import logging
+import os
 from typing import Any
 import httpx
@@ -23,6 +24,15 @@ class OpenAIClient:
         self.api_key = api_key
         self.timeout_s = timeout_s
         self.headers = {}
+        self._env_api_key: str | None = None
+        try:
+            env_key = os.getenv("ENVIRONMENT_API_KEY") or ""
+            env_key = env_key.strip()
+            if env_key:
+                self._env_api_key = env_key
+        except Exception:
+            self._env_api_key = None
         if api_key:
             self.headers["Authorization"] = f"Bearer {api_key}"
@@ -137,18 +147,49 @@ class OpenAIClient:
         Returns:
             OpenAI-compatible chat completion response
         """
-        url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
+        # Build target URL robustly: if a full endpoint is given (with query or already ending
+        # in /chat/completions), preserve it; otherwise, append the path BEFORE query params.
+        from urllib.parse import urlparse, urlunparse
+        candidate = (base_url or self.base_url).strip()
+        try:
+            parsed = urlparse(candidate)
+            # If no scheme, treat as relative base (pass-through)
+            if not parsed.scheme or not parsed.netloc:
+                base_no_slash = candidate.rstrip("/")
+                url = f"{base_no_slash}/v1/chat/completions"
+            else:
+                path = (parsed.path or "").rstrip("/")
+                if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
+                    new_path = path
+                elif path.endswith("/v1"):
+                    new_path = f"{path}/chat/completions"
+                elif path.endswith("/chat"):
+                    new_path = f"{path}/completions"
+                else:
+                    new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+                url = urlunparse(parsed._replace(path=new_path))
+        except Exception:
+            # Fallback to legacy behavior
+            url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
         timeout = timeout_s or self.timeout_s
         # Merge headers
         headers = self.headers.copy()
+        try:
+            parsed_target = urlparse(url)
+            path_for_auth = (parsed_target.path or "") if parsed_target else ""
+            if self._env_api_key and "/proxy/" in path_for_auth:
+                headers.setdefault("X-API-Key", self._env_api_key)
+        except Exception:
+            pass
         if extra_headers:
             headers.update(extra_headers)
         # Fix parameter compatibility for newer models
         processed_request = self._fix_model_parameters(request, target_url=url)
-        # Log request (redact messages in production)
+        # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
         logger.info(f"Inference POST target: {url}")
         if extra_headers:
             logger.info(f"Extra headers: {extra_headers}")
@@ -156,6 +197,62 @@ class OpenAIClient:
             keys_preview = sorted(processed_request.keys())
             logger.info(f"Request keys: {keys_preview}")
+        # Detailed IO log: messages/tools/sampling and final payload fields
+        try:
+            import json as _json
+            def _truncate(text: str, limit: int = 2000) -> str:
+                return text if len(text) <= limit else text[:limit] + "…"
+            def _messages_preview(msgs: Any) -> str:
+                try:
+                    out: list[dict[str, Any]] = []
+                    if isinstance(msgs, list):
+                        for m in msgs:
+                            if not isinstance(m, dict):
+                                continue
+                            role = m.get("role")
+                            content = m.get("content")
+                            if isinstance(content, str):
+                                text = content
+                            elif isinstance(content, list):
+                                parts: list[str] = []
+                                for seg in content:
+                                    if isinstance(seg, dict) and isinstance(seg.get("text"), str):
+                                        parts.append(seg["text"])
+                                text = "\n".join(parts)
+                            else:
+                                text = ""
+                            out.append({"role": role, "content": _truncate(str(text), 4000)})
+                    return _json.dumps(out)
+                except Exception:
+                    return "[]"
+            def _tools_preview(tools: Any) -> str:
+                try:
+                    return _truncate(_json.dumps(tools), 4000)
+                except Exception:
+                    return "[]"
+            msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
+            tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
+            io_log: dict[str, Any] = {
+                "llm.call": True,
+                "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
+                "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
+                "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
+                "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
+                "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
+                "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
+                "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
+                "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
+                "messages_preview": _messages_preview(msgs),
+                "tools_preview": _tools_preview(tools),
+            }
+            logger.info(io_log)
+        except Exception:
+            pass
         # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
         try:
             low_url = url.lower()
@@ -228,13 +325,54 @@ class OpenAIClient:
                     f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
                 )
                 if body_text:
-                    preview_len = min(800, len(body_text))
-                    logger.info(
-                        f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
-                    )
+                    # Log raw output with generous preview to debug no-tool-call issues
+                    preview_len = min(4000, len(body_text))
+                    logger.info({
+                        "llm.raw_response": True,
+                        "bytes": len(body_text),
+                        "preview": body_text[:preview_len],
+                    })
                 result = response.json()
                 logger.info(f"Inference response parsed_type={type(result).__name__}")
+                # Normalize tool calls so downstream always sees a function tool call
+                try:
+                    if isinstance(result, dict):
+                        choices = result.get("choices")
+                        if isinstance(choices, list) and choices:
+                            msg = choices[0].get("message")
+                            if isinstance(msg, dict):
+                                # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
+                                tc = msg.get("tool_calls")
+                                fc = msg.get("function_call")
+                                if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
+                                    name = fc.get("name") or "interact_many"
+                                    args = fc.get("arguments") or "{}"
+                                    msg["tool_calls"] = [
+                                        {
+                                            "id": "call_norm",
+                                            "type": "function",
+                                            "function": {"name": name, "arguments": args},
+                                        }
+                                    ]
+                                    # Encourage downstream to treat this as a tool call
+                                    if isinstance(choices[0], dict):
+                                        choices[0]["finish_reason"] = "tool_calls"
+                                # Log tool call count for debugging
+                                try:
+                                    tc2 = msg.get("tool_calls")
+                                    count = len(tc2) if isinstance(tc2, list) else 0
+                                    logger.info({
+                                        "llm.tool_calls": True,
+                                        "count": count,
+                                        "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
+                                    })
+                                except Exception:
+                                    pass
+                except Exception:
+                    pass
                 return result
             except httpx.TimeoutException:
@@ -340,40 +478,6 @@ class OpenAIClient:
                                 pass
                 except Exception:
                     pass
-                # Gracefully degrade on 422 so rollouts can still produce a trajectory
-                if status == 422:
-                    try:
-                        # Best-effort parse of error for diagnostics
-                        err = None
-                        try:
-                            err = e.response.json()
-                        except Exception:
-                            err = {"error": "unprocessable", "detail": (text or "")[:200]}
-                        logger.warning(
-                            {
-                                "inference_422_recovered": True,
-                                "detail": err,
-                            }
-                        )
-                    except Exception:
-                        pass
-                    # Return a minimal OpenAI-compatible response with no tool_calls/content
-                    import time as _t
-                    return {
-                        "id": f"cmpl-{int(_t.time())}",
-                        "object": "chat.completion",
-                        "created": int(_t.time()),
-                        "model": processed_request.get("model") or "unknown",
-                        "choices": [
-                            {
-                                "index": 0,
-                                "message": {"role": "assistant", "content": "", "tool_calls": []},
-                                "finish_reason": "stop",
-                            }
-                        ],
-                        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
-                    }
                 raise
             except Exception as e:
                 logger.error(f"Unexpected error calling {url}: {e}")
@@ -399,7 +503,14 @@ class OpenAIClient:
         try:
             async with httpx.AsyncClient(timeout=timeout) as client:
-                response = await client.get(url, headers=self.headers)
+                headers = self.headers.copy()
+                try:
+                    parsed = httpx.URL(url)
+                    if self._env_api_key and "/proxy/" in (parsed.path or ""):
+                        headers.setdefault("X-API-Key", self._env_api_key)
+                except Exception:
+                    pass
+                response = await client.get(url, headers=headers)
                 response.raise_for_status()
                 return response.json()
         except httpx.HTTPStatusError as e:

examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py CHANGED Viewed

@@ -466,11 +466,20 @@ async def step_policy(
             if tracing_context is not None:
                 try:
+                    print(
+                        f"[TRACE_DEBUG] record_policy_prompts sys={len(system_prompt_records)} user={len(user_prompt_records)}",
+                        flush=True,
+                    )
                     await tracing_context.record_policy_prompts(
                         system_prompt_records, user_prompt_records
                     )
                 except Exception as exc:
                     logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
+            else:
+                print(
+                    f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
+                    flush=True,
+                )
             # Create inference client (choose API key by target provider)
             # Require inference_url to be set explicitly by the rollout policy config.
@@ -492,7 +501,11 @@ async def step_policy(
                 if isinstance(target_url, str):
                     low_url = target_url.lower()
                     # Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
-                    if "/proxy/groq" in low_url or "/proxy/openai" in low_url:
+                    if (
+                        "/proxy/groq" in low_url
+                        or "/proxy/openai" in low_url
+                        or "/proxy/v1" in low_url
+                    ):
                         api_key_override = None
                     elif "openai.com" in low_url:
                         api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
@@ -954,6 +967,23 @@ async def step_policy(
                 except Exception as exc:
                     logger.debug(f"TRACING_LLM_FAIL: {exc}")
+        if not tool_calls:
+            preview = ""
+            try:
+                preview = str(meta.get("raw_response") or "")[:400]
+            except Exception:
+                preview = "<unavailable>"
+            logger.error(
+                {
+                    "rollout.policy_step": True,
+                    "policy_id": request.policy_id,
+                    "error": "no_tool_calls",
+                    "inference_url": meta.get("inference_url"),
+                    "raw_preview": preview,
+                }
+            )
+            raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
         return PolicyStepResponse(
             tool_calls=tool_calls,
             meta=meta,

examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py CHANGED Viewed

@@ -223,6 +223,7 @@ class RolloutTracingContext:
         ).lower()
         self.return_trace = bool(getattr(request.record, "return_trace", False))
         self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
+        print(f"[TRACE_DEBUG] RolloutTracingContext init: trace_format={self.trace_format} return_trace={self.return_trace}", flush=True)
         self.session_trace = None
         self.metadata_updates: dict[str, Any] = {}
         self.policy_name = request.policy.policy_name or ""
@@ -244,19 +245,24 @@ class RolloutTracingContext:
     async def start_session(self) -> None:
         if not self.enabled or self.tracer is None:
+            print("[TRACE_DEBUG] start_session skipped: tracer disabled", flush=True)
             return
         try:
             await self.tracer.initialize()
+            print("[TRACE_DEBUG] tracer initialized", flush=True)
         except Exception as exc:
             logger.debug("TRACING_INIT_FAIL: %s", exc)
+            # Hard fail: tracing requested but cannot initialize
+            raise
         try:
             await self.tracer.start_session(
                 session_id=self.run_id, metadata=dict(self.metadata_base)
             )
+            print(f"[TRACE_DEBUG] start_session succeeded for run_id={self.run_id}", flush=True)
         except Exception as exc:
             logger.warning("TRACING_START_FAIL: %s", exc)
-            self.enabled = False
-            self.tracer = None
+            # Hard fail: tracing requested but cannot start session
+            raise
     async def start_decision(self, turn_number: int) -> None:
         self.current_turn = turn_number
@@ -317,6 +323,9 @@ class RolloutTracingContext:
                 )
             except Exception as exc:
                 logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
+        if self.tracer and self.tracer._current_trace:
+            msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
+            print(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages", flush=True)
     def _content_to_text(self, content: Any) -> str:
         if isinstance(content, str):
@@ -395,6 +404,11 @@ class RolloutTracingContext:
                     message_type="policy_tool_call",
                     metadata=self._message_metadata(),
                 )
+                if self.tracer._current_trace:
+                    print(
+                        f"[TRACE_DEBUG] After tool invocation: messages={len(self.tracer._current_trace.markov_blanket_message_history)}",
+                        flush=True,
+                    )
             except Exception as exc:
                 logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
@@ -664,12 +678,24 @@ class RolloutTracingContext:
             except Exception as exc:
                 logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
             try:
+                if self.tracer._current_trace:
+                    msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
+                    print(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace", flush=True)
                 self.session_trace = await self.tracer.end_session()
                 if self.session_trace is not None:
                     self.session_trace.metadata.update(self.metadata_updates)
+                    print(
+                        f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}",
+                        flush=True,
+                    )
+                    print(
+                        f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}",
+                        flush=True,
+                    )
             except Exception as exc:
                 logger.debug("TRACING_END_SESSION_FAIL: %s", exc)
                 self.session_trace = None
+                print(f"[TRACE_DEBUG] end_session failed for run_id={self.run_id}: {exc}", flush=True)
             with contextlib.suppress(Exception):
                 await self.tracer.close()
@@ -700,9 +726,13 @@ class RolloutTracingContext:
     def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
         if not self.return_trace or session_trace is None:
             return None
-        if self.trace_format == "full":
+        if self.trace_format in ("full", "structured"):
             payload = session_trace.to_dict()
             payload.setdefault("metadata", {}).update(self.metadata_updates)
+            print(
+                f"[TRACE_DEBUG] build_trace_payload returning structured trace with messages={len(payload.get('markov_blanket_message_history') or [])}",
+                flush=True,
+            )
             return payload
         metadata = dict(session_trace.metadata)
         metadata.update(self.metadata_updates)

examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Utility functions for the task service."""
 from typing import Any
+from urllib.parse import urlparse, urlunparse
 import numpy as np
@@ -60,3 +61,69 @@ def sanitize_observation(observation: dict[str, Any]) -> dict[str, Any]:
             sanitized[key] = convert_numpy_to_python(value)
     return sanitized
+_CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
+def force_normalize_chat_completions_url(raw_url: Any) -> Any:
+    """
+    Convert ANY malformed inference URL into the correct chat-completions form.
+    Ensures path ends with /v1/chat/completions and that query has no '/' segments.
+    """
+    if not isinstance(raw_url, str):
+        return raw_url
+    url = raw_url.strip()
+    if not url:
+        return raw_url
+    parsed = urlparse(url)
+    path = (parsed.path or "").rstrip("/")
+    query = parsed.query or ""
+    # If query contains a path, extract and repair
+    if query and "/" in query:
+        before_slash, after_slash = query.split("/", 1)
+        cut_positions = [i for i in [after_slash.find("&"), after_slash.find("?")] if i >= 0]
+        cut = min(cut_positions) if cut_positions else len(after_slash)
+        path_from_query = "/" + after_slash[:cut]
+        extra_query = after_slash[cut + 1 :] if cut < len(after_slash) else ""
+        merged_query = before_slash if before_slash else ""
+        if extra_query:
+            merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
+        final_path = (
+            path_from_query
+            if path_from_query.startswith(_CHAT_COMPLETIONS_SUFFIX)
+            else f"{path_from_query.rstrip('/')}{_CHAT_COMPLETIONS_SUFFIX}"
+        )
+        parsed = parsed._replace(path=final_path, query=merged_query)
+        url = urlunparse(parsed)
+        parsed = urlparse(url)
+        path = parsed.path or ""
+        query = parsed.query or ""
+    # Ensure path suffix
+    if not path.endswith(_CHAT_COMPLETIONS_SUFFIX):
+        new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}" if path else _CHAT_COMPLETIONS_SUFFIX
+        parsed = parsed._replace(path=new_path)
+        url = urlunparse(parsed)
+        parsed = urlparse(url)
+        path = parsed.path or ""
+        query = parsed.query or ""
+    # Last-resort: strip any '/' from query
+    if query and "/" in query:
+        safe_query = query.split("/")[0]
+        parsed = parsed._replace(query=safe_query)
+        url = urlunparse(parsed)
+    return url
+def ensure_chat_completions_url(raw_url: Any, mode: Any = None) -> Any:
+    """
+    Mode-aware normalizer (RL/EVAL) that returns a valid chat completions URL and
+    preserves existing query parameters.
+    """
+    # For now reuse force normalizer in both modes to guarantee correctness
+    return force_normalize_chat_completions_url(raw_url)

examples/workflows/math_rl/configs/rl_from_base_qwen17.toml CHANGED Viewed

@@ -69,6 +69,7 @@ env_name = "math"
 policy_name = "math-single-step"
 max_turns = 1
 episodes_per_batch = 32  # group_size * batch_size
+task_app_origin_rewards_only = true
 [evaluation]
 instances = 32

synth_ai/api/train/builders.py CHANGED Viewed

@@ -33,7 +33,7 @@ try:
 except Exception as exc:  # pragma: no cover - critical dependency
     raise RuntimeError("Unable to load SFT payload helpers") from exc
-from .configs import RLConfig, SFTConfig
+from .configs import PromptLearningConfig, RLConfig, SFTConfig
 from .supported_algos import (
     AlgorithmValidationError,
     ensure_model_supported_for_algorithm,
@@ -56,6 +56,12 @@ class SFTBuildResult:
     validation_file: Path | None
+@dataclass(slots=True)
+class PromptLearningBuildResult:
+    payload: dict[str, Any]
+    task_url: str
 def _format_validation_error(path: Path, exc: ValidationError) -> str:
     lines: list[str] = []
     for error in exc.errors():
@@ -86,6 +92,11 @@ def build_rl_payload(
         raise click.ClickException(_format_validation_error(config_path, exc)) from exc
     data = rl_cfg.to_dict()
+    # Remove smoke section - it's CLI-only and should not be sent to the trainer
+    if "smoke" in data:
+        del data["smoke"]
     # Ensure required [reference] section for backend validators
     try:
         ref_cfg = data.get("reference") if isinstance(data, dict) else None
@@ -349,9 +360,87 @@ def build_sft_payload(
     return SFTBuildResult(payload=payload, train_file=dataset_path, validation_file=validation_file)
+def build_prompt_learning_payload(
+    *,
+    config_path: Path,
+    task_url: str | None,
+    overrides: dict[str, Any],
+    allow_experimental: bool | None = None,
+) -> PromptLearningBuildResult:
+    """Build payload for prompt learning job (MIPRO or GEPA)."""
+    import os
+    from pydantic import ValidationError
+    from .configs.prompt_learning import load_toml
+    # SDK-SIDE VALIDATION: Catch errors BEFORE sending to backend
+    from .validators import validate_prompt_learning_config
+    raw_config = load_toml(config_path)
+    validate_prompt_learning_config(raw_config, config_path)
+    try:
+        pl_cfg = PromptLearningConfig.from_path(config_path)
+    except ValidationError as exc:
+        raise click.ClickException(_format_validation_error(config_path, exc)) from exc
+    # Source of truth: TOML only (ignore shell/env and CLI overrides)
+    final_task_url = (pl_cfg.task_app_url or "").strip()
+    if not final_task_url:
+        raise click.ClickException(
+            "Task app URL required (provide --task-url or set prompt_learning.task_app_url in TOML)"
+        )
+    # Get task_app_api_key from config or environment
+    task_app_api_key = (
+        pl_cfg.task_app_api_key
+        or os.environ.get("ENVIRONMENT_API_KEY", "")
+    ).strip()
+    if not task_app_api_key:
+        raise click.ClickException(
+            "Task app API key required (set prompt_learning.task_app_api_key in TOML or ENVIRONMENT_API_KEY env var)"
+        )
+    # Build config dict for backend
+    config_dict = pl_cfg.to_dict()
+    # Ensure task_app_url and task_app_api_key are set
+    pl_section = config_dict.get("prompt_learning", {})
+    if isinstance(pl_section, dict):
+        pl_section["task_app_url"] = final_task_url
+        pl_section["task_app_api_key"] = task_app_api_key
+    else:
+        config_dict["prompt_learning"] = {
+            "task_app_url": final_task_url,
+            "task_app_api_key": task_app_api_key,
+        }
+    # Build payload matching backend API format
+    payload: dict[str, Any] = {
+        "algorithm": pl_cfg.algorithm,
+        "config_body": config_dict,
+        "overrides": overrides.get("overrides", {}),
+        "metadata": overrides.get("metadata", {}),
+        "auto_start": overrides.get("auto_start", True),
+    }
+    backend = overrides.get("backend")
+    if backend:
+        metadata_default: dict[str, Any] = {}
+        metadata = cast(dict[str, Any], payload.setdefault("metadata", metadata_default))
+        metadata["backend_base_url"] = ensure_api_base(str(backend))
+    return PromptLearningBuildResult(payload=payload, task_url=final_task_url)
 __all__ = [
+    "PromptLearningBuildResult",
     "RLBuildResult",
     "SFTBuildResult",
+    "build_prompt_learning_payload",
     "build_rl_payload",
     "build_sft_payload",
 ]

synth-ai 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl