PyPI - synth-ai - Versions diffs - 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl - Mend

synth-ai 0.2.13.dev1py3-none-any.whl → 0.2.13.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (226) hide show

examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py RENAMED Viewed

@@ -44,6 +44,7 @@ class CrafterPolicy(Policy):
         self.inference_url = inference_url
         self.model = model
         self.use_tools = True
+        self.use_vision = False  # Enable vision for VLMs
         # Sampling parameters (populated via initialize(config))
         self.temperature: float | None = None
         self.top_p: float | None = None
@@ -63,6 +64,11 @@ class CrafterPolicy(Policy):
             self.model = config["model"]
         if "use_tools" in config:
             self.use_tools = bool(config["use_tools"])
+        if "use_vision" in config:
+            self.use_vision = bool(config["use_vision"])
+        # Auto-detect vision capability from model name if not explicitly set
+        if "use_vision" not in config and self.model:
+            self.use_vision = self._is_vision_model(self.model)
         # Adopt sampling params from policy config (trainer passes these through)
         if "temperature" in config:
             self.temperature = float(config["temperature"])  # fail fast on bad types
@@ -384,6 +390,7 @@ class CrafterPolicy(Policy):
                 "inference_url": self.inference_url,
                 "model": self.model,
                 "use_tools": self.use_tools,
+                "use_vision": self.use_vision,
             },
             "state": self.state_dict(),
         }
@@ -396,7 +403,8 @@ class CrafterPolicy(Policy):
             inference_url=config["inference_url"],
             model=config.get("model"),
         )
-        policy.use_tools = bool(config["use_tools"])
+        policy.use_tools = bool(config.get("use_tools", True))
+        policy.use_vision = bool(config.get("use_vision", False))
         policy.load_state_dict(state)
         return policy
@@ -446,12 +454,60 @@ class CrafterPolicy(Policy):
         return format_observation(obs_data, step_count=step_idx, max_steps=max_steps)
+    @staticmethod
+    def _is_vision_model(model_name: str) -> bool:
+        """Check if a model supports vision/image inputs based on its name."""
+        if not model_name:
+            return False
+        model_lower = model_name.lower()
+        # Known vision-capable model patterns
+        vision_patterns = [
+            "gpt-4o",           # GPT-4o series
+            "gpt-4-turbo",      # GPT-4 Turbo with vision
+            "gpt-4-vision",     # Explicit vision variant
+            "gpt-5",            # GPT-5 series (all variants support vision)
+            "claude-3",         # All Claude 3 models support vision
+            "gemini",           # Gemini models
+            "qwen-vl",          # Qwen Vision-Language models
+            "qwen2-vl",         # Qwen2 VL
+            "pixtral",          # Mistral's vision model
+            "llava",            # LLaVA models
+            "phi-3-vision",     # Microsoft Phi-3 Vision
+            "internvl",         # InternVL models
+            "cogvlm",           # CogVLM models
+            "vision",           # Generic vision indicator
+        ]
+        return any(pattern in model_lower for pattern in vision_patterns)
     def _extract_image_parts(
         self, observation: dict[str, Any] | None
     ) -> list[dict[str, Any]]:
-        """Crafter policy uses text-only prompts; do not attach image parts."""
-        return []
+        """Extract image parts from crafter observation for vision-capable models.
+        Returns OpenAI-style image_url format if vision is enabled and image data is available.
+        """
+        # Only extract images if vision is enabled for this policy
+        if not self.use_vision:
+            return []
+        if not observation:
+            return []
+        # Get the observation data (could be nested)
+        obs = observation.get("observation", observation)
+        if not isinstance(obs, dict):
+            return []
+        # Extract the data URL (includes base64-encoded image)
+        data_url = obs.get("observation_image_data_url")
+        if not data_url or not isinstance(data_url, str):
+            return []
+        # Return OpenAI-style image_url format
+        return [{"type": "image_url", "image_url": {"url": data_url}}]
     def parse_model_response(
         self, response: str, observation: dict[str, Any]

examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py RENAMED Viewed

@@ -97,10 +97,32 @@ async def create_policy(
         # Set defaults from TaskApp / environment if not provided
         config = dict(request.config or {})
+        provider_raw = config.get("provider") or config.get("vendor")
+        provider = str(provider_raw).strip().lower() if provider_raw else None
+        # Resolve base URL for proxy endpoints (strip trailing slash)
+        base_url = str(req.base_url).rstrip("/")
+        if provider == "groq":
+            # Route through in-app Groq proxy by default
+            config.setdefault("inference_url", f"{base_url}/proxy/groq")
+            # Default to a recent Groq-hosted Qwen unless caller overrides
+            preferred_model = "qwen/qwen3-32b"
+            config.setdefault("model", preferred_model)
+            # Groq Qwen defaults tuned for deterministic tool use
+            config.setdefault("temperature", 0.0)
+            config.setdefault("top_p", 0.95)
+            config.setdefault("max_tokens", 256)
+            # Avoid leaking provider in downstream policy if unset
+            config["provider"] = "groq"
+        elif provider == "openai":
+            config.setdefault("inference_url", f"{base_url}/proxy")
+            config["provider"] = "openai"
         if "inference_url" not in config and task_app is not None:
-            base_url = getattr(task_app, "vllm_base_url", None)
-            if base_url:
-                config["inference_url"] = base_url
+            task_base_url = getattr(task_app, "vllm_base_url", None)
+            if task_base_url:
+                config["inference_url"] = task_base_url
         if "model" not in config and task_app is not None:
             default_model = getattr(task_app, "default_model", None)
             if default_model:

examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py RENAMED Viewed

@@ -1843,12 +1843,22 @@ async def execute_rollout(
                     timing_final.setdefault("overhead_ms", 0.0)
         # Build trajectory
+        # Extract inference_url from policy meta
+        inference_url = None
+        if policy_handle is not None:
+            try:
+                policy_snapshot = policy_handle.snapshot()
+                inference_url = policy_snapshot.get("config", {}).get("inference_url")
+            except Exception:
+                pass
         trajectory = RolloutTrajectory(
             env_id=env_id,
             policy_id=policy_id,
             steps=trajectory_steps,
             final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
             length=len(trajectory_steps),
+            inference_url=inference_url,  # NEW: Required for trace correlation
             decision_samples=decision_samples if step_rewards_active else None,
         )

examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py RENAMED Viewed

@@ -1,15 +1,14 @@
 #!/usr/bin/env python3
-"""
-Simple test script for the GRPO Synth Envs Hosted Service.
-Run this after starting the service with:
-    python main.py
-"""
+"""Manual smoke script for the GRPO Synth Envs Hosted Service."""
 import asyncio
 import json
 import httpx
+import pytest
+pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
 async def test_service():

examples/task_apps/dev/pokemon_emerald/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ """Pokémon Emerald speedrun task app examples."""
2	+

synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.13.dev1py3-none-any.whl → 0.2.13.dev2py3-none-any.whl