PyPI - synth-ai - Versions diffs - 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl - Mend

synth-ai 0.2.9.dev4py3-none-any.whl → 0.2.9.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (157) hide show

examples/common_old/backend.py +0 -1
examples/crafter_debug_render.py +15 -6
examples/evals_old/compare_models.py +1 -0
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
examples/finetuning_old/synth_qwen_v1/util.py +7 -2
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +17 -15
examples/rl/run_rl_and_save.py +24 -7
examples/rl/task_app/math_single_step.py +128 -11
examples/rl/task_app/math_task_app.py +11 -3
examples/rl_old/task_app.py +222 -53
examples/warming_up_to_rl/analyze_trace_db.py +7 -5
examples/warming_up_to_rl/export_trace_sft.py +141 -16
examples/warming_up_to_rl/groq_test.py +11 -4
examples/warming_up_to_rl/manage_secrets.py +15 -6
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +108 -30
examples/warming_up_to_rl/run_fft_and_save.py +128 -52
examples/warming_up_to_rl/run_local_rollout.py +87 -36
examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
examples/warming_up_to_rl/run_rl_and_save.py +31 -7
examples/warming_up_to_rl/run_rollout_remote.py +37 -10
examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
synth_ai/__init__.py +1 -0
synth_ai/api/train/builders.py +34 -10
synth_ai/api/train/cli.py +172 -32
synth_ai/api/train/config_finder.py +59 -4
synth_ai/api/train/env_resolver.py +32 -14
synth_ai/api/train/pollers.py +11 -3
synth_ai/api/train/task_app.py +4 -1
synth_ai/api/train/utils.py +20 -4
synth_ai/cli/__init__.py +11 -4
synth_ai/cli/balance.py +1 -1
synth_ai/cli/demo.py +19 -5
synth_ai/cli/rl_demo.py +75 -16
synth_ai/cli/root.py +116 -37
synth_ai/cli/task_apps.py +1286 -170
synth_ai/cli/traces.py +1 -0
synth_ai/cli/turso.py +73 -0
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +67 -30
synth_ai/demos/core/cli.py +493 -164
synth_ai/demos/demo_task_apps/core.py +50 -6
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/reproducibility/tree.py +3 -1
synth_ai/environments/service/core_routes.py +6 -2
synth_ai/evals/base.py +0 -2
synth_ai/experimental/synth_oss.py +11 -12
synth_ai/handshake.py +3 -1
synth_ai/http_client.py +31 -7
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +8 -4
synth_ai/jobs/client.py +40 -10
synth_ai/learning/client.py +33 -8
synth_ai/learning/config.py +0 -2
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +6 -3
synth_ai/learning/health.py +9 -2
synth_ai/learning/jobs.py +17 -5
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
synth_ai/learning/prompts/random_search.py +4 -1
synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
synth_ai/learning/rl_client.py +42 -14
synth_ai/learning/sse.py +0 -2
synth_ai/learning/validators.py +6 -2
synth_ai/lm/caching/ephemeral.py +1 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +13 -1
synth_ai/lm/core/synth_models.py +0 -1
synth_ai/lm/core/vendor_clients.py +4 -2
synth_ai/lm/overrides.py +2 -2
synth_ai/lm/vendors/core/anthropic_api.py +7 -7
synth_ai/lm/vendors/core/openai_api.py +2 -0
synth_ai/lm/vendors/openai_standard.py +3 -1
synth_ai/lm/vendors/openai_standard_responses.py +6 -3
synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
synth_ai/lm/vendors/synth_client.py +37 -10
synth_ai/rl/__init__.py +0 -1
synth_ai/rl/contracts.py +0 -2
synth_ai/rl/env_keys.py +6 -1
synth_ai/task/__init__.py +1 -0
synth_ai/task/apps/__init__.py +11 -11
synth_ai/task/auth.py +29 -17
synth_ai/task/client.py +3 -1
synth_ai/task/contracts.py +1 -0
synth_ai/task/datasets.py +3 -1
synth_ai/task/errors.py +3 -2
synth_ai/task/health.py +0 -2
synth_ai/task/json.py +0 -1
synth_ai/task/proxy.py +2 -5
synth_ai/task/rubrics.py +9 -3
synth_ai/task/server.py +31 -5
synth_ai/task/tracing_utils.py +8 -3
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +0 -1
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +1 -0
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +2 -0
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +24 -3
synth_ai/tracing_v3/storage/base.py +4 -1
synth_ai/tracing_v3/storage/factory.py +0 -1
synth_ai/tracing_v3/turso/manager.py +102 -38
synth_ai/tracing_v3/turso/models.py +4 -1
synth_ai/tracing_v3/utils.py +1 -0
synth_ai/v0/tracing/upload.py +32 -135
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
synth_ai/install_sqld.sh +0 -40
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py CHANGED Viewed

@@ -90,6 +90,7 @@ class CrafterEnvironmentWrapper:
                 logger.warning("Unknown Crafter action: %s - ignoring", action_str)
                 return None  # Signal to skip this action
             return CRAFTER_ACTIONS[action_str]
         for tc in tool_calls:
             if isinstance(tc, EnvToolCall):
                 # Expand interact_many; otherwise coerce non-interact tools into interact(action=tool)
@@ -103,12 +104,12 @@ class CrafterEnvironmentWrapper:
                             )
                 elif tc.tool != "interact":
                     candidate_action = tc.args.get("action") if isinstance(tc.args, dict) else None
-                    resolved_action = candidate_action if candidate_action in allowed_actions else tc.tool
+                    resolved_action = (
+                        candidate_action if candidate_action in allowed_actions else tc.tool
+                    )
                     action_int = _action_to_int(resolved_action)
                     if action_int is not None:  # Skip invalid actions
-                        normalized.append(
-                            EnvToolCall(tool="interact", args={"action": action_int})
-                        )
+                        normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
                 else:
                     normalized.append(tc)
             else:
@@ -120,13 +121,14 @@ class CrafterEnvironmentWrapper:
                 args = tc.get("arguments") or tc.get("args") or {}
                 if isinstance(args, str):
                     import json as _json
                     try:
                         args = _json.loads(args)
                     except Exception:
                         args = {}
                 # Expand interact_many into multiple interacts
                 if tool_name == "interact_many":
-                    for action in (args.get("actions") or []):
+                    for action in args.get("actions") or []:
                         action_int = _action_to_int(action)
                         if action_int is not None:  # Skip invalid actions
                             normalized.append(
@@ -135,11 +137,17 @@ class CrafterEnvironmentWrapper:
                 else:
                     # For any non-interact tool, resolve to an interact action.
                     # Support a packed list of actions under 'actions' for convenience.
-                    if isinstance(args, dict) and isinstance(args.get("actions"), list) and args.get("actions"):
+                    if (
+                        isinstance(args, dict)
+                        and isinstance(args.get("actions"), list)
+                        and args.get("actions")
+                    ):
                         for action in args.get("actions"):
                             action_int = _action_to_int(action)
                             if action_int is not None:
-                                normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
+                                normalized.append(
+                                    EnvToolCall(tool="interact", args={"action": action_int})
+                                )
                     else:
                         candidate_action = None
                         if isinstance(args, dict) and "action" in args:
@@ -148,13 +156,18 @@ class CrafterEnvironmentWrapper:
                         action_int: Optional[int]
                         if isinstance(candidate_action, int):
                             action_int = _action_to_int(candidate_action)
-                        elif isinstance(candidate_action, str) and candidate_action in allowed_actions:
+                        elif (
+                            isinstance(candidate_action, str)
+                            and candidate_action in allowed_actions
+                        ):
                             action_int = _action_to_int(candidate_action)
                         else:
                             # Fallback: interpret the tool name itself as the action label
                             action_int = _action_to_int(tool_name)
                         if action_int is not None:
-                            normalized.append(EnvToolCall(tool="interact", args={"action": action_int}))
+                            normalized.append(
+                                EnvToolCall(tool="interact", args={"action": action_int})
+                            )
         # Ensure we have at least one valid action; default to noop if none provided
         if not normalized:
@@ -173,7 +186,9 @@ class CrafterEnvironmentWrapper:
                 "semantic_map": pub_before.semantic_map,
             }
             actions_printable = [
-                (tc.args.get("action") if isinstance(tc.args, dict) else None) if isinstance(tc, EnvToolCall) else None
+                (tc.args.get("action") if isinstance(tc.args, dict) else None)
+                if isinstance(tc, EnvToolCall)
+                else None
                 for tc in normalized
             ]
             logger.info(
@@ -185,7 +200,11 @@ class CrafterEnvironmentWrapper:
                 [k for k, v in before_state["achievements_status"].items() if v],
                 actions_printable,
             )
-            logger.info("Surroundings BEFORE (seed=%s):\n%s", str(self.seed), _format_semantic_map_view(before_state))
+            logger.info(
+                "Surroundings BEFORE (seed=%s):\n%s",
+                str(self.seed),
+                _format_semantic_map_view(before_state),
+            )
         except Exception as _:
             # Logging should not interfere with stepping; fail-fast elsewhere
             pass
@@ -253,8 +272,14 @@ class CrafterEnvironmentWrapper:
                     inv_changes = ", ".join(changed_items) if changed_items else "none"
                     # Achievements gained/lost
-                    ach_b = {k for k, v in (before_state.get("achievements_status", {}) or {}).items() if v}
-                    ach_a = {k for k, v in (after_dict.get("achievements_status", {}) or {}).items() if v}
+                    ach_b = {
+                        k
+                        for k, v in (before_state.get("achievements_status", {}) or {}).items()
+                        if v
+                    }
+                    ach_a = {
+                        k for k, v in (after_dict.get("achievements_status", {}) or {}).items() if v
+                    }
                     ach_added = sorted(list(ach_a - ach_b))
                     ach_added_latest = ach_added
                     ach_removed = sorted(list(ach_b - ach_a))
@@ -272,12 +297,19 @@ class CrafterEnvironmentWrapper:
                     if reward is None and ach_added_latest:
                         try:
                             reward = float(len(ach_added_latest))
-                            logger.info("Reward shaping applied: +%s (achievements added)", len(ach_added_latest))
+                            logger.info(
+                                "Reward shaping applied: +%s (achievements added)",
+                                len(ach_added_latest),
+                            )
                         except Exception:
                             pass
                 except Exception:
                     pass
-            logger.info("Surroundings AFTER (seed=%s):\n%s", str(self.seed), _format_semantic_map_view(after_dict))
+            logger.info(
+                "Surroundings AFTER (seed=%s):\n%s",
+                str(self.seed),
+                _format_semantic_map_view(after_dict),
+            )
         except Exception as _:
             pass
         result: Dict[str, Any] = {
@@ -340,6 +372,7 @@ class CrafterEnvironmentWrapper:
             # Build reverse action map for readability
             int_to_action = {v: k for k, v in CRAFTER_ACTIONS.items()}
             from collections import Counter
             action_ids = []
             for tc in normalized:
                 if isinstance(tc, EnvToolCall) and isinstance(tc.args, dict):
@@ -380,7 +413,7 @@ class CrafterEnvironmentWrapper:
         return {
             "observation": convert_numpy_to_python(observation),
             "info": convert_numpy_to_python(info) if info else None,
-            "step_idx": self.step_idx
+            "step_idx": self.step_idx,
         }
     async def terminate(self) -> Dict[str, Any]:
@@ -390,7 +423,7 @@ class CrafterEnvironmentWrapper:
         return {
             "observation": convert_numpy_to_python(observation),
             "info": convert_numpy_to_python(info) if info else None,
-            "step_idx": self.step_idx
+            "step_idx": self.step_idx,
         }
     def state_dict(self) -> Dict[str, Any]:

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py CHANGED Viewed

@@ -5,17 +5,18 @@ from abc import ABC, abstractmethod
 from .react_agent import CrafterReActAgent
 from .tools import TOOLS_SCHEMA
 # Define Policy base class here to avoid circular import
 class Policy(ABC):
     """Base class for environment-specific policies."""
     @abstractmethod
     def prepare_inference_request(
         self, observation: Dict[str, Any], history: List[Dict[str, Any]] = None
     ) -> Tuple[List[Dict[str, Any]], Optional[List[Dict[str, Any]]]]:
         """Prepare an inference request."""
         pass
     @abstractmethod
     def parse_model_response(
         self, response: str, observation: Dict[str, Any]
@@ -23,6 +24,7 @@ class Policy(ABC):
         """Parse model response into tool calls."""
         pass
 # (imports moved to top of file to satisfy linter)
@@ -161,7 +163,7 @@ class CrafterPolicy(Policy):
         # First check if we got actual tool calls
         choices = response.get("choices", [])
         tool_calls: List[Dict[str, Any]] = []
         for choice in choices:
             msg = choice.get("message", {})
             if "tool_calls" in msg and msg["tool_calls"] is not None:
@@ -185,7 +187,7 @@ class CrafterPolicy(Policy):
                                 "arguments": tc["arguments"],
                             }
                         )
         # If we got tool calls, return them
         if tool_calls:
             # Normalize common degenerate pattern ["move_right", "do"] when nothing is nearby.
@@ -197,6 +199,7 @@ class CrafterPolicy(Policy):
                     if isinstance(args, str):
                         try:
                             import json
                             args = json.loads(args)
                         except (json.JSONDecodeError, ValueError):
                             args = {}
@@ -208,11 +211,13 @@ class CrafterPolicy(Policy):
                     # Simple heuristic: avoid repeating same pair; avoid 'do' with no context
                     if len(actions) == 2 and actions[0] == "move_right" and actions[1] == "do":
                         actions = ["move_right"]
-                    normalized.append({"tool_name": "interact_many", "arguments": {"actions": actions or []}})
+                    normalized.append(
+                        {"tool_name": "interact_many", "arguments": {"actions": actions or []}}
+                    )
                 else:
                     normalized.append(tc)
             return normalized
         # Otherwise, parse plain text content for actions
         text = ""
         for choice in choices:
@@ -221,15 +226,16 @@ class CrafterPolicy(Policy):
             if content:
                 text = content
                 break
         if text:
             # Try to parse actions from the text
             from .shared import parse_actions
             actions = parse_actions(text)
             if actions:
                 # Wrap actions in interact_many tool call
                 return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
         # No actions found
         return []
@@ -264,7 +270,11 @@ class CrafterPolicy(Policy):
                 prev_tool_calls = metadata["prev_tool_calls"]
             if "prev_env_result" in metadata:
                 prev_env_result = metadata["prev_env_result"]
-            if prev_assistant_text is not None or prev_tool_calls is not None or prev_env_result is not None:
+            if (
+                prev_assistant_text is not None
+                or prev_tool_calls is not None
+                or prev_env_result is not None
+            ):
                 self._append_assistant_turn(prev_assistant_text, prev_tool_calls, prev_env_result)
         # Append current observation as the next user message (internal history only)
@@ -274,8 +284,12 @@ class CrafterPolicy(Policy):
         # (formatted surroundings/inventory) with the previous 3 tool calls as context.
         # Most recent first.
         lines: List[str] = []
-        def _format_tool_call_line_for_context(tool_name: str, arguments: Any, max_chars: int = 500) -> str:
+        def _format_tool_call_line_for_context(
+            tool_name: str, arguments: Any, max_chars: int = 500
+        ) -> str:
             import json as _json
             # Render arguments compactly, then clip to max_chars
             if isinstance(arguments, (dict, list)):
                 try:
@@ -289,6 +303,7 @@ class CrafterPolicy(Policy):
             if isinstance(rendered, str) and len(rendered) > max_chars:
                 rendered = rendered[:max_chars]
             return f"- {tool_name}: {rendered}"
         # Prefer pulling from trajectory_history (accumulates over turns)
         for record in reversed(self.trajectory_history):
             if len(lines) >= 3:
@@ -316,7 +331,9 @@ class CrafterPolicy(Policy):
                 args = call.get("arguments")
                 lines.append(_format_tool_call_line_for_context(name, args))
-        context_text = "Previous tool calls (most recent first):\n" + ("\n".join(lines) if lines else "- none")
+        context_text = "Previous tool calls (most recent first):\n" + (
+            "\n".join(lines) if lines else "- none"
+        )
         # Combine observation with context so the model always sees surroundings/inventory
         combined_text = f"{observation_text}\n\n{context_text}"
@@ -326,7 +343,7 @@ class CrafterPolicy(Policy):
             history=[],  # no prior user/assistant history
             turn=self.turn_index,
         )
-        #print("Debugging only:; ", payload)
+        # print("Debugging only:; ", payload)
         meta_out = {
             "inference_url": self.inference_url,
             "inference_request": payload,
@@ -372,7 +389,7 @@ class CrafterPolicy(Policy):
     async def terminate(self) -> None:
         return None
     def prepare_inference_request(
         self, observation: Dict[str, Any], history: List[Dict[str, Any]] = None
     ) -> Tuple[List[Dict[str, Any]], Optional[List[Dict[str, Any]]]]:
@@ -382,9 +399,7 @@ class CrafterPolicy(Policy):
         # Build messages (observation_text already formatted; no raw matrices)
         messages = CrafterReActAgent.build_messages(
-            observation=observation_text,
-            history=history,
-            turn=self.turn_index
+            observation=observation_text, history=history, turn=self.turn_index
         )
         # Return messages and tools schema
@@ -402,7 +417,6 @@ class CrafterPolicy(Policy):
         if not isinstance(obs_data, dict):
             return f"Observation: {str(observation)}"
         # Use the shared format_observation function with step information
         step_idx = observation.get("step_idx", 0)
         max_steps = 100  # Default max steps, could be made configurable
@@ -416,25 +430,25 @@ class CrafterPolicy(Policy):
                 obs_data["health"] = info["health"]
         return format_observation(obs_data, step_count=step_idx, max_steps=max_steps)
     def parse_model_response(
         self, response: str, observation: Dict[str, Any]
     ) -> List[Dict[str, Any]]:
         """Parse model response into tool calls (implementing abstract method).
         Note: Despite the type hint, vLLM actually returns a dict response,
         not a string. We handle both cases.
         """
         # Handle dict response from vLLM (the actual case)
         if isinstance(response, dict):
             return self.parse_response_to_tool_calls(response, self.use_tools)
         # Handle string response (fallback case for raw text)
         if isinstance(response, str):
             actions = CrafterReActAgent.parse_actions_from_response(response)
             if actions:
                 return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
         # Default empty response
         return []

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -51,7 +51,7 @@ class CrafterReActAgent:
             "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
             "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
         )
     @staticmethod
     def get_system_prompt_with_tools() -> str:
         """System prompt for tool-based interaction (e.g., Qwen3 models)."""
@@ -80,9 +80,13 @@ class CrafterReActAgent:
         )
     @staticmethod
-    def build_messages(observation: str, history: Optional[List[Dict[str, str]]] = None, turn: Optional[int] = None) -> List[Dict[str, str]]:
+    def build_messages(
+        observation: str, history: Optional[List[Dict[str, str]]] = None, turn: Optional[int] = None
+    ) -> List[Dict[str, str]]:
         """Construct OpenAI-style messages list for vLLM generation."""
-        msgs: List[Dict[str, str]] = [{"role": "system", "content": CrafterReActAgent.get_system_prompt()}]
+        msgs: List[Dict[str, str]] = [
+            {"role": "system", "content": CrafterReActAgent.get_system_prompt()}
+        ]
         if history:
             msgs.extend(history)
         msgs.append({"role": "user", "content": observation})
@@ -93,4 +97,4 @@ class CrafterReActAgent:
         return parse_actions(response_text)
-__all__ = ["CrafterReActAgent"]
+__all__ = ["CrafterReActAgent"]

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py CHANGED Viewed

@@ -71,7 +71,7 @@ def validate_action(action: str) -> bool:
 def parse_actions(action_text: str) -> List[str]:
     """Extract actions from response text.
     Tries multiple parsing strategies:
     1. <action>...</action> tags (original format)
     2. [action]...[/action] or [action]... format
@@ -80,43 +80,43 @@ def parse_actions(action_text: str) -> List[str]:
     5. Newline-separated actions
     """
     import json
     # First try the original <action> tag format
     matches = re.findall(r"<action>(.*?)</action>", action_text, re.IGNORECASE)
     if matches:
         return [m.strip() for m in matches if validate_action(m.strip())]
     # Try [action] format
     matches = re.findall(r"\[action\](.*?)(?:\[/action\]|\n|$)", action_text, re.IGNORECASE)
     if matches:
         return [m.strip() for m in matches if validate_action(m.strip())]
     # If no tags found, try to parse plain text
     text = action_text.strip()
     # Check if the entire text is a valid action
     if validate_action(text):
         return [text]
     # Try splitting by newlines and checking each line
-    lines = text.split('\n')
+    lines = text.split("\n")
     actions = []
     for line in lines:
         line = line.strip()
         # Remove various prefixes
-        for prefix in ['ACTION:', 'Action:', 'action:', 'ACTION', '-', '*', '•', '**ACTION:**']:
+        for prefix in ["ACTION:", "Action:", "action:", "ACTION", "-", "*", "•", "**ACTION:**"]:
             if line.startswith(prefix):
-                line = line[len(prefix):].strip()
+                line = line[len(prefix) :].strip()
                 break
         # Also handle numbered lists
-        if re.match(r'^\d+\.\s*', line):
-            line = re.sub(r'^\d+\.\s*', '', line)
+        if re.match(r"^\d+\.\s*", line):
+            line = re.sub(r"^\d+\.\s*", "", line)
         # Split by common separators to handle multiple actions on one line
-        parts = re.split(r'[,;]|\s+and\s+|\s+then\s+', line)
+        parts = re.split(r"[,;]|\s+and\s+|\s+then\s+", line)
         for part in parts:
             part = part.strip()
             # Remove quotes if present
@@ -124,23 +124,23 @@ def parse_actions(action_text: str) -> List[str]:
                 part = part[1:-1]
             if part.startswith("'") and part.endswith("'"):
                 part = part[1:-1]
             # Check if it's a valid action
             if part and validate_action(part):
                 actions.append(part)
     return actions
 def format_observation(obs_data: Dict[str, Any], step_count: int = 0, max_steps: int = 100) -> str:
     """Format a Crafter observation dictionary into a human-readable string.
     This is critical for preventing massive token counts when observations
     contain large numpy arrays or deeply nested structures.
     """
     if not obs_data:
         return ""
     # Extract key information
     health = obs_data.get("health") or obs_data.get("inventory", {}).get("health", 0)
     inventory_dict = obs_data.get("inventory", {})
@@ -160,18 +160,18 @@ def format_observation(obs_data: Dict[str, Any], step_count: int = 0, max_steps:
     max_steps_from_obs = obs_data.get("max_steps_episode") or obs_data.get("max_steps")
     if isinstance(max_steps_from_obs, (int, float)) and max_steps_from_obs > 0:
         max_steps = int(max_steps_from_obs)
     # Format inventory (skip health as it's shown separately)
     inv_items = [f"{k}:{v}" for k, v in inventory_dict.items() if v > 0 and k != "health"]
     inventory_str = ", ".join(inv_items) if inv_items else "empty"
     # Format achievements
     achieved_list = [k for k, v in achievements.items() if v]
     achievements_str = ", ".join(achieved_list) if achieved_list else "none"
     # Format semantic map view (simplified version)
     map_view = _format_semantic_map_view(obs_data, VIEW_SIZE)
     return (
         f"=== CRAFTER GAME STATE ===\n"
         f"Step: {step_count}/{max_steps}\n"
@@ -184,6 +184,7 @@ def format_observation(obs_data: Dict[str, Any], step_count: int = 0, max_steps:
         f"Choose your next actions.\n"
     )
 def _try_build_dynamic_mapping():
     """Attempt to build id->name mapping from a real Crafter env.
@@ -232,7 +233,7 @@ def _try_build_dynamic_mapping():
 # Build dynamic mapping if possible; otherwise fall back to a basic map
 _ID_TO_NAME = _try_build_dynamic_mapping()
 _FALLBACK_ID_TO_NAME = {
-    0: "none",      # None from materials
+    0: "none",  # None from materials
     1: "water",
     2: "grass",
     3: "stone",
@@ -299,4 +300,6 @@ def _format_semantic_map_view(obs_data: Dict[str, Any], view_size: int = VIEW_SI
     transposed = list(zip(*matrix))
     grid_rows: List[str] = [" ".join(row) for row in transposed]
-    return "\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
+    return (
+        "\nLocal Map View (" + str(view_size) + "x" + str(view_size) + "):\n" + "\n".join(grid_rows)
+    )

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py CHANGED Viewed

@@ -44,4 +44,4 @@ TOOLS_SCHEMA = [
             },
         },
     }
-]
+]

examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py CHANGED Viewed

@@ -22,9 +22,7 @@ class TaskApp:
         self.service_base_url = service_base_url or os.getenv(
             "SERVICE_BASE_URL", "http://localhost:8000"
         )
-        self.vllm_base_url = vllm_base_url or os.getenv(
-            "VLLM_BASE_URL", "http://localhost:8001"
-        )
+        self.vllm_base_url = vllm_base_url or os.getenv("VLLM_BASE_URL", "http://localhost:8001")
         self.default_model = default_model or os.getenv("DEFAULT_MODEL")
@@ -69,9 +67,7 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
         @app.middleware("http")
         async def validate_environment(request, call_next):
             # Check if this is an environment-related request
-            if request.url.path.startswith("/env/") or request.url.path.startswith(
-                "/rollout"
-            ):
+            if request.url.path.startswith("/env/") or request.url.path.startswith("/rollout"):
                 # Extract environment name from request body for POST requests
                 if request.method == "POST":
                     # We need to read the body to check env_name
@@ -83,9 +79,7 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
                         env_name = data.get("env_name", "").lower()
                         # Check if environment is allowed
-                        if env_name and env_name not in [
-                            e.lower() for e in allowed_environments
-                        ]:
+                        if env_name and env_name not in [e.lower() for e in allowed_environments]:
                             from fastapi import HTTPException
                             raise HTTPException(
@@ -111,6 +105,7 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
     # Policy routes are optional; skip if optional envs are missing in this build
     try:
         from .policy_routes import router as policy_router
         app.include_router(policy_router, prefix="/policy", tags=["policy"])
     except Exception as _e:
         # Log lightweight message; policy endpoints will be unavailable
@@ -157,6 +152,7 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
         # Check if any environment API keys are configured
         from synth_ai.task.auth import allowed_environment_api_keys
         allowed_keys = allowed_environment_api_keys()
         if not allowed_keys:
             # Server-side misconfiguration; rollout would fail with 503
@@ -167,22 +163,28 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
                     "detail": "Auth not configured: missing ENVIRONMENT_API_KEY in task service environment",
                 },
             )
         # Authorize using all header variants without typed Header params (avoid 422s)
         from synth_ai.task.auth import is_api_key_header_authorized
         authorized = is_api_key_header_authorized(request)
         if not authorized:
             # Soft-pass 200 with authorized=False to avoid failing CLI preflight
             primary_key = list(allowed_keys)[0] if allowed_keys else None
-            prefix = (primary_key[: max(1, len(primary_key) // 2)] if primary_key else None)
+            prefix = primary_key[: max(1, len(primary_key) // 2)] if primary_key else None
             content = {"status": "healthy", "authorized": False}
             if prefix:
                 content["expected_api_key_prefix"] = prefix
             return JSONResponse(status_code=200, content=content)
-        return {"status": "healthy", "authorized": True, "service": {"base_url": task_app.service_base_url}}
+        return {
+            "status": "healthy",
+            "authorized": True,
+            "service": {"base_url": task_app.service_base_url},
+        }
     # Log and surface 422 validation errors with header presence
     from fastapi.exceptions import RequestValidationError
     @app.exception_handler(RequestValidationError)
     async def _on_validation_error(request: Request, exc: RequestValidationError):
         try:
@@ -197,6 +199,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
             print("[422] validation", snapshot, flush=True)
         except Exception:
             pass
-        return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
+        return JSONResponse(
+            status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
+        )
     return app

examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@
 from .openai_client import OpenAIClient, create_inference_client
-__all__ = ["OpenAIClient", "create_inference_client"]
+__all__ = ["OpenAIClient", "create_inference_client"]

synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev4py3-none-any.whl → 0.2.9.dev7py3-none-any.whl