PyPI - synth-ai - Versions diffs - 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl - Mend

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.9.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (155) hide show

examples/common_old/backend.py +0 -1
examples/crafter_debug_render.py +15 -6
examples/evals_old/compare_models.py +1 -0
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
examples/finetuning_old/synth_qwen_v1/util.py +7 -2
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +17 -15
examples/rl/run_rl_and_save.py +24 -7
examples/rl/task_app/math_single_step.py +128 -11
examples/rl/task_app/math_task_app.py +11 -3
examples/rl_old/task_app.py +222 -53
examples/warming_up_to_rl/analyze_trace_db.py +7 -5
examples/warming_up_to_rl/export_trace_sft.py +141 -16
examples/warming_up_to_rl/groq_test.py +11 -4
examples/warming_up_to_rl/manage_secrets.py +15 -6
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +108 -30
examples/warming_up_to_rl/run_fft_and_save.py +128 -52
examples/warming_up_to_rl/run_local_rollout.py +87 -36
examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
examples/warming_up_to_rl/run_rl_and_save.py +31 -7
examples/warming_up_to_rl/run_rollout_remote.py +37 -10
examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
synth_ai/__init__.py +1 -0
synth_ai/api/train/builders.py +34 -10
synth_ai/api/train/cli.py +172 -32
synth_ai/api/train/config_finder.py +59 -4
synth_ai/api/train/env_resolver.py +32 -14
synth_ai/api/train/pollers.py +11 -3
synth_ai/api/train/task_app.py +4 -1
synth_ai/api/train/utils.py +20 -4
synth_ai/cli/__init__.py +11 -4
synth_ai/cli/balance.py +1 -1
synth_ai/cli/demo.py +19 -5
synth_ai/cli/rl_demo.py +75 -16
synth_ai/cli/root.py +116 -37
synth_ai/cli/task_apps.py +1276 -186
synth_ai/cli/traces.py +1 -0
synth_ai/cli/turso.py +73 -0
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +67 -30
synth_ai/demos/core/cli.py +493 -164
synth_ai/demos/demo_task_apps/core.py +50 -6
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/reproducibility/tree.py +3 -1
synth_ai/environments/service/core_routes.py +6 -2
synth_ai/evals/base.py +0 -2
synth_ai/experimental/synth_oss.py +11 -12
synth_ai/handshake.py +3 -1
synth_ai/http_client.py +31 -7
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +8 -4
synth_ai/jobs/client.py +40 -10
synth_ai/learning/client.py +33 -8
synth_ai/learning/config.py +0 -2
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +6 -3
synth_ai/learning/health.py +9 -2
synth_ai/learning/jobs.py +17 -5
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
synth_ai/learning/prompts/random_search.py +4 -1
synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
synth_ai/learning/rl_client.py +42 -14
synth_ai/learning/sse.py +0 -2
synth_ai/learning/validators.py +6 -2
synth_ai/lm/caching/ephemeral.py +1 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +13 -1
synth_ai/lm/core/synth_models.py +0 -1
synth_ai/lm/core/vendor_clients.py +4 -2
synth_ai/lm/overrides.py +2 -2
synth_ai/lm/vendors/core/anthropic_api.py +7 -7
synth_ai/lm/vendors/core/openai_api.py +2 -0
synth_ai/lm/vendors/openai_standard.py +3 -1
synth_ai/lm/vendors/openai_standard_responses.py +6 -3
synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
synth_ai/lm/vendors/synth_client.py +37 -10
synth_ai/rl/__init__.py +0 -1
synth_ai/rl/contracts.py +0 -2
synth_ai/rl/env_keys.py +6 -1
synth_ai/task/__init__.py +1 -0
synth_ai/task/apps/__init__.py +11 -11
synth_ai/task/auth.py +29 -17
synth_ai/task/client.py +3 -1
synth_ai/task/contracts.py +1 -0
synth_ai/task/datasets.py +3 -1
synth_ai/task/errors.py +3 -2
synth_ai/task/health.py +0 -2
synth_ai/task/json.py +0 -1
synth_ai/task/proxy.py +2 -5
synth_ai/task/rubrics.py +9 -3
synth_ai/task/server.py +31 -5
synth_ai/task/tracing_utils.py +8 -3
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +0 -1
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +1 -0
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +2 -0
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +24 -3
synth_ai/tracing_v3/storage/base.py +4 -1
synth_ai/tracing_v3/storage/factory.py +0 -1
synth_ai/tracing_v3/turso/manager.py +102 -38
synth_ai/tracing_v3/turso/models.py +4 -1
synth_ai/tracing_v3/utils.py +1 -0
synth_ai/v0/tracing/upload.py +32 -135
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -154
synth_ai/install_sqld.sh +0 -40
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .registry import registry
 logger = logging.getLogger(__name__)
 # --- Seeding utilities (robust, optional deps) ---
 def _set_global_seed(seed_value: int) -> Dict[str, Any]:
     """Set global RNG seeds across common libraries; return details for logging/restoration.
@@ -29,18 +30,21 @@ def _set_global_seed(seed_value: int) -> Dict[str, Any]:
     seeded: Dict[str, Any] = {"seed": int(seed_value), "libs": []}
     try:
         import random as _random  # type: ignore
         _random.seed(seed_value)
         seeded["libs"].append("random")
     except Exception:
         pass
     try:
         import numpy as _np  # type: ignore
         _np.random.seed(seed_value)
         seeded["libs"].append("numpy")
     except Exception:
         pass
     try:
         import torch as _torch  # type: ignore
         if hasattr(_torch, "manual_seed"):
             _torch.manual_seed(seed_value)
             seeded["libs"].append("torch")
@@ -62,12 +66,14 @@ def _set_global_seed(seed_value: int) -> Dict[str, Any]:
         pass
     return seeded
 def _clear_seed_side_effects() -> None:
     """Best-effort cleanup to avoid global deterministic side-effects between requests."""
     # We cannot truly restore prior RNG states without capturing them; we just avoid
     # leaving aggressive deterministic flags enabled where it matters.
     try:
         import torch as _torch  # type: ignore
         try:
             if getattr(_torch, "backends", None) and getattr(_torch.backends, "cudnn", None):
                 # Re-enable cudnn.benchmark default True only if it was True; safest is False -> leave as is.
@@ -78,6 +84,7 @@ def _clear_seed_side_effects() -> None:
     except Exception:
         pass
 router = APIRouter()
@@ -161,11 +168,7 @@ def compute_stepwise_reward(
     prev_map = prev_achievements or {}
     next_map = new_achievements or {}
-    unlocked = [
-        name
-        for name, value in next_map.items()
-        if value and not prev_map.get(name, False)
-    ]
+    unlocked = [name for name, value in next_map.items() if value and not prev_map.get(name, False)]
     indicator = 1 if unlocked else 0
     reward_value = float(indicator_lambda) * indicator
@@ -227,7 +230,9 @@ class RolloutTracingContext:
         self.sft_records: list[dict[str, Any]] = []
         self.latest_system_messages: list[str] = []
         self.latest_user_messages: list[str] = []
-        self.trace_format = (getattr(request.record, "trace_format", "compact") or "compact").lower()
+        self.trace_format = (
+            getattr(request.record, "trace_format", "compact") or "compact"
+        ).lower()
         self.return_trace = bool(getattr(request.record, "return_trace", False))
         self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
         self.session_trace = None
@@ -257,7 +262,9 @@ class RolloutTracingContext:
         except Exception as exc:
             logger.debug("TRACING_INIT_FAIL: %s", exc)
         try:
-            await self.tracer.start_session(session_id=self.run_id, metadata=dict(self.metadata_base))
+            await self.tracer.start_session(
+                session_id=self.run_id, metadata=dict(self.metadata_base)
+            )
         except Exception as exc:
             logger.warning("TRACING_START_FAIL: %s", exc)
             self.enabled = False
@@ -379,17 +386,15 @@ class RolloutTracingContext:
         input_tokens = usage.get("input_tokens") or usage.get("prompt_tokens")
         output_tokens = usage.get("output_tokens") or usage.get("completion_tokens")
         total_tokens = usage.get("total_tokens")
-        cost_usd = (
-            usage.get("cost_usd")
-            or usage.get("cost")
-            or usage.get("total_cost")
-        )
+        cost_usd = usage.get("cost_usd") or usage.get("cost") or usage.get("total_cost")
         assistant_message = None
         choices = inference_response.get("choices") or []
         if choices:
             assistant_message = choices[0].get("message") or {}
-        assistant_content = assistant_message.get("content") if isinstance(assistant_message, dict) else None
+        assistant_content = (
+            assistant_message.get("content") if isinstance(assistant_message, dict) else None
+        )
         raw_response = self._content_to_text(assistant_content)
         if not raw_response:
@@ -397,7 +402,9 @@ class RolloutTracingContext:
         base_response = BaseLMResponse(
             raw_response=raw_response,
-            tool_calls=assistant_message.get("tool_calls") if isinstance(assistant_message, dict) else None,
+            tool_calls=assistant_message.get("tool_calls")
+            if isinstance(assistant_message, dict)
+            else None,
             usage=usage or None,
             api_type="chat_completions",
         )
@@ -469,7 +476,9 @@ class RolloutTracingContext:
                 ),
                 "assistant": {
                     "content": assistant_text,
-                    "tool_calls": assistant_message.get("tool_calls") if isinstance(assistant_message, dict) else [],
+                    "tool_calls": assistant_message.get("tool_calls")
+                    if isinstance(assistant_message, dict)
+                    else [],
                 },
                 "timestamp": datetime.utcnow().isoformat(),
             }
@@ -488,11 +497,19 @@ class RolloutTracingContext:
             return None
         try:
-            prev_summary = _summarize_observation_for_storage(env_handle, prev_obs or {}) if prev_obs is not None else None
+            prev_summary = (
+                _summarize_observation_for_storage(env_handle, prev_obs or {})
+                if prev_obs is not None
+                else None
+            )
         except Exception:
             prev_summary = None
         try:
-            next_summary = _summarize_observation_for_storage(env_handle, next_obs or {}) if next_obs is not None else None
+            next_summary = (
+                _summarize_observation_for_storage(env_handle, next_obs or {})
+                if next_obs is not None
+                else None
+            )
         except Exception:
             next_summary = None
@@ -640,7 +657,11 @@ class RolloutTracingContext:
             "lm_calls": self.lm_calls_summary,
             "decision_rewards": self.decision_rewards,
         }
-def _summarize_observation_for_storage(env_handle: Any, observation: Dict[str, Any]) -> Dict[str, Any]:
+def _summarize_observation_for_storage(
+    env_handle: Any, observation: Dict[str, Any]
+) -> Dict[str, Any]:
     """Return a compact dict for trajectory storage instead of the raw observation.
     - For Crafter, use the same summary used for the policy user prompt
@@ -652,9 +673,12 @@ def _summarize_observation_for_storage(env_handle: Any, observation: Dict[str, A
     except Exception:
         _CrafterWrapper = None  # type: ignore
-    if _CrafterWrapper is not None and isinstance(getattr(env_handle, "env", None), _CrafterWrapper):
+    if _CrafterWrapper is not None and isinstance(
+        getattr(env_handle, "env", None), _CrafterWrapper
+    ):
         try:
             from .envs.crafter.shared import format_observation as _fmt  # type: ignore
             text = _fmt(observation or {})
             return {"text": text}
         except Exception:
@@ -671,8 +695,12 @@ def _summarize_observation_for_storage(env_handle: Any, observation: Dict[str, A
         summary = {
             "position": pos,
             "health": health,
-            "inventory_keys": sorted([k for k, v in (inv or {}).items() if v])[:10] if isinstance(inv, dict) else None,
-            "achievements_unlocked": sorted([k for k, v in (ach or {}).items() if v])[:10] if isinstance(ach, dict) else None,
+            "inventory_keys": sorted([k for k, v in (inv or {}).items() if v])[:10]
+            if isinstance(inv, dict)
+            else None,
+            "achievements_unlocked": sorted([k for k, v in (ach or {}).items() if v])[:10]
+            if isinstance(ach, dict)
+            else None,
         }
         return {"text": json.dumps(summary, ensure_ascii=False)}
     except Exception:
@@ -685,7 +713,6 @@ def _summarize_observation_for_storage(env_handle: Any, observation: Dict[str, A
         return {"text": ""}
 class RunAbortRequest(BaseModel):
     run_id: str
@@ -857,9 +884,7 @@ async def execute_rollout(
             # Propagate training_session_id via env config for downstream usage
             _env_config = dict(request.env.config or {})
             if request.training_session_id is not None:
-                _env_config.setdefault(
-                    "training_session_id", request.training_session_id
-                )
+                _env_config.setdefault("training_session_id", request.training_session_id)
             env_response = await create_environment(
                 EnvCreateRequest(
                     env_name=request.env.env_name,
@@ -893,9 +918,7 @@ async def execute_rollout(
             # Propagate training_session_id and synth_base_url via policy config
             _policy_config = dict(request.policy.config or {})
             if request.training_session_id is not None:
-                _policy_config.setdefault(
-                    "training_session_id", request.training_session_id
-                )
+                _policy_config.setdefault("training_session_id", request.training_session_id)
             if request.synth_base_url is not None:
                 _policy_config.setdefault("synth_base_url", request.synth_base_url)
             policy_response = await create_policy(
@@ -1065,7 +1088,10 @@ async def execute_rollout(
                                 _timing["decision_ms"] = decision_ms
                                 if last_env_step_ms is not None:
                                     _timing.setdefault("env_step_ms", float(last_env_step_ms))
-                                    _timing.setdefault("overhead_ms", max(0.0, decision_ms - float(last_env_step_ms)))
+                                    _timing.setdefault(
+                                        "overhead_ms",
+                                        max(0.0, decision_ms - float(last_env_step_ms)),
+                                    )
                                 else:
                                     _timing.setdefault("overhead_ms", 0.0)
                                 _meta["timing"] = _timing
@@ -1107,9 +1133,7 @@ async def execute_rollout(
                     _first_guess = None
                     if _count > 0 and isinstance(_prev_calls[0], dict):
                         _args = (
-                            _prev_calls[0]["arguments"]
-                            if "arguments" in _prev_calls[0]
-                            else None
+                            _prev_calls[0]["arguments"] if "arguments" in _prev_calls[0] else None
                         )
                         if isinstance(_args, str):
                             import json as _json
@@ -1119,9 +1143,9 @@ async def execute_rollout(
                             except Exception:
                                 _args = {}
                         if isinstance(_args, dict):
-                            _first_guess = (
-                                _args["guess"] if "guess" in _args else None
-                            ) or (_args["word"] if "word" in _args else None)
+                            _first_guess = (_args["guess"] if "guess" in _args else None) or (
+                                _args["word"] if "word" in _args else None
+                            )
                     logger.info(
                         "POLICY_METADATA: prev_tool_calls=%d first_guess=%r has_prev_env_result=%s",
                         _count,
@@ -1377,7 +1401,9 @@ async def execute_rollout(
                                 (env_step_end - float(last_agent_response_ts)) * 1000.0,
                             )
                             timing_last["decision_ms"] = decision_ms
-                            timing_last.setdefault("overhead_ms", max(0.0, decision_ms - env_step_duration_ms))
+                            timing_last.setdefault(
+                                "overhead_ms", max(0.0, decision_ms - env_step_duration_ms)
+                            )
                         except Exception:
                             pass
                     if decision_open:
@@ -1409,9 +1435,7 @@ async def execute_rollout(
                 # Attach policy meta from the immediately preceding agent step
                 try:
                     prev_meta = {}
-                    if "policy_response" in locals() and isinstance(
-                        policy_response.meta, dict
-                    ):  # type: ignore[name-defined]
+                    if "policy_response" in locals() and isinstance(policy_response.meta, dict):  # type: ignore[name-defined]
                         prev_meta = policy_response.meta
                     if prev_meta:
                         _info = dict(_info)
@@ -1452,9 +1476,7 @@ async def execute_rollout(
                     reward_stepwise = float(stats.get("reward", 0.0))
                     stepwise_indicator_sum += float(stats.get("indicator", 0.0))
                     stepwise_reward_sum += reward_stepwise
-                    stepwise_new_achievements_total += int(
-                        stats.get("new_achievements_count", 0.0)
-                    )
+                    stepwise_new_achievements_total += int(stats.get("new_achievements_count", 0.0))
                     if not isinstance(_info, dict):
                         _info = {}
                     else:
@@ -1470,7 +1492,9 @@ async def execute_rollout(
                         # Prepare stable lists for logging/metadata
                         all_list = sorted(list(turned_true))
                         # Ensure nested meta exists
-                        meta_block = _info.get("meta") if isinstance(_info.get("meta"), dict) else {}
+                        meta_block = (
+                            _info.get("meta") if isinstance(_info.get("meta"), dict) else {}
+                        )
                         decision_rewards = {
                             "turn": int(decision_index),
                             "ach_delta": ach_delta,
@@ -1521,9 +1545,7 @@ async def execute_rollout(
                             EnvResetRequest,
                         )
-                        reset_response = await reset_environment(
-                            EnvResetRequest(env_id=env_id)
-                        )
+                        reset_response = await reset_environment(EnvResetRequest(env_id=env_id))
                         current_obs = reset_response.observation
                     elif request.on_done == "terminate":
                         break
@@ -1544,15 +1566,11 @@ async def execute_rollout(
         ):
             try:
                 final_now = last_env_step_completed_ts or _time.perf_counter()
-                final_decision_ms = max(
-                    0.0, (final_now - float(last_agent_response_ts)) * 1000.0
-                )
+                final_decision_ms = max(0.0, (final_now - float(last_agent_response_ts)) * 1000.0)
                 timing_final = last_policy_meta.setdefault("timing", {})
                 timing_final["decision_ms"] = final_decision_ms
                 if last_env_step_ms is not None:
-                    timing_final.setdefault(
-                        "env_step_ms", float(last_env_step_ms)
-                    )
+                    timing_final.setdefault("env_step_ms", float(last_env_step_ms))
                     timing_final.setdefault(
                         "overhead_ms",
                         max(0.0, final_decision_ms - float(last_env_step_ms)),
@@ -1601,10 +1619,11 @@ async def execute_rollout(
                 for step in trajectory_steps:
                     formatted_steps.append({"tool_calls": step.tool_calls or []})
-                if get_wordle_rollout_summary is not None and log_wordle_rollout_summary is not None:
-                    summary = get_wordle_rollout_summary(
-                        formatted_steps, current_obs, env_handle
-                    )
+                if (
+                    get_wordle_rollout_summary is not None
+                    and log_wordle_rollout_summary is not None
+                ):
+                    summary = get_wordle_rollout_summary(formatted_steps, current_obs, env_handle)
                     log_wordle_rollout_summary(request.run_id, summary)
         except ImportError:
             # Wordle helpers not available, skip Wordle-specific logging
@@ -1681,9 +1700,7 @@ async def execute_rollout(
                 except Exception:
                     pass
         except Exception as _te:
-            logger.warning(
-                f"ROLL_OUT: failed to terminate environment {created_env_id}: {_te}"
-            )
+            logger.warning(f"ROLL_OUT: failed to terminate environment {created_env_id}: {_te}")
         # Best-effort policy cleanup if we created one (avoid reuse across rollouts)
         try:

examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@
 from .volume import VolumeStorage, storage
-__all__ = ["VolumeStorage", "storage"]
+__all__ = ["VolumeStorage", "storage"]

examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py CHANGED Viewed

@@ -13,10 +13,10 @@ from typing import Any, Dict, Optional
 class VolumeStorage:
     """Helpers for Modal Volume storage operations."""
     def __init__(self, base_path: str = "/data/state") -> None:
         self.base_path = Path(base_path)
     def get_snapshot_path(
         self,
         rl_run_id: str,
@@ -27,21 +27,15 @@ class VolumeStorage:
         # Use first 2 chars of snapshot_id for sharding
         shard1 = snapshot_id[:2] if len(snapshot_id) >= 2 else "00"
         shard2 = snapshot_id[2:4] if len(snapshot_id) >= 4 else "00"
         return (
-            self.base_path
-            / "runs"
-            / rl_run_id
-            / kind
-            / shard1
-            / shard2
-            / f"{snapshot_id}.tar.gz"
+            self.base_path / "runs" / rl_run_id / kind / shard1 / shard2 / f"{snapshot_id}.tar.gz"
         )
     def get_index_path(self, rl_run_id: str) -> Path:
         """Get the index file path for a run."""
         return self.base_path / "runs" / rl_run_id / "index" / "meta.jsonl"
     def write_snapshot_atomic(
         self,
         path: Path,
@@ -50,17 +44,17 @@ class VolumeStorage:
         """Atomically write a snapshot archive to disk."""
         # Ensure parent directory exists
         path.parent.mkdir(parents=True, exist_ok=True)
         # Write to temp file first
         tmp_path = path.with_suffix(".tmp")
         with open(tmp_path, "wb") as f:
             f.write(archive_bytes)
             f.flush()
             os.fsync(f.fileno())
         # Atomic rename
         os.replace(tmp_path, path)
     def create_archive(
         self,
         state_dict: Dict[str, Any],
@@ -69,61 +63,61 @@ class VolumeStorage:
         """Create a tar.gz archive with state and metadata."""
         with tempfile.TemporaryDirectory() as tmpdir:
             tmppath = Path(tmpdir)
             # Write state.json
             state_path = tmppath / "state.json"
             with open(state_path, "w") as f:
                 json.dump(state_dict, f, sort_keys=True, indent=2)
             # Write meta.json
             meta_path = tmppath / "meta.json"
             with open(meta_path, "w") as f:
                 json.dump(meta, f, sort_keys=True, indent=2)
             # Create tar archive
             tar_path = tmppath / "archive.tar"
             with tarfile.open(tar_path, "w") as tar:
                 tar.add(state_path, arcname="state.json")
                 tar.add(meta_path, arcname="meta.json")
             # Compress with gzip
             with open(tar_path, "rb") as f:
                 tar_bytes = f.read()
             compressed = gzip.compress(tar_bytes, compresslevel=6)
             return compressed
     def extract_archive(self, archive_bytes: bytes) -> tuple[Dict[str, Any], Dict[str, Any]]:
         """Extract state and metadata from a tar.gz archive."""
         # Decompress
         tar_bytes = gzip.decompress(archive_bytes)
         with tempfile.TemporaryDirectory() as tmpdir:
             tmppath = Path(tmpdir)
             # Write tar bytes to temp file
             tar_path = tmppath / "archive.tar"
             with open(tar_path, "wb") as f:
                 f.write(tar_bytes)
             # Extract tar
             with tarfile.open(tar_path, "r") as tar:
                 tar.extractall(tmppath)
             # Read state and meta
             with open(tmppath / "state.json", "r") as f:
                 state = json.load(f)
             with open(tmppath / "meta.json", "r") as f:
                 meta = json.load(f)
             return state, meta
     def compute_snapshot_id(self, archive_bytes: bytes) -> str:
         """Compute content-addressed snapshot ID."""
         return hashlib.sha256(archive_bytes).hexdigest()
     def save_snapshot(
         self,
         rl_run_id: str,
@@ -140,33 +134,33 @@ class VolumeStorage:
             "schema_version": "1.0",
             "created_at": datetime.utcnow().isoformat(),
         }
         if parent_snapshot_id:
             meta["parent_snapshot_id"] = parent_snapshot_id
         if config:
             config_str = json.dumps(config, sort_keys=True)
             meta["config_hash"] = hashlib.sha256(config_str.encode()).hexdigest()
         # Create archive
         archive_bytes = self.create_archive(state_dict, meta)
         # Compute snapshot ID
         snapshot_id = self.compute_snapshot_id(archive_bytes)
         meta["snapshot_id"] = snapshot_id
         # Recreate archive with snapshot_id in metadata
         archive_bytes = self.create_archive(state_dict, meta)
         # Get path and write
         path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
         self.write_snapshot_atomic(path, archive_bytes)
         # Append to index
         self.append_to_index(rl_run_id, meta)
         return snapshot_id, str(path), len(archive_bytes)
     def load_snapshot(
         self,
         rl_run_id: str,
@@ -175,16 +169,16 @@ class VolumeStorage:
     ) -> tuple[Dict[str, Any], Dict[str, Any]]:
         """Load a snapshot and return (state_dict, meta)."""
         path = self.get_snapshot_path(rl_run_id, kind, snapshot_id)
         if not path.exists():
             raise FileNotFoundError(f"Snapshot not found: {path}")
         with open(path, "rb") as f:
             archive_bytes = f.read()
         state, meta = self.extract_archive(archive_bytes)
         return state, meta
     def append_to_index(
         self,
         rl_run_id: str,
@@ -193,25 +187,25 @@ class VolumeStorage:
         """Append metadata to the run's index file."""
         index_path = self.get_index_path(rl_run_id)
         index_path.parent.mkdir(parents=True, exist_ok=True)
         with open(index_path, "a") as f:
             f.write(json.dumps(meta) + "\n")
     def read_index(self, rl_run_id: str) -> list[Dict[str, Any]]:
         """Read all entries from a run's index file."""
         index_path = self.get_index_path(rl_run_id)
         if not index_path.exists():
             return []
         entries = []
         with open(index_path, "r") as f:
             for line in f:
                 if line.strip():
                     entries.append(json.loads(line))
         return entries
 # Global storage instance
-storage = VolumeStorage()
+storage = VolumeStorage()

examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py CHANGED Viewed

@@ -82,15 +82,11 @@ async def test_service():
             print(f"   Error: {response.status_code} - {response.text}")
         else:
             step_data = response.json()
-            print(
-                f"   Step result - done: {step_data['done']}, reward: {step_data.get('reward')}"
-            )
+            print(f"   Step result - done: {step_data['done']}, reward: {step_data.get('reward')}")
         # Test 6: Environment snapshot
         print("\n6. Creating environment snapshot...")
-        response = await client.post(
-            f"{base_url}/env/snapshot", json={"env_id": env_id}
-        )
+        response = await client.post(f"{base_url}/env/snapshot", json={"env_id": env_id})
         if response.status_code != 200:
             print(f"   Error: {response.status_code} - {response.text}")
         else:
@@ -100,9 +96,7 @@ async def test_service():
         # Test 7: Policy snapshot
         print("\n7. Creating policy snapshot...")
-        response = await client.post(
-            f"{base_url}/policy/snapshot", json={"policy_id": policy_id}
-        )
+        response = await client.post(f"{base_url}/policy/snapshot", json={"policy_id": policy_id})
         if response.status_code != 200:
             print(f"   Error: {response.status_code} - {response.text}")
         else:
@@ -121,9 +115,7 @@ async def test_service():
         # Test 9: Terminate environment
         print("\n9. Terminating environment...")
-        response = await client.post(
-            f"{base_url}/env/terminate", json={"env_id": env_id}
-        )
+        response = await client.post(f"{base_url}/env/terminate", json={"env_id": env_id})
         if response.status_code != 200:
             print(f"   Error: {response.status_code} - {response.text}")
         else:
@@ -131,9 +123,7 @@ async def test_service():
         # Test 10: Terminate policy
         print("\n10. Terminating policy...")
-        response = await client.post(
-            f"{base_url}/policy/terminate", json={"policy_id": policy_id}
-        )
+        response = await client.post(f"{base_url}/policy/terminate", json={"policy_id": policy_id})
         if response.status_code != 200:
             print(f"   Error: {response.status_code} - {response.text}")
         else:

synth_ai/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ Synth AI - Software for aiding the best and multiplying the will.
 # Environment exports - moved from synth-env
 from synth_ai.environments import *  # noqa
 import synth_ai.environments as environments  # expose module name for __all__
 try:
     from synth_ai.lm.core.main import LM  # Moved from zyk to lm for better organization
 except Exception:  # allow minimal imports (e.g., tracing) without LM stack

synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.9.dev7py3-none-any.whl