PyPI - synth-ai - Versions diffs - 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl - Mend

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.9.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (155) hide show

examples/common_old/backend.py +0 -1
examples/crafter_debug_render.py +15 -6
examples/evals_old/compare_models.py +1 -0
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
examples/finetuning_old/synth_qwen_v1/util.py +7 -2
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +17 -15
examples/rl/run_rl_and_save.py +24 -7
examples/rl/task_app/math_single_step.py +128 -11
examples/rl/task_app/math_task_app.py +11 -3
examples/rl_old/task_app.py +222 -53
examples/warming_up_to_rl/analyze_trace_db.py +7 -5
examples/warming_up_to_rl/export_trace_sft.py +141 -16
examples/warming_up_to_rl/groq_test.py +11 -4
examples/warming_up_to_rl/manage_secrets.py +15 -6
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +108 -30
examples/warming_up_to_rl/run_fft_and_save.py +128 -52
examples/warming_up_to_rl/run_local_rollout.py +87 -36
examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
examples/warming_up_to_rl/run_rl_and_save.py +31 -7
examples/warming_up_to_rl/run_rollout_remote.py +37 -10
examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
synth_ai/__init__.py +1 -0
synth_ai/api/train/builders.py +34 -10
synth_ai/api/train/cli.py +172 -32
synth_ai/api/train/config_finder.py +59 -4
synth_ai/api/train/env_resolver.py +32 -14
synth_ai/api/train/pollers.py +11 -3
synth_ai/api/train/task_app.py +4 -1
synth_ai/api/train/utils.py +20 -4
synth_ai/cli/__init__.py +11 -4
synth_ai/cli/balance.py +1 -1
synth_ai/cli/demo.py +19 -5
synth_ai/cli/rl_demo.py +75 -16
synth_ai/cli/root.py +116 -37
synth_ai/cli/task_apps.py +1276 -186
synth_ai/cli/traces.py +1 -0
synth_ai/cli/turso.py +73 -0
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +67 -30
synth_ai/demos/core/cli.py +493 -164
synth_ai/demos/demo_task_apps/core.py +50 -6
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/reproducibility/tree.py +3 -1
synth_ai/environments/service/core_routes.py +6 -2
synth_ai/evals/base.py +0 -2
synth_ai/experimental/synth_oss.py +11 -12
synth_ai/handshake.py +3 -1
synth_ai/http_client.py +31 -7
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +8 -4
synth_ai/jobs/client.py +40 -10
synth_ai/learning/client.py +33 -8
synth_ai/learning/config.py +0 -2
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +6 -3
synth_ai/learning/health.py +9 -2
synth_ai/learning/jobs.py +17 -5
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
synth_ai/learning/prompts/random_search.py +4 -1
synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
synth_ai/learning/rl_client.py +42 -14
synth_ai/learning/sse.py +0 -2
synth_ai/learning/validators.py +6 -2
synth_ai/lm/caching/ephemeral.py +1 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +13 -1
synth_ai/lm/core/synth_models.py +0 -1
synth_ai/lm/core/vendor_clients.py +4 -2
synth_ai/lm/overrides.py +2 -2
synth_ai/lm/vendors/core/anthropic_api.py +7 -7
synth_ai/lm/vendors/core/openai_api.py +2 -0
synth_ai/lm/vendors/openai_standard.py +3 -1
synth_ai/lm/vendors/openai_standard_responses.py +6 -3
synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
synth_ai/lm/vendors/synth_client.py +37 -10
synth_ai/rl/__init__.py +0 -1
synth_ai/rl/contracts.py +0 -2
synth_ai/rl/env_keys.py +6 -1
synth_ai/task/__init__.py +1 -0
synth_ai/task/apps/__init__.py +11 -11
synth_ai/task/auth.py +29 -17
synth_ai/task/client.py +3 -1
synth_ai/task/contracts.py +1 -0
synth_ai/task/datasets.py +3 -1
synth_ai/task/errors.py +3 -2
synth_ai/task/health.py +0 -2
synth_ai/task/json.py +0 -1
synth_ai/task/proxy.py +2 -5
synth_ai/task/rubrics.py +9 -3
synth_ai/task/server.py +31 -5
synth_ai/task/tracing_utils.py +8 -3
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +0 -1
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +1 -0
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +2 -0
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +24 -3
synth_ai/tracing_v3/storage/base.py +4 -1
synth_ai/tracing_v3/storage/factory.py +0 -1
synth_ai/tracing_v3/turso/manager.py +102 -38
synth_ai/tracing_v3/turso/models.py +4 -1
synth_ai/tracing_v3/utils.py +1 -0
synth_ai/v0/tracing/upload.py +32 -135
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -154
synth_ai/install_sqld.sh +0 -40
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0

synth_ai/demos/demo_task_apps/core.py CHANGED Viewed

@@ -24,6 +24,7 @@ class DemoEnv:
     task_app_name: str = ""
     task_app_secret_name: str = DEFAULT_TASK_APP_SECRET_NAME
 def _mask(value: str, keep: int = 4) -> str:
     if not value:
         return ""
@@ -121,6 +122,32 @@ def persist_env_api_key(key: str) -> None:
     _write_state(data)
+def persist_demo_dir(demo_dir: str) -> None:
+    """Store the demo directory path for subsequent commands."""
+    data = _read_state()
+    data["DEMO_DIR"] = demo_dir
+    _write_state(data)
+def load_demo_dir() -> str | None:
+    """Load the stored demo directory path, if any."""
+    data = _read_state()
+    return data.get("DEMO_DIR")
+def persist_env_file_path(env_path: str) -> None:
+    """Store the .env file path for subsequent commands."""
+    data = _read_state()
+    data["ENV_FILE_PATH"] = env_path
+    _write_state(data)
+def load_env_file_path() -> str | None:
+    """Load the stored .env file path, if any."""
+    data = _read_state()
+    return data.get("ENV_FILE_PATH")
 def modal_auth_status() -> Tuple[bool, str]:
     """Return (ok, message) describing Modal CLI credential status."""
@@ -192,7 +219,9 @@ def load_env() -> DemoEnv:
     # Repo/package .envs (fallbacks)
     repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
     repo_env = load_dotenv_file(os.path.join(repo_root, ".env"))
-    pkg_env = load_dotenv_file(os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env"))
+    pkg_env = load_dotenv_file(
+        os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env")
+    )
     examples_env = load_dotenv_file(os.path.join(repo_root, "examples", "rl", ".env"))
     state = _read_state()
@@ -241,7 +270,11 @@ def load_env() -> DemoEnv:
         or str(state.get("SYNTH_API_KEY") or "")
     )
     if not synth_api_key:
-        mode = "prod" if default_root in dev_url else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
+        mode = (
+            "prod"
+            if default_root in dev_url
+            else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
+        )
         if mode == "prod":
             synth_api_key = (
                 os_env.get("PROD_SYNTH_API_KEY")
@@ -310,7 +343,9 @@ def load_env() -> DemoEnv:
     return env
-def assert_http_ok(url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0) -> bool:
+def assert_http_ok(
+    url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0
+) -> bool:
     try:
         import ssl
@@ -387,7 +422,14 @@ def persist_api_key(key: str) -> None:
     _write_state(data)
-def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] = None, group_size: Optional[int] = None, model: Optional[str] = None) -> None:
+def run_job(
+    env: DemoEnv,
+    config_toml_path: str,
+    *,
+    batch_size: Optional[int] = None,
+    group_size: Optional[int] = None,
+    model: Optional[str] = None,
+) -> None:
     """Create and stream a short RL job using the backend API (placeholder: prints cURL to execute)."""
     backend = env.dev_backend_url.rstrip("/")
     if backend.endswith("/api"):
@@ -396,9 +438,11 @@ def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] =
         api_base = backend + "/api"
     print("\nTo create an RL job, run:")
     print(
-        "curl -s -X POST \"" + api_base + "/rl/jobs\" "
+        'curl -s -X POST "' + api_base + '/rl/jobs" '
         "-H 'Content-Type: application/json' "
         f"-H 'Authorization: Bearer {env.synth_api_key}' "
         "-d '{"  # intentionally not fully formed here for brevity in this scaffold
     )
-    print("  NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events.")
+    print(
+        "  NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events."
+    )

synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml CHANGED Viewed

@@ -8,8 +8,7 @@ variety = "fft"
 [job]
 model = "Qwen/Qwen3-4B"
-# Limit training to the first 100 conversations (export a 100-row JSONL and point to it here)
-# data = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.head100.jsonl"
+data = "ft_data/crafter_traces.jsonl"
 [compute]
 # Adjust as needed for your quota
@@ -23,7 +22,7 @@ topology = {}
 # Optional local validation dataset path (JSONL). If set, the client will upload
 # this file and wire up validation so the frontend can display val.loss.
-validation_path = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.tokens_1000000_seed_123.val_2000.jsonl"
+# validation_path = "../ft_data/crafter_validation.jsonl"
 [training]
 mode = "sft_offline"

synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """Compatibility wrapper for the GRPO Crafter task app.
-This module now delegates to the shared TaskAppConfig defined in
-`synth_ai.task.apps.grpo_crafter`. It is kept for legacy usage (running the
-file directly or targeting `fastapi_app` from external tooling). Prefer using
-`uvx synth-ai serve grpo-crafter` for local development and testing.
+This module now delegates to the TaskAppConfig defined in the local example at
+`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
+(running the file directly or targeting `fastapi_app` from external tooling).
+Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
 """
 from __future__ import annotations
@@ -17,35 +16,43 @@ from fastapi.responses import JSONResponse
 from starlette.requests import Request
 from synth_ai.task.apps import ModalDeploymentConfig, registry
-from synth_ai.task.apps.grpo_crafter import build_config
 from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
 from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
+import importlib.util
+def _load_build_config():
+    # Find synth_ai package location to locate examples/
+    import synth_ai
+    synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
+    module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
+    if not module_path.exists():
+        raise ImportError(
+            f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
+        )
+    spec = importlib.util.spec_from_file_location(
+        "warming_up_to_rl.task_app.grpo_crafter", module_path
+    )
+    if spec is None or spec.loader is None:
+        raise ImportError(f"Could not load task app module at {module_path}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return getattr(module, "build_config")
+build_config = _load_build_config()
 APP_ID = "grpo-crafter"
-_BASE_CONFIG = build_config()
-TASK_APP_CONFIG = TaskAppConfig(
-    app_id="grpo-crafter",
-    name=_BASE_CONFIG.name,
-    description=_BASE_CONFIG.description,
-    base_task_info=_BASE_CONFIG.base_task_info,
-    describe_taskset=_BASE_CONFIG.describe_taskset,
-    provide_task_instances=_BASE_CONFIG.provide_task_instances,
-    rollout=_BASE_CONFIG.rollout,
-    dataset_registry=_BASE_CONFIG.dataset_registry,
-    rubrics=_BASE_CONFIG.rubrics,
-    proxy=_BASE_CONFIG.proxy,
-    routers=_BASE_CONFIG.routers,
-    middleware=_BASE_CONFIG.middleware,
-    app_state=_BASE_CONFIG.app_state,
-    require_api_key=_BASE_CONFIG.require_api_key,
-    expose_debug_env=_BASE_CONFIG.expose_debug_env,
-    cors_origins=_BASE_CONFIG.cors_origins,
-    startup_hooks=_BASE_CONFIG.startup_hooks,
-    shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
-)
+def _build_base_config() -> TaskAppConfig:
+    # Lazily construct the base config to avoid heavy work at import time
+    return build_config()
 try:
     _REGISTERED_ENTRY = registry.get(APP_ID)
@@ -60,7 +67,8 @@ else:
 def build_task_app_config() -> TaskAppConfig:
     """Return a fresh TaskAppConfig for this wrapper."""
-    return TASK_APP_CONFIG.clone()
+    base = _build_base_config()
+    return base.clone()
 def fastapi_app():

synth_ai/demos/demo_task_apps/math/_common.py CHANGED Viewed

@@ -6,6 +6,7 @@ This module provides a local fallback for install_problem_bank_into_shared so
 the modal task app can import it without requiring an external math_rl package.
 """
 def install_problem_bank_into_shared() -> None:
     """No-op placeholder for installing the Hendrycks MATH problem bank.
@@ -13,5 +14,3 @@ def install_problem_bank_into_shared() -> None:
     into a shared directory. For the demo scaffold, it is a no-op.
     """
     return None

synth_ai/demos/demo_task_apps/math/deploy_modal.py CHANGED Viewed

@@ -56,5 +56,3 @@ def deploy(script_path: Optional[str] = None, *, env_api_key: Optional[str] = No
             f"No deploy script provided and Python-based deploy failed: {e}. "
             "Pass --script /path/to/deploy_task_app.sh to demo.deploy."
         )

synth_ai/demos/demo_task_apps/math/modal_task_app.py CHANGED Viewed

@@ -25,7 +25,9 @@ _SYNTH_HOSTED = None
 try:
     probe = _HERE
     for _ in range(8):
-        candidate = (probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted").resolve()
+        candidate = (
+            probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted"
+        ).resolve()
         if candidate.exists():
             _SYNTH_HOSTED = candidate
             break
@@ -101,12 +103,14 @@ def fastapi_app():
     from fastapi import FastAPI
     from fastapi.middleware.cors import CORSMiddleware
     from fastapi.responses import JSONResponse
     try:
         from synth_ai.task.auth import (
             is_api_key_header_authorized,
             normalize_environment_api_key,
         )
     except Exception:  # pragma: no cover - fallback for older synth-ai builds
         def _normalize_env_key_fallback() -> str | None:
             key = os.getenv("ENVIRONMENT_API_KEY")
             if key:
@@ -130,7 +134,7 @@ def fastapi_app():
             for value in values:
                 if not isinstance(value, str):
                     continue
-                for chunk in value.split(','):
+                for chunk in value.split(","):
                     chunk = chunk.strip()
                     if chunk:
                         parts.append(chunk)
@@ -172,19 +176,27 @@ def fastapi_app():
     def _normalize_answer_text(s: str) -> str:
         import re as _re
         return _re.sub(r"[^0-9A-Za-z.+\-/*=]", "", (s or "").strip()).lower()
     def _extract_boxed(s: str) -> str:
         import re as _re
         m = list(_re.finditer(r"\\boxed\{([^}]+)\}", s or ""))
         return m[-1].group(1) if m else ""
     def _load_hendrycks_problem(seed: int, subject: str | None = None) -> tuple[str, str]:
         subj = subject or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
-        ds = _hf_split(subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE"))
+        ds = _hf_split(
+            subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE")
+        )
         n = len(ds) if hasattr(ds, "__len__") else 0
         if n == 0 and subject not in {"", "default"}:
-            ds = _hf_split("default", os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE"))
+            ds = _hf_split(
+                "default",
+                os.getenv("HENDRYCKS_MATH_SPLIT", "test"),
+                os.getenv("HENDRYCKS_MATH_SLICE"),
+            )
             n = len(ds) if hasattr(ds, "__len__") else 0
         if n == 0:
             raise RuntimeError("Hendrycks MATH dataset loaded empty")
@@ -225,7 +237,11 @@ def fastapi_app():
         def _resolve_env_keys() -> set[str]:
             keys: set[str] = set()
-            for alias in ("ENVIRONMENT_API_KEY", "dev_environment_api_key", "DEV_ENVIRONMENT_API_KEY"):
+            for alias in (
+                "ENVIRONMENT_API_KEY",
+                "dev_environment_api_key",
+                "DEV_ENVIRONMENT_API_KEY",
+            ):
                 value = os.environ.get(alias)
                 if value:
                     os.environ.setdefault("ENVIRONMENT_API_KEY", value)
@@ -250,8 +266,12 @@ def fastapi_app():
                 candidates.append(primary.strip())
             secondary = x_api_keys or headers.get("x-api-keys")
             if secondary:
-                candidates.extend([value.strip() for value in secondary.split(",") if value.strip()])
-            auth_header = authorization or headers.get("authorization") or headers.get("Authorization")
+                candidates.extend(
+                    [value.strip() for value in secondary.split(",") if value.strip()]
+                )
+            auth_header = (
+                authorization or headers.get("authorization") or headers.get("Authorization")
+            )
             if auth_header and auth_header.lower().startswith("bearer "):
                 token = auth_header.split(" ", 1)[1].strip()
                 if token:
@@ -274,7 +294,10 @@ def fastapi_app():
         async def info():
             return {
                 "service": {"base_url": os.getenv("SERVICE_BASE_URL", "")},
-                "inference": {"base_url": "", "endpoints": {"chat_completions": "/v1/chat/completions"}},
+                "inference": {
+                    "base_url": "",
+                    "endpoints": {"chat_completions": "/v1/chat/completions"},
+                },
             }
         @app.get("/health")
@@ -282,7 +305,10 @@ def fastapi_app():
             env_keys = _resolve_env_keys()
             env_key = next(iter(env_keys), None)
             if not env_key:
-                return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
+                return JSONResponse(
+                    status_code=503,
+                    content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
+                )
             # Authorize using all header variants; avoid typed Header params to prevent 422s
             authorized = is_api_key_header_authorized(request)
             if not authorized:
@@ -302,7 +328,10 @@ def fastapi_app():
             env_keys = _resolve_env_keys()
             env_key = next(iter(env_keys), None)
             if not env_key:
-                return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
+                return JSONResponse(
+                    status_code=503,
+                    content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
+                )
             authorized = is_api_key_header_authorized(request)
             if not authorized:
                 prefix = _log_env_key_prefix("health/rollout", env_key)
@@ -321,17 +350,22 @@ def fastapi_app():
         async def task_info(seed: int = 0, subject: str = "default"):
             """Return Hendrycks MATH problem/answer and tool schema for a seed."""
             q, a = _load_hendrycks_problem(int(seed), subject=subject)
-            tools = [{
-                "name": "submit_answer",
-                "description": "Provide the final numerical or algebraic answer for the current math problem.",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "answer": {"type": "string", "description": "The proposed final answer"},
+            tools = [
+                {
+                    "name": "submit_answer",
+                    "description": "Provide the final numerical or algebraic answer for the current math problem.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "answer": {
+                                "type": "string",
+                                "description": "The proposed final answer",
+                            },
+                        },
+                        "required": ["answer"],
                     },
-                    "required": ["answer"],
-                },
-            }]
+                }
+            ]
             return {
                 "seed": int(seed),
                 "subject": subject,
@@ -363,7 +397,9 @@ def fastapi_app():
             print("[422] validation", snapshot, flush=True)
         except Exception:
             pass
-        return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
+        return JSONResponse(
+            status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
+        )
     @api.get("/")
     async def root_probe():
@@ -381,7 +417,12 @@ def fastapi_app():
     if not env_key:
         raise RuntimeError("ENVIRONMENT_API_KEY missing in task app environment")
-    OPENAI_REMOVE_FIELDS = ("stop_after_tool_calls", "thinking_mode", "thinking_budget", "reasoning")
+    OPENAI_REMOVE_FIELDS = (
+        "stop_after_tool_calls",
+        "thinking_mode",
+        "thinking_budget",
+        "reasoning",
+    )
     OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
     TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "submit_answer"}}
@@ -404,12 +445,18 @@ def fastapi_app():
     def proxy_chat_completions(request: dict[str, object] = Body(...)):
         key = os.environ.get("OPENAI_API_KEY")
         if not key:
-            raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing")
+            raise HTTPException(
+                status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing"
+            )
         model = request.get("model") if isinstance(request, dict) else None
-        payload = _prepare_openai_payload(model if isinstance(model, str) else None, request if isinstance(request, dict) else {})
+        payload = _prepare_openai_payload(
+            model if isinstance(model, str) else None, request if isinstance(request, dict) else {}
+        )
         headers = {"Authorization": f"Bearer {key}"}
         with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
-            resp = client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
+            resp = client.post(
+                "https://api.openai.com/v1/chat/completions", json=payload, headers=headers
+            )
             try:
                 data = resp.json()
             except Exception:
@@ -442,15 +489,25 @@ def fastapi_app():
         env_cfg = (env or {}).get("config") or {}
         # Prefer env.seed; fall back to env.config.seed -> default 0
         try:
-            seed_val = int((env or {}).get("seed")) if isinstance(env, dict) and (env or {}).get("seed") is not None else 0
+            seed_val = (
+                int((env or {}).get("seed"))
+                if isinstance(env, dict) and (env or {}).get("seed") is not None
+                else 0
+            )
         except Exception:
             seed_val = 0
         if seed_val == 0:
             try:
-                seed_val = int(env_cfg.get("seed")) if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None else 0
+                seed_val = (
+                    int(env_cfg.get("seed"))
+                    if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None
+                    else 0
+                )
             except Exception:
                 seed_val = 0
-        subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
+        subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv(
+            "HENDRYCKS_MATH_CONFIG", "default"
+        )
         # Load real Hendrycks problem text/solution (download if necessary). Crash on failure.
         qh, ah = _load_hendrycks_problem(seed_val, subject=subject)
         question = qh
@@ -468,7 +525,10 @@ def fastapi_app():
                     sanitized.pop("max_tokens", None)
                 for field in ("temperature", "top_p"):
                     sanitized.pop(field, None)
-                sanitized["tool_choice"] = {"type": "function", "function": {"name": "submit_answer"}}
+                sanitized["tool_choice"] = {
+                    "type": "function",
+                    "function": {"name": "submit_answer"},
+                }
                 sanitized["parallel_tool_calls"] = False
             return sanitized
@@ -509,19 +569,21 @@ def fastapi_app():
         payload = {
             "model": model,
             "messages": [{"role": "user", "content": user_prompt}],
-            "tools": [{
-                "type": "function",
-                "function": {
-                    "name": "submit_answer",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "answer": {"type": "string"},
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "submit_answer",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "answer": {"type": "string"},
+                            },
+                            "required": ["answer"],
                         },
-                        "required": ["answer"],
                     },
-                },
-            }],
+                }
+            ],
             "max_tokens": 256,
             "temperature": 0.2,
         }
@@ -529,7 +591,7 @@ def fastapi_app():
         try:
             tool_names = []
-            for t in (payload.get("tools") or []):
+            for t in payload.get("tools") or []:
                 if isinstance(t, dict):
                     fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
                     name = fn.get("name")
@@ -547,7 +609,9 @@ def fastapi_app():
             if sk:
                 headers["Authorization"] = f"Bearer {sk}"
         with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
-            resp = client.post(f"{inference_url}/v1/chat/completions", json=to_send, headers=headers)
+            resp = client.post(
+                f"{inference_url}/v1/chat/completions", json=to_send, headers=headers
+            )
             try:
                 data = resp.json()
             except Exception:
@@ -580,14 +644,21 @@ def fastapi_app():
         tool_answer = _parse_tool_answer(data)
         history.append({"answer": tool_answer})
-        steps.append({
-            "obs": {},
-            "tool_calls": [{"tool_name": "submit_answer", "arguments": _json.dumps({"answer": tool_answer})}],
-            "reward": None,
-            "done": False,
-            "truncated": False,
-            "info": None,
-        })
+        steps.append(
+            {
+                "obs": {},
+                "tool_calls": [
+                    {
+                        "tool_name": "submit_answer",
+                        "arguments": _json.dumps({"answer": tool_answer}),
+                    }
+                ],
+                "reward": None,
+                "done": False,
+                "truncated": False,
+                "info": None,
+            }
+        )
         # Evaluate answer correctness using tool output (or fall back to assistant text)
         reward_val = 0.0
@@ -605,25 +676,57 @@ def fastapi_app():
         except Exception:
             reward_val = 0.0
+        # Immediate, concise rollout logging mirroring RL format
+        try:
+            preview = tool_answer[:120] + (
+                "…" if isinstance(tool_answer, str) and len(tool_answer) > 120 else ""
+            )
+            components = {
+                "env": float(reward_val),
+                "rubric_event": 1.0 if bool(tool_answer.strip()) else 0.0,
+                "rubric_outcome": 1.0 if float(reward_val) > 0.0 else 0.0,
+            }
+            print(
+                "[MATH_ROLLOUT] run=",
+                run_id,
+                " seed=",
+                seed_val,
+                " subject=",
+                subject,
+                " tool=submit_answer answer=",
+                preview,
+                " reward=",
+                float(reward_val),
+                " components=",
+                components,
+                flush=True,
+            )
+        except Exception:
+            pass
         total_reward += float(reward_val)
-        steps.append({
-            "obs": {},
-            "tool_calls": [],
-            "reward": reward_val,
-            "done": True,
-            "truncated": False,
-            "info": None,
-        })
+        steps.append(
+            {
+                "obs": {},
+                "tool_calls": [],
+                "reward": reward_val,
+                "done": True,
+                "truncated": False,
+                "info": None,
+            }
+        )
         return {
             "run_id": run_id,
-            "trajectories": [{
-                "env_id": env_name,
-                "policy_id": (policy or {}).get("policy_name") or "math-react",
-                "steps": steps,
-                "final": {"observation": {}},
-                "length": len(steps),
-            }],
+            "trajectories": [
+                {
+                    "env_id": env_name,
+                    "policy_id": (policy or {}).get("policy_name") or "math-react",
+                    "steps": steps,
+                    "final": {"observation": {}},
+                    "length": len(steps),
+                }
+            ],
             "branches": {},
             "metrics": {
                 "episode_returns": [total_reward],

synth_ai/demos/demo_task_apps/math/task_app_entry.py CHANGED Viewed

@@ -36,4 +36,3 @@ register_task_app(
         modal=DEMO_MODAL_CONFIG,
     )
 )

synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.9.dev7py3-none-any.whl