PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/grpo_crafter.py CHANGED Viewed

@@ -5,14 +5,21 @@ from __future__ import annotations
 import logging
 import os
 import sys
+from urllib.parse import parse_qs, urlparse
 from collections.abc import Iterable, Sequence
 from contextlib import suppress
 from dataclasses import dataclass
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
+from fastapi import HTTPException
+from pydantic import BaseModel
+from pydantic import BaseModel
 from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
-from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
+from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
 from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
 from synth_ai.task.json import to_jsonable  # noqa: F401  (imported for side-effect compatibility)
 from synth_ai.task.rubrics import load_rubric
@@ -115,6 +122,27 @@ try:
 except Exception:
     pass
+try:
+    from .synth_envs_hosted.utils import (
+        ensure_chat_completions_url,
+        extract_trace_correlation_id,
+    )
+except Exception:  # pragma: no cover - fallback when optional deps missing
+    def ensure_chat_completions_url(raw_url, mode=None):
+        return raw_url
+    def extract_trace_correlation_id(_raw_url, mode=None):
+        if not isinstance(_raw_url, str):
+            return None
+        parsed = urlparse(_raw_url)
+        query_params = parse_qs(parsed.query or "")
+        for key in ("cid", "trace", "trace_correlation_id"):
+            values = query_params.get(key) or []
+            for value in values:
+                if isinstance(value, str) and value.strip():
+                    return value.strip()
+        return None
 HAS_HOSTED = True
 try:
     import crafter  # type: ignore
@@ -306,7 +334,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
 def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
     return TaskInfo(
         task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
-        environments=["crafter"],
+        environment="crafter",
         action_space={
             "type": "discrete",
             "size": len(crafter_constants.actions),
@@ -397,22 +425,29 @@ def provide_task_instances(
     dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
 ) -> Iterable[TaskInfo]:
     infos: list[TaskInfo] = []
+    base_observation = getattr(base_info, "observation", None)
+    if hasattr(base_observation, "model_dump"):
+        observation_template = base_observation.model_dump()
+    elif isinstance(base_observation, dict):
+        observation_template = dict(base_observation)
+    else:
+        observation_template = {}
     for seed_value in seeds:
         summary = dataset.describe_seed(seed_value)
         infos.append(
             TaskInfo(
                 task=base_info.task,
-                environments=base_info.environments,
+                environment=base_info.environment,
                 action_space=base_info.action_space,
                 observation={
-                    **base_info.observation,
+                    **observation_template,
                     "seed": seed_value,
                     "traits": summary["traits"],
                     "inventory": summary["inventory"],
                     "player_position": summary["player_position"],
                 },
                 dataset={
-                    **base_info.dataset,
+                    **base_info.dataset.model_dump(),
                     "seed": seed_value,
                     "difficulty": summary["difficulty"],
                     "config": summary["config"],
@@ -536,7 +571,47 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
     request = _coerce_math_to_crafter(request)
+    record_cfg = request.record.model_copy(
+        update={
+            "return_trace": True,
+            "trace_format": "structured",
+        }
+    )
+    request = request.model_copy(update={"record": record_cfg})
     policy_cfg = dict(request.policy.config or {})
+    logger.info(
+        "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
+        sorted(policy_cfg.keys()),
+        policy_cfg.get("inference_url"),
+        request.run_id,
+        request.mode,
+    )
+    inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
+    if isinstance(inferred_url, str) and inferred_url:
+        policy_cfg["inference_url"] = inferred_url
+    else:
+        logger.warning(
+            "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
+            request.run_id,
+            policy_cfg.get("inference_url"),
+        )
+    trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=request.mode)
+    if request.mode == RolloutMode.RL:
+        assert trace_correlation_id, (
+            f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
+            f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
+        )
+    if trace_correlation_id:
+        policy_cfg["trace_correlation_id"] = trace_correlation_id
+    pipeline_metadata: dict[str, Any] = {}
+    if trace_correlation_id:
+        pipeline_metadata["trace_correlation_id"] = trace_correlation_id
+    if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
+        pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
     try:
         max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
     except Exception:
@@ -585,17 +660,122 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
         safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
         training_session_id=request.training_session_id,
         synth_base_url=request.synth_base_url,
+        mode=request.mode,
     )
     legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
         legacy_request, fastapi_request
     )
     data = legacy_response.model_dump()
+    logger.debug(
+        "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
+        sorted(data.keys()),
+        bool(data.get("trace")),
+    )
     metrics = data.get("metrics", {}) or {}
     metrics.setdefault("outcome_score", None)
     metrics.setdefault("events_score", None)
     metrics.setdefault("details", {})
     data["metrics"] = metrics
+    if data.get("trace") is None:
+        legacy_trace = getattr(legacy_response, "trace", None)
+        if legacy_trace is not None:
+            data["trace"] = legacy_trace
+        else:
+            tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
+            if callable(tracer_factory):
+                tracer = tracer_factory()
+                logger.debug(
+                    "ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
+                )
+                if isinstance(tracer, SessionTracer):
+                    try:
+                        await tracer.initialize()
+                        if tracer.db is not None:
+                            trace_row = await tracer.db.get_session_trace(request.run_id)
+                            if trace_row is not None:
+                                data["trace"] = trace_row
+                    except Exception as exc:
+                        logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
+                    finally:
+                        with suppress(Exception):
+                            await tracer.close()
+    final_cid = trace_correlation_id or f"trace_{request.run_id}"
+    data["trace_correlation_id"] = final_cid
+    existing_meta = data.get("pipeline_metadata")
+    if not isinstance(existing_meta, dict):
+        existing_meta = {}
+    existing_meta.setdefault("trace_correlation_id", final_cid)
+    if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
+        existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
+    data["pipeline_metadata"] = existing_meta
+    # Propagate inference_url into each legacy trajectory entry for downstream tooling.
+    inferred_url = policy_cfg.get("inference_url")
+    # Normalize the url before propagating into trajectories
+    try:
+        from .synth_envs_hosted.utils import (
+            ensure_chat_completions_url as _ensure_cc,
+            force_normalize_chat_completions_url as _force_cc,
+        )
+        if isinstance(inferred_url, str) and inferred_url:
+            inferred_url = _force_cc(inferred_url)
+            inferred_url = _ensure_cc(inferred_url, mode=request.mode)
+    except Exception:
+        pass
+    if "trajectories" in data:
+        normalized_trajs: list[dict[str, Any]] = []
+        for traj in data.get("trajectories", []):
+            if isinstance(traj, BaseModel):
+                traj_dict = traj.model_dump()
+            elif isinstance(traj, dict):
+                traj_dict = dict(traj)
+            else:
+                continue
+            traj_dict.setdefault("trace_correlation_id", final_cid)
+            if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
+                traj_dict["inference_url"] = inferred_url
+            # Inject nested info.meta.inference_url for each step (required by RL trainer)
+            try:
+                steps = traj_dict.get("steps", [])
+                if isinstance(steps, list):
+                    for step in steps:
+                        if not isinstance(step, dict):
+                            continue
+                        info = step.get("info")
+                        if not isinstance(info, dict):
+                            info = {}
+                        meta = info.get("meta")
+                        if not isinstance(meta, dict):
+                            meta = {}
+                        if isinstance(inferred_url, str) and inferred_url and not meta.get("inference_url"):
+                            meta["inference_url"] = inferred_url
+                        info["meta"] = meta
+                        step["info"] = info
+            except Exception:
+                pass
+            normalized_trajs.append(traj_dict)
+        if normalized_trajs:
+            data["trajectories"] = normalized_trajs
+    if data.get("trace") is None:
+        data["trace"] = {
+            "session_id": request.run_id,
+            "created_at": datetime.now(UTC).isoformat(),
+            "metadata": dict(existing_meta),
+            "event_history": [],
+            "markov_blanket_message_history": [],
+        }
+        raise HTTPException(
+            status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
+        )
     return RolloutResponse.model_validate(data)

examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 This module now delegates to the TaskAppConfig defined in the colocated example at
 `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
 (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
-`uvx synth-ai serve grpo-crafter` for local development and testing.
+`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
 """
 from __future__ import annotations

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py CHANGED Viewed

@@ -148,8 +148,8 @@ class CrafterPolicy(Policy):
         if self.use_tools:
             payload["tools"] = TOOLS_SCHEMA
             payload["tool_choice"] = "required"
-            # Ensure the inference server injects family-specific stop sequences
-            # to terminate immediately after the first tool call for compliance.
+            payload["function_call"] = {"name": "interact_many"}
+            payload["parallel_tool_calls"] = False
             payload["stop_after_tool_calls"] = 1
         return payload
@@ -158,13 +158,7 @@ class CrafterPolicy(Policy):
         response: dict[str, Any],
         use_tools: bool = True,
     ) -> list[dict[str, Any]]:
-        """Turn an inference response into environment tool calls.
-        - If tools were used, expect tool_calls-compatible output and forward as-is
-          in our simple JSON format: {"tool_name": str, "arguments": {...}}.
-        - If no tools, parse plain-text actions using CrafterReActAgent parser and
-          wrap them into a single interact_many tool call.
-        """
+        """Turn an inference response into environment tool calls."""
         # First check if we got actual tool calls
         choices = response.get("choices", [])
         tool_calls: list[dict[str, Any]] = []
@@ -223,24 +217,6 @@ class CrafterPolicy(Policy):
                     normalized.append(tc)
             return normalized
-        # Otherwise, parse plain text content for actions
-        text = ""
-        for choice in choices:
-            msg = choice.get("message", {})
-            content = msg.get("content", "")
-            if content:
-                text = content
-                break
-        if text:
-            # Try to parse actions from the text
-            from .shared import parse_actions
-            actions = parse_actions(text)
-            if actions:
-                # Wrap actions in interact_many tool call
-                return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
         # No actions found
         return []

examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -46,7 +46,7 @@ class CrafterReActAgent:
             "- Always return a single tool call: interact_many({actions: [...]})\n"
             "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
             "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
-            "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
+            "\n"
             "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
             "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
             "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"

examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import contextlib
+import logging
 import os
 from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 from starlette.requests import Request
+logger = logging.getLogger(__name__)
+_VERSION_LOGGED = False
+def _resolve_task_app_version() -> str:
+    env_version = os.getenv("TASK_APP_VERSION")
+    if isinstance(env_version, str) and env_version.strip():
+        return env_version.strip()
+    try:
+        import importlib.metadata as importlib_metadata
+        pkg_version = importlib_metadata.version("synth-ai")
+        if isinstance(pkg_version, str) and pkg_version.strip():
+            return pkg_version.strip()
+    except Exception:
+        pass
+    try:
+        import synth_ai
+        attr_version = getattr(synth_ai, "__version__", None)
+        if isinstance(attr_version, str) and attr_version.strip():
+            return attr_version.strip()
+    except Exception:
+        pass
+    return "unknown"
+def _log_task_app_version_once() -> None:
+    global _VERSION_LOGGED
+    if _VERSION_LOGGED:
+        return
+    version = _resolve_task_app_version()
+    build_id = os.getenv("TASK_APP_BUILD_ID")
+    if build_id:
+        logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
+    else:
+        logger.info("TASK_APP_VERSION: %s", version)
+    _VERSION_LOGGED = True
 class TaskApp:
     """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
         allow_headers=["*"],
     )
+    _log_task_app_version_once()
     # Initialize task app configuration
     task_app = TaskApp()
     app.state.task_app = task_app

examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import asyncio
 import contextlib
 import logging
+import os
 from typing import Any
 import httpx
@@ -23,6 +24,15 @@ class OpenAIClient:
         self.api_key = api_key
         self.timeout_s = timeout_s
         self.headers = {}
+        self._env_api_key: str | None = None
+        try:
+            env_key = os.getenv("ENVIRONMENT_API_KEY") or ""
+            env_key = env_key.strip()
+            if env_key:
+                self._env_api_key = env_key
+        except Exception:
+            self._env_api_key = None
         if api_key:
             self.headers["Authorization"] = f"Bearer {api_key}"
@@ -137,18 +147,49 @@ class OpenAIClient:
         Returns:
             OpenAI-compatible chat completion response
         """
-        url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
+        # Build target URL robustly: if a full endpoint is given (with query or already ending
+        # in /chat/completions), preserve it; otherwise, append the path BEFORE query params.
+        from urllib.parse import urlparse, urlunparse
+        candidate = (base_url or self.base_url).strip()
+        try:
+            parsed = urlparse(candidate)
+            # If no scheme, treat as relative base (pass-through)
+            if not parsed.scheme or not parsed.netloc:
+                base_no_slash = candidate.rstrip("/")
+                url = f"{base_no_slash}/v1/chat/completions"
+            else:
+                path = (parsed.path or "").rstrip("/")
+                if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
+                    new_path = path
+                elif path.endswith("/v1"):
+                    new_path = f"{path}/chat/completions"
+                elif path.endswith("/chat"):
+                    new_path = f"{path}/completions"
+                else:
+                    new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+                url = urlunparse(parsed._replace(path=new_path))
+        except Exception:
+            # Fallback to legacy behavior
+            url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
         timeout = timeout_s or self.timeout_s
         # Merge headers
         headers = self.headers.copy()
+        try:
+            parsed_target = urlparse(url)
+            path_for_auth = (parsed_target.path or "") if parsed_target else ""
+            if self._env_api_key and "/proxy/" in path_for_auth:
+                headers.setdefault("X-API-Key", self._env_api_key)
+        except Exception:
+            pass
         if extra_headers:
             headers.update(extra_headers)
         # Fix parameter compatibility for newer models
         processed_request = self._fix_model_parameters(request, target_url=url)
-        # Log request (redact messages in production)
+        # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
         logger.info(f"Inference POST target: {url}")
         if extra_headers:
             logger.info(f"Extra headers: {extra_headers}")
@@ -156,13 +197,69 @@ class OpenAIClient:
             keys_preview = sorted(processed_request.keys())
             logger.info(f"Request keys: {keys_preview}")
-        # Final hard-guard for OpenAI: ensure unsupported field is not present
+        # Detailed IO log: messages/tools/sampling and final payload fields
+        try:
+            import json as _json
+            def _truncate(text: str, limit: int = 2000) -> str:
+                return text if len(text) <= limit else text[:limit] + "…"
+            def _messages_preview(msgs: Any) -> str:
+                try:
+                    out: list[dict[str, Any]] = []
+                    if isinstance(msgs, list):
+                        for m in msgs:
+                            if not isinstance(m, dict):
+                                continue
+                            role = m.get("role")
+                            content = m.get("content")
+                            if isinstance(content, str):
+                                text = content
+                            elif isinstance(content, list):
+                                parts: list[str] = []
+                                for seg in content:
+                                    if isinstance(seg, dict) and isinstance(seg.get("text"), str):
+                                        parts.append(seg["text"])
+                                text = "\n".join(parts)
+                            else:
+                                text = ""
+                            out.append({"role": role, "content": _truncate(str(text), 4000)})
+                    return _json.dumps(out)
+                except Exception:
+                    return "[]"
+            def _tools_preview(tools: Any) -> str:
+                try:
+                    return _truncate(_json.dumps(tools), 4000)
+                except Exception:
+                    return "[]"
+            msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
+            tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
+            io_log: dict[str, Any] = {
+                "llm.call": True,
+                "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
+                "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
+                "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
+                "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
+                "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
+                "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
+                "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
+                "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
+                "messages_preview": _messages_preview(msgs),
+                "tools_preview": _tools_preview(tools),
+            }
+            logger.info(io_log)
+        except Exception:
+            pass
+        # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
         try:
-            if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
+            low_url = url.lower()
+            if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
                 processed_request.pop("stop_after_tool_calls", None)
-                logger.info("Removed stop_after_tool_calls for OpenAI request")
+                logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
             # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
-            low_url = url.lower()
             if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
                 processed_request, dict
             ):
@@ -228,13 +325,54 @@ class OpenAIClient:
                     f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
                 )
                 if body_text:
-                    preview_len = min(800, len(body_text))
-                    logger.info(
-                        f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
-                    )
+                    # Log raw output with generous preview to debug no-tool-call issues
+                    preview_len = min(4000, len(body_text))
+                    logger.info({
+                        "llm.raw_response": True,
+                        "bytes": len(body_text),
+                        "preview": body_text[:preview_len],
+                    })
                 result = response.json()
                 logger.info(f"Inference response parsed_type={type(result).__name__}")
+                # Normalize tool calls so downstream always sees a function tool call
+                try:
+                    if isinstance(result, dict):
+                        choices = result.get("choices")
+                        if isinstance(choices, list) and choices:
+                            msg = choices[0].get("message")
+                            if isinstance(msg, dict):
+                                # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
+                                tc = msg.get("tool_calls")
+                                fc = msg.get("function_call")
+                                if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
+                                    name = fc.get("name") or "interact_many"
+                                    args = fc.get("arguments") or "{}"
+                                    msg["tool_calls"] = [
+                                        {
+                                            "id": "call_norm",
+                                            "type": "function",
+                                            "function": {"name": name, "arguments": args},
+                                        }
+                                    ]
+                                    # Encourage downstream to treat this as a tool call
+                                    if isinstance(choices[0], dict):
+                                        choices[0]["finish_reason"] = "tool_calls"
+                                # Log tool call count for debugging
+                                try:
+                                    tc2 = msg.get("tool_calls")
+                                    count = len(tc2) if isinstance(tc2, list) else 0
+                                    logger.info({
+                                        "llm.tool_calls": True,
+                                        "count": count,
+                                        "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
+                                    })
+                                except Exception:
+                                    pass
+                except Exception:
+                    pass
                 return result
             except httpx.TimeoutException:
@@ -340,40 +478,6 @@ class OpenAIClient:
                                 pass
                 except Exception:
                     pass
-                # Gracefully degrade on 422 so rollouts can still produce a trajectory
-                if status == 422:
-                    try:
-                        # Best-effort parse of error for diagnostics
-                        err = None
-                        try:
-                            err = e.response.json()
-                        except Exception:
-                            err = {"error": "unprocessable", "detail": (text or "")[:200]}
-                        logger.warning(
-                            {
-                                "inference_422_recovered": True,
-                                "detail": err,
-                            }
-                        )
-                    except Exception:
-                        pass
-                    # Return a minimal OpenAI-compatible response with no tool_calls/content
-                    import time as _t
-                    return {
-                        "id": f"cmpl-{int(_t.time())}",
-                        "object": "chat.completion",
-                        "created": int(_t.time()),
-                        "model": processed_request.get("model") or "unknown",
-                        "choices": [
-                            {
-                                "index": 0,
-                                "message": {"role": "assistant", "content": "", "tool_calls": []},
-                                "finish_reason": "stop",
-                            }
-                        ],
-                        "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
-                    }
                 raise
             except Exception as e:
                 logger.error(f"Unexpected error calling {url}: {e}")
@@ -399,7 +503,14 @@ class OpenAIClient:
         try:
             async with httpx.AsyncClient(timeout=timeout) as client:
-                response = await client.get(url, headers=self.headers)
+                headers = self.headers.copy()
+                try:
+                    parsed = httpx.URL(url)
+                    if self._env_api_key and "/proxy/" in (parsed.path or ""):
+                        headers.setdefault("X-API-Key", self._env_api_key)
+                except Exception:
+                    pass
+                response = await client.get(url, headers=headers)
                 response.raise_for_status()
                 return response.json()
         except httpx.HTTPStatusError as e:

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl