PyPI - synth-ai - Versions diffs - 0.2.13.dev2__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

synth-ai 0.2.13.dev2py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (110) hide show

examples/multi_step/configs/README_verilog_rl.md +77 -0
examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +5 -4
examples/multi_step/configs/crafter_synth_backend.md +40 -0
examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
examples/multi_step/configs/verilog_rl_lora.toml +190 -0
examples/multi_step/judges/crafter_backend_judge.py +220 -0
examples/multi_step/judges/verilog_backend_judge.py +234 -0
examples/multi_step/readme.md +48 -0
examples/multi_step/verilog_rl_lora.md +218 -0
examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
examples/sft/evaluate.py +2 -0
examples/sft/generate_traces.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +1 -0
examples/swe/task_app/hosted/rollout.py +2 -0
examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
examples/task_apps/crafter/task_app/__init__.py +3 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +306 -8
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +16 -3
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +52 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +111 -13
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
examples/task_apps/enron/filter_sft.toml +5 -0
examples/task_apps/enron/tests/__init__.py +2 -0
examples/task_apps/enron/tests/integration/__init__.py +2 -0
examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
examples/task_apps/enron/tests/unit/__init__.py +2 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
examples/task_apps/pokemon_red/task_app.py +199 -6
examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
examples/task_apps/sokoban/filter_sft.toml +5 -0
examples/task_apps/sokoban/tests/__init__.py +2 -0
examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
examples/task_apps/verilog/filter_sft.toml +5 -0
examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
examples/task_apps/verilog/tests/__init__.py +2 -0
examples/task_apps/verilog/tests/integration/__init__.py +2 -0
examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
examples/task_apps/verilog/tests/unit/__init__.py +2 -0
examples/warming_up_to_rl/groq_test.py +2 -0
examples/warming_up_to_rl/run_local_rollout.py +2 -0
examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
examples/warming_up_to_rl/run_rollout_remote.py +2 -0
synth_ai/api/models/supported.py +1 -0
synth_ai/cli/__init__.py +46 -13
synth_ai/cli/_modal_wrapper.py +3 -2
synth_ai/cli/recent.py +1 -1
synth_ai/cli/status.py +1 -1
synth_ai/cli/task_apps.py +354 -143
synth_ai/cli/traces.py +1 -1
synth_ai/cli/tui.py +57 -0
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/environment.py +1 -1
synth_ai/environments/examples/verilog/engine.py +76 -10
synth_ai/judge_schemas.py +8 -8
synth_ai/task/__init__.py +11 -1
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +257 -0
synth_ai/task/contracts.py +15 -2
synth_ai/task/rubrics/__init__.py +3 -0
synth_ai/task/rubrics/loaders.py +22 -3
synth_ai/task/rubrics/scoring.py +3 -0
synth_ai/task/trace_correlation_helpers.py +315 -0
synth_ai/task/validators.py +144 -0
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/session_tracer.py +16 -6
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -7
synth_ai/tracing_v3/turso/native_manager.py +63 -40
synth_ai/tracing_v3/utils.py +3 -3
synth_ai/tui/__init__.py +5 -0
synth_ai/tui/__main__.py +13 -0
synth_ai/tui/cli/__init__.py +1 -0
synth_ai/tui/cli/query_experiments.py +164 -0
synth_ai/tui/cli/query_experiments_v3.py +164 -0
synth_ai/tui/dashboard.py +906 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/METADATA +1 -1
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/RECORD +110 -71
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0

examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py CHANGED Viewed

@@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
 from synth_ai.lm.vendors.base import BaseLMResponse
 from synth_ai.task.tracing_utils import unique_sft_path
 from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
+from synth_ai.task.contracts import RolloutMode
 from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
 from synth_ai.tracing_v3.session_tracer import SessionTracer
@@ -120,6 +121,8 @@ class RolloutRequest(BaseModel):
     # Optional run/session context
     training_session_id: str | None = None
     synth_base_url: str | None = None
+    # Mode controls URL transformation: REQUIRED to make intent explicit
+    mode: RolloutMode
 class RolloutStep(BaseModel):
@@ -140,6 +143,7 @@ class RolloutTrajectory(BaseModel):
     final: dict[str, Any] | None = None
     length: int
     decision_samples: list[dict[str, Any]] | None = None
+    inference_url: str | None = None
 def _normalize_step_strategy(raw_strategy: Any) -> str:
@@ -452,11 +456,12 @@ class RolloutMetrics(BaseModel):
 class RolloutResponse(BaseModel):
     run_id: str
     trajectories: list[RolloutTrajectory]
-    branches: dict[str, list[str]] = {}
+    branches: dict[str, list[str]] = Field(default_factory=dict)
     metrics: RolloutMetrics
     aborted: bool = False
     ops_executed: int = 0
     trace: dict[str, Any] | None = None
+    pipeline_metadata: dict[str, Any] = Field(default_factory=dict)
 class RolloutTracingContext:
@@ -567,7 +572,7 @@ class RolloutTracingContext:
             try:
                 await self.tracer.record_message(
                     content=self._prompt_payload(entry, role="system"),
-                    message_type="policy_system_prompt",
+                    message_type="system",  # Use standard message type
                     metadata=self._message_metadata(),
                 )
             except Exception as exc:
@@ -576,11 +581,16 @@ class RolloutTracingContext:
             try:
                 await self.tracer.record_message(
                     content=self._prompt_payload(entry, role="user"),
-                    message_type="policy_user_prompt",
+                    message_type="user",  # Use standard message type
                     metadata=self._message_metadata(),
                 )
             except Exception as exc:
                 logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
+        # Debug: Check message count
+        if self.tracer and self.tracer._current_trace:
+            msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
+            logger.info(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages in trace")
     def _content_to_text(self, content: Any) -> str:
         if isinstance(content, str):
@@ -656,8 +666,8 @@ class RolloutTracingContext:
             try:
                 await self.tracer.record_message(
                     content=self._safe_json(tool_calls),
-                    message_type="policy_tool_call",
-                    metadata=self._message_metadata(),
+                    message_type="assistant",  # Map to standard assistant message type
+                    metadata={**self._message_metadata(), "is_tool_call": True},
                 )
             except Exception as exc:
                 logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
@@ -928,11 +938,22 @@ class RolloutTracingContext:
             except Exception as exc:
                 logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
             try:
+                # Debug: Check message count before end_session
+                if self.tracer._current_trace:
+                    msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
+                    logger.info(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace")
                 self.session_trace = await self.tracer.end_session()
-                if self.session_trace is not None:
+                # Debug: Check if session was saved
+                if self.session_trace:
+                    logger.info(f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}")
                     self.session_trace.metadata.update(self.metadata_updates)
+                    logger.info(f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}")
+                else:
+                    logger.warning("[TRACE_DEBUG] end_session returned None!")
             except Exception as exc:
-                logger.debug("TRACING_END_SESSION_FAIL: %s", exc)
+                logger.warning(f"TRACING_END_SESSION_FAIL: {exc}", exc_info=True)
                 self.session_trace = None
             with contextlib.suppress(Exception):
                 await self.tracer.close()
@@ -1056,12 +1077,14 @@ async def execute_rollout(
     req: Request,
 ) -> RolloutResponse:
     """Execute a rollout with coordinated environment and policy steps."""
+    logger.info("ROLLOUT: mode = %s", request.mode)
     # Emit rollout identifier early for correlation
     with contextlib.suppress(Exception):
         _rid = getattr(request, "run_id", None)
         _pol = getattr(request.policy, "policy_name", None) or getattr(request.policy, "policy_id", None)
         _env = getattr(request.env, "env_name", None) or getattr(request.env, "env_id", None)
-        logger.info("ROLLOUT_BEGIN: run_id=%s policy=%s env=%s", _rid, _pol, _env)
+        logger.info("ROLLOUT_BEGIN: run_id=%s policy=%s env=%s mode=%s", _rid, _pol, _env, request.mode)
         print(f"[rollout] begin run_id={_rid} policy={_pol} env={_env}", flush=True)
     # Enforce per-episode step cap via env-specific parameters; default to 20 if omitted
     try:
@@ -1271,6 +1294,7 @@ async def execute_rollout(
                     config=_policy_config,
                     rl_run_id=request.run_id,
                     bound_env_id=env_id,
+                    mode=request.mode,  # Pass through mode for URL transformation control
                 ),
                 req,
             )
@@ -1843,14 +1867,73 @@ async def execute_rollout(
                     timing_final.setdefault("overhead_ms", 0.0)
         # Build trajectory
-        # Extract inference_url from policy meta
+        # Extract inference_url from policy config (REQUIRED for trace correlation)
+        # The trainer sets this in policy config with ?cid=... parameter
         inference_url = None
-        if policy_handle is not None:
+        # Try policy config from request first (most reliable source)
+        try:
+            policy_config_snapshot = (
+                request.policy.config if isinstance(request.policy.config, dict) else {}
+            )
+            inference_url = policy_config_snapshot.get("inference_url")
+            if inference_url:
+                logger.info(
+                    "ROLLOUT_TRAJECTORY: extracted inference_url from request.policy.config run_id=%s url=%s",
+                    request.run_id,
+                    inference_url,
+                )
+        except Exception as exc:
+            logger.warning(
+                "ROLLOUT_TRAJECTORY: failed to get inference_url from request.policy.config run_id=%s: %s",
+                request.run_id,
+                exc,
+            )
+        # Fallback: Try policy handle snapshot (if request.policy.config failed)
+        if not inference_url and policy_handle is not None:
             try:
                 policy_snapshot = policy_handle.snapshot()
                 inference_url = policy_snapshot.get("config", {}).get("inference_url")
-            except Exception:
-                pass
+                if inference_url:
+                    logger.info(
+                        "ROLLOUT_TRAJECTORY: extracted inference_url from policy_handle.snapshot run_id=%s url=%s",
+                        request.run_id,
+                        inference_url,
+                    )
+            except Exception as exc:
+                logger.warning(
+                    "ROLLOUT_TRAJECTORY: failed to snapshot policy for run_id=%s policy_id=%s: %s",
+                    request.run_id,
+                    policy_id,
+                    exc,
+                )
+        # ASSERTION: inference_url MUST be present (required by RolloutTrajectory schema)
+        if not inference_url:
+            raise ValueError(
+                f"FATAL: inference_url is required but not found!\n"
+                f"\n"
+                f"run_id: {request.run_id}\n"
+                f"policy_id: {policy_id}\n"
+                f"policy_config_keys: {list(policy_config_snapshot.keys()) if 'policy_config_snapshot' in locals() else 'N/A'}\n"
+                f"\n"
+                f"The trainer MUST set inference_url in policy config with ?cid=... parameter.\n"
+                f"This is required for trace correlation and hydration.\n"
+            )
+        # policy_config_snapshot already set above in try block (line 1876-1878)
+        # Ensure it exists for logging below
+        if 'policy_config_snapshot' not in locals():
+            policy_config_snapshot = {}
+        logger.info(
+            "ROLLOUT_TRAJECTORY: run_id=%s policy_id=%s inference_url=%s trace_id=%s",
+            request.run_id,
+            policy_id,
+            inference_url,
+            policy_config_snapshot.get("trace_correlation_id"),
+        )
         trajectory = RolloutTrajectory(
             env_id=env_id,
@@ -1948,12 +2031,17 @@ async def execute_rollout(
             )
             finalized = True
         trace_payload = tracing_context.build_trace_payload(session_trace)
+        # Debug: Check trace payload
+        logger.info(f"[TRACE_DEBUG] trace_payload is None: {trace_payload is None}, return_trace={tracing_context.return_trace}")
+        if trace_payload:
+            logger.info(f"[TRACE_DEBUG] trace_payload keys: {list(trace_payload.keys())}")
         # Hard-fail if no steps executed (avg_turns == 0 scenario)
         if metrics.num_steps <= 0:
             raise HTTPException(status_code=500, detail="no_steps_executed: avg_turns == 0")
-        return RolloutResponse(
+        response = RolloutResponse(
             run_id=request.run_id,
             trajectories=[trajectory],
             branches={},
@@ -1962,6 +2050,16 @@ async def execute_rollout(
             ops_executed=ops_executed,
             trace=trace_payload,
         )
+        logger.info(
+            "ROLLOUT_RESPONSE: run_id=%s aborted=%s ops_executed=%s metrics_steps=%s trace_present=%s pipeline_metadata=%s",
+            request.run_id,
+            aborted,
+            ops_executed,
+            metrics.num_steps,
+            bool(trace_payload),
+            response.pipeline_metadata,
+        )
+        return response
     except Exception as e:
         logger.error(f"Rollout failed for run {request.run_id}: {e}")

examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py CHANGED Viewed

@@ -1,9 +1,165 @@
 """Utility functions for the task service."""
+import logging
 from typing import Any
+from urllib.parse import parse_qs, urlparse, urlunparse
 import numpy as np
+logger = logging.getLogger(__name__)
+_CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
+def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
+    """
+    Ensure inference URLs point at the chat completions endpoint.
+    Args:
+        raw_url: The inference URL to process
+        mode: "rl" applies URL transformations, "eval" uses URLs as-is (deprecated - use RolloutMode enum)
+    Returns:
+        Processed URL (transformed in RL mode, unchanged in EVAL mode)
+    """
+    # In EVAL mode, use URLs exactly as provided - no transformations
+    # Accept both string "eval" (legacy) and RolloutMode.EVAL
+    from synth_ai.task.contracts import RolloutMode
+    is_eval_mode = (mode == "eval" or mode == RolloutMode.EVAL or
+                    (hasattr(mode, 'value') and mode.value == "eval"))
+    if is_eval_mode:
+        logger.info("ensure_chat_completions_url: EVAL mode - using URL as-is: %s", raw_url)
+        return raw_url
+    # RL mode: apply transformations for compatibility
+    if not isinstance(raw_url, str):
+        logger.debug("ensure_chat_completions_url: non-string input %r (type=%s)", raw_url, type(raw_url))
+        return raw_url
+    url = raw_url.strip()
+    if not url:
+        logger.debug("ensure_chat_completions_url: blank/whitespace URL input")
+        return raw_url
+    parsed = urlparse(url)
+    path = (parsed.path or "").rstrip("/")
+    if path.endswith("/v1/chat/completions"):
+        logger.debug("ensure_chat_completions_url: URL already normalized %s", url)
+        # Already targeting the desired endpoint; keep original to preserve trailing slash.
+        return url
+    if not path:
+        new_path = _CHAT_COMPLETIONS_SUFFIX
+    else:
+        new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}"
+    rebuilt = parsed._replace(path=new_path)
+    normalized = urlunparse(rebuilt)
+    logger.info(
+        "ensure_chat_completions_url: RL mode - normalized inference URL from %s to %s",
+        url,
+        normalized,
+    )
+    return normalized
+def inference_url_to_trace_correlation_id(raw_url: Any, *, required: bool = False, mode: Any = None) -> str | None:
+    """
+    Extract trace_correlation_id from inference URL query params.
+    The inference URL should contain ?cid=trace_xxxxx parameter.
+    This is THE canonical source for trace_correlation_id - it's what the
+    inference server uses to tag traces, so we extract it here.
+    Args:
+        raw_url: Inference URL (should contain ?cid=... query param)
+        required: If True, raises AssertionError if trace_correlation_id not found
+        mode: RolloutMode or string ("rl" or "eval"). Controls warning behavior -
+              warnings only logged for RL mode, not EVAL mode.
+    Returns:
+        trace_correlation_id if found in URL, None otherwise
+    Raises:
+        AssertionError: If required=True and trace_correlation_id not found
+    """
+    if not isinstance(raw_url, str):
+        logger.debug(
+            "inference_url_to_trace_correlation_id: non-string input %r (type=%s)",
+            raw_url,
+            type(raw_url)
+        )
+        if required:
+            raise AssertionError(
+                f"FATAL: inference_url_to_trace_correlation_id requires string URL, got {type(raw_url)}: {raw_url!r}"
+            )
+        return None
+    parsed = urlparse(raw_url)
+    query_params = parse_qs(parsed.query or "")
+    # Check all possible parameter names (cid is primary)
+    candidates = (
+        query_params.get("cid") or
+        query_params.get("trace") or
+        query_params.get("trace_correlation_id") or
+        []
+    )
+    for value in candidates:
+        if isinstance(value, str) and value.strip():
+            correlation_id = value.strip()
+            logger.info(
+                "inference_url_to_trace_correlation_id: ✅ extracted id=%s from url=%s",
+                correlation_id,
+                raw_url,
+            )
+            # ASSERTION: Correlation ID should look like trace_xxxxx
+            assert correlation_id.startswith("trace_"), (
+                f"FATAL: trace_correlation_id has unexpected format: {correlation_id!r}. "
+                f"Expected to start with 'trace_'"
+            )
+            return correlation_id
+    # Not found - check if we're in EVAL mode (trace_correlation_id not required for eval)
+    from synth_ai.task.contracts import RolloutMode
+    is_eval_mode = (mode == "eval" or mode == RolloutMode.EVAL or
+                    (hasattr(mode, 'value') and mode.value == "eval"))
+    if is_eval_mode:
+        # For EVAL mode, missing trace_correlation_id is expected - log as debug, not warning
+        logger.debug(
+            "inference_url_to_trace_correlation_id: No trace_correlation_id in EVAL mode (expected) url=%s query_params=%s",
+            raw_url,
+            list(query_params.keys())
+        )
+    else:
+        # For RL mode, missing trace_correlation_id is concerning
+        logger.warning(
+            "inference_url_to_trace_correlation_id: ❌ NO trace_correlation_id found in url=%s query_params=%s",
+            raw_url,
+            list(query_params.keys())
+        )
+    if required:
+        raise AssertionError(
+            f"FATAL: trace_correlation_id REQUIRED but not found in inference_url!\n"
+            f"\n"
+            f"URL: {raw_url}\n"
+            f"Query params found: {list(query_params.keys())}\n"
+            f"\n"
+            f"The inference_url MUST contain ?cid=trace_xxxxx parameter.\n"
+            f"This is set by the trainer when generating rollout requests.\n"
+        )
+    return None
+# Legacy alias for backward compatibility
+def extract_trace_correlation_id(raw_url: Any, mode: Any = None) -> str | None:
+    """DEPRECATED: Use inference_url_to_trace_correlation_id instead."""
+    return inference_url_to_trace_correlation_id(raw_url, required=False, mode=mode)
 def convert_numpy_to_python(obj: Any) -> Any:
     """

examples/task_apps/enron/filter_sft.toml ADDED Viewed

@@ -0,0 +1,5 @@
+[filter]
+db = "traces/v3/synth_ai.db"
+output = "ft_data/enron_sft.jsonl"
+min_official_score = 0.01

examples/task_apps/enron/tests/__init__.py CHANGED Viewed

	@@ -1,2 +1,4 @@
1 1	# Enron task app tests
2 2
3	+
4	+

examples/task_apps/enron/tests/integration/__init__.py CHANGED Viewed

	@@ -1,2 +1,4 @@
1 1	# Integration tests for Enron task app
2 2
3	+
4	+

examples/task_apps/enron/tests/integration/test_enron_eval.py CHANGED Viewed

@@ -175,3 +175,5 @@ def test_enron_eval_with_groq(enron_server: str) -> None:
     # Check that we got a meaningful score
     assert "official" in result.stdout.lower() or "mean_return" in result.stdout.lower()

examples/task_apps/enron/tests/unit/__init__.py CHANGED Viewed

	@@ -1,2 +1,4 @@
1 1	# Unit tests for Enron task app
2 2
3	+
4	+

synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.14__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.13.dev2py3-none-any.whl → 0.2.14py3-none-any.whl