PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

synth_ai/streaming/config.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from .types import StreamType
+@dataclass(slots=True)
+class StreamConfig:
+    """Configuration describing which streams to consume and how to filter them."""
+    enabled_streams: set[StreamType] = field(default_factory=lambda: set(StreamType))
+    event_types: set[str] | None = None  # Whitelist: only include these event types
+    event_types_exclude: set[str] | None = None  # Blacklist: exclude these event types
+    event_levels: set[str] | None = None
+    metric_names: set[str] | None = None
+    metric_phases: set[str] | None = None
+    timeline_phases: set[str] | None = None
+    sample_rate: float = 1.0
+    max_events_per_poll: int | None = None
+    deduplicate: bool = True
+    @classmethod
+    def default(cls) -> StreamConfig:
+        """Return a configuration representing the default (all streams) view."""
+        return cls(
+            event_types_exclude={
+                # Filter out noisy events that just announce what metrics already show
+                "sft.progress",  # Generic "Training progress" with no data
+                "sft.loss",      # Generic "Loss update" with no data
+                "sft.upstream.status",  # Very verbose status echo events
+            }
+        )
+    @classmethod
+    def minimal(cls) -> StreamConfig:
+        """Return a configuration streaming status updates only."""
+        return cls(enabled_streams={StreamType.STATUS})
+    @classmethod
+    def verbose(cls) -> StreamConfig:
+        """Return a configuration with all streams and events (no filters)."""
+        return cls()
+    @classmethod
+    def progress_only(cls) -> StreamConfig:
+        """Return a configuration tailored to show training progress."""
+        return cls(
+            enabled_streams={StreamType.STATUS, StreamType.EVENTS, StreamType.METRICS},
+            event_types={"sft.progress", "rl.train.step", "sft.validation.summary"},
+            metric_names={"train.loss", "eval.reward_mean"},
+        )
+    @classmethod
+    def errors_only(cls) -> StreamConfig:
+        """Return a configuration that focuses on heightened severity signals."""
+        return cls(
+            enabled_streams={StreamType.STATUS, StreamType.EVENTS},
+            event_levels={"error", "warning"},
+        )
+    def should_include_event(self, event: dict[str, Any]) -> bool:
+        """Determine whether an event message should be included."""
+        event_type = event.get("type")
+        # Apply blacklist first (takes precedence)
+        if self.event_types_exclude and event_type in self.event_types_exclude:
+            return False
+        # Then apply whitelist
+        if self.event_types and event_type not in self.event_types:
+            return False
+        if self.event_levels:
+            return event.get("level") in self.event_levels
+        return True
+    def should_include_metric(self, metric: dict[str, Any]) -> bool:
+        """Determine whether a metric point should be included."""
+        if self.metric_names and metric.get("name") not in self.metric_names:
+            return False
+        if self.metric_phases:
+            return metric.get("phase") in self.metric_phases
+        return True
+    def should_include_timeline(self, timeline_entry: dict[str, Any]) -> bool:
+        """Determine whether a timeline entry should be included."""
+        if self.timeline_phases:
+            return timeline_entry.get("phase") in self.timeline_phases
+        return True
+__all__ = ["StreamConfig"]

synth_ai/streaming/handlers.py ADDED Viewed

@@ -0,0 +1,518 @@
+from __future__ import annotations
+import contextlib
+import json
+import re
+import time
+from abc import ABC, abstractmethod
+from collections import deque
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable
+import click
+from .types import StreamMessage, StreamType
+def _mask_sensitive_urls(text: str) -> str:
+    """Mask S3/Wasabi URLs and sensitive paths in log messages.
+    Replaces full S3/Wasabi URLs with masked versions to prevent leaking
+    bucket names, paths, and infrastructure details in public SDK logs.
+    Examples:
+        s3://synth-artifacts/models/... -> s3://***/***/[masked]
+        Wasabi s3://bucket/path/file.tar.gz -> Wasabi s3://***/***/[masked]
+    """
+    if not text:
+        return text
+    # Pattern matches:
+    # - Optional "Wasabi " prefix
+    # - s3:// or http(s):// scheme
+    # - Any bucket/host
+    # - Any path
+    # - Common model file extensions
+    pattern = r'(Wasabi\s+)?((s3|https?)://[^\s]+\.(tar\.gz|zip|pt|pth|safetensors|ckpt|bin))'
+    def replace_url(match: re.Match) -> str:
+        prefix = match.group(1) or ""  # "Wasabi " or empty
+        url = match.group(2)
+        # Extract just the filename
+        filename = url.split("/")[-1] if "/" in url else "file"
+        return f'{prefix}s3://***/***/[{filename}]'
+    return re.sub(pattern, replace_url, text, flags=re.IGNORECASE)
+class StreamHandler(ABC):
+    """Base class for log handlers that consume ``StreamMessage`` objects."""
+    @abstractmethod
+    def handle(self, message: StreamMessage) -> None:
+        """Process a message produced by the streamer."""
+    def should_handle(self, message: StreamMessage) -> bool:  # pragma: no cover - trivial
+        """Predicate allowing handlers to filter messages before processing."""
+        return True
+    def flush(self) -> None:  # pragma: no cover - optional
+        """Flush buffered output."""
+        return None
+class CLIHandler(StreamHandler):
+    """Simple CLI output mirroring current poller behaviour."""
+    def __init__(
+        self,
+        *,
+        hidden_event_types: set[str] | None = None,
+        hidden_event_substrings: set[str] | None = None,
+    ) -> None:
+        self._hidden_event_types = set(hidden_event_types or set())
+        self._hidden_event_substrings = {s.lower() for s in (hidden_event_substrings or set())}
+    def handle(self, message: StreamMessage) -> None:
+        if not self.should_handle(message):
+            return
+        timestamp = datetime.now().strftime("%H:%M:%S")
+        if message.stream_type is StreamType.STATUS:
+            status = str(message.data.get("status") or message.data.get("state") or "unknown")
+            click.echo(f"[{timestamp}] status={status}")
+            return
+        if message.stream_type is StreamType.EVENTS:
+            event_type = message.data.get("type", "event")
+            if event_type in self._hidden_event_types:
+                return
+            level = message.data.get("level")
+            msg = message.data.get("message") or ""
+            # Evaluate substring filters against lower-cased concatenated text
+            if self._hidden_event_substrings:
+                blob = " ".join(
+                    [
+                        event_type or "",
+                        str(msg),
+                        json.dumps(message.data.get("data", "")),
+                    ]
+                ).lower()
+                if any(sub in blob for sub in self._hidden_event_substrings):
+                    return
+            prefix = f"[{timestamp}] [{message.seq}] {event_type}"
+            if level:
+                prefix += f" ({level})"
+            # Mask sensitive URLs before displaying
+            sanitized_msg = _mask_sensitive_urls(msg)
+            click.echo(f"{prefix}: {sanitized_msg}".rstrip(": "))
+            return
+        if message.stream_type is StreamType.METRICS:
+            name = message.data.get("name")
+            value = message.data.get("value")
+            step = message.data.get("step")
+            data = message.data.get("data", {})
+            # Format metric display
+            metric_str = f"[{timestamp}] [metric] {name}={value:.4f}" if isinstance(value, (int, float)) else f"[{timestamp}] [metric] {name}={value}"
+            if step is not None:
+                metric_str += f" (step={step})"
+            # Add any additional context from data field
+            if isinstance(data, dict):
+                n = data.get("n")
+                if n is not None:
+                    metric_str += f" n={n}"
+            click.echo(metric_str)
+            return
+        if message.stream_type is StreamType.TIMELINE:
+            phase = message.data.get("phase", "phase")
+            click.echo(f"[{timestamp}] timeline={phase}")
+class JSONHandler(StreamHandler):
+    """Emit messages as JSON lines suitable for machine parsing."""
+    def __init__(self, output_file: str | None = None, *, indent: int | None = None) -> None:
+        self.output_file = Path(output_file).expanduser() if output_file else None
+        self._indent = indent
+    def handle(self, message: StreamMessage) -> None:
+        if not self.should_handle(message):
+            return
+        payload: dict[str, Any] = {
+            "stream_type": message.stream_type.name,
+            "timestamp": message.timestamp,
+            "job_id": message.job_id,
+            "data": message.data,
+        }
+        if message.seq is not None:
+            payload["seq"] = message.seq
+        if message.step is not None:
+            payload["step"] = message.step
+        if message.phase is not None:
+            payload["phase"] = message.phase
+        line = json.dumps(payload, indent=self._indent)
+        if self.output_file:
+            with self.output_file.open("a", encoding="utf-8") as fh:
+                fh.write(line)
+                if self._indent is None:
+                    fh.write("\n")
+        else:
+            click.echo(line)
+    def flush(self) -> None:
+        return None
+class CallbackHandler(StreamHandler):
+    """Invoke user-provided callbacks for specific stream types."""
+    def __init__(
+        self,
+        *,
+        on_status: Callable[[dict[str, Any]], None] | None = None,
+        on_event: Callable[[dict[str, Any]], None] | None = None,
+        on_metric: Callable[[dict[str, Any]], None] | None = None,
+        on_timeline: Callable[[dict[str, Any]], None] | None = None,
+    ) -> None:
+        self._on_status = on_status
+        self._on_event = on_event
+        self._on_metric = on_metric
+        self._on_timeline = on_timeline
+    def handle(self, message: StreamMessage) -> None:
+        if not self.should_handle(message):
+            return
+        if message.stream_type is StreamType.STATUS and self._on_status:
+            self._on_status(message.data)
+        elif message.stream_type is StreamType.EVENTS and self._on_event:
+            self._on_event(message.data)
+        elif message.stream_type is StreamType.METRICS and self._on_metric:
+            self._on_metric(message.data)
+        elif message.stream_type is StreamType.TIMELINE and self._on_timeline:
+            self._on_timeline(message.data)
+class BufferedHandler(StreamHandler):
+    """Collect messages and emit them in batches."""
+    def __init__(self, *, flush_interval: float = 5.0, max_buffer_size: int = 100) -> None:
+        self.flush_interval = flush_interval
+        self.max_buffer_size = max_buffer_size
+        self._buffer: list[StreamMessage] = []
+        self._last_flush = time.time()
+    def handle(self, message: StreamMessage) -> None:
+        if not self.should_handle(message):
+            return
+        self._buffer.append(message)
+        now = time.time()
+        if len(self._buffer) >= self.max_buffer_size or now - self._last_flush >= self.flush_interval:
+            self.flush()
+    def flush(self) -> None:
+        if not self._buffer:
+            return
+        self.process_batch(self._buffer)
+        self._buffer.clear()
+        self._last_flush = time.time()
+    def process_batch(self, messages: list[StreamMessage]) -> None:  # pragma: no cover - abstract
+        """Override to define how buffered messages should be processed."""
+class IntegrationTestHandler(StreamHandler):
+    """Collect messages for integration tests or programmatic assertions."""
+    def __init__(self) -> None:
+        self.messages: list[StreamMessage] = []
+    def handle(self, message: StreamMessage) -> None:
+        self.messages.append(message)
+    def clear(self) -> None:
+        self.messages.clear()
+class LossCurveHandler(StreamHandler):
+    """Render a live-updating loss chart inside a fixed Rich panel."""
+    def __init__(
+        self,
+        *,
+        metric_name: str = "train.loss",
+        max_points: int = 200,
+        width: int = 60,
+        console: Any | None = None,
+        live: Any | None = None,
+    ) -> None:
+        try:
+            from rich.console import Console
+            from rich.live import Live
+            from rich.panel import Panel
+            from rich.text import Text
+        except ImportError as exc:  # pragma: no cover - optional dependency guard
+            raise RuntimeError(
+                "LossCurveHandler requires the 'rich' package. Install synth-ai[analytics] or rich>=13."
+            ) from exc
+        self.metric_name = metric_name
+        self.max_points = max_points
+        self.width = width
+        self._console_class = Console
+        self._panel_class = Panel
+        self._text_class = Text
+        self._console = console or Console()
+        self._live = live or Live(console=self._console, transient=False, refresh_per_second=8)
+        self._started = False
+        self._steps: list[int] = []
+        self._values: list[float] = []
+        self._status = "waiting"
+        self._last_event: str | None = None
+    def handle(self, message: StreamMessage) -> None:
+        updated = False
+        if message.stream_type is StreamType.STATUS:
+            status = str(message.data.get("status") or message.data.get("state") or "unknown")
+            if status != self._status:
+                self._status = status
+                updated = True
+        elif message.stream_type is StreamType.EVENTS:
+            event_type = message.data.get("type", "")
+            msg = message.data.get("message") or ""
+            level = message.data.get("level")
+            summary = f"{event_type}".strip()
+            if level:
+                summary += f" ({level})"
+            if msg:
+                summary += f": {msg}"
+            if summary != self._last_event:
+                self._last_event = summary
+                updated = True
+        elif message.stream_type is StreamType.METRICS:
+            if message.data.get("name") != self.metric_name:
+                return
+            value = message.data.get("value")
+            step = message.data.get("step")
+            if not isinstance(value, (int, float)) or not isinstance(step, int):
+                return
+            self._values.append(float(value))
+            self._steps.append(step)
+            if len(self._values) > self.max_points:
+                self._values = self._values[-self.max_points :]
+                self._steps = self._steps[-self.max_points :]
+            updated = True
+        elif message.stream_type is StreamType.TIMELINE:
+            phase = message.data.get("phase")
+            if phase:
+                self._status = str(phase)
+                updated = True
+        if updated:
+            self._refresh()
+    def flush(self) -> None:
+        if self._started:
+            with contextlib.suppress(Exception):
+                self._live.stop()
+            self._started = False
+    def _ensure_live(self) -> None:
+        if not self._started:
+            with contextlib.suppress(Exception):
+                self._live.start()
+            self._started = True
+    def _refresh(self) -> None:
+        self._ensure_live()
+        body = self._build_body()
+        title = f"{self.metric_name} | status={self._status}"
+        self._live.update(self._panel_class(body, title=title, border_style="cyan"))
+    def _build_body(self) -> Any:
+        if not self._values:
+            return self._text_class("Waiting for metrics…", style="yellow")
+        chart = self._render_sparkline()
+        last_value = self._values[-1]
+        lines = [
+            chart,
+            f"latest: {last_value:.4f} (step {self._steps[-1]})",
+        ]
+        if self._last_event:
+            lines.append(f"event: {self._last_event}")
+        return "\n".join(lines)
+    def _render_sparkline(self) -> str:
+        blocks = "▁▂▃▄▅▆▇█"
+        tail_len = min(self.width, len(self._values))
+        tail = self._values[-tail_len:]
+        minimum = min(tail)
+        maximum = max(tail)
+        if maximum == minimum:
+            level = blocks[0]
+            return f"{minimum:.2f} {level * tail_len} {maximum:.2f}"
+        scale = (len(blocks) - 1) / (maximum - minimum)
+        chars = "".join(blocks[int((v - minimum) * scale + 0.5)] for v in tail)
+        return f"{minimum:.2f} {chars} {maximum:.2f}"
+    def __del__(self) -> None:  # pragma: no cover - defensive cleanup
+        with contextlib.suppress(Exception):
+            self.flush()
+class RichHandler(StreamHandler):
+    """Rich powered handler with live progress and metrics table."""
+    def __init__(
+        self,
+        *,
+        event_log_size: int = 20,
+        console: Any | None = None,
+    ) -> None:
+        try:
+            from rich.console import Console
+            from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn
+            from rich.table import Table
+        except ImportError as exc:  # pragma: no cover - requires optional dependency
+            raise RuntimeError(
+                "RichHandler requires the 'rich' package. Install synth-ai[analytics] or rich>=13."
+            ) from exc
+        self._console_class = Console
+        self._progress_class = Progress
+        self._spinner_column = SpinnerColumn
+        self._text_column = TextColumn
+        self._bar_column = BarColumn
+        self._table_class = Table
+        self._console = console or Console()
+        self._progress = Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            TextColumn("{task.completed}/{task.total}" if console else ""),
+            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
+            transient=False,
+            console=self._console,
+        )
+        self._task_id: int | None = None
+        self._current_status = "unknown"
+        self._latest_metrics: dict[str, Any] = {}
+        self._event_log: deque[str] = deque(maxlen=event_log_size)
+        self._progress_started = False
+    def handle(self, message: StreamMessage) -> None:
+        if not self.should_handle(message):
+            return
+        if message.stream_type is StreamType.STATUS:
+            self._current_status = str(message.data.get("status") or message.data.get("state"))
+            self._ensure_progress_started()
+            if self._task_id is not None:
+                description = f"Status: {self._current_status}"
+                self._progress.update(self._task_id, description=description)
+            self._render_summary()
+            return
+        if message.stream_type is StreamType.EVENTS:
+            event_type = message.data.get("type", "event")
+            summary = message.data.get("message") or ""
+            level = message.data.get("level")
+            # Mask sensitive URLs before displaying
+            sanitized_summary = _mask_sensitive_urls(summary)
+            formatted = f"[{event_type}] {sanitized_summary}".strip()
+            if level:
+                formatted = f"{formatted} ({level})"
+            self._event_log.append(formatted)
+            data = message.data.get("data") or {}
+            step = data.get("step") or data.get("current_step")
+            total_steps = data.get("total_steps") or data.get("max_steps")
+            if step and total_steps:
+                self._ensure_progress_started(total_steps)
+                if self._task_id is not None:
+                    self._progress.update(self._task_id, completed=int(step), total=int(total_steps))
+            self._render_summary()
+            return
+        if message.stream_type is StreamType.METRICS:
+            name = message.data.get("name", "")
+            value = message.data.get("value")
+            if name:
+                self._latest_metrics[name] = value
+            self._render_summary()
+            return
+        if message.stream_type is StreamType.TIMELINE:
+            phase = message.data.get("phase", "")
+            if phase and phase.lower() not in {"training", "running"}:
+                self._event_log.append(f"[timeline] {phase}")
+            self._render_summary()
+    def flush(self) -> None:
+        if self._progress_started:
+            self._progress.stop()
+            self._progress_started = False
+        self._render_summary(force=True)
+    def _ensure_progress_started(self, total: int | float | None = None) -> None:
+        if not self._progress_started:
+            self._progress.start()
+            self._progress_started = True
+        if self._task_id is None:
+            self._task_id = self._progress.add_task(
+                f"Status: {self._current_status}", total=total or 100
+            )
+        elif total is not None and self._task_id is not None:
+            self._progress.update(self._task_id, total=total)
+    def _render_summary(self, force: bool = False) -> None:
+        if force and self._progress_started:
+            self._progress.refresh()
+        table = self._table_class(title="Latest Metrics")
+        table.add_column("Metric")
+        table.add_column("Value")
+        if not self._latest_metrics:
+            table.add_row("—", "—")
+        else:
+            for name, value in sorted(self._latest_metrics.items()):
+                table.add_row(str(name), str(value))
+        if self._progress_started:
+            self._progress.console.print(table)
+        else:
+            self._console.print(table)
+        if self._event_log:
+            self._console.print("\nRecent events:")
+            for entry in list(self._event_log):
+                self._console.print(f"  • {entry}")
+__all__ = [
+    "BufferedHandler",
+    "CallbackHandler",
+    "CLIHandler",
+    "JSONHandler",
+    "IntegrationTestHandler",
+    "LossCurveHandler",
+    "RichHandler",
+    "StreamHandler",
+]

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl