PyPI - synth-ai - Versions diffs - 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl - Mend

synth-ai 0.2.9.dev4py3-none-any.whl → 0.2.9.dev7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (157) hide show

examples/common_old/backend.py +0 -1
examples/crafter_debug_render.py +15 -6
examples/evals_old/compare_models.py +1 -0
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
examples/finetuning_old/synth_qwen_v1/util.py +7 -2
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +17 -15
examples/rl/run_rl_and_save.py +24 -7
examples/rl/task_app/math_single_step.py +128 -11
examples/rl/task_app/math_task_app.py +11 -3
examples/rl_old/task_app.py +222 -53
examples/warming_up_to_rl/analyze_trace_db.py +7 -5
examples/warming_up_to_rl/export_trace_sft.py +141 -16
examples/warming_up_to_rl/groq_test.py +11 -4
examples/warming_up_to_rl/manage_secrets.py +15 -6
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +108 -30
examples/warming_up_to_rl/run_fft_and_save.py +128 -52
examples/warming_up_to_rl/run_local_rollout.py +87 -36
examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
examples/warming_up_to_rl/run_rl_and_save.py +31 -7
examples/warming_up_to_rl/run_rollout_remote.py +37 -10
examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
synth_ai/__init__.py +1 -0
synth_ai/api/train/builders.py +34 -10
synth_ai/api/train/cli.py +172 -32
synth_ai/api/train/config_finder.py +59 -4
synth_ai/api/train/env_resolver.py +32 -14
synth_ai/api/train/pollers.py +11 -3
synth_ai/api/train/task_app.py +4 -1
synth_ai/api/train/utils.py +20 -4
synth_ai/cli/__init__.py +11 -4
synth_ai/cli/balance.py +1 -1
synth_ai/cli/demo.py +19 -5
synth_ai/cli/rl_demo.py +75 -16
synth_ai/cli/root.py +116 -37
synth_ai/cli/task_apps.py +1286 -170
synth_ai/cli/traces.py +1 -0
synth_ai/cli/turso.py +73 -0
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +67 -30
synth_ai/demos/core/cli.py +493 -164
synth_ai/demos/demo_task_apps/core.py +50 -6
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/reproducibility/tree.py +3 -1
synth_ai/environments/service/core_routes.py +6 -2
synth_ai/evals/base.py +0 -2
synth_ai/experimental/synth_oss.py +11 -12
synth_ai/handshake.py +3 -1
synth_ai/http_client.py +31 -7
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +8 -4
synth_ai/jobs/client.py +40 -10
synth_ai/learning/client.py +33 -8
synth_ai/learning/config.py +0 -2
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +6 -3
synth_ai/learning/health.py +9 -2
synth_ai/learning/jobs.py +17 -5
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
synth_ai/learning/prompts/random_search.py +4 -1
synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
synth_ai/learning/rl_client.py +42 -14
synth_ai/learning/sse.py +0 -2
synth_ai/learning/validators.py +6 -2
synth_ai/lm/caching/ephemeral.py +1 -3
synth_ai/lm/core/exceptions.py +0 -2
synth_ai/lm/core/main.py +13 -1
synth_ai/lm/core/synth_models.py +0 -1
synth_ai/lm/core/vendor_clients.py +4 -2
synth_ai/lm/overrides.py +2 -2
synth_ai/lm/vendors/core/anthropic_api.py +7 -7
synth_ai/lm/vendors/core/openai_api.py +2 -0
synth_ai/lm/vendors/openai_standard.py +3 -1
synth_ai/lm/vendors/openai_standard_responses.py +6 -3
synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
synth_ai/lm/vendors/synth_client.py +37 -10
synth_ai/rl/__init__.py +0 -1
synth_ai/rl/contracts.py +0 -2
synth_ai/rl/env_keys.py +6 -1
synth_ai/task/__init__.py +1 -0
synth_ai/task/apps/__init__.py +11 -11
synth_ai/task/auth.py +29 -17
synth_ai/task/client.py +3 -1
synth_ai/task/contracts.py +1 -0
synth_ai/task/datasets.py +3 -1
synth_ai/task/errors.py +3 -2
synth_ai/task/health.py +0 -2
synth_ai/task/json.py +0 -1
synth_ai/task/proxy.py +2 -5
synth_ai/task/rubrics.py +9 -3
synth_ai/task/server.py +31 -5
synth_ai/task/tracing_utils.py +8 -3
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +0 -1
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +1 -0
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +2 -0
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +24 -3
synth_ai/tracing_v3/storage/base.py +4 -1
synth_ai/tracing_v3/storage/factory.py +0 -1
synth_ai/tracing_v3/turso/manager.py +102 -38
synth_ai/tracing_v3/turso/models.py +4 -1
synth_ai/tracing_v3/utils.py +1 -0
synth_ai/v0/tracing/upload.py +32 -135
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
synth_ai/install_sqld.sh +0 -40
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0

synth_ai/api/train/builders.py CHANGED Viewed

@@ -34,9 +34,13 @@ def build_rl_payload(
     services = data.get("services") if isinstance(data.get("services"), dict) else {}
     model_cfg = data.get("model") if isinstance(data.get("model"), dict) else {}
-    final_task_url = (overrides.get("task_url") or task_url or services.get("task_url") or "").strip()
+    final_task_url = (
+        overrides.get("task_url") or task_url or services.get("task_url") or ""
+    ).strip()
     if not final_task_url:
-        raise click.ClickException("Task app URL required (provide --task-url or set services.task_url in TOML)")
+        raise click.ClickException(
+            "Task app URL required (provide --task-url or set services.task_url in TOML)"
+        )
     model_source = (model_cfg.get("source") or "").strip()
     model_base = (model_cfg.get("base") or "").strip()
@@ -45,7 +49,9 @@ def build_rl_payload(
         model_source = override_model
         model_base = ""
     if bool(model_source) == bool(model_base):
-        raise click.ClickException("Model section must specify exactly one of [model].source or [model].base")
+        raise click.ClickException(
+            "Model section must specify exactly one of [model].source or [model].base"
+        )
     # Force TOML services.task_url to the effective endpoint to avoid split URLs
     try:
@@ -93,15 +99,23 @@ def build_sft_payload(
     if not raw_dataset:
         raise TrainError("Dataset not specified; pass --dataset or set [job].data")
     dataset_path = Path(raw_dataset)
-    dataset_path = (dataset_path if dataset_path.is_absolute() else (config_path.parent / dataset_path)).resolve()
+    # Resolve relative paths from current working directory, not config directory
+    dataset_path = (
+        dataset_path if dataset_path.is_absolute() else (Path.cwd() / dataset_path)
+    ).resolve()
     if not dataset_path.exists():
         raise TrainError(f"Dataset not found: {dataset_path}")
-    validation_path = data_cfg.get("validation_path") if isinstance(data_cfg.get("validation_path"), str) else None
+    validation_path = (
+        data_cfg.get("validation_path")
+        if isinstance(data_cfg.get("validation_path"), str)
+        else None
+    )
     validation_file = None
     if validation_path:
         vpath = Path(validation_path)
-        vpath = (vpath if vpath.is_absolute() else (config_path.parent / vpath)).resolve()
+        # Resolve relative paths from current working directory, not config directory
+        vpath = (vpath if vpath.is_absolute() else (Path.cwd() / vpath)).resolve()
         if not vpath.exists():
             click.echo(f"[WARN] Validation dataset {vpath} missing; continuing without validation")
         else:
@@ -125,15 +139,23 @@ def build_sft_payload(
     if isinstance(hp_cfg.get("parallelism"), dict):
         hp_block["parallelism"] = hp_cfg["parallelism"]
-    compute_block = {k: compute_cfg[k] for k in ("gpu_type", "gpu_count", "nodes") if k in compute_cfg}
+    compute_block = {
+        k: compute_cfg[k] for k in ("gpu_type", "gpu_count", "nodes") if k in compute_cfg
+    }
     effective = {
         "compute": compute_block,
-        "data": {"topology": data_cfg.get("topology", {}) if isinstance(data_cfg.get("topology"), dict) else {}},
+        "data": {
+            "topology": data_cfg.get("topology", {})
+            if isinstance(data_cfg.get("topology"), dict)
+            else {}
+        },
         "training": {k: v for k, v in train_cfg.items() if k in ("mode", "use_qlora")},
     }
-    validation_cfg = train_cfg.get("validation") if isinstance(train_cfg.get("validation"), dict) else None
+    validation_cfg = (
+        train_cfg.get("validation") if isinstance(train_cfg.get("validation"), dict) else None
+    )
     if isinstance(validation_cfg, dict):
         hp_block.update(
             {
@@ -144,7 +166,9 @@ def build_sft_payload(
                 "greater_is_better": bool(validation_cfg.get("greater_is_better", False)),
             }
         )
-        effective.setdefault("training", {})["validation"] = {"enabled": bool(validation_cfg.get("enabled", True))}
+        effective.setdefault("training", {})["validation"] = {
+            "enabled": bool(validation_cfg.get("enabled", True))
+        }
     payload = {
         "model": job_cfg.get("model") or data.get("model"),

synth_ai/api/train/cli.py CHANGED Viewed

@@ -24,6 +24,7 @@ from .utils import (
     sleep,
     validate_sft_jsonl,
 )
+from synth_ai.config.base_url import get_backend_from_env
 def _discover_dataset_candidates(config_path: Path, limit: int = 50) -> list[Path]:
@@ -92,20 +93,57 @@ def _prompt_manual_dataset() -> Path:
     return Path(manual).expanduser()
+def _default_backend() -> str:
+    """Resolve backend URL with proper production default."""
+    # Check explicit override first
+    explicit = os.getenv("BACKEND_BASE_URL", "").strip()
+    if explicit:
+        return explicit
+    # Use standard resolution logic
+    base, _ = get_backend_from_env()
+    return f"{base}/api" if not base.endswith("/api") else base
 @click.command("train")
-@click.option("--config", "config_paths", multiple=True, type=click.Path(), help="Path to training TOML (repeatable)")
+@click.option(
+    "--config",
+    "config_paths",
+    multiple=True,
+    type=click.Path(),
+    help="Path to training TOML (repeatable)",
+)
 @click.option("--type", "train_type", type=click.Choice(["auto", "rl", "sft"]), default="auto")
-@click.option("--env-file", "env_files", multiple=True, type=click.Path(), help=".env file(s) to preload (skips selection prompt)")
+@click.option(
+    "--env-file",
+    "env_files",
+    multiple=True,
+    type=click.Path(),
+    help=".env file(s) to preload (skips selection prompt)",
+)
 @click.option("--task-url", default=None, help="Override task app base URL (RL only)")
-@click.option("--dataset", "dataset_path", type=click.Path(), default=None, help="Override dataset JSONL path (SFT)")
-@click.option("--backend", default=lambda: os.getenv("BACKEND_BASE_URL", "http://localhost:8000/api"), help="Backend base URL")
+@click.option(
+    "--dataset",
+    "dataset_path",
+    type=click.Path(),
+    default=None,
+    help="Override dataset JSONL path (SFT)",
+)
+@click.option("--backend", default=_default_backend, help="Backend base URL")
 @click.option("--model", default=None, help="Override model identifier")
 @click.option("--idempotency", default=None, help="Idempotency-Key header for job creation")
 @click.option("--dry-run", is_flag=True, help="Preview payload without submitting")
 @click.option("--poll/--no-poll", default=True, help="Poll job status until terminal state")
-@click.option("--poll-timeout", default=3600.0, type=float, help="Maximum seconds to poll before timing out")
+@click.option(
+    "--poll-timeout", default=3600.0, type=float, help="Maximum seconds to poll before timing out"
+)
 @click.option("--poll-interval", default=5.0, type=float, help="Seconds between poll attempts")
-@click.option("--examples", "examples_limit", type=int, default=None, help="Limit SFT training to the first N examples")
+@click.option(
+    "--examples",
+    "examples_limit",
+    type=int,
+    default=None,
+    help="Limit SFT training to the first N examples",
+)
 def train_command(
     config_paths: tuple[str, ...],
     train_type: str,
@@ -123,12 +161,18 @@ def train_command(
 ) -> None:
     """Interactive launcher for RL / SFT jobs."""
-    candidates = discover_configs(list(config_paths), requested_type=train_type if train_type != "auto" else None)
-    selection = prompt_for_config(candidates, requested_type=train_type if train_type != "auto" else None)
+    candidates = discover_configs(
+        list(config_paths), requested_type=train_type if train_type != "auto" else None
+    )
+    selection = prompt_for_config(
+        candidates, requested_type=train_type if train_type != "auto" else None
+    )
     effective_type = train_type if train_type != "auto" else selection.train_type
     if effective_type not in {"rl", "sft"}:
-        effective_type = click.prompt("Detected config type is ambiguous. Enter type", type=click.Choice(["rl", "sft"]))
+        effective_type = click.prompt(
+            "Detected config type is ambiguous. Enter type", type=click.Choice(["rl", "sft"])
+        )
     cfg_path = selection.path
     click.echo(f"Using config: {cfg_path} ({effective_type})")
@@ -219,11 +263,14 @@ def train_command(
         )
-def _wait_for_training_file(backend_base: str, api_key: str, file_id: str, *, timeout: float = 120.0) -> None:
+def _wait_for_training_file(
+    backend_base: str, api_key: str, file_id: str, *, timeout: float = 120.0
+) -> None:
     url = f"{backend_base}/learning/files/{file_id}"
     headers = {"Authorization": f"Bearer {api_key}"}
     elapsed = 0.0
     interval = 2.0
+    first_check = True
     while True:
         resp = http_get(url, headers=headers, timeout=30.0)
         if resp.status_code == 200:
@@ -231,17 +278,55 @@ def _wait_for_training_file(backend_base: str, api_key: str, file_id: str, *, ti
                 data = resp.json()
             except Exception:
                 data = {}
-            status = str(data.get("status") or data.get("state") or data.get("storage_state") or "ready").lower()
+            status = str(
+                data.get("status") or data.get("state") or data.get("storage_state") or "ready"
+            ).lower()
+            if first_check:
+                click.echo(f"File uploaded successfully (id={file_id}, status={status})")
+                first_check = False
             if status in {"ready", "uploaded", "stored", "complete"}:
+                click.echo(f"✓ Training file ready (status={status})")
                 return
+            # Show progress for processing states
+            if status in {"processing", "pending", "validating"}:
+                click.echo(
+                    f"  Waiting for file processing... (status={status}, {elapsed:.0f}s elapsed)"
+                )
         elif resp.status_code == 404:
             # Keep polling; object may not be visible yet
-            pass
+            if first_check:
+                click.echo(f"Waiting for file {file_id} to become visible...")
+                first_check = False
+        elif resp.status_code in {401, 403}:
+            # Auth errors won't resolve by polling - fail immediately
+            try:
+                error_body = resp.json()
+            except Exception:
+                error_body = resp.text[:400]
+            click.echo(f"\n[ERROR] Authentication failed when checking training file:")
+            click.echo(f"  URL: {url}")
+            click.echo(f"  Status: {resp.status_code}")
+            click.echo(f"  Response: {error_body}")
+            click.echo(f"  API key: {mask_value(api_key)}")
+            raise click.ClickException(
+                f"Authentication error ({resp.status_code}). "
+                "Check that your SYNTH_API_KEY is valid and has permission to access this organization's files."
+            )
         else:
-            click.echo(f"[WARN] Unexpected response while checking training file {file_id}: {resp.status_code}")
+            # Other errors - show details but keep polling
+            try:
+                error_body = resp.json()
+            except Exception:
+                error_body = resp.text[:400]
+            click.echo(f"[WARN] Unexpected response checking file {file_id}:")
+            click.echo(f"  URL: {url}")
+            click.echo(f"  Status: {resp.status_code}")
+            click.echo(f"  Response: {error_body}")
         if elapsed >= timeout:
-            raise click.ClickException(f"Training file {file_id} not ready after {timeout:.0f}s")
+            raise click.ClickException(
+                f"Training file {file_id} not ready after {timeout:.0f}s (last status: {resp.status_code})"
+            )
         sleep(interval)
         elapsed += interval
@@ -259,7 +344,11 @@ def handle_rl(
     poll_timeout: float,
     poll_interval: float,
 ) -> None:
-    overrides: Dict[str, Any] = {"backend": backend_base, "task_url": task_url_override, "model": model_override}
+    overrides: Dict[str, Any] = {
+        "backend": backend_base,
+        "task_url": task_url_override,
+        "model": model_override,
+    }
     build = build_rl_payload(
         config_path=cfg_path,
         task_url=task_url_override or os.environ.get("TASK_APP_URL", ""),
@@ -271,13 +360,17 @@ def handle_rl(
     verify_url = f"{backend_base}/rl/verify_task_app"
     verify_headers = {"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}
     try:
-        vresp = http_post(verify_url, headers=verify_headers, json_body={"endpoint_base_url": build.task_url})
+        vresp = http_post(
+            verify_url, headers=verify_headers, json_body={"endpoint_base_url": build.task_url}
+        )
         try:
             vjs = vresp.json()
         except Exception:
             vjs = {"status": vresp.status_code, "text": (vresp.text or "")[:400]}
     except Exception as _ve:
-        raise click.ClickException(f"Task app verification call failed: {type(_ve).__name__}: {_ve}") from _ve
+        raise click.ClickException(
+            f"Task app verification call failed: {type(_ve).__name__}: {_ve}"
+        ) from _ve
     if vresp.status_code >= 400:
         click.echo("Task app verification error:\n" + preview_json(vjs, limit=800))
         raise click.ClickException(f"Verification failed with status {vresp.status_code}")
@@ -379,55 +472,102 @@ def handle_sft(
             validate_sft_jsonl(build.validation_file)
         upload_url = f"{backend_base}/learning/files"
-        click.echo(f"Uploading dataset {build.train_file}")
+        click.echo(f"\n=== Uploading Training Data ===")
+        click.echo(f"Dataset: {build.train_file}")
+        click.echo(f"Destination: {upload_url}")
         if dry_run:
             click.echo("Dry run: skipping upload")
             train_file_id = "dry-run-train"
             val_file_id = None
         else:
-            resp = post_multipart(upload_url, api_key=synth_key, file_field="file", file_path=build.train_file)
-            js = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
+            resp = post_multipart(
+                upload_url, api_key=synth_key, file_field="file", file_path=build.train_file
+            )
+            js = (
+                resp.json()
+                if resp.headers.get("content-type", "").startswith("application/json")
+                else {}
+            )
             if resp.status_code >= 400 or "id" not in js:
-                raise click.ClickException(f"Training file upload failed ({resp.status_code}): {js or resp.text[:200]}")
+                click.echo(f"\n[ERROR] Training file upload failed:")
+                click.echo(f"  URL: {upload_url}")
+                click.echo(f"  Status: {resp.status_code}")
+                click.echo(f"  Response: {js or resp.text[:400]}")
+                click.echo(f"  File: {build.train_file}")
+                raise click.ClickException(
+                    f"Training file upload failed with status {resp.status_code}"
+                )
             train_file_id = js["id"]
+            click.echo(f"✓ Training file uploaded (id={train_file_id})")
             val_file_id = None
             if build.validation_file:
-                click.echo(f"Uploading validation dataset {build.validation_file}")
-                vresp = post_multipart(upload_url, api_key=synth_key, file_field="file", file_path=build.validation_file)
-                vjs = vresp.json() if vresp.headers.get("content-type", "").startswith("application/json") else {}
+                click.echo(f"Uploading validation dataset: {build.validation_file}")
+                vresp = post_multipart(
+                    upload_url,
+                    api_key=synth_key,
+                    file_field="file",
+                    file_path=build.validation_file,
+                )
+                vjs = (
+                    vresp.json()
+                    if vresp.headers.get("content-type", "").startswith("application/json")
+                    else {}
+                )
                 if vresp.status_code < 400 and "id" in vjs:
                     val_file_id = vjs["id"]
+                    click.echo(f"✓ Validation file uploaded (id={val_file_id})")
                 else:
-                    click.echo(f"[WARN] Validation upload failed: {vresp.status_code} {vjs or vresp.text[:200]}")
+                    click.echo(
+                        f"[WARN] Validation upload failed ({vresp.status_code}): {vjs or vresp.text[:200]}"
+                    )
         payload = dict(build.payload)
         payload["training_file_id"] = train_file_id
         if val_file_id:
-            payload.setdefault("metadata", {}).setdefault("effective_config", {}).setdefault("data", {})["validation_files"] = [val_file_id]
+            payload.setdefault("metadata", {}).setdefault("effective_config", {}).setdefault(
+                "data", {}
+            )["validation_files"] = [val_file_id]
+        click.echo(f"\n=== Checking File Processing Status ===")
         try:
             _wait_for_training_file(backend_base, synth_key, train_file_id)
         except click.ClickException as exc:
             raise click.ClickException(f"Training file {train_file_id} not ready: {exc}") from exc
-        click.echo("FFT job payload:\n" + preview_json(payload, limit=800))
+        click.echo(f"\n=== Creating Training Job ===")
+        click.echo("Job payload preview:")
+        click.echo(preview_json(payload, limit=800))
         if dry_run:
             click.echo("Dry run: skipping job submission")
             return
         create_url = f"{backend_base}/learning/jobs"
         headers = {"Authorization": f"Bearer {synth_key}", "Content-Type": "application/json"}
+        click.echo(f"\nPOST {create_url}")
         resp = http_post(create_url, headers=headers, json_body=payload)
-        js = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
-        click.echo(f"Response {resp.status_code}: {preview_json(js, limit=400)}")
+        js = (
+            resp.json()
+            if resp.headers.get("content-type", "").startswith("application/json")
+            else {}
+        )
         if resp.status_code not in (200, 201):
-            raise click.ClickException("Failed to create learning job")
+            click.echo(f"\n[ERROR] Job creation failed:")
+            click.echo(f"  URL: {create_url}")
+            click.echo(f"  Status: {resp.status_code}")
+            click.echo(f"  Response: {preview_json(js, limit=600)}")
+            raise click.ClickException(f"Job creation failed with status {resp.status_code}")
         job_id = js.get("job_id") or js.get("id")
         if not job_id:
             raise click.ClickException("Response missing job id")
+        click.echo(f"✓ Job created (id={job_id})")
+        click.echo(f"\n=== Starting Training Job ===")
         start_url = f"{backend_base}/learning/jobs/{job_id}/start"
-        click.echo(f"POST {start_url} (start)")
-        _ = http_post(start_url, headers=headers, json_body={})
+        click.echo(f"POST {start_url}")
+        start_resp = http_post(start_url, headers=headers, json_body={})
+        if start_resp.status_code not in (200, 201):
+            click.echo(f"[WARN] Job start returned status {start_resp.status_code}")
+        else:
+            click.echo(f"✓ Job started")
         if not poll:
             click.echo(f"Started job {job_id} (polling disabled)")

synth_ai/api/train/config_finder.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+import json
+import os
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Iterable
@@ -9,6 +11,7 @@ import click
 from .utils import REPO_ROOT, load_toml, preview_json
 _SKIP_DIRS = {".git", "__pycache__", ".venv", "node_modules", "dist", "build"}
+_STATE_FILE = os.path.expanduser("~/.synth-ai/demo.json")
 @dataclass(slots=True)
@@ -17,9 +20,43 @@ class ConfigCandidate:
     train_type: str  # "rl", "sft", or "unknown"
+def _load_last_config() -> Path | None:
+    """Load the last used training config path from state file."""
+    try:
+        if os.path.isfile(_STATE_FILE):
+            with open(_STATE_FILE) as fh:
+                data = json.load(fh)
+                if isinstance(data, dict):
+                    last_config = data.get("LAST_CONFIG")
+                    if last_config:
+                        path = Path(last_config).resolve()
+                        if path.exists():
+                            return path
+    except Exception:
+        pass
+    return None
+def _save_last_config(config_path: Path) -> None:
+    """Save the last used training config path to state file."""
+    try:
+        data = {}
+        if os.path.isfile(_STATE_FILE):
+            with open(_STATE_FILE) as fh:
+                data = json.load(fh) or {}
+        if not isinstance(data, dict):
+            data = {}
+        data["LAST_CONFIG"] = str(config_path.resolve())
+        os.makedirs(os.path.dirname(_STATE_FILE), exist_ok=True)
+        with open(_STATE_FILE, "w") as fh:
+            json.dump(data, fh)
+    except Exception:
+        pass
 def _iter_candidate_paths() -> Iterable[Path]:
     seen: set[Path] = set()
     # Prioritize current working directory first
     try:
         cwd = Path.cwd().resolve()
@@ -135,23 +172,41 @@ def discover_configs(explicit: list[str], *, requested_type: str | None) -> list
     return candidates
-def prompt_for_config(candidates: list[ConfigCandidate], *, requested_type: str | None) -> ConfigCandidate:
+def prompt_for_config(
+    candidates: list[ConfigCandidate], *, requested_type: str | None
+) -> ConfigCandidate:
     if not candidates:
         raise click.ClickException("No training configs found. Pass --config explicitly.")
+    # Check for last used config and move it to the top if found
+    last_config = _load_last_config()
+    default_idx = 1
+    if last_config:
+        for idx, cand in enumerate(candidates):
+            if cand.path.resolve() == last_config:
+                # Move last used config to the front
+                candidates.insert(0, candidates.pop(idx))
+                break
     click.echo("Select a training config:")
     for idx, cand in enumerate(candidates, start=1):
         label = cand.train_type if cand.train_type != "unknown" else "?"
-        click.echo(f"  {idx}) [{label}] {cand.path}")
+        last_marker = " (last used)" if last_config and cand.path.resolve() == last_config else ""
+        click.echo(f"  {idx}) [{label}] {cand.path}{last_marker}")
     click.echo("  0) Abort")
-    choice = click.prompt("Enter choice", type=int)
+    choice = click.prompt("Enter choice", type=int, default=default_idx)
     if choice == 0:
         raise click.ClickException("Aborted by user")
     if choice < 0 or choice > len(candidates):
         raise click.ClickException("Invalid selection")
     selection = candidates[choice - 1]
+    # Save this config as the last used
+    _save_last_config(selection.path)
     try:
         data = load_toml(selection.path)
         preview = preview_json({k: data.get(k) for k in list(data.keys())[:4]}, limit=320)

synth_ai/api/train/env_resolver.py CHANGED Viewed

@@ -56,12 +56,12 @@ class EnvResolver:
 def _collect_default_candidates(config_path: Path | None) -> list[Path]:
     candidates: list[Path] = []
     cwd = Path.cwd()
     # Prioritize CWD env files
     cwd_env = cwd / ".env"
     if cwd_env.exists():
         candidates.append(cwd_env.resolve())
     # Search for additional .env files in CWD subdirectories
     for sub in cwd.glob("**/.env"):
         try:
@@ -76,13 +76,13 @@ def _collect_default_candidates(config_path: Path | None) -> list[Path]:
         if len(candidates) >= 20:
             break
         candidates.append(resolved)
     # Then config path env file
     if config_path:
         cfg_env = config_path.parent / ".env"
         if cfg_env.exists():
             candidates.append(cfg_env.resolve())
     # Then repo env files
     repo_env = REPO_ROOT / ".env"
     if repo_env.exists():
@@ -90,7 +90,7 @@ def _collect_default_candidates(config_path: Path | None) -> list[Path]:
     examples_env = REPO_ROOT / "examples" / ".env"
     if examples_env.exists():
         candidates.append(examples_env.resolve())
     # Search shallow depth for additional .env files in examples
     for sub in (REPO_ROOT / "examples").glob("**/.env"):
         try:
@@ -105,7 +105,7 @@ def _collect_default_candidates(config_path: Path | None) -> list[Path]:
         if len(candidates) >= 20:
             break
         candidates.append(resolved)
     deduped: list[Path] = []
     for path in candidates:
         if path not in deduped:
@@ -156,8 +156,27 @@ def resolve_env(
                 raise click.ClickException(f"Env file not found: {path}")
         resolver = EnvResolver(provided)
     else:
-        resolver = EnvResolver(_collect_default_candidates(config_path))
-        resolver.select_new_env()  # force user selection even if one candidate
+        # Check for saved .env path from demo command
+        try:
+            from synth_ai.demos.demo_task_apps.core import load_env_file_path
+            saved_env_path = load_env_file_path()
+            if saved_env_path:
+                saved_path = Path(saved_env_path)
+                if saved_path.exists():
+                    click.echo(f"Using .env file: {saved_path}")
+                    resolver = EnvResolver([saved_path])
+                else:
+                    # Saved path no longer exists, fall back to prompt
+                    resolver = EnvResolver(_collect_default_candidates(config_path))
+                    resolver.select_new_env()
+            else:
+                resolver = EnvResolver(_collect_default_candidates(config_path))
+                resolver.select_new_env()
+        except Exception:
+            # If import fails or any error, fall back to original behavior
+            resolver = EnvResolver(_collect_default_candidates(config_path))
+            resolver.select_new_env()
     # Preload selected .env keys into process env so downstream lookups succeed
     try:
@@ -207,10 +226,10 @@ def _resolve_key(resolver: EnvResolver, spec: KeySpec) -> str:
                     break
         if env_val:
             click.echo(f"Found {spec.name} in current sources: {mask_value(env_val)}")
-            if _prompt_yes_no(f"Use this value for {spec.name}?", default=True):
-                _maybe_persist(resolver, spec, env_val)
-                os.environ[spec.name] = env_val
-                return env_val
+            # Automatically use and persist the value (no prompt)
+            _maybe_persist(resolver, spec, env_val)
+            os.environ[spec.name] = env_val
+            return env_val
         options: list[tuple[str, Callable[[], str | None]]] = []
         def _enter_manual() -> str:
@@ -254,8 +273,7 @@ def _resolve_key(resolver: EnvResolver, spec: KeySpec) -> str:
 def _maybe_persist(resolver: EnvResolver, spec: KeySpec, value: str) -> None:
-    if not _prompt_yes_no(f"Save {spec.name} to {resolver.current_path}?", default=True):
-        return
+    # Automatically save (no prompt)
     resolver.set_value(spec.name, value)
     click.echo(f"Saved {spec.name} to {resolver.current_path}")

synth_ai/api/train/pollers.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from dataclasses import dataclass
+from datetime import datetime
 from typing import Any, Mapping
 import click
@@ -15,7 +16,9 @@ class PollOutcome:
 class JobPoller:
-    def __init__(self, base_url: str, api_key: str, *, interval: float = 5.0, timeout: float = 3600.0) -> None:
+    def __init__(
+        self, base_url: str, api_key: str, *, interval: float = 5.0, timeout: float = 3600.0
+    ) -> None:
         self.base_url = ensure_api_base(base_url)
         self.api_key = api_key
         self.interval = interval
@@ -35,9 +38,14 @@ class JobPoller:
         while elapsed <= self.timeout:
             try:
                 resp = http_get(f"{self.base_url}{path}", headers=self._headers())
-                info = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
+                info = (
+                    resp.json()
+                    if resp.headers.get("content-type", "").startswith("application/json")
+                    else {}
+                )
                 status = (info.get("status") or info.get("state") or "").lower()
-                click.echo(f"[poll] {elapsed:.0f}s status={status}")
+                timestamp = datetime.now().strftime("%H:%M:%S")
+                click.echo(f"[poll] {timestamp} {elapsed:.0f}s status={status}")
                 if status in {"succeeded", "failed", "cancelled", "canceled", "completed"}:
                     break
             except Exception as exc:  # pragma: no cover - network failures

synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev4py3-none-any.whl → 0.2.9.dev7py3-none-any.whl