synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
from datetime import UTC, datetime, timedelta
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from ..client import StatusAPIClient
|
|
10
|
+
from ..errors import StatusAPIError
|
|
11
|
+
from ..formatters import console
|
|
12
|
+
from ..utils import common_options, resolve_context_config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_iso(ts: str | None) -> datetime | None:
|
|
16
|
+
if not ts:
|
|
17
|
+
return None
|
|
18
|
+
try:
|
|
19
|
+
# Python 3.11 handles 'YYYY-mm-ddTHH:MM:SS.ssssss+00:00' and '...Z'
|
|
20
|
+
if ts.endswith("Z"):
|
|
21
|
+
ts = ts.replace("Z", "+00:00")
|
|
22
|
+
return datetime.fromisoformat(ts)
|
|
23
|
+
except Exception:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _extract_total_usd(events: list[dict[str, Any]]) -> tuple[float, int]:
|
|
28
|
+
"""Return (usd_total, tokens_total) for an arbitrary job's events.
|
|
29
|
+
|
|
30
|
+
Strategy:
|
|
31
|
+
- Prefer a consolidated total from any *.completed event with total_usd
|
|
32
|
+
- Next, prefer any *.billing.end event with total_usd
|
|
33
|
+
- Otherwise, combine usage.recorded's usd_tokens with billing.sandboxes' usd
|
|
34
|
+
and sum token counts if present
|
|
35
|
+
Works for prompt learning and other job types that follow similar conventions.
|
|
36
|
+
"""
|
|
37
|
+
total_usd = 0.0
|
|
38
|
+
token_count = 0
|
|
39
|
+
|
|
40
|
+
# Prefer consolidated totals from completion events (any namespace)
|
|
41
|
+
for e in reversed(events):
|
|
42
|
+
typ = str(e.get("type") or "").lower()
|
|
43
|
+
if typ.endswith(".completed"):
|
|
44
|
+
data = e.get("data") or {}
|
|
45
|
+
try:
|
|
46
|
+
total_usd = float(data.get("total_usd") or 0.0)
|
|
47
|
+
except Exception:
|
|
48
|
+
total_usd = 0.0
|
|
49
|
+
# Try common token fields
|
|
50
|
+
tc = 0
|
|
51
|
+
for k in ("token_count_total", "token_count"):
|
|
52
|
+
try:
|
|
53
|
+
tc = int(data.get(k) or 0)
|
|
54
|
+
if tc:
|
|
55
|
+
break
|
|
56
|
+
except Exception:
|
|
57
|
+
pass
|
|
58
|
+
if not tc:
|
|
59
|
+
try:
|
|
60
|
+
tc = int((data.get("token_count_rollouts") or 0) + (data.get("token_count_mutation") or 0))
|
|
61
|
+
except Exception:
|
|
62
|
+
tc = 0
|
|
63
|
+
token_count = tc
|
|
64
|
+
return total_usd, token_count
|
|
65
|
+
|
|
66
|
+
# Next, billing.end if present with total_usd
|
|
67
|
+
for e in reversed(events):
|
|
68
|
+
typ = str(e.get("type") or "").lower()
|
|
69
|
+
if typ.endswith("billing.end"):
|
|
70
|
+
data = e.get("data") or {}
|
|
71
|
+
try:
|
|
72
|
+
total_usd = float(data.get("total_usd") or 0.0)
|
|
73
|
+
except Exception:
|
|
74
|
+
total_usd = 0.0
|
|
75
|
+
# token_count may not be present here; fall through to usage tokens calc
|
|
76
|
+
break
|
|
77
|
+
|
|
78
|
+
# Fallback: combine usage + sandboxes (prompt learning style); generic scan
|
|
79
|
+
usd_tokens = 0.0
|
|
80
|
+
sandbox_usd = 0.0
|
|
81
|
+
# token fields observed across tasks
|
|
82
|
+
token_fields = ("token_count_total", "token_count", "tokens_in", "tokens_out",
|
|
83
|
+
"token_count_rollouts", "token_count_mutation")
|
|
84
|
+
for e in events:
|
|
85
|
+
typ = str(e.get("type") or "").lower()
|
|
86
|
+
data = e.get("data") or {}
|
|
87
|
+
# generic usage-style aggregation
|
|
88
|
+
if "usage" in typ or typ.endswith("usage.recorded"):
|
|
89
|
+
with contextlib.suppress(Exception):
|
|
90
|
+
usd_tokens = float(data.get("usd_tokens") or data.get("usd_estimate") or 0.0)
|
|
91
|
+
# accumulate tokens if any
|
|
92
|
+
for k in token_fields:
|
|
93
|
+
with contextlib.suppress(Exception):
|
|
94
|
+
token_count += int(data.get(k) or 0)
|
|
95
|
+
# sandbox billing
|
|
96
|
+
if typ.endswith("billing.sandboxes"):
|
|
97
|
+
with contextlib.suppress(Exception):
|
|
98
|
+
sandbox_usd += float(data.get("usd") or 0.0)
|
|
99
|
+
return (total_usd or (usd_tokens + sandbox_usd)), token_count
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@click.command("usage", help="Show recent usage (daily/weekly/monthly) and remaining budget if provided.")
|
|
103
|
+
@common_options()
|
|
104
|
+
@click.option("--budget-usd", type=float, default=None, help="Optional credit/budget to compute remaining.")
|
|
105
|
+
@click.option("--json", "output_json", is_flag=True, help="Emit machine-readable JSON.")
|
|
106
|
+
@click.pass_context
|
|
107
|
+
def usage_command(
|
|
108
|
+
ctx: click.Context,
|
|
109
|
+
base_url: str | None,
|
|
110
|
+
api_key: str | None,
|
|
111
|
+
timeout: float,
|
|
112
|
+
budget_usd: float | None,
|
|
113
|
+
output_json: bool,
|
|
114
|
+
) -> None:
|
|
115
|
+
cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
|
|
116
|
+
now = datetime.now(UTC)
|
|
117
|
+
daily_cutoff = (now - timedelta(days=1)).isoformat()
|
|
118
|
+
weekly_cutoff = (now - timedelta(days=7)).isoformat()
|
|
119
|
+
monthly_cutoff = (now - timedelta(days=30)).isoformat()
|
|
120
|
+
|
|
121
|
+
async def _run() -> tuple[dict[str, float | int], dict[str, float | int], dict[str, float | int]]:
|
|
122
|
+
daily = {"usd": 0.0, "tokens": 0, "sandbox_seconds": 0.0}
|
|
123
|
+
weekly = {"usd": 0.0, "tokens": 0, "sandbox_seconds": 0.0}
|
|
124
|
+
monthly = {"usd": 0.0, "tokens": 0, "sandbox_seconds": 0.0}
|
|
125
|
+
async with StatusAPIClient(cfg) as client:
|
|
126
|
+
try:
|
|
127
|
+
jobs = await client.list_jobs(created_after=weekly_cutoff)
|
|
128
|
+
except StatusAPIError as exc:
|
|
129
|
+
raise click.ClickException(f"Backend error: {exc}") from exc
|
|
130
|
+
for j in jobs or []:
|
|
131
|
+
job_id = str(j.get("job_id") or j.get("id") or "")
|
|
132
|
+
if not job_id:
|
|
133
|
+
continue
|
|
134
|
+
try:
|
|
135
|
+
events = await client.get_job_events(job_id, since=weekly_cutoff)
|
|
136
|
+
except StatusAPIError:
|
|
137
|
+
events = []
|
|
138
|
+
if not events:
|
|
139
|
+
continue
|
|
140
|
+
# Use event timestamps for windowing
|
|
141
|
+
# Weekly
|
|
142
|
+
weekly_ev = [e for e in events if (_parse_iso(e.get("created_at")) or now) >= datetime.fromisoformat(weekly_cutoff)]
|
|
143
|
+
w_usd, w_tok = _extract_total_usd(weekly_ev)
|
|
144
|
+
weekly["usd"] += w_usd
|
|
145
|
+
weekly["tokens"] += w_tok
|
|
146
|
+
# sandbox seconds
|
|
147
|
+
for e in weekly_ev:
|
|
148
|
+
if str(e.get("type") or "").lower().endswith("billing.sandboxes"):
|
|
149
|
+
with contextlib.suppress(Exception):
|
|
150
|
+
weekly["sandbox_seconds"] += float((e.get("data") or {}).get("seconds") or 0.0)
|
|
151
|
+
# Daily
|
|
152
|
+
daily_ev = [e for e in events if (_parse_iso(e.get("created_at")) or now) >= datetime.fromisoformat(daily_cutoff)]
|
|
153
|
+
d_usd, d_tok = _extract_total_usd(daily_ev)
|
|
154
|
+
daily["usd"] += d_usd
|
|
155
|
+
daily["tokens"] += d_tok
|
|
156
|
+
for e in daily_ev:
|
|
157
|
+
if str(e.get("type") or "").lower().endswith("billing.sandboxes"):
|
|
158
|
+
with contextlib.suppress(Exception):
|
|
159
|
+
daily["sandbox_seconds"] += float((e.get("data") or {}).get("seconds") or 0.0)
|
|
160
|
+
# Monthly
|
|
161
|
+
monthly_ev = [e for e in events if (_parse_iso(e.get("created_at")) or now) >= datetime.fromisoformat(monthly_cutoff)]
|
|
162
|
+
m_usd, m_tok = _extract_total_usd(monthly_ev)
|
|
163
|
+
monthly["usd"] += m_usd
|
|
164
|
+
monthly["tokens"] += m_tok
|
|
165
|
+
for e in monthly_ev:
|
|
166
|
+
if str(e.get("type") or "").lower().endswith("billing.sandboxes"):
|
|
167
|
+
with contextlib.suppress(Exception):
|
|
168
|
+
monthly["sandbox_seconds"] += float((e.get("data") or {}).get("seconds") or 0.0)
|
|
169
|
+
return daily, weekly, monthly
|
|
170
|
+
|
|
171
|
+
daily, weekly, monthly = __import__("asyncio").run(_run())
|
|
172
|
+
|
|
173
|
+
if output_json:
|
|
174
|
+
import json as _json
|
|
175
|
+
payload: dict[str, Any] = {
|
|
176
|
+
"daily": {
|
|
177
|
+
"usd": round(float(daily["usd"]), 4),
|
|
178
|
+
"tokens": int(daily["tokens"]),
|
|
179
|
+
"sandbox_hours": round(float(daily["sandbox_seconds"]) / 3600.0, 4),
|
|
180
|
+
},
|
|
181
|
+
"weekly": {
|
|
182
|
+
"usd": round(float(weekly["usd"]), 4),
|
|
183
|
+
"tokens": int(weekly["tokens"]),
|
|
184
|
+
"sandbox_hours": round(float(weekly["sandbox_seconds"]) / 3600.0, 4),
|
|
185
|
+
},
|
|
186
|
+
"monthly": {
|
|
187
|
+
"usd": round(float(monthly["usd"]), 4),
|
|
188
|
+
"tokens": int(monthly["tokens"]),
|
|
189
|
+
"sandbox_hours": round(float(monthly["sandbox_seconds"]) / 3600.0, 4),
|
|
190
|
+
},
|
|
191
|
+
}
|
|
192
|
+
if budget_usd is not None:
|
|
193
|
+
payload["remaining_vs_budget"] = round(max(0.0, float(budget_usd) - float(weekly["usd"])), 4)
|
|
194
|
+
console.print(_json.dumps(payload))
|
|
195
|
+
return
|
|
196
|
+
|
|
197
|
+
console.print(f"Daily usage: ${float(daily['usd']):.2f} | tokens {int(daily['tokens'])} | sandbox {float(daily['sandbox_seconds'])/3600.0:.2f}h")
|
|
198
|
+
console.print(f"Weekly usage: ${float(weekly['usd']):.2f} | tokens {int(weekly['tokens'])} | sandbox {float(weekly['sandbox_seconds'])/3600.0:.2f}h")
|
|
199
|
+
console.print(f"Monthly usage: ${float(monthly['usd']):.2f} | tokens {int(monthly['tokens'])} | sandbox {float(monthly['sandbox_seconds'])/3600.0:.2f}h")
|
|
200
|
+
if budget_usd is not None:
|
|
201
|
+
remaining = max(0.0, float(budget_usd) - float(weekly["usd"]))
|
|
202
|
+
console.print(f"Remaining (vs weekly budget ${float(budget_usd):.2f}): ${remaining:.2f}")
|
|
203
|
+
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Shared utilities for status commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import Callable, Coroutine
|
|
7
|
+
from datetime import UTC, datetime, timedelta
|
|
8
|
+
from typing import Any, TypeVar
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from .config import DEFAULT_TIMEOUT, BackendConfig, resolve_backend_config
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
|
|
17
|
+
console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_relative_time(value: str | None) -> str | None:
|
|
21
|
+
"""Convert relative time expressions (e.g., '5m', '2h', '1d') to ISO strings."""
|
|
22
|
+
if not value:
|
|
23
|
+
return None
|
|
24
|
+
token = value.strip().lower()
|
|
25
|
+
if not token:
|
|
26
|
+
return None
|
|
27
|
+
multiplier = 1.0
|
|
28
|
+
if token.endswith("ms"):
|
|
29
|
+
multiplier = 0.001
|
|
30
|
+
token = token[:-2]
|
|
31
|
+
elif token.endswith("s"):
|
|
32
|
+
multiplier = 1.0
|
|
33
|
+
token = token[:-1]
|
|
34
|
+
elif token.endswith("m"):
|
|
35
|
+
multiplier = 60.0
|
|
36
|
+
token = token[:-1]
|
|
37
|
+
elif token.endswith("h"):
|
|
38
|
+
multiplier = 3600.0
|
|
39
|
+
token = token[:-1]
|
|
40
|
+
elif token.endswith("d"):
|
|
41
|
+
multiplier = 86400.0
|
|
42
|
+
token = token[:-1]
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
seconds = float(token) * multiplier
|
|
46
|
+
except ValueError:
|
|
47
|
+
return value
|
|
48
|
+
|
|
49
|
+
dt = datetime.now(UTC) - timedelta(seconds=seconds)
|
|
50
|
+
return dt.isoformat()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def ensure_async(fn: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
54
|
+
"""Decorator to run an async callable via asyncio.run inside Click commands."""
|
|
55
|
+
|
|
56
|
+
def wrapper(*args, **kwargs):
|
|
57
|
+
return asyncio.run(fn(*args, **kwargs))
|
|
58
|
+
|
|
59
|
+
return wrapper
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def resolve_context_config(
|
|
63
|
+
ctx: click.Context,
|
|
64
|
+
*,
|
|
65
|
+
base_url: str | None,
|
|
66
|
+
api_key: str | None,
|
|
67
|
+
timeout: float | None,
|
|
68
|
+
) -> BackendConfig:
|
|
69
|
+
if base_url is not None or api_key is not None or timeout not in (None, DEFAULT_TIMEOUT):
|
|
70
|
+
return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
|
|
71
|
+
obj = ctx.find_object(dict)
|
|
72
|
+
if obj and isinstance(obj.get("status_backend_config"), BackendConfig):
|
|
73
|
+
return obj["status_backend_config"]
|
|
74
|
+
return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def warn(message: str) -> None:
|
|
78
|
+
console.print(f"[yellow]{message}[/yellow]")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def bail(message: str) -> None:
|
|
82
|
+
raise click.ClickException(message)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def common_options() -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
86
|
+
"""Apply shared backend CLI options to a command."""
|
|
87
|
+
|
|
88
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
89
|
+
options = [
|
|
90
|
+
click.option(
|
|
91
|
+
"--base-url",
|
|
92
|
+
envvar="SYNTH_STATUS_BASE_URL",
|
|
93
|
+
default=None,
|
|
94
|
+
help="Override the Synth backend base URL for this command.",
|
|
95
|
+
),
|
|
96
|
+
click.option(
|
|
97
|
+
"--api-key",
|
|
98
|
+
envvar="SYNTH_STATUS_API_KEY",
|
|
99
|
+
default=None,
|
|
100
|
+
help="API key for the Synth backend.",
|
|
101
|
+
),
|
|
102
|
+
click.option(
|
|
103
|
+
"--timeout",
|
|
104
|
+
default=DEFAULT_TIMEOUT,
|
|
105
|
+
show_default=True,
|
|
106
|
+
type=float,
|
|
107
|
+
help="HTTP request timeout in seconds.",
|
|
108
|
+
),
|
|
109
|
+
]
|
|
110
|
+
for option in reversed(options):
|
|
111
|
+
func = option(func)
|
|
112
|
+
return func
|
|
113
|
+
|
|
114
|
+
return decorator
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from .core import register, train_command
|
|
2
|
+
from .errors import (
|
|
3
|
+
InvalidJudgeConfigError,
|
|
4
|
+
InvalidRubricConfigError,
|
|
5
|
+
TrainCliError,
|
|
6
|
+
)
|
|
7
|
+
from .judge_schemas import (
|
|
8
|
+
JudgeConfig,
|
|
9
|
+
JudgeOptionsConfig,
|
|
10
|
+
JudgeRequestPayload,
|
|
11
|
+
RubricConfig,
|
|
12
|
+
RubricWeightsConfig,
|
|
13
|
+
build_judge_http_options,
|
|
14
|
+
)
|
|
15
|
+
from .judge_validation import (
|
|
16
|
+
check_for_deprecated_fields,
|
|
17
|
+
extract_and_validate_judge_rubric,
|
|
18
|
+
validate_judge_config,
|
|
19
|
+
validate_rubric_config,
|
|
20
|
+
)
|
|
21
|
+
from .validation import (
|
|
22
|
+
load_and_validate_rl,
|
|
23
|
+
load_and_validate_sft,
|
|
24
|
+
validate_rl_config,
|
|
25
|
+
validate_sft_config,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Core
|
|
30
|
+
"register",
|
|
31
|
+
"train_command",
|
|
32
|
+
# Errors
|
|
33
|
+
"TrainCliError",
|
|
34
|
+
"InvalidJudgeConfigError",
|
|
35
|
+
"InvalidRubricConfigError",
|
|
36
|
+
# SFT/RL validation
|
|
37
|
+
"validate_sft_config",
|
|
38
|
+
"validate_rl_config",
|
|
39
|
+
"load_and_validate_sft",
|
|
40
|
+
"load_and_validate_rl",
|
|
41
|
+
# Judge/Rubric schemas
|
|
42
|
+
"RubricWeightsConfig",
|
|
43
|
+
"RubricConfig",
|
|
44
|
+
"JudgeOptionsConfig",
|
|
45
|
+
"JudgeConfig",
|
|
46
|
+
"JudgeRequestPayload",
|
|
47
|
+
"build_judge_http_options",
|
|
48
|
+
# Judge/Rubric validation
|
|
49
|
+
"validate_rubric_config",
|
|
50
|
+
"validate_judge_config",
|
|
51
|
+
"extract_and_validate_judge_rubric",
|
|
52
|
+
"check_for_deprecated_fields",
|
|
53
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from synth_ai.api.train.cli import (
|
|
5
|
+
register as _register_with_cli,
|
|
6
|
+
)
|
|
7
|
+
from synth_ai.api.train.cli import (
|
|
8
|
+
train_command as _train_command,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = ["register", "train_command"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def register(cli: click.Group) -> None:
|
|
15
|
+
"""Attach the train command to the root CLI."""
|
|
16
|
+
_register_with_cli(cli)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def train_command(*args, **kwargs):
|
|
20
|
+
"""Entrypoint used by the train CLI command."""
|
|
21
|
+
return _train_command(*args, **kwargs)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TrainCliError(RuntimeError):
|
|
5
|
+
"""Base exception for train CLI failures."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(slots=True)
|
|
9
|
+
class TomlParseError(TrainCliError):
|
|
10
|
+
"""Raised when TOML file cannot be parsed."""
|
|
11
|
+
path: str
|
|
12
|
+
detail: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class ConfigNotFoundError(TrainCliError):
|
|
17
|
+
"""Raised when config file is not found."""
|
|
18
|
+
path: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(slots=True)
|
|
22
|
+
class InvalidSFTConfigError(TrainCliError):
|
|
23
|
+
"""Raised when SFT configuration is invalid."""
|
|
24
|
+
detail: str
|
|
25
|
+
hint: str | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(slots=True)
|
|
29
|
+
class InvalidRLConfigError(TrainCliError):
|
|
30
|
+
"""Raised when RL configuration is invalid."""
|
|
31
|
+
detail: str
|
|
32
|
+
hint: str | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class MissingAlgorithmError(TrainCliError):
|
|
37
|
+
"""Raised when [algorithm] section is missing or invalid."""
|
|
38
|
+
detail: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(slots=True)
|
|
42
|
+
class MissingModelError(TrainCliError):
|
|
43
|
+
"""Raised when model specification is missing."""
|
|
44
|
+
detail: str
|
|
45
|
+
hint: str | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class MissingDatasetError(TrainCliError):
|
|
50
|
+
"""Raised when dataset path is missing for SFT."""
|
|
51
|
+
detail: str
|
|
52
|
+
hint: str | None = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(slots=True)
|
|
56
|
+
class MissingComputeError(TrainCliError):
|
|
57
|
+
"""Raised when compute configuration is missing or incomplete."""
|
|
58
|
+
detail: str
|
|
59
|
+
hint: str | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(slots=True)
|
|
63
|
+
class UnsupportedAlgorithmError(TrainCliError):
|
|
64
|
+
"""Raised when algorithm type is not supported."""
|
|
65
|
+
algorithm_type: str
|
|
66
|
+
expected: str
|
|
67
|
+
hint: str | None = None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(slots=True)
|
|
71
|
+
class InvalidHyperparametersError(TrainCliError):
|
|
72
|
+
"""Raised when hyperparameters are invalid."""
|
|
73
|
+
detail: str
|
|
74
|
+
parameter: str | None = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(slots=True)
|
|
78
|
+
class InvalidTopologyError(TrainCliError):
|
|
79
|
+
"""Raised when topology configuration is invalid."""
|
|
80
|
+
detail: str
|
|
81
|
+
hint: str | None = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(slots=True)
|
|
85
|
+
class InvalidJudgeConfigError(TrainCliError):
|
|
86
|
+
"""Raised when judge configuration validation fails."""
|
|
87
|
+
detail: str
|
|
88
|
+
|
|
89
|
+
def __str__(self) -> str:
|
|
90
|
+
return self.detail
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(slots=True)
|
|
94
|
+
class InvalidRubricConfigError(TrainCliError):
|
|
95
|
+
"""Raised when rubric configuration validation fails."""
|
|
96
|
+
detail: str
|
|
97
|
+
|
|
98
|
+
def __str__(self) -> str:
|
|
99
|
+
return self.detail
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
__all__ = [
|
|
103
|
+
"TrainCliError",
|
|
104
|
+
"TomlParseError",
|
|
105
|
+
"ConfigNotFoundError",
|
|
106
|
+
"InvalidSFTConfigError",
|
|
107
|
+
"InvalidRLConfigError",
|
|
108
|
+
"MissingAlgorithmError",
|
|
109
|
+
"MissingModelError",
|
|
110
|
+
"MissingDatasetError",
|
|
111
|
+
"MissingComputeError",
|
|
112
|
+
"UnsupportedAlgorithmError",
|
|
113
|
+
"InvalidHyperparametersError",
|
|
114
|
+
"InvalidTopologyError",
|
|
115
|
+
"InvalidJudgeConfigError",
|
|
116
|
+
"InvalidRubricConfigError",
|
|
117
|
+
]
|