soup-cli 0.40.2__tar.gz → 0.40.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {soup_cli-0.40.2 → soup_cli-0.40.3}/CONTRIBUTING.md +1 -1
- {soup_cli-0.40.2 → soup_cli-0.40.3}/PKG-INFO +50 -13
- {soup_cli-0.40.2 → soup_cli-0.40.3}/README.md +50 -13
- {soup_cli-0.40.2 → soup_cli-0.40.3}/SECURITY.md +2 -1
- {soup_cli-0.40.2 → soup_cli-0.40.3}/pyproject.toml +1 -1
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/__init__.py +1 -1
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/data.py +67 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/serve.py +78 -1
- soup_cli-0.40.3/soup_cli/data/traces/quality.py +114 -0
- soup_cli-0.40.3/soup_cli/monitoring/trace_logger.py +162 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/pretrain.py +7 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/sft.py +26 -3
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/batch_probe.py +106 -0
- soup_cli-0.40.3/soup_cli/utils/multipack_trainer.py +194 -0
- soup_cli-0.40.3/tests/test_v0403_part_a.py +206 -0
- soup_cli-0.40.3/tests/test_v0403_part_b.py +304 -0
- soup_cli-0.40.3/tests/test_v0403_part_c.py +584 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.dockerignore +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/FUNDING.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/pull_request_template.md +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/workflows/ci.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/workflows/docker.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/workflows/publish.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.github/workflows/recipe-validation.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/.gitignore +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/CODEOWNERS +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/CODE_OF_CONDUCT.md +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/Dockerfile +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/LICENSE +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/NOTICE +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/docker-compose.yml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/README.md +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/dpo_chat.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/dpo_example.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/grpo_reasoning.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/rlhf_step1_sft.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/rlhf_step2_reward.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/rlhf_step3_ppo.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/sft_basic.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/configs/vision_llama.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/data/alpaca_tiny.jsonl +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/data/chat_preferences.jsonl +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/data/dpo_sample.jsonl +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/examples/data/reasoning_math.jsonl +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup.png +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/__main__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/autopilot/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/autopilot/analyzer.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/autopilot/decisions.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/autopilot/generate_config.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/pack.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/publish.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/run.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/schema.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/unpack.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cans/verify.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/cli.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/adapters.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/autopilot.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/bench.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/can.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/chat.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/cost.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/deploy.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/diff.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/doctor.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/eval.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/export.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/generate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/history.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/infer.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/init.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/merge.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/migrate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/profile.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/push.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/quickstart.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/recipes.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/registry.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/runs.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/sweep.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/train.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/tui.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/ui.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/commands/why.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/config/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/config/loader.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/config/schema.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/augment.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/chat_templates.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/collators.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/formats.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/loader.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/loss_mask.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/providers/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/providers/_utils.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/providers/anthropic.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/providers/ollama.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/providers/vllm.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/sft_format.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/code.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/conversation.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/preference.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/qa.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/reasoning.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/tool_calling.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/templates/verifiable.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/traces/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/traces/pair_builder.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/traces/parsers.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/data/validator.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/checkpoint_intelligence.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/custom.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/forgetting.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/gate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/human.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/judge.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/leaderboard.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/eval/quant_check.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/experiment/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/experiment/tracker.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/migrate/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/migrate/axolotl.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/migrate/common.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/migrate/llamafactory.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/migrate/unsloth.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/monitoring/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/monitoring/callback.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/monitoring/display.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/monitoring/hf_push.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/recipes/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/recipes/catalog.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/registry/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/registry/attach.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/registry/diff.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/registry/hashing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/registry/store.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/audio.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/bco.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/chat.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/code.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/embedding.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/ipo.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/kto.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/longcontext.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/manifest.json +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/medical.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/moe.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/orpo.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/pretrain.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/reasoning.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/rlhf.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/simpo.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/tool-calling.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/templates/vision.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/bco.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/dpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/embedding.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/grpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/ipo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/kto.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/mlx_dpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/mlx_grpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/mlx_routing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/mlx_sft.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/orpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/ppo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/preference.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/reward_model.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/rewards.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/trainer/simpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/tui_app.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/app.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/static/app.js +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/static/index.html +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/static/logo.png +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/static/logo.svg +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/ui/static/style.css +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/activation_offload.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/auto_quant.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/constants.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/convergence.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/crash.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/cross_doc_attn.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/curriculum.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/cut_ce.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/deepspeed.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/dpo_variants.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/encoding.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/errors.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/flash_attn.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/fp8.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/freeze.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/fsdp.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/galore.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/gpu.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/grad_accum.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/gradient_ckpt.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/hf.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/hf_space.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/jinja_analyzer.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/kernel_picker.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/launcher.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/liger.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/log_level.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/long_context.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/lr_finder.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/metrics.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/mii.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/mixed_precision.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/mlx.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/moe.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/multipack.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/multipack_sampler.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/neat_packing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/ollama.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/paths.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/peft_builder.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/peft_patches.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/pipeline.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/preference_combine.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/profiler.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/profiling.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/qat.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/quality.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/quant_menu.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/registry.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/relora.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/replay.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/ring_attention.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/run_cost.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/sglang.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/spec_pairing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/spike_recovery.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/structured_output.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/topology.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/tracing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/trust_remote.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/unsloth.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/v028_features.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/vllm.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/warmup.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_cli/utils/why.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/soup_logo_svg.svg +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/templates/chat.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/templates/code.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/templates/medical.yaml +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/__init__.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/conftest.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_adapters.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_advanced_peft.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_assistant_mask.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_audio.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_auto_tuning.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_autopilot.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_awq_gptq_export.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_batch_probe.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_bco.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_bench.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_bugfixes.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_callback.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_cans.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_chat.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_chat_template.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_cli.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_cli_subprocess.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_config.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_cost.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_crash_reporter.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_curriculum.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_data.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_data_augment.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_data_sample.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_data_split.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_data_tools.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_dataset_hub.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_dataset_registry.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_deepspeed.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_deploy_ollama.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_diff.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_display.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_doctor.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_dpo_example.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_dpo_variants.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_embedding.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_errors.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_eval.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_eval_gate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_eval_platform.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_export.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_formats.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_fp8_recipe.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_freeze_training.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_generate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_gpu.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_grpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_hf_integration.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_infer.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_inference_advanced.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_init.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ipo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_jinja_analyzer.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_kto.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_loader.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_log_level.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_loss_watchdog.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_merge.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_migrate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_mlx_backend.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_moe.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_multi_adapter.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_multi_gpu.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_multipack_config.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_multipack_invariants.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_multipack_sampler.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_neat_packing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_neftune_rslora.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_onnx_tensorrt_export.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_orpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_packing.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_a_wave1.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_a_wave2.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_b.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_c.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_d.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_e.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_part_f_hardening.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_peft_methods.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_peft_patches.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_performance.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_pissa_init.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ppo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_preference_dispatcher.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_preference_multi.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_preference_multi_runtime.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_pretrain.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_profile.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_profiling.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_progress.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_push.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_qat.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_quality_filter.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_quant_check.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_quant_menu.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_quickstart.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_rank_pattern.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_recipes.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_recipes_v031.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_registry.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_relora.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_replay.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_resume.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_rlvr.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_run_cost.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_runs.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_serve.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_server_generate.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_sglang_serve.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_simpo.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_smoke_train.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_speculative_decoding.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_sweep.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_synth_data_pro.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_templates_yaml.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_tensorboard.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_tool_calling.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_trace_to_pref.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_tracker.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_trainer_coverage_v035.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_trainer_init.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_training_intelligence.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_training_speed.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_trust_remote_code.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_tui.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ui.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ui_chat.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ui_config_builder.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ui_live_monitor.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_ui_metrics.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_unsloth.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_v0401_part_c.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_v0401_part_d.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_v0401_part_e.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_v0402_part_a.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_v0402_part_b.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_validator.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_vision.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_vllm_serve.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_why.py +0 -0
- {soup_cli-0.40.2 → soup_cli-0.40.3}/tests/test_windows_encoding.py +0 -0
|
@@ -111,7 +111,7 @@ soup_cli/
|
|
|
111
111
|
templates/ - 17 built-in soup.yaml templates (YAML + manifest.json) with load_template loader (v0.39.0, +bco v0.40.0)
|
|
112
112
|
ui/ - Web UI (FastAPI + HTML/JS SPA)
|
|
113
113
|
|
|
114
|
-
tests/ - Test suite (
|
|
114
|
+
tests/ - Test suite (146 files, 4855 tests)
|
|
115
115
|
examples/ - Real-world config examples and datasets
|
|
116
116
|
```
|
|
117
117
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: soup-cli
|
|
3
|
-
Version: 0.40.
|
|
3
|
+
Version: 0.40.3
|
|
4
4
|
Summary: Fine-tune LLMs in one command. No SSH, no config hell.
|
|
5
5
|
Project-URL: Homepage, https://github.com/MakazhanAlpamys/Soup
|
|
6
6
|
Project-URL: Repository, https://github.com/MakazhanAlpamys/Soup
|
|
@@ -134,17 +134,13 @@ soup train
|
|
|
134
134
|
|
|
135
135
|
Latest highlights only. Full history: [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases).
|
|
136
136
|
|
|
137
|
-
**v0.40.
|
|
137
|
+
**v0.40.3 — Stub-to-live**: three v0.X.0 deferred-stub features become live runtime — closes #33 (data harvester judge filter + serve trace log), #64 (live CUDA OOM probe), #65 (multipack sampler in HF Trainer).
|
|
138
138
|
|
|
139
|
-
-
|
|
140
|
-
- **
|
|
141
|
-
- **`soup
|
|
142
|
-
- **`soup
|
|
143
|
-
-
|
|
144
|
-
- **`soup infer` / `soup bench` accept HF ids** — when the local model path is missing AND the value isn't path-like (no `./`, `/`, `~`, `C:\`), it falls through to a HuggingFace download via `transformers.from_pretrained`. Path-like-but-missing surfaces a friendly `FileNotFoundError`.
|
|
145
|
-
- **CLI flag aliases** — `data filter --min-coherence` (alias for `--coherence`); `data split --train` accepted (informational, train is the implicit remainder); `data register / unregister` accept positional `<name>` and `<path>` alongside the `--name` / `--path` options, with conflict detection.
|
|
146
|
-
- **`--log-level` plumbing complete** — `apply_logging_level` now sets the root logger so third-party libraries (transformers / peft / trl) actually respect QUIET and DEBUG. The four tiers no longer produce byte-identical output.
|
|
147
|
-
- **+36 net new tests** across the new helpers, security-fix follow-ups (path-containment in `register_data`, `bench.py`, `infer.py --output`, symlink-reject in custom Space templates), and Windows cross-drive edge cases.
|
|
139
|
+
- **Live CUDA batch-size probe** — `auto_batch_size_strategy: probe` now runs ONE forward+backward+step on a synthetic batch per candidate before training. On `torch.cuda.OutOfMemoryError` the probe halves; otherwise it doubles. Result is cached per `(model, max_length, quant, lora_r, gpu)` tuple so the next run short-circuits. CPU sessions skip the probe and fall back to the static estimate. SFT-only this release.
|
|
140
|
+
- **Multipack sampler — helpers landed, live wiring deferred to v0.40.4** — adversarial review surfaced a HF Trainer DataLoader shape mismatch (`Sampler[int]` expected, `list[list[int]]` returned). Helpers (lru-cached subclass factory + state-attach with bounds + arch-detect + length-extract with all-zero warning) ship as a stub; live wiring requires a `get_train_dataloader` override and lands next patch.
|
|
141
|
+
- **`soup data from-traces --judge`** — optional LLM-as-a-judge pass over harvested preference pairs. `--judge-provider openai|server|ollama`, `--judge-model gpt-4o-mini`, `--min-confidence 0.7`. Drops pairs whose normalised `(chosen - rejected)` confidence falls below threshold. Per-pair backend exceptions are counted, not crashed; lazy `itertools.islice` cap avoids buffering pathological generators.
|
|
142
|
+
- **`soup serve --trace-log <path>`** — passive append-only JSONL request log (`{prompt, response, latency_ms, tokens, ts}` per chat completion). Path-containment validated, 100 MB rotation cap (one backup retained, symlink-reject on rotate), and `hf_*` / `sk-*` / `Bearer …` token shapes redacted to `<redacted>` before write (mirrors v0.34.0 `crash.py` policy).
|
|
143
|
+
- **+95 net new tests** across the new closures, the dynamic Trainer subclass, the judge filter (including degenerate-scale + lazy-materialisation cases), and the trace logger (including symlink-backup rejection + multi-thread append safety).
|
|
148
144
|
|
|
149
145
|
## Why Soup?
|
|
150
146
|
|
|
@@ -661,7 +657,7 @@ training:
|
|
|
661
657
|
|
|
662
658
|
**Architecture allowlist** — 18 supported (Llama 3.x, Qwen 2/3, Mistral, Gemma 2/3, Phi 3/4, DeepSeek V2/V3, Mixtral, Falcon, StableLM, SmolLM2). Unknown architectures **fail loudly at config-load** instead of silently no-opping (critical fix vs Axolotl's silent-miss footgun).
|
|
663
659
|
|
|
664
|
-
**v0.
|
|
660
|
+
**Live wiring** — still deferred. v0.40.3 ships the helpers (`make_multipack_trainer_class` lru-cached factory + `attach_multipack_state` + `lengths_from_dataset` + `detect_arch_name`) but neither SFT nor Pretrain wrappers instantiate the subclass — adversarial review caught a `Sampler[int]` vs `list[list[int]]` mismatch with HF Trainer's DataLoader. Setting `multipack: true` prints a yellow advisory and falls back to the standard sampler. Live wiring (via a `get_train_dataloader` override with `batch_sampler=`) lands in v0.40.4. Multipack is **sft / pretrain only** on the `transformers` backend; preference / RLHF trainers and MLX backend still get distinct error messages naming the actual reason.
|
|
665
661
|
|
|
666
662
|
**DoS hardening** — the FFD packer caps at 1M items (algorithm is O(N²) worst-case); the 4D mask builder caps allocations at 2³¹ cells; the chat-template Jinja analyzer caps at 128KB. Every numeric input rejects `bool` explicitly (matches v0.30.0+ project policy).
|
|
667
663
|
|
|
@@ -2579,6 +2575,7 @@ soup serve --model <m> --structured-output json --json-schema s.json Constraine
|
|
|
2579
2575
|
soup serve --model <m> --structured-output regex --regex-pattern '...' Regex-constrained output
|
|
2580
2576
|
soup serve --model <m> --dashboard Live dashboard + /metrics endpoint
|
|
2581
2577
|
soup serve --model <m> --trace --trace-endpoint http://localhost:4317 OpenTelemetry tracing
|
|
2578
|
+
soup serve --model <m> --trace-log ./serve.jsonl Per-request JSONL log + rotation + secret redaction
|
|
2582
2579
|
POST /v1/adapters/activate/<name> Hot-swap active LoRA adapter
|
|
2583
2580
|
soup sweep --config soup.yaml --param lr=... Hyperparameter search
|
|
2584
2581
|
soup diff --model-a ./a --model-b ./b Compare two models
|
|
@@ -2594,7 +2591,9 @@ soup data generate ... --provider anthropic Use Claude API
|
|
|
2594
2591
|
soup data generate ... --provider vllm Use local vLLM server
|
|
2595
2592
|
soup data generate ... --template code Domain templates (code/conversation/qa/preference/reasoning)
|
|
2596
2593
|
soup data generate ... --quality-pipeline Auto validate + filter + dedup
|
|
2597
|
-
soup data augment <path> --strategy rephrase|translate|style LLM-driven augmentationsoup data from-traces --logs l.jsonl --format langchain --signal thumbs_up --output p.jsonl Preference pairs from
|
|
2594
|
+
soup data augment <path> --strategy rephrase|translate|style LLM-driven augmentationsoup data from-traces --logs l.jsonl --format langchain --signal thumbs_up --output p.jsonl Preference pairs from traces
|
|
2595
|
+
soup data from-traces ... --judge --min-confidence 0.7 LLM-judge confidence filter
|
|
2596
|
+
soup data review prefs.jsonl --sample 10 Preview preference pairssoup data filter <path> --coherence 0.3 Quality filter (perplexity/coherence)
|
|
2598
2597
|
soup data sample <path> --n 1000 Random sample subset
|
|
2599
2598
|
soup data sample <path> --n 1000 --strategy diverse Cluster-based diverse sampling
|
|
2600
2599
|
soup data sample <path> --n 1000 --strategy hard Sample hardest examples
|
|
@@ -2791,6 +2790,44 @@ pytest tests/ -v
|
|
|
2791
2790
|
pytest tests/ -m smoke -v
|
|
2792
2791
|
```
|
|
2793
2792
|
|
|
2793
|
+
## Live CUDA Batch-Size Probe
|
|
2794
|
+
|
|
2795
|
+
Set `auto_batch_size_strategy: probe` in `training:` and Soup will run a real OOM-probe before training:
|
|
2796
|
+
|
|
2797
|
+
```yaml
|
|
2798
|
+
training:
|
|
2799
|
+
batch_size: auto
|
|
2800
|
+
auto_batch_size_strategy: probe
|
|
2801
|
+
```
|
|
2802
|
+
|
|
2803
|
+
For each candidate size `B`, the probe runs ONE forward + backward + step on a synthetic batch of `B` sequences of length `max_length`. On `torch.cuda.OutOfMemoryError` it halves; otherwise it doubles up to `4 × static_estimate`. The picked size is cached per `(model, max_length, quantization, lora_r, gpu)` tuple in `~/.soup/batch_cache.json` so subsequent runs skip the probe.
|
|
2804
|
+
|
|
2805
|
+
CPU sessions and `auto_batch_size_strategy: static` skip the probe. Synthetic batch tensors are freed before the backward pass so peak VRAM reflects the realistic training step. SFT-only this release — non-SFT trainers fall back to the static estimate.
|
|
2806
|
+
|
|
2807
|
+
## Trace-to-Preference: LLM-Judge Filter
|
|
2808
|
+
|
|
2809
|
+
`soup data from-traces --judge` filters harvested preference pairs through an LLM judge:
|
|
2810
|
+
|
|
2811
|
+
```bash
|
|
2812
|
+
soup data from-traces \
|
|
2813
|
+
--logs ./prod-traces.jsonl --format langchain --signal thumbs_up \
|
|
2814
|
+
--output ./prefs.jsonl \
|
|
2815
|
+
--judge --judge-provider ollama --judge-model llama3 \
|
|
2816
|
+
--min-confidence 0.7
|
|
2817
|
+
```
|
|
2818
|
+
|
|
2819
|
+
The judge scores `chosen` and `rejected` independently against its rubric (default helpfulness/accuracy/safety on a 1-5 scale). Pairs whose normalised `(chosen - rejected)` confidence falls below `--min-confidence` are dropped. Per-pair backend exceptions are counted (not crashed) and reported. Provider allowlist `{openai, server, ollama}` validated at the CLI boundary; SSRF protection on `--judge-api-base` carries over from `soup eval judge`.
|
|
2820
|
+
|
|
2821
|
+
## Inference Server Trace Log
|
|
2822
|
+
|
|
2823
|
+
`soup serve --trace-log <path>` writes a passive append-only JSONL log per chat completion:
|
|
2824
|
+
|
|
2825
|
+
```bash
|
|
2826
|
+
soup serve --model ./out --trace-log ./serve-trace.jsonl --trace-log-cap-mb 100
|
|
2827
|
+
```
|
|
2828
|
+
|
|
2829
|
+
Each line: `{"ts": ..., "prompt": ..., "response": ..., "latency_ms": ..., "tokens": ...}`. Path-containment validated, hard rotation cap (default 100 MB, one backup retained), symlink-reject on the backup path (TOCTOU defence), and `hf_*` / `sk-*` / `Bearer …` token shapes redacted to `<redacted>` before write. Failures (disk full, serialisation errors) never crash the request handler.
|
|
2830
|
+
|
|
2794
2831
|
## Changelog
|
|
2795
2832
|
|
|
2796
2833
|
See [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases) for version history.
|
|
@@ -43,17 +43,13 @@ soup train
|
|
|
43
43
|
|
|
44
44
|
Latest highlights only. Full history: [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases).
|
|
45
45
|
|
|
46
|
-
**v0.40.
|
|
47
|
-
|
|
48
|
-
-
|
|
49
|
-
- **
|
|
50
|
-
- **`soup
|
|
51
|
-
- **`soup
|
|
52
|
-
-
|
|
53
|
-
- **`soup infer` / `soup bench` accept HF ids** — when the local model path is missing AND the value isn't path-like (no `./`, `/`, `~`, `C:\`), it falls through to a HuggingFace download via `transformers.from_pretrained`. Path-like-but-missing surfaces a friendly `FileNotFoundError`.
|
|
54
|
-
- **CLI flag aliases** — `data filter --min-coherence` (alias for `--coherence`); `data split --train` accepted (informational, train is the implicit remainder); `data register / unregister` accept positional `<name>` and `<path>` alongside the `--name` / `--path` options, with conflict detection.
|
|
55
|
-
- **`--log-level` plumbing complete** — `apply_logging_level` now sets the root logger so third-party libraries (transformers / peft / trl) actually respect QUIET and DEBUG. The four tiers no longer produce byte-identical output.
|
|
56
|
-
- **+36 net new tests** across the new helpers, security-fix follow-ups (path-containment in `register_data`, `bench.py`, `infer.py --output`, symlink-reject in custom Space templates), and Windows cross-drive edge cases.
|
|
46
|
+
**v0.40.3 — Stub-to-live**: three v0.X.0 deferred-stub features become live runtime — closes #33 (data harvester judge filter + serve trace log), #64 (live CUDA OOM probe), #65 (multipack sampler in HF Trainer).
|
|
47
|
+
|
|
48
|
+
- **Live CUDA batch-size probe** — `auto_batch_size_strategy: probe` now runs ONE forward+backward+step on a synthetic batch per candidate before training. On `torch.cuda.OutOfMemoryError` the probe halves; otherwise it doubles. Result is cached per `(model, max_length, quant, lora_r, gpu)` tuple so the next run short-circuits. CPU sessions skip the probe and fall back to the static estimate. SFT-only this release.
|
|
49
|
+
- **Multipack sampler — helpers landed, live wiring deferred to v0.40.4** — adversarial review surfaced a HF Trainer DataLoader shape mismatch (`Sampler[int]` expected, `list[list[int]]` returned). Helpers (lru-cached subclass factory + state-attach with bounds + arch-detect + length-extract with all-zero warning) ship as a stub; live wiring requires a `get_train_dataloader` override and lands next patch.
|
|
50
|
+
- **`soup data from-traces --judge`** — optional LLM-as-a-judge pass over harvested preference pairs. `--judge-provider openai|server|ollama`, `--judge-model gpt-4o-mini`, `--min-confidence 0.7`. Drops pairs whose normalised `(chosen - rejected)` confidence falls below threshold. Per-pair backend exceptions are counted, not crashed; lazy `itertools.islice` cap avoids buffering pathological generators.
|
|
51
|
+
- **`soup serve --trace-log <path>`** — passive append-only JSONL request log (`{prompt, response, latency_ms, tokens, ts}` per chat completion). Path-containment validated, 100 MB rotation cap (one backup retained, symlink-reject on rotate), and `hf_*` / `sk-*` / `Bearer …` token shapes redacted to `<redacted>` before write (mirrors v0.34.0 `crash.py` policy).
|
|
52
|
+
- **+95 net new tests** across the new closures, the dynamic Trainer subclass, the judge filter (including degenerate-scale + lazy-materialisation cases), and the trace logger (including symlink-backup rejection + multi-thread append safety).
|
|
57
53
|
|
|
58
54
|
## Why Soup?
|
|
59
55
|
|
|
@@ -570,7 +566,7 @@ training:
|
|
|
570
566
|
|
|
571
567
|
**Architecture allowlist** — 18 supported (Llama 3.x, Qwen 2/3, Mistral, Gemma 2/3, Phi 3/4, DeepSeek V2/V3, Mixtral, Falcon, StableLM, SmolLM2). Unknown architectures **fail loudly at config-load** instead of silently no-opping (critical fix vs Axolotl's silent-miss footgun).
|
|
572
568
|
|
|
573
|
-
**v0.
|
|
569
|
+
**Live wiring** — still deferred. v0.40.3 ships the helpers (`make_multipack_trainer_class` lru-cached factory + `attach_multipack_state` + `lengths_from_dataset` + `detect_arch_name`) but neither SFT nor Pretrain wrappers instantiate the subclass — adversarial review caught a `Sampler[int]` vs `list[list[int]]` mismatch with HF Trainer's DataLoader. Setting `multipack: true` prints a yellow advisory and falls back to the standard sampler. Live wiring (via a `get_train_dataloader` override with `batch_sampler=`) lands in v0.40.4. Multipack is **sft / pretrain only** on the `transformers` backend; preference / RLHF trainers and MLX backend still get distinct error messages naming the actual reason.
|
|
574
570
|
|
|
575
571
|
**DoS hardening** — the FFD packer caps at 1M items (algorithm is O(N²) worst-case); the 4D mask builder caps allocations at 2³¹ cells; the chat-template Jinja analyzer caps at 128KB. Every numeric input rejects `bool` explicitly (matches v0.30.0+ project policy).
|
|
576
572
|
|
|
@@ -2488,6 +2484,7 @@ soup serve --model <m> --structured-output json --json-schema s.json Constraine
|
|
|
2488
2484
|
soup serve --model <m> --structured-output regex --regex-pattern '...' Regex-constrained output
|
|
2489
2485
|
soup serve --model <m> --dashboard Live dashboard + /metrics endpoint
|
|
2490
2486
|
soup serve --model <m> --trace --trace-endpoint http://localhost:4317 OpenTelemetry tracing
|
|
2487
|
+
soup serve --model <m> --trace-log ./serve.jsonl Per-request JSONL log + rotation + secret redaction
|
|
2491
2488
|
POST /v1/adapters/activate/<name> Hot-swap active LoRA adapter
|
|
2492
2489
|
soup sweep --config soup.yaml --param lr=... Hyperparameter search
|
|
2493
2490
|
soup diff --model-a ./a --model-b ./b Compare two models
|
|
@@ -2503,7 +2500,9 @@ soup data generate ... --provider anthropic Use Claude API
|
|
|
2503
2500
|
soup data generate ... --provider vllm Use local vLLM server
|
|
2504
2501
|
soup data generate ... --template code Domain templates (code/conversation/qa/preference/reasoning)
|
|
2505
2502
|
soup data generate ... --quality-pipeline Auto validate + filter + dedup
|
|
2506
|
-
soup data augment <path> --strategy rephrase|translate|style LLM-driven augmentationsoup data from-traces --logs l.jsonl --format langchain --signal thumbs_up --output p.jsonl Preference pairs from
|
|
2503
|
+
soup data augment <path> --strategy rephrase|translate|style LLM-driven augmentationsoup data from-traces --logs l.jsonl --format langchain --signal thumbs_up --output p.jsonl Preference pairs from traces
|
|
2504
|
+
soup data from-traces ... --judge --min-confidence 0.7 LLM-judge confidence filter
|
|
2505
|
+
soup data review prefs.jsonl --sample 10 Preview preference pairssoup data filter <path> --coherence 0.3 Quality filter (perplexity/coherence)
|
|
2507
2506
|
soup data sample <path> --n 1000 Random sample subset
|
|
2508
2507
|
soup data sample <path> --n 1000 --strategy diverse Cluster-based diverse sampling
|
|
2509
2508
|
soup data sample <path> --n 1000 --strategy hard Sample hardest examples
|
|
@@ -2700,6 +2699,44 @@ pytest tests/ -v
|
|
|
2700
2699
|
pytest tests/ -m smoke -v
|
|
2701
2700
|
```
|
|
2702
2701
|
|
|
2702
|
+
## Live CUDA Batch-Size Probe
|
|
2703
|
+
|
|
2704
|
+
Set `auto_batch_size_strategy: probe` in `training:` and Soup will run a real OOM-probe before training:
|
|
2705
|
+
|
|
2706
|
+
```yaml
|
|
2707
|
+
training:
|
|
2708
|
+
batch_size: auto
|
|
2709
|
+
auto_batch_size_strategy: probe
|
|
2710
|
+
```
|
|
2711
|
+
|
|
2712
|
+
For each candidate size `B`, the probe runs ONE forward + backward + step on a synthetic batch of `B` sequences of length `max_length`. On `torch.cuda.OutOfMemoryError` it halves; otherwise it doubles up to `4 × static_estimate`. The picked size is cached per `(model, max_length, quantization, lora_r, gpu)` tuple in `~/.soup/batch_cache.json` so subsequent runs skip the probe.
|
|
2713
|
+
|
|
2714
|
+
CPU sessions and `auto_batch_size_strategy: static` skip the probe. Synthetic batch tensors are freed before the backward pass so peak VRAM reflects the realistic training step. SFT-only this release — non-SFT trainers fall back to the static estimate.
|
|
2715
|
+
|
|
2716
|
+
## Trace-to-Preference: LLM-Judge Filter
|
|
2717
|
+
|
|
2718
|
+
`soup data from-traces --judge` filters harvested preference pairs through an LLM judge:
|
|
2719
|
+
|
|
2720
|
+
```bash
|
|
2721
|
+
soup data from-traces \
|
|
2722
|
+
--logs ./prod-traces.jsonl --format langchain --signal thumbs_up \
|
|
2723
|
+
--output ./prefs.jsonl \
|
|
2724
|
+
--judge --judge-provider ollama --judge-model llama3 \
|
|
2725
|
+
--min-confidence 0.7
|
|
2726
|
+
```
|
|
2727
|
+
|
|
2728
|
+
The judge scores `chosen` and `rejected` independently against its rubric (default helpfulness/accuracy/safety on a 1-5 scale). Pairs whose normalised `(chosen - rejected)` confidence falls below `--min-confidence` are dropped. Per-pair backend exceptions are counted (not crashed) and reported. Provider allowlist `{openai, server, ollama}` validated at the CLI boundary; SSRF protection on `--judge-api-base` carries over from `soup eval judge`.
|
|
2729
|
+
|
|
2730
|
+
## Inference Server Trace Log
|
|
2731
|
+
|
|
2732
|
+
`soup serve --trace-log <path>` writes a passive append-only JSONL log per chat completion:
|
|
2733
|
+
|
|
2734
|
+
```bash
|
|
2735
|
+
soup serve --model ./out --trace-log ./serve-trace.jsonl --trace-log-cap-mb 100
|
|
2736
|
+
```
|
|
2737
|
+
|
|
2738
|
+
Each line: `{"ts": ..., "prompt": ..., "response": ..., "latency_ms": ..., "tokens": ...}`. Path-containment validated, hard rotation cap (default 100 MB, one backup retained), symlink-reject on the backup path (TOCTOU defence), and `hf_*` / `sk-*` / `Bearer …` token shapes redacted to `<redacted>` before write. Failures (disk full, serialisation errors) never crash the request handler.
|
|
2739
|
+
|
|
2703
2740
|
## Changelog
|
|
2704
2741
|
|
|
2705
2742
|
See [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases) for version history.
|
|
@@ -9,7 +9,7 @@ We provide security updates for the following versions:
|
|
|
9
9
|
- **Versions older than 3 minor versions:** No support
|
|
10
10
|
|
|
11
11
|
Example:
|
|
12
|
-
- v0.40.
|
|
12
|
+
- v0.40.3 -- Full support (latest)
|
|
13
13
|
- v0.40.0-v0.40.x -- Full support
|
|
14
14
|
- v0.39.0-0.39.x -- Bug-fix support only
|
|
15
15
|
- v0.38.0-0.38.x -- Bug-fix support only
|
|
@@ -144,6 +144,7 @@ No known critical vulnerabilities in current releases.
|
|
|
144
144
|
- **v0.32.0 — Training Stability & Auto-Tuning**: `--find-lr-output` containment via shared `utils/paths.is_under_cwd` (prevents writes outside cwd); `save_lr_finder_report` rejects NaN / Infinity floats in `lrs` / `losses` and serialises with `allow_nan=False` (keeps the report parser-safe); `compute_lr_schedule` rejects non-positive `start_lr`, inverted ranges, and `num_steps` outside `[2, 10_000]`; `pick_mixed_precision` rejects empty / null-byte / >200-char model names and resolves multi-version quirks (`qwen2.5` vs `qwen2`, `phi-3.5` vs `phi-3`) by longest-substring-first iteration so an added family can never accidentally make a more-specific entry dead code; `compute_warmup_steps` clamps to `[10, 1000]` with a `ratio==0.0` short-circuit matching HF Trainer's "no warmup" convention; `SpikeRecoveryStrategy` is `@dataclass(frozen=True)` (post-construction mutation cannot bypass validation), `max_attempts ∈ [1, 10]`, `lr_decay ∈ (0, 1)`, `min_lr > 0`; cross-validator `_validate_spike_recovery_requires_watchdog` rejects `loss_spike_recovery=true, loss_watchdog=false` at config-load (fails fast instead of never triggering); `convergence_window ∈ [5, 10_000]`, `convergence_rel_tol ∈ (0, 1]`, `recommend_action` reuses `detect_plateau` so plateau heuristic stays single-source-of-truth; `GradAccumMonitor.recommend()` caps doubled `accum` at `MAX_ACCUM=1024` so a runaway advisory loop cannot blow up DataLoader prefetch; `generate_config` validates BOTH the YAML output path AND the embedded `decisions["output"]` field via `is_under_cwd` (closes the gap where a crafted `decisions["output"]="../../etc"` would have silently propagated into the rendered YAML)
|
|
145
145
|
- **v0.34.0 — Observability & Dev UX**: `.crash` bundle generator (`utils/crash.py`) recursively redacts `hf_*` / `sk-*` / `Bearer …` token-shaped strings in any captured `config` and metric tail before serialisation, so a `.crash` file shared on a public GitHub issue cannot leak credentials; `output_dir` is reduced to `os.path.basename` so `$HOME` doesn't leak; `write_crash_bundle` uses `os.path.realpath + commonpath` for cwd containment (Windows-safe; raises `ValueError` not `PermissionError` so callers cannot silently swallow with `except OSError`); filename appends `secrets.token_hex(4)` so two crashes in the same UTC second don't collide; bundle truncated to `MAX_BUNDLE_BYTES=1_000_000`. `train.py` crash-write surfaces failures to the user (no silent missing-bundle). `profiling.py` `resolve_trace_path` rejects empty / `.` / `..` / `/` / `\\` / null-byte `run_id` (closes the `output_dir/profiles/../trace.json` escape) and uses `os.path.realpath + is_under_cwd`; profiles dir is created only on successful torch import (no stale empty dirs on torch-less CI). `tracker.get_run` LIKE-prefix match escapes `%` / `_` / `\\` and uses `ESCAPE '\\'` so a crafted `run_id` cannot widen the match (mirrors v0.26.0 registry policy). Lazy schema migration (`_ensure_schema`) tolerates the "duplicate column" race when two CLI processes start simultaneously on a fresh DB (fork-based multi-GPU training, TUI auto-refresh). `runs.py show/replay/clean` switched user `run_id` rendering to `markup_escape` and switched `clean` containment from broken `Path.resolve() + relative_to()` to project-standard `os.path.realpath + is_under_cwd`. `tui_app.py` lazy-imports `ExperimentTracker` and `markup_escape`s every DB-sourced string before passing into Textual widgets so a crafted base_model / experiment_name cannot inject `[bold red]…[/]` markup. `run_cost.estimate_run_cost_usd` rejects `bool` in `num_gpus` (bool is a subclass of int — same defence as v0.30.0 `Candidate.__post_init__`); duration clamped to `[0, 1 year]`; unknown GPU returns `None` so callers render `—` instead of fabricating `$0.00`. `log_level.parse_log_level` rejects non-string + null-byte input.
|
|
146
146
|
- **v0.33.0 — Live Wire**: RLVR `code_exec_reward` adds OS-level isolation (Linux best-effort `os.unshare(CLONE_NEWUSER|CLONE_NEWNET|CLONE_NEWPID)`, macOS `sandbox-exec` with default-deny `MACOS_SANDBOX_PROFILE` narrowed to a 3-name `mach-lookup` allowlist to prevent DNS / NSURLSession bypass of `(deny network*)`); `prune_checkpoints` switches to TOCTOU-safe `os.lstat + S_ISLNK` + `shutil.rmtree(onerror=_abort_on_symlink)` so a symlink encountered mid-walk aborts rather than escapes; `run_gate` wraps each task scorer in a typed `try/except` so backend failures produce `score=None, error=str(exc)` (never silent `score=1.0`); `_parse_judge_url` removes the bare `http://` catch-all (defence-in-depth after the Pydantic GateTask validator); `soup can run` requires `--yes` or explicit consent callback and raises `ValueError` (not `PermissionError`, which is an `OSError` subclass that broad `except` blocks would swallow); GGUF `rglob` result for ollama deploy is `realpath+commonpath` checked against extract_dir (prevents symlink escape from a crafted can); `DeployTarget.path` validator normalises mixed `\\`/`/` separators before splitting (closes a Windows `..` bypass); `CAN_FORMAT_VERSION` 1→2 (additive — v1 still loads); `soup can publish` validates `repo_id` via `utils/hf.validate_repo_id`, resolves token via `resolve_token`, sanitises commit messages (first-line, 200-char cap), uses HTTPS-only HfApi; `_write_spike_recovery_hint` adds `is_under_cwd` containment check on `args.output_dir` from raw HF `TrainingArguments`; `lookup_entry_by_output_dir` emits `ResourceWarning` when 1000-row scan limit is hit (no silent miss); `CrossDocCollator` no longer mutates input feature dicts (HF Dataset rows are cached and reused — mutation broke subsequent batches); `Candidate` rejects `bool` in `score`/`latency_ms` (was sneaking past `int` isinstance check); `evaluate_candidate` latency mean now divides by *completed* prompts (excludes crashed) so a broken candidate isn't artificially fast; `auto_quant.run_auto_quant_picker` soft-falls-back to highest-scored candidate when no candidate clears `min_score` (server still binds); `build_logits_processors` returns `[]` when neither `outlines` nor `lm-format-enforcer` is installed (server degrades to free-form rather than 500); MII server uses loopback-only CORS, max_tokens cap [1, 16384], stream rejection, generic 500 with no stack-trace leak; `os.execvp` auto-reexec uses list args (no shell), all forwarded flags pre-validated; `cleanup_extract_dir` uses `os.path.commonpath` (Windows-safe) instead of `startswith`; `_run_subprocess` catches `TimeoutExpired` and returns rc=124 (coreutils convention) instead of an unhandled traceback; new `eval_results` and `tensorrt` artifact kinds in `RegistryStore._VALID_KINDS`
|
|
147
|
+
- **v0.40.3 — Stub-to-live**: New `soup_cli/utils/batch_probe.py:make_cuda_probe_fn` builds a CUDA probe closure that runs ONE forward+backward+step on a synthetic batch per candidate; `model.zero_grad(set_to_none=True)` runs BEFORE forward (defends against the synthetic backward accumulating into the live training model's grad buffers — matches v0.35.0 #45 `benchmark_kernel_combos` policy); intermediate `ids/attn/labels/outputs` are `del`-ed before `loss.backward()` so peak VRAM reflects a realistic training step; `bool` rejected on `batch_size` and `max_length`; `max_length < 8` rejected; `torch.cuda.OutOfMemoryError` returns False, other exceptions propagate; returns `None` (no-op) on non-CUDA / no-torch / missing model or tokenizer. New `soup_cli/utils/multipack_trainer.py:make_multipack_trainer_class` is `lru_cache`d so two calls with the same `base_cls` return the same subclass (consistent `isinstance`, pickle-safe); `attach_multipack_state` rejects `bool` on `max_seq_len`/`batch_size`/`seed` and rejects empty `lengths`; `lengths_from_dataset` logs WARNING when every row produces 0 (loud-fail mirrors v0.37.0 multipack arch allowlist — prevents silent NaN-loss footgun); `_get_train_sampler` override accepts `*args, **kwargs` for HF >= 4.41 signature compat. **Live wiring of the sampler into SFT / Pretrain trainer wrappers is deferred to v0.40.4** — adversarial 5th-pass review surfaced a `Sampler[int]` vs `list[list[int]]` shape mismatch with HF Trainer's DataLoader; the wrappers currently print a yellow advisory and fall back to the standard sampler when `multipack: true`. New `soup_cli/data/traces/quality.py:judge_filter_pairs` reuses v0.19.0 `JudgeEvaluator` SSRF protections; threshold rejects `bool` / NaN / out-of-`[0,1]`; `_MAX_BATCH=100_000` cap applied via lazy `itertools.islice` (never fully materialises a malicious / pathological generator); per-pair backend exceptions caught and logged at DEBUG (matches v0.33.0 #47 `CrossDocCollator` policy — never silently crash the harvest); `judge_provider` validated against `VALID_PROVIDERS` allowlist at the CLI boundary BEFORE constructor, with a Rich-escape error on mismatch. New `soup_cli/monitoring/trace_logger.py:TraceLogWriter` is thread-safe (single `threading.Lock` — multi-worker `--workers 4` documented as a single-process limitation); path containment via shared `is_under_cwd`; null-byte / empty / non-string path rejected; `cap_mb` bounds `[1, 10000]` with explicit `bool` rejection; rotation: when `current + extra > cap_bytes`, rename to `<path>.1` (one backup retained); symlink at the backup path is rejected via `os.lstat + stat.S_ISLNK` (matches v0.33.0 #22 TOCTOU policy) — defends against pre-placed `<log>.1 -> /etc/cron.d/x` overwrite. Secret redaction: prompt + response strings passed through `_SECRET_RE` matching `hf_*` (≥8), `sk-*` (≥16), and `Bearer …` (≥8) — replaces matches with `<redacted>` before serialisation (mirrors v0.34.0 `crash.py` policy). `--trace-log` constructor error messages in `commands/serve.py` are `rich.markup.escape`d before printing so a crafted path name cannot inject Rich markup. Unserialisable entries dropped silently; disk-full / OSError on write never crashes the request handler (passive log). Known limitations: live CUDA probe is wired in SFT only; multipack live wiring covers SFT+Pretrain only; `TraceLogWriter` retains exactly ONE backup file (operators wanting longer retention should use external rotation); custom HF Space templates from v0.40.2 still always create the Space with `space_sdk="gradio"` (tracked for v0.40.4+).
|
|
147
148
|
- **v0.40.2 — Quick polish + carry-overs**: New `soup_cli/utils/hf_space.py:render_custom_template_dir` enforces `is_under_cwd` containment on the template directory; `validate_repo_id` runs BEFORE `{MODEL_REPO}` substitution (matches v0.29.0 Part F policy); per-file 256 KB cap (matches v0.39.0 Part E template-size policy); only `app.py` / `README.md` / `requirements.txt` are read (closed allowlist — no path-from-user-data). Symlinks rejected via `os.lstat + stat.S_ISLNK` and non-regular files (FIFO / device) also rejected (matches v0.33.0 #22 prune_checkpoints TOCTOU policy) — defends against `<template_dir>/app.py -> /etc/passwd`. `_find_highest_local_checkpoint` reads `output_dir` after caller's `is_under_cwd` validation (in `prepare_hf_resume`) and silently drops non-directories + OSError. `prepare_hf_resume` skips the snapshot download when local `checkpoint-N >= remote checkpoint-N` (saves bandwidth and never overwrites a fresher local checkpoint). `commands/data.py:register_data` containment switched from `Path.resolve() + relative_to()` to shared `is_under_cwd` (Windows 8.3 short-name safety per CLAUDE.md project rule); same fix applied to `commands/bench.py` prompts-file containment. `commands/infer.py:--output` now containment-checked via `is_under_cwd` (late-evaluated after model+input validation so pre-existing `tmp_path` test contracts keep working). `commands/quickstart.py:--output` validates target dir via `is_under_cwd` before `mkdir(parents=True)`; rejects out-of-cwd targets with friendly message. `commands/runs.py:_filter_runs_by_cwd` uses `os.path.realpath + commonpath`, catches `(ValueError, OSError)` so cross-drive paths on Windows (`D:\runs` vs `C:\project`) drop silently rather than crash. `monitoring/display.py:format_gate_row` uses explicit `task.get("passed") is True` so a missing `"passed"` field renders neutrally instead of as a false-y red ✗. `commands/infer.py:_resolve_model_source` heuristic for HF-id-vs-local-path: only falls through to HF when value is NOT path-like (no `./`, `/`, `\\`, `~`, no Windows drive letter, non-empty); path-like-but-missing raises `FileNotFoundError` so users see actionable errors instead of confusing HF download attempts. Known limitation: custom HF Space templates always create the Space with `space_sdk="gradio"` regardless of the supplied `app.py` (no `--sdk` flag in this release; combine `--template streamlit-chat` with the inline registry for Streamlit Spaces). Tracked for v0.40.3+.
|
|
148
149
|
- **v0.40.1 — QA Hardening**: `soup_cli/utils/encoding.force_utf8_stdio` reconfigures Windows stdout/stderr to UTF-8 before any Rich Console is constructed; `os.environ.setdefault("PYTHONIOENCODING", "utf-8")` preserves user override; `(OSError, ValueError, AttributeError)` swallowed on detached streams; POSIX no-op. `SoupConfig._remap_root_level_misplaced_keys` (model_validator, mode='before') migrates root-level `lora:` into `training.lora` so nested validators (including `lora.init_strategy: Literal["random","pissa","olora"]`) actually fire — closes a footgun where the misplaced key was silently dropped. Caller's dict is never mutated (shallow-copy policy mirroring v0.33.0 #47 / v0.40.0 Part B). `PreferenceTrainerWrapper._build_multi_objective` replaces the v0.40.0 `NotImplementedError` stub with a primary-loss approximation; `validate_weight_compat` rejects BCO mixed with paired losses at runtime (data-format incompatible). `combine_losses` rejects empty weights, propagates NaN loudly (no silent zeroing), and rejects `bool` weight values (matches v0.30.0 `Candidate` policy). `_probe_cache_param_count` rejects empty / null-byte model names before path construction (mirrors v0.26.0 registry / v0.39.0 ReLoRAPolicy policy). `commands/doctor` flags `transformers ≥ 5.0.0` as INCOMPATIBLE via `_MAX_EXCLUSIVE` table; `_version_ge` parses leading-int chunks so `5.0.0.dev0` correctly trips the cap. `_detect_gpu_hw_without_torch_cuda` calls `nvidia-smi` via argv list (no shell), 5s timeout, `OSError` / `TimeoutExpired` caught; GPU label from `nvidia-smi` stdout is `rich.markup.escape`d before embedding in Rich-markup string (a real GPU name like `NVIDIA Quadro [T4]` cannot break or inject markup). `_detect_dual_python_interpreters` uses `os.path.realpath` (not `Path.resolve()`) for Windows 8.3 short-name compat. `_pick_quickstart_model` swaps TinyLlama-1.1B → SmolLM2-135M when `total_memory ≤ 6 GB` (prevents step-0 OOM on RTX 3050 4 GB / similar). `_live_lr_sweep_from_config` switched broken `load_local` import to `load_raw_data` (previously always silently fell back to a static placeholder curve). `commands/migrate` rejects `.jsonl` input (with first-line `{` sniff) with exit-2 friendly error; `.jsonl`-only suffix gate prevents false-positives on `.ipynb` notebooks. `commands/eval custom -o` is now honored independently of `--attach-to-registry`; loop-shadow regression where `output = generate_fn(...)` overwrote the CLI option fixed (variable renamed to `response`). `_load_jsonl` switched from `utf-8` to `utf-8-sig` so PowerShell `Out-File -Encoding utf8`-produced JSONL no longer fails first-row parse. Known limitation: `--trust-remote-code` opt-in surface still excludes 10 non-SFT trainers + 5 commands (v0.36.0 #63 carry-over).
|
|
149
150
|
- **v0.40.0 — Preference Variety**: New `task='bco'` (Binary Classifier Optimization) and `task='preference'` (unified dispatcher). New schema fields: `bco_beta` (gt=0), `preference_loss: Literal[dpo,simpo,orpo,ipo,bco]|None`, `preference_loss_weights: Optional[Dict[str,float]]`, `dpo_beta_schedule: Literal[linear,cosine,exponential]|None`, `dpo_beta_end: float, gt=0|None`, `dpo_ref_regen_epochs: int [1,1000]|None`. Cross-validators: `_validate_preference_dispatcher` rejects setting either `preference_loss` or `preference_loss_weights` outside `task='preference'` (closes ordering-dependency between Part B/D validators); `_validate_dpo_variants_supported_tasks` gates β-schedule + ref-regen to DPO-family tasks (`dpo`, `ipo`, or `preference` + `preference_loss in {dpo, ipo}`); rejected on mlx backend with distinct error message (matches v0.34.0 distinct-reason policy); `_validate_preference_loss_weights` enforces 2–5 entries (single-entry rejected with actionable message pointing at scalar `preference_loss`), key allowlist `{dpo, simpo, orpo, ipo, bco}`, explicit null-byte rejection on keys (matches v0.39.0 rank_pattern policy), per-value bounds `(0, 1]`, weights must sum to 1.0 (±1e-6), mutually exclusive with scalar `preference_loss`, rejected on mlx backend. `compute_beta_at_step` rejects `bool` on `step` and `total_steps` (project bool-as-int policy from v0.30.0). `BetaScheduleCallback` resolves `total_steps` lazily in `on_train_begin` so the schedule sees the real `state.max_steps` populated by HF Trainer (closes a first-cut silent-no-op bug where total_steps=0 emitted beta_end for every step). `RefModelRegenCallback._regenerate` uses `strict=True` on `load_state_dict` and logs at WARNING on mismatch (closes a first-cut silent partial-copy hazard where strict=False could produce a hybrid old-base + new-LoRA reference); epoch 0 regen suppressed (avoids copying untrained student); trainer `.beta` assignment swallow narrowed to `AttributeError` only. `PreferenceTrainerWrapper._make_inner_cfg` uses `model_copy` (not `model_dump`+`model_validate`) so re-validation never sees an inconsistent intermediate state and the caller's `cfg` is never mutated (mirrors v0.33.0 #47 immutability policy). `_split_dpo_rows_to_bco` skipped-row count emitted at DEBUG so production silent-degradation is inspectable (mirrors v0.33.0 #47 CrossDocCollator policy). Multi-objective live runtime weighted-loss combination is deferred to v0.40.1: `PreferenceTrainerWrapper.setup` raises `NotImplementedError` with a friendly message naming the deferred-version follow-up (mirrors v0.27.0 MII / v0.37.0 multipack / v0.38.0 quant menu / v0.39.0 ReLoRA stub-then-live pattern). Known limitation: `BCOTrainerWrapper._setup_transformers` still hardcodes `trust_remote_code=True` (v0.36.0 #63 known-gap family carry-over across non-SFT trainers).
|
|
@@ -1586,6 +1586,26 @@ def from_traces_cmd(
|
|
|
1586
1586
|
"prefs.jsonl", "--output", "-o",
|
|
1587
1587
|
help="Output path for preference pairs (JSONL)",
|
|
1588
1588
|
),
|
|
1589
|
+
judge: bool = typer.Option(
|
|
1590
|
+
False, "--judge",
|
|
1591
|
+
help="Filter pairs via LLM-as-a-judge confidence (v0.40.3 #33).",
|
|
1592
|
+
),
|
|
1593
|
+
judge_provider: str = typer.Option(
|
|
1594
|
+
"openai", "--judge-provider",
|
|
1595
|
+
help="Judge backend: openai | server | ollama. Used with --judge.",
|
|
1596
|
+
),
|
|
1597
|
+
judge_model: str = typer.Option(
|
|
1598
|
+
"gpt-4o-mini", "--judge-model",
|
|
1599
|
+
help="Judge model id (e.g. 'gpt-4o-mini', 'llama3', 'qwen2.5'). Used with --judge.",
|
|
1600
|
+
),
|
|
1601
|
+
judge_api_base: Optional[str] = typer.Option(
|
|
1602
|
+
None, "--judge-api-base",
|
|
1603
|
+
help="Judge API base URL. SSRF-protected. Used with --judge.",
|
|
1604
|
+
),
|
|
1605
|
+
min_confidence: float = typer.Option(
|
|
1606
|
+
0.7, "--min-confidence",
|
|
1607
|
+
help="Drop pairs with judge-confidence below this threshold (0.0 - 1.0).",
|
|
1608
|
+
),
|
|
1589
1609
|
) -> None:
|
|
1590
1610
|
"""Harvest preference pairs from production traces (v0.26.0 Part C).
|
|
1591
1611
|
|
|
@@ -1666,6 +1686,53 @@ def from_traces_cmd(
|
|
|
1666
1686
|
trace_iter = parse_openai(events)
|
|
1667
1687
|
|
|
1668
1688
|
pairs = list(build_pairs(trace_iter, signal=signal))
|
|
1689
|
+
|
|
1690
|
+
if judge:
|
|
1691
|
+
# v0.40.3 (#33 (a)) — LLM-judge confidence filter.
|
|
1692
|
+
from soup_cli.data.traces.quality import judge_filter_pairs
|
|
1693
|
+
from soup_cli.eval.judge import VALID_PROVIDERS, JudgeEvaluator
|
|
1694
|
+
|
|
1695
|
+
# Friendly early validation matches the existing CLI conventions —
|
|
1696
|
+
# fall through to the constructor only after the obvious typo is caught.
|
|
1697
|
+
if judge_provider not in VALID_PROVIDERS:
|
|
1698
|
+
console.print(
|
|
1699
|
+
f"[red]--judge-provider '{_escape(judge_provider)}' is invalid. "
|
|
1700
|
+
f"Choose: {', '.join(sorted(VALID_PROVIDERS))}[/]"
|
|
1701
|
+
)
|
|
1702
|
+
raise typer.Exit(1)
|
|
1703
|
+
|
|
1704
|
+
try:
|
|
1705
|
+
judge_evaluator = JudgeEvaluator(
|
|
1706
|
+
provider=judge_provider,
|
|
1707
|
+
model=judge_model,
|
|
1708
|
+
api_base=judge_api_base,
|
|
1709
|
+
)
|
|
1710
|
+
except ValueError as exc:
|
|
1711
|
+
console.print(f"[red]--judge config error:[/] {_escape(str(exc))}")
|
|
1712
|
+
raise typer.Exit(1) from exc
|
|
1713
|
+
|
|
1714
|
+
# Cost-shock warning: each pair → TWO judge calls (chosen + rejected).
|
|
1715
|
+
projected = len(pairs) * 2
|
|
1716
|
+
console.print(
|
|
1717
|
+
f"[yellow]Judge filter will issue ~{projected} backend calls "
|
|
1718
|
+
f"({len(pairs)} pairs × 2). Cost depends on provider and model. "
|
|
1719
|
+
f"Use --min-confidence to tune throughput.[/]"
|
|
1720
|
+
)
|
|
1721
|
+
|
|
1722
|
+
try:
|
|
1723
|
+
filtered, report = judge_filter_pairs(
|
|
1724
|
+
pairs, judge=judge_evaluator, min_confidence=min_confidence,
|
|
1725
|
+
)
|
|
1726
|
+
except (TypeError, ValueError) as exc:
|
|
1727
|
+
console.print(f"[red]--judge runtime error:[/] {_escape(str(exc))}")
|
|
1728
|
+
raise typer.Exit(1) from exc
|
|
1729
|
+
|
|
1730
|
+
console.print(
|
|
1731
|
+
f"[green]Judge filter:[/] kept={report.kept} dropped={report.dropped} "
|
|
1732
|
+
f"errors={report.errors} (min_confidence={min_confidence:.2f})"
|
|
1733
|
+
)
|
|
1734
|
+
pairs = filtered
|
|
1735
|
+
|
|
1669
1736
|
with output_path.open("w", encoding="utf-8") as fh:
|
|
1670
1737
|
for pair in pairs:
|
|
1671
1738
|
fh.write(_json.dumps(pair.to_jsonl_dict(), ensure_ascii=False) + "\n")
|
|
@@ -174,6 +174,20 @@ def serve(
|
|
|
174
174
|
"Default deny (v0.36.0). Only enable if you trust the source."
|
|
175
175
|
),
|
|
176
176
|
),
|
|
177
|
+
trace_log: Optional[str] = typer.Option(
|
|
178
|
+
None,
|
|
179
|
+
"--trace-log",
|
|
180
|
+
help=(
|
|
181
|
+
"Append per-request {prompt, response, latency_ms, tokens, ts} "
|
|
182
|
+
"to JSONL at this path. Path must stay under cwd. Rotates at "
|
|
183
|
+
"100 MB (one backup retained). Added in v0.40.3 (#33)."
|
|
184
|
+
),
|
|
185
|
+
),
|
|
186
|
+
trace_log_cap_mb: int = typer.Option(
|
|
187
|
+
100,
|
|
188
|
+
"--trace-log-cap-mb",
|
|
189
|
+
help="Rotation cap in MB for --trace-log (1 - 10000). Default 100.",
|
|
190
|
+
),
|
|
177
191
|
):
|
|
178
192
|
"""Start a local inference server with OpenAI-compatible API."""
|
|
179
193
|
# Lazy imports for fast CLI startup
|
|
@@ -567,6 +581,25 @@ def serve(
|
|
|
567
581
|
|
|
568
582
|
tracer = build_tracer(enabled=trace, endpoint=trace_endpoint)
|
|
569
583
|
|
|
584
|
+
# v0.40.3 (#33 (b)) — passive request log.
|
|
585
|
+
trace_log_writer = None
|
|
586
|
+
if trace_log is not None:
|
|
587
|
+
from soup_cli.monitoring.trace_logger import TraceLogWriter
|
|
588
|
+
|
|
589
|
+
try:
|
|
590
|
+
trace_log_writer = TraceLogWriter(
|
|
591
|
+
trace_log, cap_mb=trace_log_cap_mb,
|
|
592
|
+
)
|
|
593
|
+
except (TypeError, ValueError) as exc:
|
|
594
|
+
from rich.markup import escape as _escape
|
|
595
|
+
|
|
596
|
+
console.print(f"[red]--trace-log:[/] {_escape(str(exc))}")
|
|
597
|
+
raise typer.Exit(1) from exc
|
|
598
|
+
console.print(
|
|
599
|
+
f"[green]Request trace log:[/] {trace_log_writer.path} "
|
|
600
|
+
f"(cap {trace_log_cap_mb} MB)"
|
|
601
|
+
)
|
|
602
|
+
|
|
570
603
|
app = _create_app(
|
|
571
604
|
model_obj=model_obj,
|
|
572
605
|
tokenizer=tokenizer,
|
|
@@ -579,6 +612,7 @@ def serve(
|
|
|
579
612
|
output_constraint=constraint,
|
|
580
613
|
enable_dashboard=dashboard,
|
|
581
614
|
tracer=tracer,
|
|
615
|
+
trace_log_writer=trace_log_writer,
|
|
582
616
|
)
|
|
583
617
|
|
|
584
618
|
console.print(
|
|
@@ -844,6 +878,7 @@ def _create_app(
|
|
|
844
878
|
output_constraint: Optional[Dict] = None,
|
|
845
879
|
enable_dashboard: bool = False,
|
|
846
880
|
tracer=None,
|
|
881
|
+
trace_log_writer=None,
|
|
847
882
|
):
|
|
848
883
|
"""Create the FastAPI application with OpenAI-compatible endpoints."""
|
|
849
884
|
import threading as _threading
|
|
@@ -987,6 +1022,7 @@ def _create_app(
|
|
|
987
1022
|
max_tokens = request.max_tokens or max_tokens_default
|
|
988
1023
|
|
|
989
1024
|
if request.stream:
|
|
1025
|
+
stream_started = time.perf_counter()
|
|
990
1026
|
return StreamingResponse(
|
|
991
1027
|
_stream_response(
|
|
992
1028
|
model_obj, tokenizer, messages,
|
|
@@ -996,6 +1032,8 @@ def _create_app(
|
|
|
996
1032
|
model_name=model_name,
|
|
997
1033
|
assistant_model=draft_model,
|
|
998
1034
|
num_assistant_tokens=num_speculative_tokens,
|
|
1035
|
+
trace_log_writer=trace_log_writer,
|
|
1036
|
+
started=stream_started,
|
|
999
1037
|
),
|
|
1000
1038
|
media_type="text/event-stream",
|
|
1001
1039
|
)
|
|
@@ -1042,6 +1080,21 @@ def _create_app(
|
|
|
1042
1080
|
# returns an empty list and generation runs free-form.
|
|
1043
1081
|
pass
|
|
1044
1082
|
|
|
1083
|
+
# v0.40.3 (#33 (b)) — passive request log; never breaks
|
|
1084
|
+
# the request handler on disk / serialisation issues.
|
|
1085
|
+
if trace_log_writer is not None:
|
|
1086
|
+
last_user = next(
|
|
1087
|
+
(m["content"] for m in reversed(messages)
|
|
1088
|
+
if m.get("role") == "user"),
|
|
1089
|
+
"",
|
|
1090
|
+
)
|
|
1091
|
+
trace_log_writer.record(
|
|
1092
|
+
prompt=str(last_user),
|
|
1093
|
+
response=response_text,
|
|
1094
|
+
latency_ms=(time.perf_counter() - started) * 1000,
|
|
1095
|
+
tokens=completion_tokens,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1045
1098
|
return {
|
|
1046
1099
|
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
|
1047
1100
|
"object": "chat.completion",
|
|
@@ -1071,6 +1124,7 @@ def _create_app(
|
|
|
1071
1124
|
# Expose dashboard intent + constraint on the app for tests + introspection
|
|
1072
1125
|
app.state.enable_dashboard = enable_dashboard
|
|
1073
1126
|
app.state.output_constraint = output_constraint
|
|
1127
|
+
app.state.trace_log_writer = trace_log_writer
|
|
1074
1128
|
return app
|
|
1075
1129
|
|
|
1076
1130
|
|
|
@@ -1078,14 +1132,16 @@ def _stream_response(
|
|
|
1078
1132
|
model, tokenizer, messages,
|
|
1079
1133
|
max_tokens, temperature, top_p, model_name,
|
|
1080
1134
|
assistant_model=None, num_assistant_tokens=5,
|
|
1135
|
+
trace_log_writer=None, started=None,
|
|
1081
1136
|
):
|
|
1082
1137
|
"""Generator that yields SSE chunks for streaming responses."""
|
|
1083
1138
|
chat_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
|
1084
1139
|
created = int(time.time())
|
|
1085
1140
|
|
|
1086
1141
|
# Generate full response (true token-by-token streaming requires TextIteratorStreamer)
|
|
1142
|
+
completion_tokens_for_log = 0
|
|
1087
1143
|
try:
|
|
1088
|
-
response_text, _,
|
|
1144
|
+
response_text, _, completion_tokens_for_log = _generate_response(
|
|
1089
1145
|
model, tokenizer, messages,
|
|
1090
1146
|
max_tokens=max_tokens,
|
|
1091
1147
|
temperature=temperature,
|
|
@@ -1133,3 +1189,24 @@ def _stream_response(
|
|
|
1133
1189
|
}
|
|
1134
1190
|
yield f"data: {json.dumps(final_chunk)}\n\n"
|
|
1135
1191
|
yield "data: [DONE]\n\n"
|
|
1192
|
+
|
|
1193
|
+
# v0.40.3 (#33 (b)) — passive request log on the streaming path. Latency
|
|
1194
|
+
# measured from the BEFORE-`_generate_response` mark passed in by the
|
|
1195
|
+
# chat_completions handler. Skipped if writer is None or `started` is
|
|
1196
|
+
# missing. Errors swallowed (passive log).
|
|
1197
|
+
if trace_log_writer is not None and started is not None:
|
|
1198
|
+
try:
|
|
1199
|
+
last_user = next(
|
|
1200
|
+
(m["content"] for m in reversed(messages)
|
|
1201
|
+
if m.get("role") == "user"),
|
|
1202
|
+
"",
|
|
1203
|
+
)
|
|
1204
|
+
trace_log_writer.record(
|
|
1205
|
+
prompt=str(last_user),
|
|
1206
|
+
response=response_text,
|
|
1207
|
+
latency_ms=(time.perf_counter() - started) * 1000,
|
|
1208
|
+
tokens=int(completion_tokens_for_log),
|
|
1209
|
+
extra={"stream": True},
|
|
1210
|
+
)
|
|
1211
|
+
except Exception: # noqa: BLE001 — passive log never blocks SSE
|
|
1212
|
+
logger.debug("trace_log streaming record failed", exc_info=True)
|