veris-cli 2.12.0__tar.gz → 2.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {veris_cli-2.12.0 → veris_cli-2.13.0}/PKG-INFO +81 -1
- {veris_cli-2.12.0 → veris_cli-2.13.0}/README.md +80 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/pyproject.toml +1 -1
- veris_cli-2.13.0/src/veris_cli/ci_output.py +80 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/cli.py +316 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/config.py +10 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/.gitignore +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/__init__.py +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/api.py +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/build_context.py +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/output.py +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/prompts.py +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/scripts/__init__.py +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/scripts/docker_build.sh +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/scripts/docker_push.sh +0 -0
- {veris_cli-2.12.0 → veris_cli-2.13.0}/src/veris_cli/templates.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: veris-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.13.0
|
|
4
4
|
Summary: CLI to connect local agents to the Veris backend
|
|
5
5
|
Project-URL: Homepage, https://github.com/veris-ai/veris-cli
|
|
6
6
|
Project-URL: Bug Tracker, https://github.com/veris-ai/veris-cli/issues
|
|
@@ -309,6 +309,13 @@ veris evaluation-runs list --run-id <id>
|
|
|
309
309
|
veris evaluation-runs status <eval-run-id> --run-id <id> [--watch]
|
|
310
310
|
```
|
|
311
311
|
|
|
312
|
+
### CI
|
|
313
|
+
|
|
314
|
+
```bash
|
|
315
|
+
# Run simulation + evaluation pipeline (config-driven)
|
|
316
|
+
veris ci run [--scenario-set-id <id>] [--env-id <id>] [--concurrency <n>] [--image-tag <tag>] [--simulation-timeout <seconds>]
|
|
317
|
+
```
|
|
318
|
+
|
|
312
319
|
### Runs
|
|
313
320
|
|
|
314
321
|
```bash
|
|
@@ -458,6 +465,79 @@ Each scenario runs in an isolated container with:
|
|
|
458
465
|
- Mounted logs directory at `/sessions` (for output)
|
|
459
466
|
- Environment variables from `.env` plus `SCENARIO_ID`
|
|
460
467
|
|
|
468
|
+
## CI/CD Integration
|
|
469
|
+
|
|
470
|
+
Run simulations automatically on every pull request and post results as a PR comment.
|
|
471
|
+
|
|
472
|
+
### Setup
|
|
473
|
+
|
|
474
|
+
1. Run `veris ci run` interactively once to select a scenario set — this saves the config to `.veris/config.yaml`:
|
|
475
|
+
|
|
476
|
+
```bash
|
|
477
|
+
veris ci run
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
2. Commit `.veris/config.yaml` to your repo (make sure it's not gitignored).
|
|
481
|
+
|
|
482
|
+
3. Add a `VERIS_API_KEY` secret to your repo (Settings → Secrets → Actions).
|
|
483
|
+
|
|
484
|
+
### GitHub Actions Workflow
|
|
485
|
+
|
|
486
|
+
```yaml
|
|
487
|
+
name: Veris Simulation
|
|
488
|
+
on:
|
|
489
|
+
pull_request:
|
|
490
|
+
branches: [main]
|
|
491
|
+
|
|
492
|
+
jobs:
|
|
493
|
+
simulate:
|
|
494
|
+
runs-on: ubuntu-latest
|
|
495
|
+
# Restrict to maintainers via GitHub environment protection rules
|
|
496
|
+
environment: veris-sim-ci
|
|
497
|
+
steps:
|
|
498
|
+
- uses: actions/checkout@v4
|
|
499
|
+
|
|
500
|
+
- uses: actions/setup-python@v5
|
|
501
|
+
with:
|
|
502
|
+
python-version: "3.11"
|
|
503
|
+
|
|
504
|
+
- name: Install veris-cli
|
|
505
|
+
run: pip install veris-cli
|
|
506
|
+
|
|
507
|
+
- name: Build & push agent image
|
|
508
|
+
env:
|
|
509
|
+
VERIS_API_KEY: ${{ secrets.VERIS_API_KEY }}
|
|
510
|
+
run: |
|
|
511
|
+
veris login "$VERIS_API_KEY"
|
|
512
|
+
veris env push --tag ${{ github.sha }} --remote
|
|
513
|
+
|
|
514
|
+
- name: Run simulation & evaluation
|
|
515
|
+
run: |
|
|
516
|
+
veris ci run --image-tag ${{ github.sha }} > veris-summary.md
|
|
517
|
+
|
|
518
|
+
- name: Comment on PR
|
|
519
|
+
uses: marocchino/sticky-pull-request-comment@v2
|
|
520
|
+
with:
|
|
521
|
+
path: veris-summary.md
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
The `environment: veris-sim-ci` line uses [GitHub environment protection rules](https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment) — secrets are only exposed to approved maintainers, even on public repos.
|
|
525
|
+
|
|
526
|
+
### CLI Usage
|
|
527
|
+
|
|
528
|
+
```bash
|
|
529
|
+
# Everything from config — zero flags needed
|
|
530
|
+
veris ci run
|
|
531
|
+
|
|
532
|
+
# Override image tag (common in CI for PR builds)
|
|
533
|
+
veris ci run --image-tag $(git rev-parse --short HEAD)
|
|
534
|
+
|
|
535
|
+
# Override everything
|
|
536
|
+
veris ci run --scenario-set-id X --env-id Y --concurrency 5
|
|
537
|
+
```
|
|
538
|
+
|
|
539
|
+
Progress is printed to stderr, clean markdown to stdout. The command exits non-zero if the run or evaluation fails.
|
|
540
|
+
|
|
461
541
|
## How It Works
|
|
462
542
|
|
|
463
543
|
```
|
|
@@ -287,6 +287,13 @@ veris evaluation-runs list --run-id <id>
|
|
|
287
287
|
veris evaluation-runs status <eval-run-id> --run-id <id> [--watch]
|
|
288
288
|
```
|
|
289
289
|
|
|
290
|
+
### CI
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
# Run simulation + evaluation pipeline (config-driven)
|
|
294
|
+
veris ci run [--scenario-set-id <id>] [--env-id <id>] [--concurrency <n>] [--image-tag <tag>] [--simulation-timeout <seconds>]
|
|
295
|
+
```
|
|
296
|
+
|
|
290
297
|
### Runs
|
|
291
298
|
|
|
292
299
|
```bash
|
|
@@ -436,6 +443,79 @@ Each scenario runs in an isolated container with:
|
|
|
436
443
|
- Mounted logs directory at `/sessions` (for output)
|
|
437
444
|
- Environment variables from `.env` plus `SCENARIO_ID`
|
|
438
445
|
|
|
446
|
+
## CI/CD Integration
|
|
447
|
+
|
|
448
|
+
Run simulations automatically on every pull request and post results as a PR comment.
|
|
449
|
+
|
|
450
|
+
### Setup
|
|
451
|
+
|
|
452
|
+
1. Run `veris ci run` interactively once to select a scenario set — this saves the config to `.veris/config.yaml`:
|
|
453
|
+
|
|
454
|
+
```bash
|
|
455
|
+
veris ci run
|
|
456
|
+
```
|
|
457
|
+
|
|
458
|
+
2. Commit `.veris/config.yaml` to your repo (make sure it's not gitignored).
|
|
459
|
+
|
|
460
|
+
3. Add a `VERIS_API_KEY` secret to your repo (Settings → Secrets → Actions).
|
|
461
|
+
|
|
462
|
+
### GitHub Actions Workflow
|
|
463
|
+
|
|
464
|
+
```yaml
|
|
465
|
+
name: Veris Simulation
|
|
466
|
+
on:
|
|
467
|
+
pull_request:
|
|
468
|
+
branches: [main]
|
|
469
|
+
|
|
470
|
+
jobs:
|
|
471
|
+
simulate:
|
|
472
|
+
runs-on: ubuntu-latest
|
|
473
|
+
# Restrict to maintainers via GitHub environment protection rules
|
|
474
|
+
environment: veris-sim-ci
|
|
475
|
+
steps:
|
|
476
|
+
- uses: actions/checkout@v4
|
|
477
|
+
|
|
478
|
+
- uses: actions/setup-python@v5
|
|
479
|
+
with:
|
|
480
|
+
python-version: "3.11"
|
|
481
|
+
|
|
482
|
+
- name: Install veris-cli
|
|
483
|
+
run: pip install veris-cli
|
|
484
|
+
|
|
485
|
+
- name: Build & push agent image
|
|
486
|
+
env:
|
|
487
|
+
VERIS_API_KEY: ${{ secrets.VERIS_API_KEY }}
|
|
488
|
+
run: |
|
|
489
|
+
veris login "$VERIS_API_KEY"
|
|
490
|
+
veris env push --tag ${{ github.sha }} --remote
|
|
491
|
+
|
|
492
|
+
- name: Run simulation & evaluation
|
|
493
|
+
run: |
|
|
494
|
+
veris ci run --image-tag ${{ github.sha }} > veris-summary.md
|
|
495
|
+
|
|
496
|
+
- name: Comment on PR
|
|
497
|
+
uses: marocchino/sticky-pull-request-comment@v2
|
|
498
|
+
with:
|
|
499
|
+
path: veris-summary.md
|
|
500
|
+
```
|
|
501
|
+
|
|
502
|
+
The `environment: veris-sim-ci` line uses [GitHub environment protection rules](https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment) — secrets are only exposed to approved maintainers, even on public repos.
|
|
503
|
+
|
|
504
|
+
### CLI Usage
|
|
505
|
+
|
|
506
|
+
```bash
|
|
507
|
+
# Everything from config — zero flags needed
|
|
508
|
+
veris ci run
|
|
509
|
+
|
|
510
|
+
# Override image tag (common in CI for PR builds)
|
|
511
|
+
veris ci run --image-tag $(git rev-parse --short HEAD)
|
|
512
|
+
|
|
513
|
+
# Override everything
|
|
514
|
+
veris ci run --scenario-set-id X --env-id Y --concurrency 5
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
Progress is printed to stderr, clean markdown to stdout. The command exits non-zero if the run or evaluation fails.
|
|
518
|
+
|
|
439
519
|
## How It Works
|
|
440
520
|
|
|
441
521
|
```
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Markdown summary formatter for CI runs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _fmt_duration(seconds: int | None) -> str:
|
|
7
|
+
if seconds is None:
|
|
8
|
+
return "—"
|
|
9
|
+
m, s = divmod(seconds, 60)
|
|
10
|
+
return f"{m}m {s:02d}s"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def format_markdown_summary(
|
|
14
|
+
run: dict,
|
|
15
|
+
scenario_set: dict,
|
|
16
|
+
simulations: list[dict],
|
|
17
|
+
eval_run: dict,
|
|
18
|
+
) -> str:
|
|
19
|
+
"""Build a markdown report from run + evaluation data."""
|
|
20
|
+
|
|
21
|
+
# Build lookup maps
|
|
22
|
+
scenarios = scenario_set.get("scenarios") or []
|
|
23
|
+
scenario_map: dict[str, str] = {s["id"]: s["title"] for s in scenarios}
|
|
24
|
+
sim_scenario_map: dict[str, str] = {s["id"]: s["scenario_id"] for s in simulations}
|
|
25
|
+
|
|
26
|
+
lines: list[str] = []
|
|
27
|
+
lines.append("## Veris Simulation Report")
|
|
28
|
+
lines.append("")
|
|
29
|
+
|
|
30
|
+
# Metadata table
|
|
31
|
+
lines.append("| Field | Value |")
|
|
32
|
+
lines.append("|-------|-------|")
|
|
33
|
+
lines.append(f"| Run | `{run['id']}` |")
|
|
34
|
+
lines.append(f"| Status | {run['status']} |")
|
|
35
|
+
set_title = scenario_set.get("title", "—")
|
|
36
|
+
set_id = scenario_set.get("id", "—")
|
|
37
|
+
lines.append(f"| Scenario Set | {set_title} (`{set_id}`) |")
|
|
38
|
+
lines.append(f"| Scenarios | {len(scenarios)} |")
|
|
39
|
+
lines.append(f"| Duration | {_fmt_duration(run.get('duration_seconds'))} |")
|
|
40
|
+
lines.append("")
|
|
41
|
+
|
|
42
|
+
# Grading results
|
|
43
|
+
grading_results = eval_run.get("grading_results") or []
|
|
44
|
+
if grading_results:
|
|
45
|
+
lines.append("### Grading Results")
|
|
46
|
+
lines.append("")
|
|
47
|
+
lines.append("| Scenario | Simulation | Score | Status |")
|
|
48
|
+
lines.append("|----------|------------|-------|--------|")
|
|
49
|
+
for gr in grading_results:
|
|
50
|
+
sim_id = gr["simulation_id"]
|
|
51
|
+
scenario_id = sim_scenario_map.get(sim_id, "—")
|
|
52
|
+
scenario_title = scenario_map.get(scenario_id, scenario_id)
|
|
53
|
+
score = gr.get("score")
|
|
54
|
+
score_str = f"{score:.2f}" if score is not None else "—"
|
|
55
|
+
lines.append(f"| {scenario_title} | `{sim_id}` | {score_str} | {gr['status']} |")
|
|
56
|
+
lines.append("")
|
|
57
|
+
|
|
58
|
+
# Assertion results
|
|
59
|
+
assertion_results = eval_run.get("assertion_results") or []
|
|
60
|
+
if assertion_results:
|
|
61
|
+
lines.append("### Assertion Results")
|
|
62
|
+
lines.append("")
|
|
63
|
+
lines.append("| Scenario | Verdict | Criteria |")
|
|
64
|
+
lines.append("|----------|---------|----------|")
|
|
65
|
+
for ar in assertion_results:
|
|
66
|
+
sim_id = ar["simulation_id"]
|
|
67
|
+
scenario_id = sim_scenario_map.get(sim_id, "—")
|
|
68
|
+
scenario_title = scenario_map.get(scenario_id, scenario_id)
|
|
69
|
+
verdict = ar.get("verdict") or "—"
|
|
70
|
+
# Compute criteria summary
|
|
71
|
+
criteria_results = (ar.get("result") or {}).get("criteria_results", [])
|
|
72
|
+
if criteria_results:
|
|
73
|
+
passed = sum(1 for c in criteria_results if c.get("result") == "PASS")
|
|
74
|
+
criteria_str = f"{passed}/{len(criteria_results)}"
|
|
75
|
+
else:
|
|
76
|
+
criteria_str = "—"
|
|
77
|
+
lines.append(f"| {scenario_title} | {verdict} | {criteria_str} |")
|
|
78
|
+
lines.append("")
|
|
79
|
+
|
|
80
|
+
return "\n".join(lines)
|
|
@@ -1674,6 +1674,322 @@ def run_local(
|
|
|
1674
1674
|
print()
|
|
1675
1675
|
|
|
1676
1676
|
|
|
1677
|
+
## CI commands
|
|
1678
|
+
@cli.group()
|
|
1679
|
+
def ci():
|
|
1680
|
+
"""CI/CD integration commands."""
|
|
1681
|
+
pass
|
|
1682
|
+
|
|
1683
|
+
|
|
1684
|
+
@ci.command(name="run")
|
|
1685
|
+
@click.option("--scenario-set-id", default=None, help="Scenario set ID (overrides config)")
|
|
1686
|
+
@click.option("--env-id", default=None, help="Environment ID (overrides config)")
|
|
1687
|
+
@click.option("--concurrency", default=None, type=int, help="Parallel jobs (overrides config)")
|
|
1688
|
+
@click.option("--image-tag", default=None, help="Image tag to run (default: latest)")
|
|
1689
|
+
@click.option("--simulation-timeout", default=None, type=int, help="Timeout per sim in seconds")
|
|
1690
|
+
@click.pass_context
|
|
1691
|
+
def ci_run(ctx, scenario_set_id, env_id, concurrency, image_tag, simulation_timeout):
|
|
1692
|
+
"""Run simulations and evaluations for CI, output markdown summary.
|
|
1693
|
+
|
|
1694
|
+
Reads scenario_set_id, concurrency, and environment_id from
|
|
1695
|
+
.veris/config.yaml so zero flags are needed in CI. All values
|
|
1696
|
+
are overridable via CLI flags.
|
|
1697
|
+
|
|
1698
|
+
Progress is printed to stderr; markdown summary to stdout.
|
|
1699
|
+
|
|
1700
|
+
\b
|
|
1701
|
+
Examples:
|
|
1702
|
+
veris ci run # everything from config
|
|
1703
|
+
veris ci run --image-tag $(git rev-parse --short HEAD)
|
|
1704
|
+
veris ci run --scenario-set-id X --env-id Y --concurrency 5
|
|
1705
|
+
"""
|
|
1706
|
+
from veris_cli.ci_output import format_markdown_summary
|
|
1707
|
+
|
|
1708
|
+
profile = _get_profile(ctx)
|
|
1709
|
+
project_config = ProjectConfig(profile=profile)
|
|
1710
|
+
ci_config = project_config.get_ci_config()
|
|
1711
|
+
is_tty = sys.stdin.isatty()
|
|
1712
|
+
|
|
1713
|
+
# ── Resolve env_id ──
|
|
1714
|
+
env_id = env_id or project_config.get_environment_id()
|
|
1715
|
+
if not env_id:
|
|
1716
|
+
output.print_error("No environment_id found. Run `veris init` or pass --env-id.")
|
|
1717
|
+
sys.exit(1)
|
|
1718
|
+
|
|
1719
|
+
try:
|
|
1720
|
+
api = VerisAPI(profile=profile)
|
|
1721
|
+
except ValueError as e:
|
|
1722
|
+
output.print_error(str(e))
|
|
1723
|
+
sys.exit(1)
|
|
1724
|
+
|
|
1725
|
+
# ── Resolve scenario_set_id ──
|
|
1726
|
+
prompted_scenario_set = False
|
|
1727
|
+
scenario_set_id = scenario_set_id or ci_config.get("scenario_set_id")
|
|
1728
|
+
if not scenario_set_id:
|
|
1729
|
+
if not is_tty:
|
|
1730
|
+
click.echo(
|
|
1731
|
+
"Error: CI not configured. Run `veris ci run` interactively first "
|
|
1732
|
+
"or pass --scenario-set-id.",
|
|
1733
|
+
err=True,
|
|
1734
|
+
)
|
|
1735
|
+
sys.exit(1)
|
|
1736
|
+
scenario_set_id = _ci_prompt_scenario_set(api, env_id)
|
|
1737
|
+
prompted_scenario_set = True
|
|
1738
|
+
|
|
1739
|
+
# ── Resolve concurrency ──
|
|
1740
|
+
concurrency = concurrency or ci_config.get("concurrency", 10)
|
|
1741
|
+
|
|
1742
|
+
# ── Save to config only when user interactively selected ──
|
|
1743
|
+
if prompted_scenario_set:
|
|
1744
|
+
new_ci = {"scenario_set_id": scenario_set_id}
|
|
1745
|
+
if concurrency != 10:
|
|
1746
|
+
new_ci["concurrency"] = concurrency
|
|
1747
|
+
project_config.set_ci_config(new_ci)
|
|
1748
|
+
click.echo("Saved CI config to .veris/config.yaml", err=True)
|
|
1749
|
+
|
|
1750
|
+
# ── Build run config ──
|
|
1751
|
+
run_config = {}
|
|
1752
|
+
if image_tag:
|
|
1753
|
+
run_config["image_tag"] = image_tag
|
|
1754
|
+
if simulation_timeout:
|
|
1755
|
+
run_config["simulation_timeout"] = simulation_timeout
|
|
1756
|
+
|
|
1757
|
+
# ── Step 1: Create run ──
|
|
1758
|
+
click.echo(
|
|
1759
|
+
f"Creating run (scenario_set={scenario_set_id}, concurrency={concurrency})...",
|
|
1760
|
+
err=True,
|
|
1761
|
+
)
|
|
1762
|
+
try:
|
|
1763
|
+
run = api.create_run(
|
|
1764
|
+
scenario_set_id=scenario_set_id,
|
|
1765
|
+
environment_id=env_id,
|
|
1766
|
+
parallel_jobs=concurrency,
|
|
1767
|
+
config=run_config if run_config else None,
|
|
1768
|
+
)
|
|
1769
|
+
except Exception as e:
|
|
1770
|
+
click.echo(f"Error: Failed to create run: {e}", err=True)
|
|
1771
|
+
sys.exit(1)
|
|
1772
|
+
|
|
1773
|
+
run_id = run["id"]
|
|
1774
|
+
click.echo(f"Run {run_id}: created", err=True)
|
|
1775
|
+
|
|
1776
|
+
# ── Step 2: Poll run ──
|
|
1777
|
+
run = _ci_poll_run(api, run_id)
|
|
1778
|
+
if run["status"] not in ("completed", "failed"):
|
|
1779
|
+
click.echo(f"Run {run_id}: {run['status']} (unexpected terminal state)", err=True)
|
|
1780
|
+
sys.exit(1)
|
|
1781
|
+
|
|
1782
|
+
run_failed = run["status"] == "failed"
|
|
1783
|
+
if run_failed:
|
|
1784
|
+
click.echo(f"Run {run_id}: failed — {run.get('error_message', 'unknown error')}", err=True)
|
|
1785
|
+
if run.get("completed_simulations", 0) == 0:
|
|
1786
|
+
sys.exit(1)
|
|
1787
|
+
click.echo("Some simulations completed — continuing to evaluation...", err=True)
|
|
1788
|
+
|
|
1789
|
+
# ── Step 3: Resolve grader ──
|
|
1790
|
+
click.echo("Resolving grader...", err=True)
|
|
1791
|
+
try:
|
|
1792
|
+
graders_resp = api.list_graders(env_id, scenario_set_id=scenario_set_id)
|
|
1793
|
+
except Exception as e:
|
|
1794
|
+
click.echo(f"Error: Failed to list graders: {e}", err=True)
|
|
1795
|
+
sys.exit(1)
|
|
1796
|
+
|
|
1797
|
+
graders = graders_resp.get("graders", [])
|
|
1798
|
+
if not graders:
|
|
1799
|
+
click.echo("Error: No grader found for this environment/scenario set.", err=True)
|
|
1800
|
+
sys.exit(1)
|
|
1801
|
+
|
|
1802
|
+
grader_id = graders[0]["id"]
|
|
1803
|
+
click.echo(f"Using grader {grader_id}", err=True)
|
|
1804
|
+
|
|
1805
|
+
# ── Step 4: Trigger evaluation ──
|
|
1806
|
+
click.echo("Triggering evaluation...", err=True)
|
|
1807
|
+
try:
|
|
1808
|
+
eval_resp = api.trigger_evaluation(run_id, grader_id)
|
|
1809
|
+
except Exception as e:
|
|
1810
|
+
click.echo(f"Error: Failed to trigger evaluation: {e}", err=True)
|
|
1811
|
+
sys.exit(1)
|
|
1812
|
+
|
|
1813
|
+
eval_run_id = eval_resp["evaluation_run_id"]
|
|
1814
|
+
click.echo(f"Evaluation run {eval_run_id}: started", err=True)
|
|
1815
|
+
|
|
1816
|
+
# ── Step 5: Poll evaluation ──
|
|
1817
|
+
eval_run = _ci_poll_eval(api, run_id, eval_run_id)
|
|
1818
|
+
if eval_run["status"] == "failed":
|
|
1819
|
+
click.echo(f"Evaluation run {eval_run_id}: failed", err=True)
|
|
1820
|
+
|
|
1821
|
+
# ── Step 6: Fetch supplementary data ──
|
|
1822
|
+
try:
|
|
1823
|
+
scenario_set = api.get_scenario_set(scenario_set_id)
|
|
1824
|
+
sims_resp = api.list_run_simulations(run_id)
|
|
1825
|
+
except Exception as e:
|
|
1826
|
+
click.echo(f"Error: Failed to fetch data: {e}", err=True)
|
|
1827
|
+
sys.exit(1)
|
|
1828
|
+
|
|
1829
|
+
simulations = sims_resp.get("simulations", [])
|
|
1830
|
+
|
|
1831
|
+
# ── Step 7: Output markdown ──
|
|
1832
|
+
md = format_markdown_summary(run, scenario_set, simulations, eval_run)
|
|
1833
|
+
click.echo(md)
|
|
1834
|
+
|
|
1835
|
+
# ── Exit non-zero if run or evaluation failed ──
|
|
1836
|
+
if run_failed or eval_run.get("status") == "failed":
|
|
1837
|
+
sys.exit(1)
|
|
1838
|
+
|
|
1839
|
+
|
|
1840
|
+
def _ci_prompt_scenario_set(api: VerisAPI, env_id: str) -> str:
|
|
1841
|
+
"""Interactively prompt user to select a scenario set for CI config."""
|
|
1842
|
+
click.echo("Fetching scenario sets...", err=True)
|
|
1843
|
+
try:
|
|
1844
|
+
sets = api.list_scenario_sets(environment_id=env_id)
|
|
1845
|
+
except Exception as e:
|
|
1846
|
+
output.print_error(f"Failed to list scenario sets: {e}")
|
|
1847
|
+
sys.exit(1)
|
|
1848
|
+
|
|
1849
|
+
if not sets:
|
|
1850
|
+
output.print_error("No scenario sets found for this environment.")
|
|
1851
|
+
sys.exit(1)
|
|
1852
|
+
|
|
1853
|
+
choices = [
|
|
1854
|
+
{
|
|
1855
|
+
"id": s["id"],
|
|
1856
|
+
"title": f"{s.get('title', '')} ({s['id']}) — {s.get('scenario_count', '?')} scenarios",
|
|
1857
|
+
}
|
|
1858
|
+
for s in sets
|
|
1859
|
+
]
|
|
1860
|
+
selected = prompts.select_from_list(
|
|
1861
|
+
"Select a scenario set for CI:", choices, flag_hint="--scenario-set-id"
|
|
1862
|
+
)
|
|
1863
|
+
if not selected:
|
|
1864
|
+
output.print_error("No scenario set selected")
|
|
1865
|
+
sys.exit(1)
|
|
1866
|
+
|
|
1867
|
+
return selected
|
|
1868
|
+
|
|
1869
|
+
|
|
1870
|
+
def _ci_poll_run(api: VerisAPI, run_id: str) -> dict:
|
|
1871
|
+
"""Poll run status until terminal state. Uses Rich spinner on TTY, plain lines in CI."""
|
|
1872
|
+
terminal = {"completed", "failed", "cancelled"}
|
|
1873
|
+
is_tty = sys.stderr.isatty()
|
|
1874
|
+
last_msg = None
|
|
1875
|
+
|
|
1876
|
+
live = None
|
|
1877
|
+
if is_tty:
|
|
1878
|
+
from rich.console import Console
|
|
1879
|
+
from rich.live import Live
|
|
1880
|
+
from rich.spinner import Spinner
|
|
1881
|
+
|
|
1882
|
+
stderr_console = Console(stderr=True)
|
|
1883
|
+
live = Live(console=stderr_console, refresh_per_second=8, transient=True)
|
|
1884
|
+
live.start()
|
|
1885
|
+
|
|
1886
|
+
try:
|
|
1887
|
+
while True:
|
|
1888
|
+
try:
|
|
1889
|
+
run = api.get_run(run_id)
|
|
1890
|
+
except Exception as e:
|
|
1891
|
+
if live:
|
|
1892
|
+
live.stop()
|
|
1893
|
+
click.echo(f"Error: Failed to poll run: {e}", err=True)
|
|
1894
|
+
sys.exit(1)
|
|
1895
|
+
|
|
1896
|
+
status = run["status"]
|
|
1897
|
+
completed = run.get("completed_simulations", 0)
|
|
1898
|
+
failed = run.get("failed_simulations", 0)
|
|
1899
|
+
total = run.get("total_simulations", 0)
|
|
1900
|
+
if total == 0:
|
|
1901
|
+
total = run.get("total_jobs", 0)
|
|
1902
|
+
running = total - completed - failed
|
|
1903
|
+
parts = []
|
|
1904
|
+
if running > 0:
|
|
1905
|
+
parts.append(f"{running} running")
|
|
1906
|
+
if completed > 0:
|
|
1907
|
+
parts.append(f"{completed} done")
|
|
1908
|
+
if failed > 0:
|
|
1909
|
+
parts.append(f"{failed} failed")
|
|
1910
|
+
detail = ", ".join(parts) if parts else "waiting"
|
|
1911
|
+
msg = f"{status} — {detail} ({total} total)"
|
|
1912
|
+
|
|
1913
|
+
if status in terminal:
|
|
1914
|
+
if live:
|
|
1915
|
+
live.stop()
|
|
1916
|
+
icon = "[green]✓[/green]" if status == "completed" else "[red]✗[/red]"
|
|
1917
|
+
stderr_console.print(f"{icon} Run {status} — {detail}")
|
|
1918
|
+
else:
|
|
1919
|
+
icon = "✓" if status == "completed" else "✗"
|
|
1920
|
+
click.echo(f"{icon} Run {status} — {detail}", err=True)
|
|
1921
|
+
return run
|
|
1922
|
+
|
|
1923
|
+
if is_tty:
|
|
1924
|
+
live.update(Spinner("dots", text=msg))
|
|
1925
|
+
elif msg != last_msg:
|
|
1926
|
+
click.echo(f" Run: {msg}", err=True)
|
|
1927
|
+
last_msg = msg
|
|
1928
|
+
time.sleep(3)
|
|
1929
|
+
finally:
|
|
1930
|
+
if live:
|
|
1931
|
+
live.stop()
|
|
1932
|
+
|
|
1933
|
+
|
|
1934
|
+
def _ci_poll_eval(api: VerisAPI, run_id: str, eval_run_id: str) -> dict:
|
|
1935
|
+
"""Poll evaluation run until terminal state. Uses Rich spinner on TTY, plain lines in CI."""
|
|
1936
|
+
terminal = {"completed", "failed"}
|
|
1937
|
+
is_tty = sys.stderr.isatty()
|
|
1938
|
+
last_msg = None
|
|
1939
|
+
|
|
1940
|
+
live = None
|
|
1941
|
+
if is_tty:
|
|
1942
|
+
from rich.console import Console
|
|
1943
|
+
from rich.live import Live
|
|
1944
|
+
from rich.spinner import Spinner
|
|
1945
|
+
|
|
1946
|
+
stderr_console = Console(stderr=True)
|
|
1947
|
+
live = Live(console=stderr_console, refresh_per_second=8, transient=True)
|
|
1948
|
+
live.start()
|
|
1949
|
+
|
|
1950
|
+
try:
|
|
1951
|
+
while True:
|
|
1952
|
+
try:
|
|
1953
|
+
ev = api.get_evaluation_run(run_id, eval_run_id)
|
|
1954
|
+
except Exception as e:
|
|
1955
|
+
if live:
|
|
1956
|
+
live.stop()
|
|
1957
|
+
click.echo(f"Error: Failed to poll evaluation: {e}", err=True)
|
|
1958
|
+
sys.exit(1)
|
|
1959
|
+
|
|
1960
|
+
status = ev["status"]
|
|
1961
|
+
gr_done = ev.get("completed_grading_results", 0) + ev.get("failed_grading_results", 0)
|
|
1962
|
+
gr_total = ev.get("total_grading_results", 0)
|
|
1963
|
+
ar_done = ev.get("completed_assertion_results", 0) + ev.get(
|
|
1964
|
+
"failed_assertion_results", 0
|
|
1965
|
+
)
|
|
1966
|
+
ar_total = ev.get("total_assertion_results", 0)
|
|
1967
|
+
msg = (
|
|
1968
|
+
f"Evaluation {eval_run_id}: {status} "
|
|
1969
|
+
f"(grading {gr_done}/{gr_total}, assertions {ar_done}/{ar_total})"
|
|
1970
|
+
)
|
|
1971
|
+
|
|
1972
|
+
if status in terminal:
|
|
1973
|
+
if live:
|
|
1974
|
+
live.stop()
|
|
1975
|
+
icon = "[green]✓[/green]" if status == "completed" else "[red]✗[/red]"
|
|
1976
|
+
stderr_console.print(f"{icon} {msg}")
|
|
1977
|
+
else:
|
|
1978
|
+
icon = "✓" if status == "completed" else "✗"
|
|
1979
|
+
click.echo(f"{icon} {msg}", err=True)
|
|
1980
|
+
return ev
|
|
1981
|
+
|
|
1982
|
+
if is_tty:
|
|
1983
|
+
live.update(Spinner("dots", text=msg))
|
|
1984
|
+
elif msg != last_msg:
|
|
1985
|
+
click.echo(f" {msg}", err=True)
|
|
1986
|
+
last_msg = msg
|
|
1987
|
+
time.sleep(5)
|
|
1988
|
+
finally:
|
|
1989
|
+
if live:
|
|
1990
|
+
live.stop()
|
|
1991
|
+
|
|
1992
|
+
|
|
1677
1993
|
def main():
|
|
1678
1994
|
"""Main entry point"""
|
|
1679
1995
|
cli()
|
|
@@ -200,3 +200,13 @@ class ProjectConfig:
|
|
|
200
200
|
profile_data["environment_id"] = environment_id
|
|
201
201
|
profile_data["environment_name"] = environment_name
|
|
202
202
|
self._save_profile(profile_data)
|
|
203
|
+
|
|
204
|
+
def get_ci_config(self) -> dict:
|
|
205
|
+
"""Get CI config block from project config."""
|
|
206
|
+
return self._load_profile().get("ci", {})
|
|
207
|
+
|
|
208
|
+
def set_ci_config(self, ci_config: dict) -> None:
|
|
209
|
+
"""Save CI config block to project config."""
|
|
210
|
+
profile_data = self._load_profile()
|
|
211
|
+
profile_data["ci"] = ci_config
|
|
212
|
+
self._save_profile(profile_data)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|