wafer-cli 0.2.13__tar.gz → 0.2.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/PKG-INFO +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/pyproject.toml +13 -3
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_analytics.py +8 -6
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_kernel_scope_cli.py +0 -2
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_nsys_analyze.py +6 -10
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_nsys_profile.py +7 -4
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_rocprof_compute_integration.py +5 -4
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_wevin_cli.py +43 -35
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/analytics.py +0 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/auth.py +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/autotuner.py +21 -17
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/cli.py +41 -3
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/evaluate.py +113 -53
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/kernel_scope.py +7 -9
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/nsys_profile.py +2 -3
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/output.py +10 -3
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/rocprof_compute.py +50 -42
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/rocprof_sdk.py +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/targets_ops.py +0 -1
- wafer_cli-0.2.14/wafer/templates/optimize_kernelbench.py +137 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/workspaces.py +0 -2
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer_cli.egg-info/PKG-INFO +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer_cli.egg-info/SOURCES.txt +1 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/README.md +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/setup.cfg +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_billing.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_cli_coverage.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_cli_parity_integration.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_config_integration.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_file_operations_integration.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_output.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_skill_commands.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_ssh_integration.py +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_targets_ops.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/tests/test_workflow_integration.py +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/GUIDE.md +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/__init__.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/api_client.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/billing.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/config.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/corpus.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/global_config.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/gpu_run.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/inference.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/ncu_analyze.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/nsys_analyze.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/problems.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/rocprof_systems.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/skills/wafer-guide/SKILL.md +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/ssh_keys.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/target_lock.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/targets.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/templates/__init__.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/templates/ask_docs.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/templates/optimize_kernel.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/templates/trace_analyze.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/tracelens.py +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer/wevin_cli.py +1 -1
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer_cli.egg-info/dependency_links.txt +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer_cli.egg-info/entry_points.txt +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer_cli.egg-info/requires.txt +0 -0
- {wafer_cli-0.2.13 → wafer_cli-0.2.14}/wafer_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "wafer-cli"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.14"
|
|
4
4
|
description = "CLI tool for running commands on remote GPUs and GPU kernel optimization agent"
|
|
5
5
|
requires-python = ">=3.11"
|
|
6
6
|
dependencies = [
|
|
@@ -76,9 +76,19 @@ ignore = [
|
|
|
76
76
|
]
|
|
77
77
|
|
|
78
78
|
[tool.ruff.lint.per-file-ignores]
|
|
79
|
-
"tests/**/*.py" = ["ANN201"] # Don't require
|
|
80
|
-
"wafer/evaluate.py" = ["PLR0915", "PLR1702", "E402"] # complex deployment flows - TODO: refactor
|
|
79
|
+
"tests/**/*.py" = ["ANN001", "ANN201", "ANN202", "ANN204"] # Don't require type annotations in tests
|
|
80
|
+
"wafer/evaluate.py" = ["PLR0915", "PLR1702", "E402", "PLW2901", "ASYNC221"] # complex deployment flows - TODO: refactor
|
|
81
81
|
"wafer/output.py" = ["ANN401"] # Output collector uses **kwargs for flexible event data
|
|
82
|
+
"wafer/autotuner.py" = ["PLR0915", "PLR1702", "B007", "B904"] # complex sweep logic - TODO: refactor
|
|
83
|
+
"wafer/ncu_analyze.py" = ["PLR0915", "PLR1702"] # complex parsing logic - TODO: refactor
|
|
84
|
+
"wafer/cli.py" = ["PLR0915"] # CLI commands can be long - TODO: refactor
|
|
85
|
+
"wafer/targets.py" = ["PLR1702"] # complex target init flow - TODO: refactor
|
|
86
|
+
"wafer/workspaces.py" = ["PLR1702"] # SSE streaming has nested blocks - TODO: refactor
|
|
87
|
+
"wafer/kernel_scope.py" = ["ANN001", "ANN202", "PLR0913"] # complex filtering logic
|
|
88
|
+
"wafer/api_client.py" = ["PLR0913"] # API client needs many params
|
|
89
|
+
"wafer/rocprof_compute.py" = ["PLR0913", "B904"] # profiler commands need many params
|
|
90
|
+
"wafer/rocprof_sdk.py" = ["PLR0913"] # profiler commands need many params
|
|
91
|
+
"wafer/rocprof_systems.py" = ["PLR0913"] # profiler commands need many params
|
|
82
92
|
|
|
83
93
|
[tool.ruff.lint.pylint]
|
|
84
94
|
max-args = 7 # Max function arguments (Tiger Style: few parameters)
|
|
@@ -11,12 +11,9 @@ Tests cover:
|
|
|
11
11
|
Run with: PYTHONPATH=apps/wafer-cli uv run pytest apps/wafer-cli/tests/test_analytics.py -v
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
-
import json
|
|
15
14
|
from pathlib import Path
|
|
16
15
|
from unittest.mock import MagicMock, patch
|
|
17
16
|
|
|
18
|
-
import pytest
|
|
19
|
-
|
|
20
17
|
|
|
21
18
|
class TestAnalyticsInit:
|
|
22
19
|
"""Test analytics initialization."""
|
|
@@ -403,16 +400,17 @@ class TestCliCallback:
|
|
|
403
400
|
def test_cli_callback_tracks_command(self) -> None:
|
|
404
401
|
"""CLI callback should track command execution."""
|
|
405
402
|
from typer.testing import CliRunner
|
|
403
|
+
|
|
406
404
|
from wafer.cli import app
|
|
407
405
|
|
|
408
406
|
runner = CliRunner()
|
|
409
407
|
|
|
410
408
|
with patch("wafer.analytics.init_analytics") as mock_init, \
|
|
411
|
-
patch("wafer.analytics.track_command")
|
|
409
|
+
patch("wafer.analytics.track_command"):
|
|
412
410
|
mock_init.return_value = True
|
|
413
411
|
|
|
414
412
|
# Run a simple command that doesn't require auth
|
|
415
|
-
|
|
413
|
+
runner.invoke(app, ["guide"])
|
|
416
414
|
|
|
417
415
|
# Analytics should be initialized
|
|
418
416
|
mock_init.assert_called()
|
|
@@ -423,6 +421,7 @@ class TestCliCallback:
|
|
|
423
421
|
def test_cli_help_does_not_crash_analytics(self) -> None:
|
|
424
422
|
"""CLI --help should not crash due to analytics."""
|
|
425
423
|
from typer.testing import CliRunner
|
|
424
|
+
|
|
426
425
|
from wafer.cli import app
|
|
427
426
|
|
|
428
427
|
runner = CliRunner()
|
|
@@ -435,6 +434,7 @@ class TestCliCallback:
|
|
|
435
434
|
def test_cli_subcommand_help_works(self) -> None:
|
|
436
435
|
"""Subcommand --help should work with analytics."""
|
|
437
436
|
from typer.testing import CliRunner
|
|
437
|
+
|
|
438
438
|
from wafer.cli import app
|
|
439
439
|
|
|
440
440
|
runner = CliRunner()
|
|
@@ -452,6 +452,7 @@ class TestLoginLogoutAnalytics:
|
|
|
452
452
|
from unittest.mock import MagicMock
|
|
453
453
|
|
|
454
454
|
from typer.testing import CliRunner
|
|
455
|
+
|
|
455
456
|
from wafer.cli import app
|
|
456
457
|
|
|
457
458
|
runner = CliRunner()
|
|
@@ -466,7 +467,7 @@ class TestLoginLogoutAnalytics:
|
|
|
466
467
|
patch("wafer.analytics.track_login") as mock_track_login, \
|
|
467
468
|
patch("wafer.analytics.init_analytics", return_value=True):
|
|
468
469
|
|
|
469
|
-
|
|
470
|
+
runner.invoke(app, ["login", "--token", "test-token"])
|
|
470
471
|
|
|
471
472
|
# track_login should be called
|
|
472
473
|
mock_track_login.assert_called_once_with("test-user-id", "test@example.com")
|
|
@@ -474,6 +475,7 @@ class TestLoginLogoutAnalytics:
|
|
|
474
475
|
def test_logout_calls_track_logout(self) -> None:
|
|
475
476
|
"""Logout command should call track_logout."""
|
|
476
477
|
from typer.testing import CliRunner
|
|
478
|
+
|
|
477
479
|
from wafer.cli import app
|
|
478
480
|
|
|
479
481
|
runner = CliRunner()
|
|
@@ -8,7 +8,6 @@ Run with: PYTHONPATH=apps/wafer-cli uv run pytest apps/wafer-cli/tests/test_kern
|
|
|
8
8
|
import json
|
|
9
9
|
import re
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from unittest.mock import patch, MagicMock
|
|
12
11
|
|
|
13
12
|
import pytest
|
|
14
13
|
from typer.testing import CliRunner
|
|
@@ -20,7 +19,6 @@ from wafer.kernel_scope import (
|
|
|
20
19
|
targets_command,
|
|
21
20
|
)
|
|
22
21
|
|
|
23
|
-
|
|
24
22
|
runner = CliRunner()
|
|
25
23
|
|
|
26
24
|
|
|
@@ -7,16 +7,12 @@ Tests the nsys_analyze module including:
|
|
|
7
7
|
- API fallback logic
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
import json
|
|
11
|
-
import os
|
|
12
|
-
import platform
|
|
13
10
|
from pathlib import Path
|
|
14
|
-
from unittest.mock import
|
|
11
|
+
from unittest.mock import patch
|
|
15
12
|
|
|
16
13
|
import pytest
|
|
17
14
|
|
|
18
15
|
from wafer.nsys_analyze import (
|
|
19
|
-
NSYSCheckResult,
|
|
20
16
|
_find_nsys,
|
|
21
17
|
_get_install_command,
|
|
22
18
|
_get_platform,
|
|
@@ -247,7 +243,7 @@ class TestAnalyzeNsysProfile:
|
|
|
247
243
|
with patch("wafer.nsys_analyze._find_nsys", return_value=None):
|
|
248
244
|
with patch("wafer.nsys_analyze._analyze_remote_api") as mock_remote:
|
|
249
245
|
mock_remote.return_value = '{"success": true}'
|
|
250
|
-
|
|
246
|
+
analyze_nsys_profile(test_file)
|
|
251
247
|
mock_remote.assert_called_once()
|
|
252
248
|
|
|
253
249
|
def test_uses_local_when_nsys_installed(self, tmp_path: Path) -> None:
|
|
@@ -258,7 +254,7 @@ class TestAnalyzeNsysProfile:
|
|
|
258
254
|
with patch("wafer.nsys_analyze._find_nsys", return_value="/usr/bin/nsys"):
|
|
259
255
|
with patch("wafer.nsys_analyze._analyze_local") as mock_local:
|
|
260
256
|
mock_local.return_value = '{"success": true}'
|
|
261
|
-
|
|
257
|
+
analyze_nsys_profile(test_file)
|
|
262
258
|
mock_local.assert_called_once()
|
|
263
259
|
|
|
264
260
|
def test_respects_remote_flag(self, tmp_path: Path) -> None:
|
|
@@ -269,7 +265,7 @@ class TestAnalyzeNsysProfile:
|
|
|
269
265
|
with patch("wafer.nsys_analyze._find_nsys", return_value="/usr/bin/nsys"):
|
|
270
266
|
with patch("wafer.nsys_analyze._analyze_remote_api") as mock_remote:
|
|
271
267
|
mock_remote.return_value = '{"success": true}'
|
|
272
|
-
|
|
268
|
+
analyze_nsys_profile(test_file, remote=True)
|
|
273
269
|
mock_remote.assert_called_once()
|
|
274
270
|
|
|
275
271
|
def test_uses_workspace_when_specified(self, tmp_path: Path) -> None:
|
|
@@ -279,7 +275,7 @@ class TestAnalyzeNsysProfile:
|
|
|
279
275
|
|
|
280
276
|
with patch("wafer.nsys_analyze._analyze_workspace") as mock_workspace:
|
|
281
277
|
mock_workspace.return_value = '{"success": true}'
|
|
282
|
-
|
|
278
|
+
analyze_nsys_profile(test_file, target="workspace:abc123")
|
|
283
279
|
mock_workspace.assert_called_once_with(test_file, "abc123", False)
|
|
284
280
|
|
|
285
281
|
def test_uses_target_when_specified(self, tmp_path: Path) -> None:
|
|
@@ -289,5 +285,5 @@ class TestAnalyzeNsysProfile:
|
|
|
289
285
|
|
|
290
286
|
with patch("wafer.nsys_analyze._analyze_remote_direct") as mock_direct:
|
|
291
287
|
mock_direct.return_value = '{"success": true}'
|
|
292
|
-
|
|
288
|
+
analyze_nsys_profile(test_file, target="vultr-b200")
|
|
293
289
|
mock_direct.assert_called_once_with(test_file, "vultr-b200", False)
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
"""Tests for NSYS profile command."""
|
|
2
2
|
|
|
3
|
+
from unittest.mock import patch
|
|
4
|
+
|
|
3
5
|
import pytest
|
|
4
|
-
|
|
6
|
+
|
|
5
7
|
from wafer.nsys_profile import (
|
|
6
8
|
NSYSProfileOptions,
|
|
7
9
|
NSYSProfileResult,
|
|
8
|
-
profile_local,
|
|
9
|
-
profile_workspace,
|
|
10
|
-
profile_and_analyze,
|
|
11
10
|
_build_nsys_command,
|
|
11
|
+
profile_and_analyze,
|
|
12
|
+
profile_local,
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
|
|
@@ -136,6 +137,7 @@ class TestCLIIntegration:
|
|
|
136
137
|
def test_profile_command_help(self):
|
|
137
138
|
"""Verify profile command exists and has help text."""
|
|
138
139
|
from typer.testing import CliRunner
|
|
140
|
+
|
|
139
141
|
from wafer.cli import app
|
|
140
142
|
|
|
141
143
|
runner = CliRunner()
|
|
@@ -148,6 +150,7 @@ class TestCLIIntegration:
|
|
|
148
150
|
def test_profile_command_requires_command(self):
|
|
149
151
|
"""Verify profile command requires a command argument."""
|
|
150
152
|
from typer.testing import CliRunner
|
|
153
|
+
|
|
151
154
|
from wafer.cli import app
|
|
152
155
|
|
|
153
156
|
runner = CliRunner()
|
|
@@ -10,9 +10,8 @@ Follows similar testing patterns from the codebase.
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
import json
|
|
13
|
-
import subprocess
|
|
14
13
|
from pathlib import Path
|
|
15
|
-
from unittest.mock import
|
|
14
|
+
from unittest.mock import Mock, patch
|
|
16
15
|
|
|
17
16
|
import pytest
|
|
18
17
|
|
|
@@ -139,9 +138,10 @@ class TestIntegrationWithCore:
|
|
|
139
138
|
tmp_path: Path
|
|
140
139
|
) -> None:
|
|
141
140
|
"""Test CLI uses wafer-core launch_gui."""
|
|
142
|
-
from wafer.rocprof_compute import launch_gui
|
|
143
141
|
from wafer_core.lib.rocprofiler.compute.types import CheckResult
|
|
144
142
|
|
|
143
|
+
from wafer.rocprof_compute import launch_gui
|
|
144
|
+
|
|
145
145
|
test_folder = tmp_path / "results"
|
|
146
146
|
test_folder.mkdir()
|
|
147
147
|
|
|
@@ -173,9 +173,10 @@ class TestCommandFormat:
|
|
|
173
173
|
tmp_path: Path
|
|
174
174
|
) -> None:
|
|
175
175
|
"""Test generated command uses correct CLI flags when using external binary."""
|
|
176
|
-
from wafer.rocprof_compute import launch_gui
|
|
177
176
|
from wafer_core.lib.rocprofiler.compute.types import CheckResult, LaunchResult
|
|
178
177
|
|
|
178
|
+
from wafer.rocprof_compute import launch_gui
|
|
179
|
+
|
|
179
180
|
test_folder = tmp_path / "results"
|
|
180
181
|
test_folder.mkdir()
|
|
181
182
|
|
|
@@ -6,10 +6,8 @@ because they require complex integration tests to execute.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import json
|
|
9
|
-
import os
|
|
10
9
|
import tempfile
|
|
11
10
|
from pathlib import Path
|
|
12
|
-
from unittest.mock import patch
|
|
13
11
|
|
|
14
12
|
from wafer_core.rollouts import Endpoint, FileSessionStore
|
|
15
13
|
from wafer_core.rollouts.dtypes import Message
|
|
@@ -125,7 +123,7 @@ def test_list_sessions_returns_metadata_only():
|
|
|
125
123
|
endpoint = Endpoint(provider="anthropic", model="claude-sonnet-4.5")
|
|
126
124
|
env_config = EnvironmentConfig(type="localfs")
|
|
127
125
|
|
|
128
|
-
async def run_test():
|
|
126
|
+
async def run_test() -> None:
|
|
129
127
|
# Create a session with messages
|
|
130
128
|
session = await session_store.create(endpoint=endpoint, environment=env_config)
|
|
131
129
|
|
|
@@ -182,8 +180,9 @@ def test_get_session_json_output():
|
|
|
182
180
|
|
|
183
181
|
Tests the CLI command used by wevin-extension to load session messages.
|
|
184
182
|
"""
|
|
185
|
-
import trio
|
|
186
183
|
from dataclasses import asdict
|
|
184
|
+
|
|
185
|
+
import trio
|
|
187
186
|
from wafer_core.rollouts import EnvironmentConfig
|
|
188
187
|
|
|
189
188
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
@@ -193,7 +192,7 @@ def test_get_session_json_output():
|
|
|
193
192
|
endpoint = Endpoint(provider="anthropic", model="claude-sonnet-4.5")
|
|
194
193
|
env_config = EnvironmentConfig(type="localfs")
|
|
195
194
|
|
|
196
|
-
async def run_test():
|
|
195
|
+
async def run_test() -> None:
|
|
197
196
|
session = await session_store.create(endpoint=endpoint, environment=env_config)
|
|
198
197
|
|
|
199
198
|
# Add some messages
|
|
@@ -240,7 +239,7 @@ def test_get_session_not_found():
|
|
|
240
239
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
241
240
|
session_store = FileSessionStore(base_dir=Path(tmpdir))
|
|
242
241
|
|
|
243
|
-
async def run_test():
|
|
242
|
+
async def run_test() -> None:
|
|
244
243
|
session, err = await session_store.get("nonexistent-session-id")
|
|
245
244
|
assert session is None
|
|
246
245
|
assert err is not None
|
|
@@ -289,7 +288,7 @@ def test_session_resume_loads_context():
|
|
|
289
288
|
endpoint = Endpoint(provider="anthropic", model="claude-sonnet-4.5")
|
|
290
289
|
env_config = EnvironmentConfig(type="localfs")
|
|
291
290
|
|
|
292
|
-
async def run_test():
|
|
291
|
+
async def run_test() -> None:
|
|
293
292
|
# Create initial session with messages
|
|
294
293
|
session = await session_store.create(endpoint=endpoint, environment=env_config)
|
|
295
294
|
|
|
@@ -335,8 +334,9 @@ def test_cli_parameters_accepted():
|
|
|
335
334
|
- max_tool_fails: Exit after N failures
|
|
336
335
|
- max_turns: Limit conversation turns
|
|
337
336
|
"""
|
|
338
|
-
from wafer.wevin_cli import main as wevin_main
|
|
339
337
|
import inspect
|
|
338
|
+
|
|
339
|
+
from wafer.wevin_cli import main as wevin_main
|
|
340
340
|
|
|
341
341
|
sig = inspect.signature(wevin_main)
|
|
342
342
|
params = sig.parameters
|
|
@@ -370,15 +370,16 @@ def test_streaming_frontend_session_start_resumed_session():
|
|
|
370
370
|
|
|
371
371
|
Edge case: When session_id is known upfront (from --resume), emit immediately.
|
|
372
372
|
"""
|
|
373
|
+
from io import StringIO
|
|
374
|
+
|
|
373
375
|
import trio
|
|
376
|
+
|
|
374
377
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
375
|
-
from io import StringIO
|
|
376
|
-
import sys
|
|
377
378
|
|
|
378
379
|
# Capture stdout
|
|
379
380
|
captured_output = StringIO()
|
|
380
381
|
|
|
381
|
-
async def run_test():
|
|
382
|
+
async def run_test() -> None:
|
|
382
383
|
frontend = StreamingChunkFrontend(
|
|
383
384
|
session_id="test-session-123",
|
|
384
385
|
model="claude-sonnet-4.5"
|
|
@@ -386,8 +387,8 @@ def test_streaming_frontend_session_start_resumed_session():
|
|
|
386
387
|
|
|
387
388
|
# Mock _emit to capture output
|
|
388
389
|
emitted_events = []
|
|
389
|
-
|
|
390
|
-
def mock_emit(obj):
|
|
390
|
+
|
|
391
|
+
def mock_emit(obj) -> None:
|
|
391
392
|
emitted_events.append(obj)
|
|
392
393
|
# Also print to verify JSON format
|
|
393
394
|
print(json.dumps(obj), file=captured_output)
|
|
@@ -418,14 +419,15 @@ def test_streaming_frontend_session_start_no_session_id():
|
|
|
418
419
|
Edge case: New session - session_id not known until after run_interactive.
|
|
419
420
|
"""
|
|
420
421
|
import trio
|
|
422
|
+
|
|
421
423
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
422
424
|
|
|
423
|
-
async def run_test():
|
|
425
|
+
async def run_test() -> None:
|
|
424
426
|
frontend = StreamingChunkFrontend(session_id=None, model=None)
|
|
425
427
|
|
|
426
428
|
emitted_events = []
|
|
427
|
-
|
|
428
|
-
def mock_emit(obj):
|
|
429
|
+
|
|
430
|
+
def mock_emit(obj) -> None:
|
|
429
431
|
emitted_events.append(obj)
|
|
430
432
|
|
|
431
433
|
frontend._emit = mock_emit
|
|
@@ -444,15 +446,16 @@ def test_streaming_frontend_emit_session_start_new_session():
|
|
|
444
446
|
Edge case: Session created during run_interactive, emit after first state.
|
|
445
447
|
"""
|
|
446
448
|
import trio
|
|
449
|
+
|
|
447
450
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
448
451
|
|
|
449
|
-
async def run_test():
|
|
452
|
+
async def run_test() -> None:
|
|
450
453
|
# Frontend starts without session_id (new session)
|
|
451
454
|
frontend = StreamingChunkFrontend(session_id=None, model="claude-sonnet-4.5")
|
|
452
455
|
|
|
453
456
|
emitted_events = []
|
|
454
|
-
|
|
455
|
-
def mock_emit(obj):
|
|
457
|
+
|
|
458
|
+
def mock_emit(obj) -> None:
|
|
456
459
|
emitted_events.append(obj)
|
|
457
460
|
|
|
458
461
|
frontend._emit = mock_emit
|
|
@@ -480,15 +483,16 @@ def test_streaming_frontend_emit_session_start_model_none():
|
|
|
480
483
|
Edge case: Model might be None, should use frontend's model or None.
|
|
481
484
|
"""
|
|
482
485
|
import trio
|
|
486
|
+
|
|
483
487
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
484
488
|
|
|
485
|
-
async def run_test():
|
|
489
|
+
async def run_test() -> None:
|
|
486
490
|
# Frontend with model, but emit_session_start called with None
|
|
487
491
|
frontend = StreamingChunkFrontend(session_id=None, model="claude-sonnet-4.5")
|
|
488
492
|
|
|
489
493
|
emitted_events = []
|
|
490
|
-
|
|
491
|
-
def mock_emit(obj):
|
|
494
|
+
|
|
495
|
+
def mock_emit(obj) -> None:
|
|
492
496
|
emitted_events.append(obj)
|
|
493
497
|
|
|
494
498
|
frontend._emit = mock_emit
|
|
@@ -518,14 +522,15 @@ def test_streaming_frontend_emit_session_start_multiple_calls():
|
|
|
518
522
|
Edge case: Multiple calls should work (e.g., if called from different code paths).
|
|
519
523
|
"""
|
|
520
524
|
import trio
|
|
525
|
+
|
|
521
526
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
522
527
|
|
|
523
|
-
async def run_test():
|
|
528
|
+
async def run_test() -> None:
|
|
524
529
|
frontend = StreamingChunkFrontend(session_id=None, model="claude-sonnet-4.5")
|
|
525
530
|
|
|
526
531
|
emitted_events = []
|
|
527
|
-
|
|
528
|
-
def mock_emit(obj):
|
|
532
|
+
|
|
533
|
+
def mock_emit(obj) -> None:
|
|
529
534
|
emitted_events.append(obj)
|
|
530
535
|
|
|
531
536
|
frontend._emit = mock_emit
|
|
@@ -550,14 +555,15 @@ def test_streaming_frontend_session_start_empty_states():
|
|
|
550
555
|
Edge case: run_interactive might return empty list (shouldn't crash).
|
|
551
556
|
"""
|
|
552
557
|
import trio
|
|
558
|
+
|
|
553
559
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
554
560
|
|
|
555
|
-
async def run_test():
|
|
561
|
+
async def run_test() -> None:
|
|
556
562
|
frontend = StreamingChunkFrontend(session_id=None, model="claude-sonnet-4.5")
|
|
557
563
|
|
|
558
564
|
emitted_events = []
|
|
559
|
-
|
|
560
|
-
def mock_emit(obj):
|
|
565
|
+
|
|
566
|
+
def mock_emit(obj) -> None:
|
|
561
567
|
emitted_events.append(obj)
|
|
562
568
|
|
|
563
569
|
frontend._emit = mock_emit
|
|
@@ -583,19 +589,20 @@ def test_streaming_frontend_session_start_state_without_session_id():
|
|
|
583
589
|
Edge case: First state might not have session_id set yet.
|
|
584
590
|
"""
|
|
585
591
|
import trio
|
|
592
|
+
|
|
586
593
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
587
594
|
|
|
588
595
|
# Mock AgentState for testing
|
|
589
596
|
class MockState:
|
|
590
|
-
def __init__(self, session_id=None):
|
|
597
|
+
def __init__(self, session_id=None) -> None:
|
|
591
598
|
self.session_id = session_id
|
|
592
599
|
|
|
593
|
-
async def run_test():
|
|
600
|
+
async def run_test() -> None:
|
|
594
601
|
frontend = StreamingChunkFrontend(session_id=None, model="claude-sonnet-4.5")
|
|
595
602
|
|
|
596
603
|
emitted_events = []
|
|
597
|
-
|
|
598
|
-
def mock_emit(obj):
|
|
604
|
+
|
|
605
|
+
def mock_emit(obj) -> None:
|
|
599
606
|
emitted_events.append(obj)
|
|
600
607
|
|
|
601
608
|
frontend._emit = mock_emit
|
|
@@ -631,9 +638,10 @@ def test_streaming_frontend_session_start_resumed_then_new():
|
|
|
631
638
|
Edge case: --resume used but states return different session_id (should use states one).
|
|
632
639
|
"""
|
|
633
640
|
import trio
|
|
641
|
+
|
|
634
642
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
635
643
|
|
|
636
|
-
async def run_test():
|
|
644
|
+
async def run_test() -> None:
|
|
637
645
|
# Start with resumed session_id
|
|
638
646
|
frontend = StreamingChunkFrontend(
|
|
639
647
|
session_id="resumed-session-123",
|
|
@@ -641,8 +649,8 @@ def test_streaming_frontend_session_start_resumed_then_new():
|
|
|
641
649
|
)
|
|
642
650
|
|
|
643
651
|
emitted_events = []
|
|
644
|
-
|
|
645
|
-
def mock_emit(obj):
|
|
652
|
+
|
|
653
|
+
def mock_emit(obj) -> None:
|
|
646
654
|
emitted_events.append(obj)
|
|
647
655
|
|
|
648
656
|
frontend._emit = mock_emit
|
|
@@ -419,7 +419,7 @@ def device_code_login(timeout: int = 600) -> tuple[str, str | None]:
|
|
|
419
419
|
print(f" {CROSS}\n")
|
|
420
420
|
raise RuntimeError(f"CLI auth flow failed: {response.status_code} {response.text}")
|
|
421
421
|
|
|
422
|
-
except httpx.RequestError
|
|
422
|
+
except httpx.RequestError:
|
|
423
423
|
# Network error, retry
|
|
424
424
|
print("!", end="", flush=True)
|
|
425
425
|
last_poll = time.time()
|
|
@@ -5,6 +5,7 @@ This module provides the implementation for the `wafer autotuner` commands.
|
|
|
5
5
|
|
|
6
6
|
import asyncio
|
|
7
7
|
import json
|
|
8
|
+
from datetime import UTC
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
@@ -32,13 +33,14 @@ def run_sweep_command(
|
|
|
32
33
|
raise FileNotFoundError(f"Config file not found: {config_file}")
|
|
33
34
|
|
|
34
35
|
# Import autotuner core
|
|
35
|
-
from datetime import datetime
|
|
36
|
+
from datetime import datetime
|
|
36
37
|
from uuid import uuid4
|
|
38
|
+
|
|
37
39
|
import trio
|
|
38
40
|
from wafer_core.tools.autotuner import AutotunerConfig, run_sweep
|
|
39
41
|
from wafer_core.tools.autotuner.dtypes import Sweep, Trial
|
|
40
42
|
from wafer_core.tools.autotuner.search import generate_grid_trials
|
|
41
|
-
from wafer_core.tools.autotuner.storage import
|
|
43
|
+
from wafer_core.tools.autotuner.storage import add_trial, create_sweep, get_sweep, get_trials
|
|
42
44
|
|
|
43
45
|
# Load or reconstruct config
|
|
44
46
|
if resume_sweep_id:
|
|
@@ -189,8 +191,8 @@ def run_sweep_command(
|
|
|
189
191
|
status="running",
|
|
190
192
|
total_trials=total_trials,
|
|
191
193
|
completed_trials=0,
|
|
192
|
-
created_at=datetime.now(
|
|
193
|
-
updated_at=datetime.now(
|
|
194
|
+
created_at=datetime.now(UTC),
|
|
195
|
+
updated_at=datetime.now(UTC),
|
|
194
196
|
)
|
|
195
197
|
|
|
196
198
|
# Create sweep and get the actual ID from the API
|
|
@@ -245,7 +247,7 @@ def run_sweep_command(
|
|
|
245
247
|
# Helper to update sweep status
|
|
246
248
|
async def update_sweep_status(status: str) -> None:
|
|
247
249
|
import httpx
|
|
248
|
-
from wafer_core.tools.autotuner.storage import
|
|
250
|
+
from wafer_core.tools.autotuner.storage import _get_auth_headers, get_api_url
|
|
249
251
|
|
|
250
252
|
api_url = get_api_url()
|
|
251
253
|
headers = _get_auth_headers()
|
|
@@ -260,7 +262,7 @@ def run_sweep_command(
|
|
|
260
262
|
# Note: working_dir already set based on is_resume flag
|
|
261
263
|
|
|
262
264
|
try:
|
|
263
|
-
|
|
265
|
+
await run_sweep(
|
|
264
266
|
config=config,
|
|
265
267
|
sweep_id=actual_sweep_id,
|
|
266
268
|
working_dir=working_dir,
|
|
@@ -273,7 +275,7 @@ def run_sweep_command(
|
|
|
273
275
|
|
|
274
276
|
# Print final summary
|
|
275
277
|
print()
|
|
276
|
-
print(
|
|
278
|
+
print("✅ Sweep completed!")
|
|
277
279
|
print(f" Total: {total_trials} trials")
|
|
278
280
|
print(f" Success: {success_count}")
|
|
279
281
|
print(f" Failed: {failed_count}")
|
|
@@ -297,7 +299,7 @@ def run_sweep_command(
|
|
|
297
299
|
except KeyboardInterrupt:
|
|
298
300
|
# User pressed Ctrl+C
|
|
299
301
|
print()
|
|
300
|
-
print(
|
|
302
|
+
print("❌ Sweep interrupted by user (Ctrl+C)")
|
|
301
303
|
print(f" Completed: {completed_count}/{total_trials} trials")
|
|
302
304
|
await update_sweep_status("failed")
|
|
303
305
|
raise
|
|
@@ -350,8 +352,8 @@ def results_command(
|
|
|
350
352
|
Formatted string with results
|
|
351
353
|
"""
|
|
352
354
|
from wafer_core.tools.autotuner import compute_pareto_frontier
|
|
353
|
-
from wafer_core.tools.autotuner.storage import get_sweep, get_trials
|
|
354
355
|
from wafer_core.tools.autotuner.aggregation import aggregate_trials_by_config
|
|
356
|
+
from wafer_core.tools.autotuner.storage import get_sweep, get_trials
|
|
355
357
|
|
|
356
358
|
try:
|
|
357
359
|
# Get sweep and trials
|
|
@@ -501,7 +503,10 @@ def results_command(
|
|
|
501
503
|
# Use aggregated config scoring
|
|
502
504
|
if len(objectives_data) > 1:
|
|
503
505
|
# Multi-objective: compute Pareto
|
|
504
|
-
from wafer_core.tools.autotuner.scoring import
|
|
506
|
+
from wafer_core.tools.autotuner.scoring import (
|
|
507
|
+
compute_pareto_frontier_configs,
|
|
508
|
+
rank_pareto_configs,
|
|
509
|
+
)
|
|
505
510
|
objectives = [
|
|
506
511
|
Objective(
|
|
507
512
|
metric=obj["metric"],
|
|
@@ -513,7 +518,7 @@ def results_command(
|
|
|
513
518
|
pareto_configs = compute_pareto_frontier_configs(aggregated_configs, objectives)
|
|
514
519
|
ranked_configs = rank_pareto_configs(pareto_configs, objectives)
|
|
515
520
|
|
|
516
|
-
lines.append(
|
|
521
|
+
lines.append("Pareto Frontier (using config objectives):")
|
|
517
522
|
lines.append(f"Found {len(ranked_configs)} non-dominated configurations.")
|
|
518
523
|
lines.append("")
|
|
519
524
|
|
|
@@ -563,7 +568,7 @@ def results_command(
|
|
|
563
568
|
]
|
|
564
569
|
pareto_trials = compute_pareto_frontier(completed_trials, objectives)
|
|
565
570
|
|
|
566
|
-
lines.append(
|
|
571
|
+
lines.append("Pareto Frontier (using config objectives):")
|
|
567
572
|
lines.append(f"Found {len(pareto_trials)} non-dominated configurations.")
|
|
568
573
|
lines.append("")
|
|
569
574
|
|
|
@@ -699,8 +704,8 @@ def best_command(
|
|
|
699
704
|
Returns:
|
|
700
705
|
Formatted string with best config
|
|
701
706
|
"""
|
|
702
|
-
from wafer_core.tools.autotuner.storage import get_sweep, get_trials
|
|
703
707
|
from wafer_core.tools.autotuner.aggregation import aggregate_trials_by_config
|
|
708
|
+
from wafer_core.tools.autotuner.storage import get_sweep, get_trials
|
|
704
709
|
|
|
705
710
|
try:
|
|
706
711
|
# Get sweep and trials
|
|
@@ -991,7 +996,7 @@ def delete_command(sweep_id: str) -> str:
|
|
|
991
996
|
Success message
|
|
992
997
|
"""
|
|
993
998
|
import httpx
|
|
994
|
-
from wafer_core.tools.autotuner.storage import
|
|
999
|
+
from wafer_core.tools.autotuner.storage import _get_auth_headers, get_api_url
|
|
995
1000
|
|
|
996
1001
|
try:
|
|
997
1002
|
api_url = get_api_url()
|
|
@@ -1008,8 +1013,7 @@ def delete_command(sweep_id: str) -> str:
|
|
|
1008
1013
|
except httpx.HTTPStatusError as e:
|
|
1009
1014
|
if e.response.status_code == 404:
|
|
1010
1015
|
raise ValueError(f"Sweep {sweep_id} not found")
|
|
1011
|
-
|
|
1012
|
-
raise ValueError(f"Failed to delete sweep: {e}")
|
|
1016
|
+
raise ValueError(f"Failed to delete sweep: {e}")
|
|
1013
1017
|
except Exception as e:
|
|
1014
1018
|
raise ValueError(f"Failed to delete sweep: {e}") from e
|
|
1015
1019
|
|
|
@@ -1024,7 +1028,7 @@ def delete_all_command(status_filter: str | None = None) -> str:
|
|
|
1024
1028
|
Summary of deletions
|
|
1025
1029
|
"""
|
|
1026
1030
|
import httpx
|
|
1027
|
-
from wafer_core.tools.autotuner.storage import
|
|
1031
|
+
from wafer_core.tools.autotuner.storage import _get_auth_headers, get_api_url, list_sweeps
|
|
1028
1032
|
|
|
1029
1033
|
try:
|
|
1030
1034
|
# Get all sweeps
|