wafer-cli 0.2.52__tar.gz → 0.2.53__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/PKG-INFO +1 -1
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/pyproject.toml +2 -1
- wafer_cli-0.2.53/tests/test_ncu_run.py +337 -0
- wafer_cli-0.2.53/tests/test_ncu_run_e2e.py +225 -0
- wafer_cli-0.2.53/tests/test_ncu_run_local_e2e.py +215 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/cli.py +88 -2
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/kernel_scope.py +33 -0
- wafer_cli-0.2.53/wafer/ncu_run.py +372 -0
- wafer_cli-0.2.53/wafer/skills/packed-ops-guide/SKILL.md +212 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/skills/wafer-guide/SKILL.md +1 -1
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/PKG-INFO +1 -1
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/SOURCES.txt +5 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/README.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/setup.cfg +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_analytics.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_auth.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_billing.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_cli_coverage.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_cli_parity_integration.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_config_integration.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_distributed_traces_cli.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_file_operations_integration.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_kernel_scope_cli.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_nsys_analyze.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_nsys_profile.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_output.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_rocprof_compute_integration.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_skill_commands.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_ssh_integration.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_targets_ops.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_wevin_cli.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_workflow_integration.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/GUIDE.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/__init__.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/agent_defaults.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/analytics.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/api_client.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/auth.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/autotuner.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/baseline.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/billing.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/cli_instructions.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/config.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/amd_instinct_gpu_specs.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna2/01-architecture-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna2/02-matrix-instructions.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna2/README.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/01-introduction.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/02-program-organization.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/03-kernel-state.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/04-program-flow-control.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/05-scalar-alu.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/06-vector-alu.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/07-matrix-instructions.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/08-scalar-memory.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/09-vector-memory.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/10-flat-memory.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/11-data-share.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/README.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/composable-kernel/01-ck-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/01-hip-programming-model.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/02-hip-memory-management.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/03-hip-synchronization.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/04-hip-intrinsics.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/rocm-profiling/01-rocprofiler-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/common/flash-attention/01-flash-attention-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/common/vllm/01-vllm-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/blackwell/01-architecture-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/01-cuda-programming-model.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/02-cuda-memory-management.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/03-cuda-best-practices.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/04-cuda-streams-events.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cutlass/01-cutlass-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/01-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/02-streaming-multiprocessor.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/03-tensor-cores.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/04-memory-hierarchy.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/05-synchronization.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/README.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/nsight/01-nsight-compute-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/nsight/02-nsight-systems.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/ptx-isa/01-ptx-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/ptx-isa/02-ptx-tensor-operations.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/triton/01-triton-overview.md +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpus.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/distributed_traces.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/evaluate.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/global_config.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/gpu_run.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/inference.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/ncu_analyze.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/nsys_analyze.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/nsys_profile.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/output.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/problems.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/rocprof_compute.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/rocprof_sdk.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/rocprof_systems.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/specs_cli.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/ssh_keys.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/target_lock.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/targets.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/targets_cli.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/targets_ops.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/__init__.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/aiter_optimize.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/ask_docs.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/audit.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_flashinfer.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_kernel.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_kernelbench.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_vllm.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/trace_analyze.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/tests/test_eval_cli_parity.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/trace_compare.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/tracelens.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/wevin_cli.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/workspaces.py +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/dependency_links.txt +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/entry_points.txt +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/requires.txt +0 -0
- {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "wafer-cli"
|
|
3
|
-
version = "0.2.
|
|
3
|
+
version = "0.2.53"
|
|
4
4
|
description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.11"
|
|
@@ -78,6 +78,7 @@ ignore = [
|
|
|
78
78
|
|
|
79
79
|
[tool.ruff.lint.per-file-ignores]
|
|
80
80
|
"tests/**/*.py" = ["ANN001", "ANN201", "ANN202", "ANN204"] # Don't require type annotations in tests
|
|
81
|
+
"tests/test_ncu_run_local_e2e.py" = ["PLR0915"] # E2E test has a long sequential flow by design
|
|
81
82
|
"wafer/evaluate.py" = ["PLR0915", "PLR1702", "E402", "PLW2901", "ASYNC221"] # complex deployment flows - TODO: refactor
|
|
82
83
|
"wafer/output.py" = ["ANN401"] # Output collector uses **kwargs for flexible event data
|
|
83
84
|
"wafer/autotuner.py" = ["PLR0915", "PLR1702", "B007", "B904"] # complex sweep logic - TODO: refactor
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""Unit tests for NCU remote profiling CLI module.
|
|
2
|
+
|
|
3
|
+
Tests directory packaging, .gitignore filtering, tar.gz creation,
|
|
4
|
+
and SSE stream processing — all locally, no B200 needed.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import tarfile
|
|
10
|
+
import tempfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import pytest
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# --- Directory Packaging ---
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestCollectFilesForUpload:
|
|
20
|
+
"""Tests for _collect_files_for_upload — the directory scanning logic."""
|
|
21
|
+
|
|
22
|
+
def test_collects_all_files_in_flat_dir(self) -> None:
|
|
23
|
+
"""All files in a simple directory are collected."""
|
|
24
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
25
|
+
|
|
26
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
27
|
+
d = Path(tmpdir)
|
|
28
|
+
(d / "run.py").write_text("print('hello')")
|
|
29
|
+
(d / "kernel.py").write_text("# kernel")
|
|
30
|
+
(d / "data.txt").write_text("data")
|
|
31
|
+
|
|
32
|
+
files = _collect_files_for_upload(d)
|
|
33
|
+
|
|
34
|
+
names = {f.name for f in files}
|
|
35
|
+
assert "run.py" in names
|
|
36
|
+
assert "kernel.py" in names
|
|
37
|
+
assert "data.txt" in names
|
|
38
|
+
|
|
39
|
+
def test_collects_nested_files(self) -> None:
|
|
40
|
+
"""Files in subdirectories are collected."""
|
|
41
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
42
|
+
|
|
43
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
44
|
+
d = Path(tmpdir)
|
|
45
|
+
(d / "run.py").write_text("import utils")
|
|
46
|
+
(d / "utils").mkdir()
|
|
47
|
+
(d / "utils" / "helper.py").write_text("# helper")
|
|
48
|
+
|
|
49
|
+
files = _collect_files_for_upload(d)
|
|
50
|
+
rel_paths = {str(f.relative_to(d)) for f in files}
|
|
51
|
+
|
|
52
|
+
assert "run.py" in rel_paths
|
|
53
|
+
assert "utils/helper.py" in rel_paths
|
|
54
|
+
|
|
55
|
+
def test_excludes_pycache(self) -> None:
|
|
56
|
+
"""__pycache__ directories are excluded."""
|
|
57
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
58
|
+
|
|
59
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
60
|
+
d = Path(tmpdir)
|
|
61
|
+
(d / "run.py").write_text("print('hello')")
|
|
62
|
+
(d / "__pycache__").mkdir()
|
|
63
|
+
(d / "__pycache__" / "run.cpython-312.pyc").write_bytes(b"\x00")
|
|
64
|
+
|
|
65
|
+
files = _collect_files_for_upload(d)
|
|
66
|
+
names = {f.name for f in files}
|
|
67
|
+
|
|
68
|
+
assert "run.py" in names
|
|
69
|
+
assert "run.cpython-312.pyc" not in names
|
|
70
|
+
|
|
71
|
+
def test_excludes_git_dir(self) -> None:
|
|
72
|
+
""".git directory is excluded."""
|
|
73
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
74
|
+
|
|
75
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
76
|
+
d = Path(tmpdir)
|
|
77
|
+
(d / "run.py").write_text("print('hello')")
|
|
78
|
+
(d / ".git").mkdir()
|
|
79
|
+
(d / ".git" / "HEAD").write_text("ref: refs/heads/main")
|
|
80
|
+
|
|
81
|
+
files = _collect_files_for_upload(d)
|
|
82
|
+
names = {f.name for f in files}
|
|
83
|
+
|
|
84
|
+
assert "run.py" in names
|
|
85
|
+
assert "HEAD" not in names
|
|
86
|
+
|
|
87
|
+
def test_excludes_ncu_rep_files(self) -> None:
|
|
88
|
+
"""Existing .ncu-rep files are excluded from upload."""
|
|
89
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
90
|
+
|
|
91
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
92
|
+
d = Path(tmpdir)
|
|
93
|
+
(d / "run.py").write_text("print('hello')")
|
|
94
|
+
(d / "old_profile.ncu-rep").write_bytes(b"\x00" * 100)
|
|
95
|
+
|
|
96
|
+
files = _collect_files_for_upload(d)
|
|
97
|
+
names = {f.name for f in files}
|
|
98
|
+
|
|
99
|
+
assert "run.py" in names
|
|
100
|
+
assert "old_profile.ncu-rep" not in names
|
|
101
|
+
|
|
102
|
+
def test_excludes_venv(self) -> None:
|
|
103
|
+
"""Virtual environment directories are excluded."""
|
|
104
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
105
|
+
|
|
106
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
107
|
+
d = Path(tmpdir)
|
|
108
|
+
(d / "run.py").write_text("print('hello')")
|
|
109
|
+
(d / ".venv").mkdir()
|
|
110
|
+
(d / ".venv" / "bin").mkdir()
|
|
111
|
+
(d / ".venv" / "bin" / "python").write_text("#!/usr/bin/env python")
|
|
112
|
+
|
|
113
|
+
files = _collect_files_for_upload(d)
|
|
114
|
+
names = {f.name for f in files}
|
|
115
|
+
|
|
116
|
+
assert "run.py" in names
|
|
117
|
+
assert "python" not in names
|
|
118
|
+
|
|
119
|
+
def test_custom_excludes(self) -> None:
|
|
120
|
+
"""Extra exclude patterns are applied."""
|
|
121
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
122
|
+
|
|
123
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
124
|
+
d = Path(tmpdir)
|
|
125
|
+
(d / "run.py").write_text("print('hello')")
|
|
126
|
+
(d / "big_data.bin").write_bytes(b"\x00" * 100)
|
|
127
|
+
|
|
128
|
+
files = _collect_files_for_upload(d, extra_excludes={"*.bin"})
|
|
129
|
+
names = {f.name for f in files}
|
|
130
|
+
|
|
131
|
+
assert "run.py" in names
|
|
132
|
+
assert "big_data.bin" not in names
|
|
133
|
+
|
|
134
|
+
def test_git_ls_files_used_in_git_repo(self) -> None:
|
|
135
|
+
"""In a git repo, git ls-files is used (respects .gitignore)."""
|
|
136
|
+
from wafer.ncu_run import _collect_files_for_upload
|
|
137
|
+
|
|
138
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
139
|
+
d = Path(tmpdir)
|
|
140
|
+
# Initialize a git repo
|
|
141
|
+
subprocess.run(["git", "init"], cwd=d, capture_output=True, check=True)
|
|
142
|
+
subprocess.run(
|
|
143
|
+
["git", "config", "user.email", "test@test.com"],
|
|
144
|
+
cwd=d, capture_output=True, check=True,
|
|
145
|
+
)
|
|
146
|
+
subprocess.run(
|
|
147
|
+
["git", "config", "user.name", "Test"],
|
|
148
|
+
cwd=d, capture_output=True, check=True,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Create files
|
|
152
|
+
(d / "tracked.py").write_text("# tracked")
|
|
153
|
+
(d / "ignored.log").write_text("logs")
|
|
154
|
+
|
|
155
|
+
# Create .gitignore
|
|
156
|
+
(d / ".gitignore").write_text("*.log\n")
|
|
157
|
+
|
|
158
|
+
# Git add tracked file
|
|
159
|
+
subprocess.run(["git", "add", "tracked.py", ".gitignore"], cwd=d, capture_output=True, check=True)
|
|
160
|
+
|
|
161
|
+
files = _collect_files_for_upload(d)
|
|
162
|
+
names = {f.name for f in files}
|
|
163
|
+
|
|
164
|
+
assert "tracked.py" in names
|
|
165
|
+
assert ".gitignore" in names
|
|
166
|
+
assert "ignored.log" not in names
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# --- Tar.gz Packaging ---
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class TestPackageDirectory:
|
|
173
|
+
"""Tests for package_directory — creating uploadable tar.gz archives."""
|
|
174
|
+
|
|
175
|
+
def test_creates_valid_tarball(self) -> None:
|
|
176
|
+
"""package_directory produces a valid tar.gz."""
|
|
177
|
+
from wafer.ncu_run import package_directory
|
|
178
|
+
|
|
179
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
180
|
+
d = Path(tmpdir)
|
|
181
|
+
(d / "run.py").write_text("print('hello')")
|
|
182
|
+
(d / "kernel.py").write_text("# kernel code")
|
|
183
|
+
|
|
184
|
+
tarball_bytes, count = package_directory(d)
|
|
185
|
+
|
|
186
|
+
assert count == 2
|
|
187
|
+
assert len(tarball_bytes) > 0
|
|
188
|
+
|
|
189
|
+
# Verify it's valid tar.gz
|
|
190
|
+
import io
|
|
191
|
+
buf = io.BytesIO(tarball_bytes)
|
|
192
|
+
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
|
193
|
+
names = tar.getnames()
|
|
194
|
+
assert "run.py" in names
|
|
195
|
+
assert "kernel.py" in names
|
|
196
|
+
|
|
197
|
+
def test_preserves_relative_paths(self) -> None:
|
|
198
|
+
"""Tarball entries have paths relative to the directory root."""
|
|
199
|
+
from wafer.ncu_run import package_directory
|
|
200
|
+
|
|
201
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
202
|
+
d = Path(tmpdir)
|
|
203
|
+
(d / "src").mkdir()
|
|
204
|
+
(d / "src" / "model.py").write_text("# model")
|
|
205
|
+
(d / "run.py").write_text("from src.model import *")
|
|
206
|
+
|
|
207
|
+
tarball_bytes, count = package_directory(d)
|
|
208
|
+
|
|
209
|
+
import io
|
|
210
|
+
buf = io.BytesIO(tarball_bytes)
|
|
211
|
+
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
|
212
|
+
names = tar.getnames()
|
|
213
|
+
# Paths should be relative, not absolute
|
|
214
|
+
assert "run.py" in names
|
|
215
|
+
assert "src/model.py" in names
|
|
216
|
+
# No absolute paths
|
|
217
|
+
for name in names:
|
|
218
|
+
assert not name.startswith("/")
|
|
219
|
+
|
|
220
|
+
def test_empty_directory_raises(self) -> None:
|
|
221
|
+
"""Empty directory raises AssertionError."""
|
|
222
|
+
from wafer.ncu_run import package_directory
|
|
223
|
+
|
|
224
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
225
|
+
d = Path(tmpdir)
|
|
226
|
+
with pytest.raises(AssertionError, match="No files found"):
|
|
227
|
+
package_directory(d)
|
|
228
|
+
|
|
229
|
+
def test_realistic_gpu_mode_project(self) -> None:
|
|
230
|
+
"""Packages a realistic GPU Mode kernel project correctly.
|
|
231
|
+
|
|
232
|
+
This mimics what a GPU Mode community member would have:
|
|
233
|
+
- A Python entry script
|
|
234
|
+
- A Triton kernel file
|
|
235
|
+
- A utils module
|
|
236
|
+
- Some files that should be excluded
|
|
237
|
+
"""
|
|
238
|
+
from wafer.ncu_run import package_directory
|
|
239
|
+
|
|
240
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
241
|
+
d = Path(tmpdir)
|
|
242
|
+
|
|
243
|
+
# Realistic GPU Mode project structure
|
|
244
|
+
(d / "run.py").write_text(
|
|
245
|
+
"import torch\n"
|
|
246
|
+
"from kernel import my_kernel\n"
|
|
247
|
+
"x = torch.randn(1024, device='cuda')\n"
|
|
248
|
+
"my_kernel(x)\n"
|
|
249
|
+
)
|
|
250
|
+
(d / "kernel.py").write_text(
|
|
251
|
+
"import triton\n"
|
|
252
|
+
"import triton.language as tl\n"
|
|
253
|
+
"@triton.jit\n"
|
|
254
|
+
"def my_kernel(x_ptr, n: tl.constexpr):\n"
|
|
255
|
+
" pid = tl.program_id(0)\n"
|
|
256
|
+
" offsets = pid * 128 + tl.arange(0, 128)\n"
|
|
257
|
+
" x = tl.load(x_ptr + offsets)\n"
|
|
258
|
+
" tl.store(x_ptr + offsets, x * 2)\n"
|
|
259
|
+
)
|
|
260
|
+
(d / "utils.py").write_text("# utility functions\n")
|
|
261
|
+
(d / "requirements.txt").write_text("torch>=2.0\ntriton>=2.1\n")
|
|
262
|
+
|
|
263
|
+
# Files that SHOULD be excluded
|
|
264
|
+
(d / "__pycache__").mkdir()
|
|
265
|
+
(d / "__pycache__" / "kernel.cpython-312.pyc").write_bytes(b"\x00")
|
|
266
|
+
(d / "old_profile.ncu-rep").write_bytes(b"\x00" * 50)
|
|
267
|
+
|
|
268
|
+
tarball_bytes, count = package_directory(d)
|
|
269
|
+
|
|
270
|
+
# Should include the 4 real files (run.py, kernel.py, utils.py, requirements.txt)
|
|
271
|
+
# Should NOT include __pycache__ or .ncu-rep
|
|
272
|
+
import io
|
|
273
|
+
buf = io.BytesIO(tarball_bytes)
|
|
274
|
+
with tarfile.open(fileobj=buf, mode="r:gz") as tar:
|
|
275
|
+
names = set(tar.getnames())
|
|
276
|
+
assert "run.py" in names
|
|
277
|
+
assert "kernel.py" in names
|
|
278
|
+
assert "utils.py" in names
|
|
279
|
+
assert "requirements.txt" in names
|
|
280
|
+
# Excluded files
|
|
281
|
+
assert not any("__pycache__" in n for n in names)
|
|
282
|
+
assert not any(".ncu-rep" in n for n in names)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# --- SSE Stream Processing ---
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class TestHandleSseLine:
|
|
289
|
+
"""Tests for _handle_sse_line — CLI-side SSE event processing."""
|
|
290
|
+
|
|
291
|
+
def test_done_sets_done_flag(self) -> None:
|
|
292
|
+
"""[DONE] event sets the done flag."""
|
|
293
|
+
from wafer.ncu_run import _handle_sse_line
|
|
294
|
+
|
|
295
|
+
state: dict = {"exit_code": 0, "job_id": None, "done": False}
|
|
296
|
+
_handle_sse_line("[DONE]", state)
|
|
297
|
+
|
|
298
|
+
assert state["done"] is True
|
|
299
|
+
|
|
300
|
+
def test_error_sets_exit_code(self) -> None:
|
|
301
|
+
"""[ERROR] event sets exit_code to 1."""
|
|
302
|
+
from wafer.ncu_run import _handle_sse_line
|
|
303
|
+
|
|
304
|
+
state: dict = {"exit_code": 0, "job_id": None, "done": False}
|
|
305
|
+
_handle_sse_line("[ERROR] something broke", state)
|
|
306
|
+
|
|
307
|
+
assert state["exit_code"] == 1
|
|
308
|
+
assert state["done"] is True
|
|
309
|
+
|
|
310
|
+
def test_exit_code_parsed(self) -> None:
|
|
311
|
+
"""[EXIT:N] event parses the exit code."""
|
|
312
|
+
from wafer.ncu_run import _handle_sse_line
|
|
313
|
+
|
|
314
|
+
state: dict = {"exit_code": 0, "job_id": None, "done": False}
|
|
315
|
+
_handle_sse_line("[EXIT:42]", state)
|
|
316
|
+
|
|
317
|
+
assert state["exit_code"] == 42
|
|
318
|
+
|
|
319
|
+
def test_report_ready_captures_job_id(self) -> None:
|
|
320
|
+
"""[REPORT_READY:id] event captures the job ID."""
|
|
321
|
+
from wafer.ncu_run import _handle_sse_line
|
|
322
|
+
|
|
323
|
+
state: dict = {"exit_code": 0, "job_id": None, "done": False}
|
|
324
|
+
_handle_sse_line("[REPORT_READY:abc123def]", state)
|
|
325
|
+
|
|
326
|
+
assert state["job_id"] == "abc123def"
|
|
327
|
+
|
|
328
|
+
def test_status_events_dont_affect_state(self) -> None:
|
|
329
|
+
"""[STATUS:...] events are informational only."""
|
|
330
|
+
from wafer.ncu_run import _handle_sse_line
|
|
331
|
+
|
|
332
|
+
state: dict = {"exit_code": 0, "job_id": None, "done": False}
|
|
333
|
+
_handle_sse_line("[STATUS:UPLOADING]", state)
|
|
334
|
+
|
|
335
|
+
assert state["exit_code"] == 0
|
|
336
|
+
assert state["job_id"] is None
|
|
337
|
+
assert state["done"] is False
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""End-to-end tests for NCU remote profiling.
|
|
2
|
+
|
|
3
|
+
These tests require:
|
|
4
|
+
1. Authentication: `wafer login` (or WAFER_API_URL pointed at staging)
|
|
5
|
+
2. B200 GPU access: A running B200 target in the pool
|
|
6
|
+
3. Credits: Sufficient credits for a short NCU run
|
|
7
|
+
|
|
8
|
+
Run manually:
|
|
9
|
+
cd apps/wafer-cli
|
|
10
|
+
uv run pytest tests/test_ncu_run_e2e.py -v -s
|
|
11
|
+
|
|
12
|
+
Or with staging:
|
|
13
|
+
WAFER_API_URL=https://wafer-api-staging.onrender.com uv run pytest tests/test_ncu_run_e2e.py -v -s
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import subprocess
|
|
18
|
+
import tempfile
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import pytest
|
|
22
|
+
|
|
23
|
+
# Mark all tests in this file as E2E (skip in CI by default)
|
|
24
|
+
pytestmark = [
|
|
25
|
+
pytest.mark.skipif(
|
|
26
|
+
os.environ.get("RUN_E2E_TESTS") != "1",
|
|
27
|
+
reason="Set RUN_E2E_TESTS=1 to run E2E tests (requires B200 + auth)",
|
|
28
|
+
),
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
# Path to the realistic GPU Mode test fixture
|
|
32
|
+
FIXTURE_DIR = Path(__file__).parent / "fixtures" / "gpu_mode_kernel"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TestNcuRunE2E:
|
|
36
|
+
"""End-to-end tests for the full `wafer ncu run` flow."""
|
|
37
|
+
|
|
38
|
+
def _run_wafer(self, *args: str, cwd: str | None = None) -> subprocess.CompletedProcess:
|
|
39
|
+
"""Run a wafer CLI command and return the result."""
|
|
40
|
+
cmd = ["uv", "run", "wafer", *args]
|
|
41
|
+
env = {**os.environ}
|
|
42
|
+
if "WAFER_API_URL" not in env:
|
|
43
|
+
# Default to staging for E2E tests
|
|
44
|
+
env["WAFER_API_URL"] = "https://wafer-api-staging.onrender.com"
|
|
45
|
+
|
|
46
|
+
return subprocess.run(
|
|
47
|
+
cmd,
|
|
48
|
+
capture_output=True,
|
|
49
|
+
text=True,
|
|
50
|
+
cwd=cwd or str(Path(__file__).parent.parent),
|
|
51
|
+
env=env,
|
|
52
|
+
timeout=300, # 5 min timeout for NCU runs
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def test_ncu_run_help(self) -> None:
|
|
56
|
+
"""wafer nvidia ncu run --help shows usage."""
|
|
57
|
+
result = self._run_wafer("nvidia", "ncu", "run", "--help")
|
|
58
|
+
|
|
59
|
+
assert result.returncode == 0
|
|
60
|
+
assert "Run NCU profiling remotely" in result.stdout
|
|
61
|
+
assert "--dir" in result.stdout
|
|
62
|
+
assert "--ncu-args" in result.stdout
|
|
63
|
+
|
|
64
|
+
def test_ncu_run_no_command_fails(self) -> None:
|
|
65
|
+
"""wafer nvidia ncu run (no command) fails with error."""
|
|
66
|
+
result = self._run_wafer("nvidia", "ncu", "run")
|
|
67
|
+
|
|
68
|
+
# Should fail because no command provided
|
|
69
|
+
assert result.returncode != 0
|
|
70
|
+
|
|
71
|
+
def test_ncu_run_simple_python(self) -> None:
|
|
72
|
+
"""Full E2E: profile a simple Python script on B200.
|
|
73
|
+
|
|
74
|
+
This is THE critical test — it validates the entire flow:
|
|
75
|
+
1. Directory packaging (tar.gz)
|
|
76
|
+
2. Upload to API
|
|
77
|
+
3. SFTP to B200
|
|
78
|
+
4. Docker execution with NCU
|
|
79
|
+
5. SSE streaming
|
|
80
|
+
6. Report download
|
|
81
|
+
"""
|
|
82
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
83
|
+
d = Path(tmpdir)
|
|
84
|
+
# Minimal script that NCU can profile
|
|
85
|
+
(d / "run.py").write_text(
|
|
86
|
+
"import torch\n"
|
|
87
|
+
"x = torch.randn(1024, device='cuda')\n"
|
|
88
|
+
"y = x * 2 # Simple operation for NCU to profile\n"
|
|
89
|
+
"print(f'Result shape: {y.shape}')\n"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
result = self._run_wafer(
|
|
93
|
+
"nvidia", "ncu", "run",
|
|
94
|
+
"--dir", str(d),
|
|
95
|
+
"--timeout", "120",
|
|
96
|
+
"python", "run.py",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
print("STDOUT:", result.stdout)
|
|
100
|
+
print("STDERR:", result.stderr)
|
|
101
|
+
|
|
102
|
+
# Should upload and start running
|
|
103
|
+
assert "Packaging" in result.stdout
|
|
104
|
+
assert "Uploading" in result.stdout
|
|
105
|
+
|
|
106
|
+
# Should get NCU output (==PROF== lines)
|
|
107
|
+
assert "==PROF==" in result.stdout or "RUNNING" in result.stdout
|
|
108
|
+
|
|
109
|
+
def test_ncu_run_gpu_mode_fixture(self) -> None:
|
|
110
|
+
"""Full E2E: profile the GPU Mode test fixture (Triton kernel).
|
|
111
|
+
|
|
112
|
+
This tests the realistic GPU Mode use case:
|
|
113
|
+
- Multi-file project (run.py + requirements.txt)
|
|
114
|
+
- Triton kernel that needs profiling
|
|
115
|
+
- Requirements installation
|
|
116
|
+
"""
|
|
117
|
+
if not FIXTURE_DIR.exists():
|
|
118
|
+
pytest.skip(f"Fixture not found: {FIXTURE_DIR}")
|
|
119
|
+
|
|
120
|
+
with tempfile.TemporaryDirectory() as output_dir:
|
|
121
|
+
output_file = Path(output_dir) / "profile.ncu-rep"
|
|
122
|
+
|
|
123
|
+
result = self._run_wafer(
|
|
124
|
+
"nvidia", "ncu", "run",
|
|
125
|
+
"--dir", str(FIXTURE_DIR),
|
|
126
|
+
"--output", str(output_file),
|
|
127
|
+
"--timeout", "180",
|
|
128
|
+
"python", "run.py",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
print("STDOUT:", result.stdout)
|
|
132
|
+
print("STDERR:", result.stderr)
|
|
133
|
+
|
|
134
|
+
# Should complete successfully
|
|
135
|
+
assert "Packaging" in result.stdout
|
|
136
|
+
assert "Uploading" in result.stdout
|
|
137
|
+
|
|
138
|
+
# If NCU ran successfully, report should be downloaded
|
|
139
|
+
if result.returncode == 0 and output_file.exists():
|
|
140
|
+
# Report should be a valid NCU report (starts with NVR magic)
|
|
141
|
+
header = output_file.read_bytes()[:3]
|
|
142
|
+
assert header == b"NVR", f"Invalid report header: {header}"
|
|
143
|
+
print(f"Report downloaded: {output_file.stat().st_size / 1024:.1f} KB")
|
|
144
|
+
|
|
145
|
+
def test_ncu_run_with_ncu_args(self) -> None:
|
|
146
|
+
"""E2E: custom NCU flags are passed through."""
|
|
147
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
148
|
+
d = Path(tmpdir)
|
|
149
|
+
(d / "run.py").write_text(
|
|
150
|
+
"import torch\n"
|
|
151
|
+
"x = torch.randn(1024, device='cuda')\n"
|
|
152
|
+
"y = x * 2\n"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
result = self._run_wafer(
|
|
156
|
+
"nvidia", "ncu", "run",
|
|
157
|
+
"--dir", str(d),
|
|
158
|
+
"--ncu-args", "--set full",
|
|
159
|
+
"--no-download",
|
|
160
|
+
"--timeout", "120",
|
|
161
|
+
"python", "run.py",
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
print("STDOUT:", result.stdout)
|
|
165
|
+
print("STDERR:", result.stderr)
|
|
166
|
+
|
|
167
|
+
# Should show the ncu args in the command
|
|
168
|
+
assert "--set full" in result.stdout
|
|
169
|
+
|
|
170
|
+
def test_ncu_run_no_download(self) -> None:
|
|
171
|
+
"""E2E: --no-download skips report download."""
|
|
172
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
173
|
+
d = Path(tmpdir)
|
|
174
|
+
(d / "run.py").write_text(
|
|
175
|
+
"import torch\n"
|
|
176
|
+
"x = torch.randn(1024, device='cuda')\n"
|
|
177
|
+
"y = x * 2\n"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
result = self._run_wafer(
|
|
181
|
+
"nvidia", "ncu", "run",
|
|
182
|
+
"--dir", str(d),
|
|
183
|
+
"--no-download",
|
|
184
|
+
"--timeout", "120",
|
|
185
|
+
"python", "run.py",
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
print("STDOUT:", result.stdout)
|
|
189
|
+
|
|
190
|
+
# Should NOT contain download messages
|
|
191
|
+
assert "Downloading report" not in result.stdout
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class TestNcuRunDirPackaging:
|
|
195
|
+
"""E2E tests focused on directory packaging behavior."""
|
|
196
|
+
|
|
197
|
+
def test_large_directory_with_exclusions(self) -> None:
|
|
198
|
+
"""Directories with lots of excludable content are packaged efficiently."""
|
|
199
|
+
from wafer.ncu_run import package_directory
|
|
200
|
+
|
|
201
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
202
|
+
d = Path(tmpdir)
|
|
203
|
+
|
|
204
|
+
# Create a realistic project with lots of junk
|
|
205
|
+
(d / "run.py").write_text("print('hello')")
|
|
206
|
+
(d / "kernel.cu").write_text("__global__ void k() {}")
|
|
207
|
+
|
|
208
|
+
# Lots of excludable stuff
|
|
209
|
+
(d / "__pycache__").mkdir()
|
|
210
|
+
for i in range(50):
|
|
211
|
+
(d / "__pycache__" / f"module_{i}.cpython-312.pyc").write_bytes(b"\x00" * 1000)
|
|
212
|
+
|
|
213
|
+
(d / "node_modules").mkdir()
|
|
214
|
+
(d / "node_modules" / "big_dep").mkdir()
|
|
215
|
+
(d / "node_modules" / "big_dep" / "index.js").write_text("// big")
|
|
216
|
+
|
|
217
|
+
(d / ".git").mkdir()
|
|
218
|
+
(d / ".git" / "objects").mkdir()
|
|
219
|
+
|
|
220
|
+
tarball, count = package_directory(d)
|
|
221
|
+
|
|
222
|
+
# Should only include the 2 real files
|
|
223
|
+
assert count == 2
|
|
224
|
+
# Tarball should be tiny (just run.py + kernel.cu)
|
|
225
|
+
assert len(tarball) < 1024 # < 1KB
|