wafer-cli 0.2.52__tar.gz → 0.2.53__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/PKG-INFO +1 -1
  2. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/pyproject.toml +2 -1
  3. wafer_cli-0.2.53/tests/test_ncu_run.py +337 -0
  4. wafer_cli-0.2.53/tests/test_ncu_run_e2e.py +225 -0
  5. wafer_cli-0.2.53/tests/test_ncu_run_local_e2e.py +215 -0
  6. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/cli.py +88 -2
  7. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/kernel_scope.py +33 -0
  8. wafer_cli-0.2.53/wafer/ncu_run.py +372 -0
  9. wafer_cli-0.2.53/wafer/skills/packed-ops-guide/SKILL.md +212 -0
  10. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/skills/wafer-guide/SKILL.md +1 -1
  11. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/PKG-INFO +1 -1
  12. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/SOURCES.txt +5 -0
  13. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/README.md +0 -0
  14. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/setup.cfg +0 -0
  15. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_analytics.py +0 -0
  16. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_auth.py +0 -0
  17. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_billing.py +0 -0
  18. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_cli_coverage.py +0 -0
  19. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_cli_parity_integration.py +0 -0
  20. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_config_integration.py +0 -0
  21. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_distributed_traces_cli.py +0 -0
  22. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_file_operations_integration.py +0 -0
  23. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_kernel_scope_cli.py +0 -0
  24. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_nsys_analyze.py +0 -0
  25. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_nsys_profile.py +0 -0
  26. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_output.py +0 -0
  27. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_rocprof_compute_integration.py +0 -0
  28. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_skill_commands.py +0 -0
  29. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_ssh_integration.py +0 -0
  30. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_targets_ops.py +0 -0
  31. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_wevin_cli.py +0 -0
  32. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/tests/test_workflow_integration.py +0 -0
  33. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/GUIDE.md +0 -0
  34. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/__init__.py +0 -0
  35. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/agent_defaults.py +0 -0
  36. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/analytics.py +0 -0
  37. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/api_client.py +0 -0
  38. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/auth.py +0 -0
  39. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/autotuner.py +0 -0
  40. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/baseline.py +0 -0
  41. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/billing.py +0 -0
  42. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/cli_instructions.py +0 -0
  43. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/config.py +0 -0
  44. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/amd_instinct_gpu_specs.md +0 -0
  45. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna2/01-architecture-overview.md +0 -0
  46. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna2/02-matrix-instructions.md +0 -0
  47. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna2/README.md +0 -0
  48. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/01-introduction.md +0 -0
  49. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/02-program-organization.md +0 -0
  50. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/03-kernel-state.md +0 -0
  51. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/04-program-flow-control.md +0 -0
  52. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/05-scalar-alu.md +0 -0
  53. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/06-vector-alu.md +0 -0
  54. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/07-matrix-instructions.md +0 -0
  55. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/08-scalar-memory.md +0 -0
  56. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/09-vector-memory.md +0 -0
  57. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/10-flat-memory.md +0 -0
  58. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/11-data-share.md +0 -0
  59. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/cdna3-isa/README.md +0 -0
  60. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/composable-kernel/01-ck-overview.md +0 -0
  61. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/01-hip-programming-model.md +0 -0
  62. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/02-hip-memory-management.md +0 -0
  63. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/03-hip-synchronization.md +0 -0
  64. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/hip/04-hip-intrinsics.md +0 -0
  65. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/amd/rocm-profiling/01-rocprofiler-overview.md +0 -0
  66. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/common/flash-attention/01-flash-attention-overview.md +0 -0
  67. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/common/vllm/01-vllm-overview.md +0 -0
  68. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/blackwell/01-architecture-overview.md +0 -0
  69. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/01-cuda-programming-model.md +0 -0
  70. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/02-cuda-memory-management.md +0 -0
  71. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/03-cuda-best-practices.md +0 -0
  72. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cuda-guide/04-cuda-streams-events.md +0 -0
  73. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/cutlass/01-cutlass-overview.md +0 -0
  74. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/01-overview.md +0 -0
  75. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/02-streaming-multiprocessor.md +0 -0
  76. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/03-tensor-cores.md +0 -0
  77. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/04-memory-hierarchy.md +0 -0
  78. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/05-synchronization.md +0 -0
  79. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/hopper/README.md +0 -0
  80. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/nsight/01-nsight-compute-overview.md +0 -0
  81. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/nsight/02-nsight-systems.md +0 -0
  82. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/ptx-isa/01-ptx-overview.md +0 -0
  83. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/ptx-isa/02-ptx-tensor-operations.md +0 -0
  84. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpora/nvidia/triton/01-triton-overview.md +0 -0
  85. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/corpus.py +0 -0
  86. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/distributed_traces.py +0 -0
  87. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/evaluate.py +0 -0
  88. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/global_config.py +0 -0
  89. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/gpu_run.py +0 -0
  90. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/inference.py +0 -0
  91. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/ncu_analyze.py +0 -0
  92. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/nsys_analyze.py +0 -0
  93. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/nsys_profile.py +0 -0
  94. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/output.py +0 -0
  95. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/problems.py +0 -0
  96. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/rocprof_compute.py +0 -0
  97. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/rocprof_sdk.py +0 -0
  98. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/rocprof_systems.py +0 -0
  99. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/specs_cli.py +0 -0
  100. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/ssh_keys.py +0 -0
  101. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/target_lock.py +0 -0
  102. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/targets.py +0 -0
  103. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/targets_cli.py +0 -0
  104. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/targets_ops.py +0 -0
  105. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/__init__.py +0 -0
  106. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/aiter_optimize.py +0 -0
  107. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/ask_docs.py +0 -0
  108. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/audit.py +0 -0
  109. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_flashinfer.py +0 -0
  110. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_kernel.py +0 -0
  111. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_kernelbench.py +0 -0
  112. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/optimize_vllm.py +0 -0
  113. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/templates/trace_analyze.py +0 -0
  114. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/tests/test_eval_cli_parity.py +0 -0
  115. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/trace_compare.py +0 -0
  116. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/tracelens.py +0 -0
  117. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/wevin_cli.py +0 -0
  118. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer/workspaces.py +0 -0
  119. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/dependency_links.txt +0 -0
  120. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/entry_points.txt +0 -0
  121. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/requires.txt +0 -0
  122. {wafer_cli-0.2.52 → wafer_cli-0.2.53}/wafer_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-cli
3
- Version: 0.2.52
3
+ Version: 0.2.53
4
4
  Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wafer-cli"
3
- version = "0.2.52"
3
+ version = "0.2.53"
4
4
  description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -78,6 +78,7 @@ ignore = [
78
78
 
79
79
  [tool.ruff.lint.per-file-ignores]
80
80
  "tests/**/*.py" = ["ANN001", "ANN201", "ANN202", "ANN204"] # Don't require type annotations in tests
81
+ "tests/test_ncu_run_local_e2e.py" = ["PLR0915"] # E2E test has a long sequential flow by design
81
82
  "wafer/evaluate.py" = ["PLR0915", "PLR1702", "E402", "PLW2901", "ASYNC221"] # complex deployment flows - TODO: refactor
82
83
  "wafer/output.py" = ["ANN401"] # Output collector uses **kwargs for flexible event data
83
84
  "wafer/autotuner.py" = ["PLR0915", "PLR1702", "B007", "B904"] # complex sweep logic - TODO: refactor
@@ -0,0 +1,337 @@
1
+ """Unit tests for NCU remote profiling CLI module.
2
+
3
+ Tests directory packaging, .gitignore filtering, tar.gz creation,
4
+ and SSE stream processing — all locally, no B200 needed.
5
+ """
6
+
7
+ import os
8
+ import subprocess
9
+ import tarfile
10
+ import tempfile
11
+ from pathlib import Path
12
+
13
+ import pytest
14
+
15
+
16
+ # --- Directory Packaging ---
17
+
18
+
19
+ class TestCollectFilesForUpload:
20
+ """Tests for _collect_files_for_upload — the directory scanning logic."""
21
+
22
+ def test_collects_all_files_in_flat_dir(self) -> None:
23
+ """All files in a simple directory are collected."""
24
+ from wafer.ncu_run import _collect_files_for_upload
25
+
26
+ with tempfile.TemporaryDirectory() as tmpdir:
27
+ d = Path(tmpdir)
28
+ (d / "run.py").write_text("print('hello')")
29
+ (d / "kernel.py").write_text("# kernel")
30
+ (d / "data.txt").write_text("data")
31
+
32
+ files = _collect_files_for_upload(d)
33
+
34
+ names = {f.name for f in files}
35
+ assert "run.py" in names
36
+ assert "kernel.py" in names
37
+ assert "data.txt" in names
38
+
39
+ def test_collects_nested_files(self) -> None:
40
+ """Files in subdirectories are collected."""
41
+ from wafer.ncu_run import _collect_files_for_upload
42
+
43
+ with tempfile.TemporaryDirectory() as tmpdir:
44
+ d = Path(tmpdir)
45
+ (d / "run.py").write_text("import utils")
46
+ (d / "utils").mkdir()
47
+ (d / "utils" / "helper.py").write_text("# helper")
48
+
49
+ files = _collect_files_for_upload(d)
50
+ rel_paths = {str(f.relative_to(d)) for f in files}
51
+
52
+ assert "run.py" in rel_paths
53
+ assert "utils/helper.py" in rel_paths
54
+
55
+ def test_excludes_pycache(self) -> None:
56
+ """__pycache__ directories are excluded."""
57
+ from wafer.ncu_run import _collect_files_for_upload
58
+
59
+ with tempfile.TemporaryDirectory() as tmpdir:
60
+ d = Path(tmpdir)
61
+ (d / "run.py").write_text("print('hello')")
62
+ (d / "__pycache__").mkdir()
63
+ (d / "__pycache__" / "run.cpython-312.pyc").write_bytes(b"\x00")
64
+
65
+ files = _collect_files_for_upload(d)
66
+ names = {f.name for f in files}
67
+
68
+ assert "run.py" in names
69
+ assert "run.cpython-312.pyc" not in names
70
+
71
+ def test_excludes_git_dir(self) -> None:
72
+ """.git directory is excluded."""
73
+ from wafer.ncu_run import _collect_files_for_upload
74
+
75
+ with tempfile.TemporaryDirectory() as tmpdir:
76
+ d = Path(tmpdir)
77
+ (d / "run.py").write_text("print('hello')")
78
+ (d / ".git").mkdir()
79
+ (d / ".git" / "HEAD").write_text("ref: refs/heads/main")
80
+
81
+ files = _collect_files_for_upload(d)
82
+ names = {f.name for f in files}
83
+
84
+ assert "run.py" in names
85
+ assert "HEAD" not in names
86
+
87
+ def test_excludes_ncu_rep_files(self) -> None:
88
+ """Existing .ncu-rep files are excluded from upload."""
89
+ from wafer.ncu_run import _collect_files_for_upload
90
+
91
+ with tempfile.TemporaryDirectory() as tmpdir:
92
+ d = Path(tmpdir)
93
+ (d / "run.py").write_text("print('hello')")
94
+ (d / "old_profile.ncu-rep").write_bytes(b"\x00" * 100)
95
+
96
+ files = _collect_files_for_upload(d)
97
+ names = {f.name for f in files}
98
+
99
+ assert "run.py" in names
100
+ assert "old_profile.ncu-rep" not in names
101
+
102
+ def test_excludes_venv(self) -> None:
103
+ """Virtual environment directories are excluded."""
104
+ from wafer.ncu_run import _collect_files_for_upload
105
+
106
+ with tempfile.TemporaryDirectory() as tmpdir:
107
+ d = Path(tmpdir)
108
+ (d / "run.py").write_text("print('hello')")
109
+ (d / ".venv").mkdir()
110
+ (d / ".venv" / "bin").mkdir()
111
+ (d / ".venv" / "bin" / "python").write_text("#!/usr/bin/env python")
112
+
113
+ files = _collect_files_for_upload(d)
114
+ names = {f.name for f in files}
115
+
116
+ assert "run.py" in names
117
+ assert "python" not in names
118
+
119
+ def test_custom_excludes(self) -> None:
120
+ """Extra exclude patterns are applied."""
121
+ from wafer.ncu_run import _collect_files_for_upload
122
+
123
+ with tempfile.TemporaryDirectory() as tmpdir:
124
+ d = Path(tmpdir)
125
+ (d / "run.py").write_text("print('hello')")
126
+ (d / "big_data.bin").write_bytes(b"\x00" * 100)
127
+
128
+ files = _collect_files_for_upload(d, extra_excludes={"*.bin"})
129
+ names = {f.name for f in files}
130
+
131
+ assert "run.py" in names
132
+ assert "big_data.bin" not in names
133
+
134
+ def test_git_ls_files_used_in_git_repo(self) -> None:
135
+ """In a git repo, git ls-files is used (respects .gitignore)."""
136
+ from wafer.ncu_run import _collect_files_for_upload
137
+
138
+ with tempfile.TemporaryDirectory() as tmpdir:
139
+ d = Path(tmpdir)
140
+ # Initialize a git repo
141
+ subprocess.run(["git", "init"], cwd=d, capture_output=True, check=True)
142
+ subprocess.run(
143
+ ["git", "config", "user.email", "test@test.com"],
144
+ cwd=d, capture_output=True, check=True,
145
+ )
146
+ subprocess.run(
147
+ ["git", "config", "user.name", "Test"],
148
+ cwd=d, capture_output=True, check=True,
149
+ )
150
+
151
+ # Create files
152
+ (d / "tracked.py").write_text("# tracked")
153
+ (d / "ignored.log").write_text("logs")
154
+
155
+ # Create .gitignore
156
+ (d / ".gitignore").write_text("*.log\n")
157
+
158
+ # Git add tracked file
159
+ subprocess.run(["git", "add", "tracked.py", ".gitignore"], cwd=d, capture_output=True, check=True)
160
+
161
+ files = _collect_files_for_upload(d)
162
+ names = {f.name for f in files}
163
+
164
+ assert "tracked.py" in names
165
+ assert ".gitignore" in names
166
+ assert "ignored.log" not in names
167
+
168
+
169
+ # --- Tar.gz Packaging ---
170
+
171
+
172
+ class TestPackageDirectory:
173
+ """Tests for package_directory — creating uploadable tar.gz archives."""
174
+
175
+ def test_creates_valid_tarball(self) -> None:
176
+ """package_directory produces a valid tar.gz."""
177
+ from wafer.ncu_run import package_directory
178
+
179
+ with tempfile.TemporaryDirectory() as tmpdir:
180
+ d = Path(tmpdir)
181
+ (d / "run.py").write_text("print('hello')")
182
+ (d / "kernel.py").write_text("# kernel code")
183
+
184
+ tarball_bytes, count = package_directory(d)
185
+
186
+ assert count == 2
187
+ assert len(tarball_bytes) > 0
188
+
189
+ # Verify it's valid tar.gz
190
+ import io
191
+ buf = io.BytesIO(tarball_bytes)
192
+ with tarfile.open(fileobj=buf, mode="r:gz") as tar:
193
+ names = tar.getnames()
194
+ assert "run.py" in names
195
+ assert "kernel.py" in names
196
+
197
+ def test_preserves_relative_paths(self) -> None:
198
+ """Tarball entries have paths relative to the directory root."""
199
+ from wafer.ncu_run import package_directory
200
+
201
+ with tempfile.TemporaryDirectory() as tmpdir:
202
+ d = Path(tmpdir)
203
+ (d / "src").mkdir()
204
+ (d / "src" / "model.py").write_text("# model")
205
+ (d / "run.py").write_text("from src.model import *")
206
+
207
+ tarball_bytes, count = package_directory(d)
208
+
209
+ import io
210
+ buf = io.BytesIO(tarball_bytes)
211
+ with tarfile.open(fileobj=buf, mode="r:gz") as tar:
212
+ names = tar.getnames()
213
+ # Paths should be relative, not absolute
214
+ assert "run.py" in names
215
+ assert "src/model.py" in names
216
+ # No absolute paths
217
+ for name in names:
218
+ assert not name.startswith("/")
219
+
220
+ def test_empty_directory_raises(self) -> None:
221
+ """Empty directory raises AssertionError."""
222
+ from wafer.ncu_run import package_directory
223
+
224
+ with tempfile.TemporaryDirectory() as tmpdir:
225
+ d = Path(tmpdir)
226
+ with pytest.raises(AssertionError, match="No files found"):
227
+ package_directory(d)
228
+
229
+ def test_realistic_gpu_mode_project(self) -> None:
230
+ """Packages a realistic GPU Mode kernel project correctly.
231
+
232
+ This mimics what a GPU Mode community member would have:
233
+ - A Python entry script
234
+ - A Triton kernel file
235
+ - A utils module
236
+ - Some files that should be excluded
237
+ """
238
+ from wafer.ncu_run import package_directory
239
+
240
+ with tempfile.TemporaryDirectory() as tmpdir:
241
+ d = Path(tmpdir)
242
+
243
+ # Realistic GPU Mode project structure
244
+ (d / "run.py").write_text(
245
+ "import torch\n"
246
+ "from kernel import my_kernel\n"
247
+ "x = torch.randn(1024, device='cuda')\n"
248
+ "my_kernel(x)\n"
249
+ )
250
+ (d / "kernel.py").write_text(
251
+ "import triton\n"
252
+ "import triton.language as tl\n"
253
+ "@triton.jit\n"
254
+ "def my_kernel(x_ptr, n: tl.constexpr):\n"
255
+ " pid = tl.program_id(0)\n"
256
+ " offsets = pid * 128 + tl.arange(0, 128)\n"
257
+ " x = tl.load(x_ptr + offsets)\n"
258
+ " tl.store(x_ptr + offsets, x * 2)\n"
259
+ )
260
+ (d / "utils.py").write_text("# utility functions\n")
261
+ (d / "requirements.txt").write_text("torch>=2.0\ntriton>=2.1\n")
262
+
263
+ # Files that SHOULD be excluded
264
+ (d / "__pycache__").mkdir()
265
+ (d / "__pycache__" / "kernel.cpython-312.pyc").write_bytes(b"\x00")
266
+ (d / "old_profile.ncu-rep").write_bytes(b"\x00" * 50)
267
+
268
+ tarball_bytes, count = package_directory(d)
269
+
270
+ # Should include the 4 real files (run.py, kernel.py, utils.py, requirements.txt)
271
+ # Should NOT include __pycache__ or .ncu-rep
272
+ import io
273
+ buf = io.BytesIO(tarball_bytes)
274
+ with tarfile.open(fileobj=buf, mode="r:gz") as tar:
275
+ names = set(tar.getnames())
276
+ assert "run.py" in names
277
+ assert "kernel.py" in names
278
+ assert "utils.py" in names
279
+ assert "requirements.txt" in names
280
+ # Excluded files
281
+ assert not any("__pycache__" in n for n in names)
282
+ assert not any(".ncu-rep" in n for n in names)
283
+
284
+
285
+ # --- SSE Stream Processing ---
286
+
287
+
288
+ class TestHandleSseLine:
289
+ """Tests for _handle_sse_line — CLI-side SSE event processing."""
290
+
291
+ def test_done_sets_done_flag(self) -> None:
292
+ """[DONE] event sets the done flag."""
293
+ from wafer.ncu_run import _handle_sse_line
294
+
295
+ state: dict = {"exit_code": 0, "job_id": None, "done": False}
296
+ _handle_sse_line("[DONE]", state)
297
+
298
+ assert state["done"] is True
299
+
300
+ def test_error_sets_exit_code(self) -> None:
301
+ """[ERROR] event sets exit_code to 1."""
302
+ from wafer.ncu_run import _handle_sse_line
303
+
304
+ state: dict = {"exit_code": 0, "job_id": None, "done": False}
305
+ _handle_sse_line("[ERROR] something broke", state)
306
+
307
+ assert state["exit_code"] == 1
308
+ assert state["done"] is True
309
+
310
+ def test_exit_code_parsed(self) -> None:
311
+ """[EXIT:N] event parses the exit code."""
312
+ from wafer.ncu_run import _handle_sse_line
313
+
314
+ state: dict = {"exit_code": 0, "job_id": None, "done": False}
315
+ _handle_sse_line("[EXIT:42]", state)
316
+
317
+ assert state["exit_code"] == 42
318
+
319
+ def test_report_ready_captures_job_id(self) -> None:
320
+ """[REPORT_READY:id] event captures the job ID."""
321
+ from wafer.ncu_run import _handle_sse_line
322
+
323
+ state: dict = {"exit_code": 0, "job_id": None, "done": False}
324
+ _handle_sse_line("[REPORT_READY:abc123def]", state)
325
+
326
+ assert state["job_id"] == "abc123def"
327
+
328
+ def test_status_events_dont_affect_state(self) -> None:
329
+ """[STATUS:...] events are informational only."""
330
+ from wafer.ncu_run import _handle_sse_line
331
+
332
+ state: dict = {"exit_code": 0, "job_id": None, "done": False}
333
+ _handle_sse_line("[STATUS:UPLOADING]", state)
334
+
335
+ assert state["exit_code"] == 0
336
+ assert state["job_id"] is None
337
+ assert state["done"] is False
@@ -0,0 +1,225 @@
1
+ """End-to-end tests for NCU remote profiling.
2
+
3
+ These tests require:
4
+ 1. Authentication: `wafer login` (or WAFER_API_URL pointed at staging)
5
+ 2. B200 GPU access: A running B200 target in the pool
6
+ 3. Credits: Sufficient credits for a short NCU run
7
+
8
+ Run manually:
9
+ cd apps/wafer-cli
10
+ uv run pytest tests/test_ncu_run_e2e.py -v -s
11
+
12
+ Or with staging:
13
+ WAFER_API_URL=https://wafer-api-staging.onrender.com uv run pytest tests/test_ncu_run_e2e.py -v -s
14
+ """
15
+
16
+ import os
17
+ import subprocess
18
+ import tempfile
19
+ from pathlib import Path
20
+
21
+ import pytest
22
+
23
+ # Mark all tests in this file as E2E (skip in CI by default)
24
+ pytestmark = [
25
+ pytest.mark.skipif(
26
+ os.environ.get("RUN_E2E_TESTS") != "1",
27
+ reason="Set RUN_E2E_TESTS=1 to run E2E tests (requires B200 + auth)",
28
+ ),
29
+ ]
30
+
31
+ # Path to the realistic GPU Mode test fixture
32
+ FIXTURE_DIR = Path(__file__).parent / "fixtures" / "gpu_mode_kernel"
33
+
34
+
35
+ class TestNcuRunE2E:
36
+ """End-to-end tests for the full `wafer ncu run` flow."""
37
+
38
+ def _run_wafer(self, *args: str, cwd: str | None = None) -> subprocess.CompletedProcess:
39
+ """Run a wafer CLI command and return the result."""
40
+ cmd = ["uv", "run", "wafer", *args]
41
+ env = {**os.environ}
42
+ if "WAFER_API_URL" not in env:
43
+ # Default to staging for E2E tests
44
+ env["WAFER_API_URL"] = "https://wafer-api-staging.onrender.com"
45
+
46
+ return subprocess.run(
47
+ cmd,
48
+ capture_output=True,
49
+ text=True,
50
+ cwd=cwd or str(Path(__file__).parent.parent),
51
+ env=env,
52
+ timeout=300, # 5 min timeout for NCU runs
53
+ )
54
+
55
+ def test_ncu_run_help(self) -> None:
56
+ """wafer nvidia ncu run --help shows usage."""
57
+ result = self._run_wafer("nvidia", "ncu", "run", "--help")
58
+
59
+ assert result.returncode == 0
60
+ assert "Run NCU profiling remotely" in result.stdout
61
+ assert "--dir" in result.stdout
62
+ assert "--ncu-args" in result.stdout
63
+
64
+ def test_ncu_run_no_command_fails(self) -> None:
65
+ """wafer nvidia ncu run (no command) fails with error."""
66
+ result = self._run_wafer("nvidia", "ncu", "run")
67
+
68
+ # Should fail because no command provided
69
+ assert result.returncode != 0
70
+
71
+ def test_ncu_run_simple_python(self) -> None:
72
+ """Full E2E: profile a simple Python script on B200.
73
+
74
+ This is THE critical test — it validates the entire flow:
75
+ 1. Directory packaging (tar.gz)
76
+ 2. Upload to API
77
+ 3. SFTP to B200
78
+ 4. Docker execution with NCU
79
+ 5. SSE streaming
80
+ 6. Report download
81
+ """
82
+ with tempfile.TemporaryDirectory() as tmpdir:
83
+ d = Path(tmpdir)
84
+ # Minimal script that NCU can profile
85
+ (d / "run.py").write_text(
86
+ "import torch\n"
87
+ "x = torch.randn(1024, device='cuda')\n"
88
+ "y = x * 2 # Simple operation for NCU to profile\n"
89
+ "print(f'Result shape: {y.shape}')\n"
90
+ )
91
+
92
+ result = self._run_wafer(
93
+ "nvidia", "ncu", "run",
94
+ "--dir", str(d),
95
+ "--timeout", "120",
96
+ "python", "run.py",
97
+ )
98
+
99
+ print("STDOUT:", result.stdout)
100
+ print("STDERR:", result.stderr)
101
+
102
+ # Should upload and start running
103
+ assert "Packaging" in result.stdout
104
+ assert "Uploading" in result.stdout
105
+
106
+ # Should get NCU output (==PROF== lines)
107
+ assert "==PROF==" in result.stdout or "RUNNING" in result.stdout
108
+
109
+ def test_ncu_run_gpu_mode_fixture(self) -> None:
110
+ """Full E2E: profile the GPU Mode test fixture (Triton kernel).
111
+
112
+ This tests the realistic GPU Mode use case:
113
+ - Multi-file project (run.py + requirements.txt)
114
+ - Triton kernel that needs profiling
115
+ - Requirements installation
116
+ """
117
+ if not FIXTURE_DIR.exists():
118
+ pytest.skip(f"Fixture not found: {FIXTURE_DIR}")
119
+
120
+ with tempfile.TemporaryDirectory() as output_dir:
121
+ output_file = Path(output_dir) / "profile.ncu-rep"
122
+
123
+ result = self._run_wafer(
124
+ "nvidia", "ncu", "run",
125
+ "--dir", str(FIXTURE_DIR),
126
+ "--output", str(output_file),
127
+ "--timeout", "180",
128
+ "python", "run.py",
129
+ )
130
+
131
+ print("STDOUT:", result.stdout)
132
+ print("STDERR:", result.stderr)
133
+
134
+ # Should complete successfully
135
+ assert "Packaging" in result.stdout
136
+ assert "Uploading" in result.stdout
137
+
138
+ # If NCU ran successfully, report should be downloaded
139
+ if result.returncode == 0 and output_file.exists():
140
+ # Report should be a valid NCU report (starts with NVR magic)
141
+ header = output_file.read_bytes()[:3]
142
+ assert header == b"NVR", f"Invalid report header: {header}"
143
+ print(f"Report downloaded: {output_file.stat().st_size / 1024:.1f} KB")
144
+
145
+ def test_ncu_run_with_ncu_args(self) -> None:
146
+ """E2E: custom NCU flags are passed through."""
147
+ with tempfile.TemporaryDirectory() as tmpdir:
148
+ d = Path(tmpdir)
149
+ (d / "run.py").write_text(
150
+ "import torch\n"
151
+ "x = torch.randn(1024, device='cuda')\n"
152
+ "y = x * 2\n"
153
+ )
154
+
155
+ result = self._run_wafer(
156
+ "nvidia", "ncu", "run",
157
+ "--dir", str(d),
158
+ "--ncu-args", "--set full",
159
+ "--no-download",
160
+ "--timeout", "120",
161
+ "python", "run.py",
162
+ )
163
+
164
+ print("STDOUT:", result.stdout)
165
+ print("STDERR:", result.stderr)
166
+
167
+ # Should show the ncu args in the command
168
+ assert "--set full" in result.stdout
169
+
170
+ def test_ncu_run_no_download(self) -> None:
171
+ """E2E: --no-download skips report download."""
172
+ with tempfile.TemporaryDirectory() as tmpdir:
173
+ d = Path(tmpdir)
174
+ (d / "run.py").write_text(
175
+ "import torch\n"
176
+ "x = torch.randn(1024, device='cuda')\n"
177
+ "y = x * 2\n"
178
+ )
179
+
180
+ result = self._run_wafer(
181
+ "nvidia", "ncu", "run",
182
+ "--dir", str(d),
183
+ "--no-download",
184
+ "--timeout", "120",
185
+ "python", "run.py",
186
+ )
187
+
188
+ print("STDOUT:", result.stdout)
189
+
190
+ # Should NOT contain download messages
191
+ assert "Downloading report" not in result.stdout
192
+
193
+
194
+ class TestNcuRunDirPackaging:
195
+ """E2E tests focused on directory packaging behavior."""
196
+
197
+ def test_large_directory_with_exclusions(self) -> None:
198
+ """Directories with lots of excludable content are packaged efficiently."""
199
+ from wafer.ncu_run import package_directory
200
+
201
+ with tempfile.TemporaryDirectory() as tmpdir:
202
+ d = Path(tmpdir)
203
+
204
+ # Create a realistic project with lots of junk
205
+ (d / "run.py").write_text("print('hello')")
206
+ (d / "kernel.cu").write_text("__global__ void k() {}")
207
+
208
+ # Lots of excludable stuff
209
+ (d / "__pycache__").mkdir()
210
+ for i in range(50):
211
+ (d / "__pycache__" / f"module_{i}.cpython-312.pyc").write_bytes(b"\x00" * 1000)
212
+
213
+ (d / "node_modules").mkdir()
214
+ (d / "node_modules" / "big_dep").mkdir()
215
+ (d / "node_modules" / "big_dep" / "index.js").write_text("// big")
216
+
217
+ (d / ".git").mkdir()
218
+ (d / ".git" / "objects").mkdir()
219
+
220
+ tarball, count = package_directory(d)
221
+
222
+ # Should only include the 2 real files
223
+ assert count == 2
224
+ # Tarball should be tiny (just run.py + kernel.cu)
225
+ assert len(tarball) < 1024 # < 1KB