wafer-cli 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/inference.py ADDED
@@ -0,0 +1,148 @@
1
+ """Pure functions for inferring what files to upload and which environment to use.
2
+
3
+ All functions are pure: same input = same output, no side effects.
4
+ """
5
+
6
+ import shlex
7
+ from pathlib import Path
8
+
9
+ from .config import WaferConfig, WaferEnvironment
10
+
11
+
12
+ def infer_upload_files(command: str, cwd: Path) -> list[Path]:
13
+ """Infer which files to upload based on command.
14
+
15
+ Pure function: command + directory -> list of paths.
16
+
17
+ Strategy:
18
+ 1. Extract file references from command tokens
19
+ 2. Add common build files (Makefile, pyproject.toml, etc.)
20
+ 3. Add source files matching common patterns
21
+
22
+ Args:
23
+ command: Command to execute
24
+ cwd: Current working directory
25
+
26
+ Returns:
27
+ Sorted list of file paths to upload
28
+
29
+ Example:
30
+ >>> infer_upload_files("nvcc kernel.cu -o kernel", Path("/home/user/cuda"))
31
+ [Path("/home/user/cuda/kernel.cu"), Path("/home/user/cuda/Makefile"), ...]
32
+ """
33
+ assert cwd.exists(), f"cwd does not exist: {cwd}"
34
+ assert isinstance(command, str), "command must be a string"
35
+ assert isinstance(cwd, Path), "cwd must be a Path"
36
+
37
+ files = set()
38
+
39
+ # Extract file references from command
40
+ try:
41
+ tokens = shlex.split(command)
42
+ except ValueError:
43
+ # If command has unmatched quotes, just split on spaces
44
+ tokens = command.split()
45
+
46
+ # File extensions we care about
47
+ file_extensions = {
48
+ ".cu",
49
+ ".cuh",
50
+ ".py",
51
+ ".cpp",
52
+ ".c",
53
+ ".h",
54
+ ".hpp",
55
+ ".rs",
56
+ ".go",
57
+ }
58
+
59
+ for token in tokens:
60
+ token_path = Path(token)
61
+ if token_path.suffix in file_extensions:
62
+ full_path = cwd / token_path
63
+ if full_path.exists() and full_path.is_file():
64
+ files.add(full_path)
65
+
66
+ # Add common build files if they exist
67
+ common_files = [
68
+ "Makefile",
69
+ "CMakeLists.txt",
70
+ "pyproject.toml",
71
+ "setup.py",
72
+ "Cargo.toml",
73
+ "go.mod",
74
+ "requirements.txt",
75
+ ]
76
+ for filename in common_files:
77
+ path = cwd / filename
78
+ if path.exists() and path.is_file():
79
+ files.add(path)
80
+
81
+ # Add all source files in current directory (not recursive)
82
+ source_extensions = [".cu", ".cuh", ".h", ".hpp", ".c", ".cpp"]
83
+ for ext in source_extensions:
84
+ for path in cwd.glob(f"*{ext}"):
85
+ if path.is_file():
86
+ files.add(path)
87
+
88
+ result = sorted(files)
89
+ return result
90
+
91
+
92
+ def resolve_environment(
93
+ config: WaferConfig,
94
+ env_name: str | None,
95
+ ) -> WaferEnvironment:
96
+ """Resolve which environment to use.
97
+
98
+ Pure function: config + name -> environment.
99
+
100
+ Priority:
101
+ 1. Explicit env_name argument
102
+ 2. Config default_environment
103
+ 3. Only environment if there's exactly one
104
+
105
+ Args:
106
+ config: Wafer configuration
107
+ env_name: Optional environment name from CLI
108
+
109
+ Returns:
110
+ WaferEnvironment to use
111
+
112
+ Raises:
113
+ ValueError: If environment cannot be determined
114
+
115
+ Example:
116
+ >>> config = WaferConfig(...)
117
+ >>> env = resolve_environment(config, "pytorch")
118
+ >>> env.docker
119
+ 'pytorch/pytorch:2.5'
120
+ """
121
+ assert isinstance(config, WaferConfig), "config must be WaferConfig"
122
+ assert env_name is None or isinstance(env_name, str), "env_name must be None or str"
123
+
124
+ # Priority 1: Explicit env_name
125
+ if env_name:
126
+ if env_name not in config.environments:
127
+ available = ", ".join(config.environments.keys())
128
+ raise ValueError(f"Unknown environment: {env_name}. Available: {available}")
129
+ return config.environments[env_name]
130
+
131
+ # Priority 2: Config default
132
+ if config.default_environment:
133
+ assert (
134
+ config.default_environment in config.environments
135
+ ), "default_environment validated in WaferConfig"
136
+ return config.environments[config.default_environment]
137
+
138
+ # Priority 3: Only one environment
139
+ if len(config.environments) == 1:
140
+ return next(iter(config.environments.values()))
141
+
142
+ # Cannot determine
143
+ available = ", ".join(config.environments.keys())
144
+ raise ValueError(
145
+ f"No environment specified and no default configured. "
146
+ f"Available: {available}. "
147
+ f"Use --env to specify or set default.environment in config."
148
+ )
wafer/kernel_scope.py ADDED
@@ -0,0 +1,552 @@
1
+ """Unified ISA Analyzer - CLI for static ISA analysis of AMD GPU kernels.
2
+
3
+ This module provides the CLI wrapper for the `wafer amd isa` command.
4
+ It supports analysis of:
5
+ - AMD GPU code objects (.co) - Via API server with ROCm tools
6
+ - AMDGCN ISA files (.s, .gcn, .asm) - Local parsing
7
+ - LLVM-IR files (.ll) - Local parsing
8
+ - TTGIR files (.ttgir, .ttir, .mlir) - Local parsing
9
+
10
+ Design: Wafer-436 - AMD Kernel Scope / ISA Analyzer
11
+ """
12
+
13
+ import sys
14
+ from pathlib import Path
15
+
16
+
17
+ def print_usage() -> None:
18
+ """Print CLI usage information."""
19
+ print("Usage: wafer amd isa <subcommand> [options]", file=sys.stderr)
20
+ print("", file=sys.stderr)
21
+ print("Subcommands:", file=sys.stderr)
22
+ print(" analyze <file|directory> Analyze ISA files (.co, .s, .ll, .ttgir)", file=sys.stderr)
23
+ print(" metrics List available metrics", file=sys.stderr)
24
+ print(" targets List supported GPU targets", file=sys.stderr)
25
+ print("", file=sys.stderr)
26
+ print("Supported File Types:", file=sys.stderr)
27
+ print(" .co AMD GPU code objects (requires API authentication)", file=sys.stderr)
28
+ print(" .s, .gcn, .asm AMDGCN ISA assembly (local parsing)", file=sys.stderr)
29
+ print(" .ll, .bc LLVM-IR (local parsing)", file=sys.stderr)
30
+ print(" .ttgir, .ttir, .mlir TTGIR / Triton IR (local parsing)", file=sys.stderr)
31
+ print("", file=sys.stderr)
32
+ print("Analyze Options:", file=sys.stderr)
33
+ print(" --json Output as JSON", file=sys.stderr)
34
+ print(" --csv Output as CSV", file=sys.stderr)
35
+ print(" --recursive / -r Scan directories recursively", file=sys.stderr)
36
+ print(" --filter EXPR Filter results (e.g., 'spills > 0')", file=sys.stderr)
37
+ print(" --output / -o FILE Write output to file", file=sys.stderr)
38
+ print(" --kernel INDEX Kernel index if multiple in file", file=sys.stderr)
39
+ print("", file=sys.stderr)
40
+ print("Examples:", file=sys.stderr)
41
+ print(" wafer amd isa analyze kernel.co # Analyze code object (requires login)", file=sys.stderr)
42
+ print(" wafer amd isa analyze kernel.s # Analyze ISA assembly", file=sys.stderr)
43
+ print(" wafer amd isa analyze kernel.s --json # Output as JSON", file=sys.stderr)
44
+ print(" wafer amd isa analyze ~/.triton/cache/ --filter 'spills > 0'", file=sys.stderr)
45
+ print(" wafer amd isa analyze . -r --csv -o metrics.csv", file=sys.stderr)
46
+ print(" wafer amd isa metrics # List available metrics", file=sys.stderr)
47
+ print(" wafer amd isa targets # List supported GPU targets", file=sys.stderr)
48
+
49
+
50
+ def analyze_command(
51
+ path: str,
52
+ json_output: bool = False,
53
+ csv_output: bool = False,
54
+ recursive: bool = True,
55
+ filter_expr: str | None = None,
56
+ output_file: str | None = None,
57
+ kernel_index: int = 0,
58
+ api_url: str | None = None,
59
+ auth_headers: dict[str, str] | None = None,
60
+ ) -> str:
61
+ """Analyze ISA/LLVM-IR/TTGIR/.co file or directory.
62
+
63
+ Args:
64
+ path: Path to file or directory
65
+ json_output: Output as JSON
66
+ csv_output: Output as CSV
67
+ recursive: Scan directories recursively
68
+ filter_expr: Filter expression (e.g., "spills > 0")
69
+ output_file: Write output to file
70
+ kernel_index: Kernel index for multi-kernel files
71
+ api_url: API URL for .co file analysis (required for .co files)
72
+ auth_headers: Auth headers for .co file analysis
73
+
74
+ Returns:
75
+ Analysis output string
76
+ """
77
+ from wafer_core.lib.kernel_scope import (
78
+ analyze_code_object,
79
+ analyze_directory,
80
+ analyze_file,
81
+ analyze_isa_file,
82
+ )
83
+
84
+ target_path = Path(path).expanduser()
85
+
86
+ if not target_path.exists():
87
+ raise FileNotFoundError(f"Path not found: {path}")
88
+
89
+ # Single file analysis
90
+ if target_path.is_file():
91
+ suffix = target_path.suffix.lower()
92
+
93
+ # Code object files (.co) - need API
94
+ if suffix == ".co":
95
+ if not api_url or not auth_headers:
96
+ raise RuntimeError(
97
+ "API authentication required for .co file analysis. "
98
+ "Run 'wafer login' first."
99
+ )
100
+ result = analyze_code_object(target_path, api_url, auth_headers)
101
+ # ISA files - use kernel_index parameter
102
+ elif suffix in (".s", ".gcn", ".asm"):
103
+ result = analyze_isa_file(target_path, kernel_index=kernel_index)
104
+ else:
105
+ result = analyze_file(target_path, api_url=api_url, auth_headers=auth_headers)
106
+
107
+ if not result.success:
108
+ raise RuntimeError(f"Analysis failed: {result.error}")
109
+
110
+ output = _format_single_result(result, json_output, csv_output)
111
+
112
+ # Directory analysis
113
+ else:
114
+ batch_result = analyze_directory(
115
+ target_path,
116
+ recursive=recursive,
117
+ api_url=api_url,
118
+ auth_headers=auth_headers,
119
+ )
120
+
121
+ # Apply filter if specified
122
+ if filter_expr:
123
+ batch_result = _apply_filter(batch_result, filter_expr)
124
+
125
+ output = _format_batch_result(batch_result, json_output, csv_output)
126
+
127
+ # Write to file if specified
128
+ if output_file:
129
+ Path(output_file).write_text(output)
130
+ print(f"Output written to {output_file}", file=sys.stderr)
131
+ return f"Results saved to {output_file}"
132
+
133
+ return output
134
+
135
+
136
+ def metrics_command() -> str:
137
+ """List available metrics.
138
+
139
+ Returns:
140
+ Metrics list output
141
+ """
142
+ metrics = [
143
+ ("vgpr_count", "Vector GPR allocation", "From .amdhsa_next_free_vgpr directive"),
144
+ ("sgpr_count", "Scalar GPR allocation", "From .amdhsa_next_free_sgpr directive"),
145
+ ("agpr_count", "Accumulator GPR count", "For MFMA operations (MI100+)"),
146
+ ("lds_size", "LDS allocation (bytes)", "From .amdhsa_group_segment_fixed_size"),
147
+ ("scratch_size", "Scratch memory (bytes)", "From .amdhsa_private_segment_fixed_size"),
148
+ ("spill_count", "Register spill operations", "Count of scratch_store/load instructions"),
149
+ ("mfma_count", "MFMA instructions", "Count of v_mfma_* instructions"),
150
+ ("mfma_density_pct", "MFMA density (%)", "MFMA / total VALU * 100"),
151
+ ("packed_ops_count", "Packed instructions", "Count of v_pk_* instructions"),
152
+ ("fma_count", "FMA instructions", "Count of v_fma_* instructions"),
153
+ ("barrier_count", "Barriers", "Count of s_barrier instructions"),
154
+ ("full_stall_count", "Full stalls", "Count of waitcnt 0 instructions"),
155
+ ("global_load_count", "Global loads", "Count of global_load_* instructions"),
156
+ ("global_store_count", "Global stores", "Count of global_store_* instructions"),
157
+ ("lds_ops_count", "LDS operations", "Count of ds_read/write instructions"),
158
+ ("theoretical_occupancy", "Max waves/CU", "Limited by VGPR/SGPR/LDS"),
159
+ ]
160
+
161
+ lines = [
162
+ "Available Metrics for Kernel Scope Analysis",
163
+ "=" * 60,
164
+ "",
165
+ ]
166
+
167
+ for name, description, derivation in metrics:
168
+ lines.append(f" {name:<25} {description}")
169
+ lines.append(f" {'':<25} Derivation: {derivation}")
170
+ lines.append("")
171
+
172
+ lines.extend([
173
+ "Instruction Categories:",
174
+ " VALU - Vector ALU (v_add_*, v_mul_*, v_fma_*)",
175
+ " SALU - Scalar ALU (s_add_*, s_mul_*)",
176
+ " VMEM - Vector memory (global_load_*, global_store_*)",
177
+ " SMEM - Scalar memory (s_load_*, s_buffer_load_*)",
178
+ " LDS - Local Data Share (ds_read_*, ds_write_*)",
179
+ " MFMA - Matrix FMA (v_mfma_f32_*, v_mfma_f16_*)",
180
+ " SYNC - Synchronization (s_barrier, s_waitcnt)",
181
+ " SPILL - Spill operations (scratch_store_*, scratch_load_*)",
182
+ ])
183
+
184
+ return "\n".join(lines)
185
+
186
+
187
+ def targets_command() -> str:
188
+ """List supported GPU targets.
189
+
190
+ Returns:
191
+ Targets list output
192
+ """
193
+ from wafer_core.lib.kernel_scope.targets import SUPPORTED_TARGETS, get_target_specs
194
+
195
+ lines = [
196
+ "Supported GPU Targets",
197
+ "=" * 60,
198
+ "",
199
+ f"{'Architecture':<12} {'Series':<10} {'VGPRs/CU':<10} {'SGPRs/CU':<10} {'LDS/CU':<10} {'Max Waves':<10}",
200
+ "-" * 60,
201
+ ]
202
+
203
+ for target in SUPPORTED_TARGETS:
204
+ specs = get_target_specs(target)
205
+ lines.append(
206
+ f"{specs.name:<12} {specs.series:<10} {specs.vgprs_per_cu:<10} "
207
+ f"{specs.sgprs_per_cu:<10} {specs.lds_per_cu:<10} {specs.max_waves_per_cu:<10}"
208
+ )
209
+
210
+ lines.extend([
211
+ "",
212
+ "Note: Default values are used for unknown architectures.",
213
+ ])
214
+
215
+ return "\n".join(lines)
216
+
217
+
218
+ def _format_single_result(result, json_output: bool, csv_output: bool) -> str:
219
+ """Format a single analysis result."""
220
+ if json_output:
221
+ return result.to_json()
222
+
223
+ if csv_output:
224
+ return _result_to_csv(result)
225
+
226
+ return _result_to_text(result)
227
+
228
+
229
+ def _format_batch_result(batch_result, json_output: bool, csv_output: bool) -> str:
230
+ """Format batch analysis results."""
231
+ if json_output:
232
+ return batch_result.to_json()
233
+
234
+ if csv_output:
235
+ return _batch_to_csv(batch_result)
236
+
237
+ return _batch_to_text(batch_result)
238
+
239
+
240
+ def _result_to_text(result) -> str:
241
+ """Format single result as human-readable text."""
242
+ lines = []
243
+
244
+ if result.code_object_analysis:
245
+ # .co file analysis (via API)
246
+ a = result.code_object_analysis
247
+ lines.extend([
248
+ f"Kernel: {a.kernel_name}",
249
+ f"Architecture: {a.architecture}",
250
+ "Source: Code Object (.co)",
251
+ "",
252
+ "=== Registers ===",
253
+ f" VGPRs: {a.vgpr_count}",
254
+ f" SGPRs: {a.sgpr_count}",
255
+ f" AGPRs: {a.agpr_count}",
256
+ ])
257
+
258
+ if a.vgpr_spill_count > 0 or a.sgpr_spill_count > 0:
259
+ lines.extend([
260
+ "",
261
+ "!!! SPILLS DETECTED !!!",
262
+ f" VGPR spills: {a.vgpr_spill_count}",
263
+ f" SGPR spills: {a.sgpr_spill_count}",
264
+ ])
265
+ else:
266
+ lines.append(" Spills: None (good)")
267
+
268
+ lines.extend([
269
+ "",
270
+ "=== Memory ===",
271
+ f" LDS: {a.lds_bytes} bytes",
272
+ f" Global loads: {a.global_loads}",
273
+ f" Global stores: {a.global_stores}",
274
+ f" LDS ops: {a.lds_ops}",
275
+ "",
276
+ "=== Instructions ===",
277
+ f" MFMA: {a.mfma_count}",
278
+ f" FMA: {a.fma_count}",
279
+ f" Packed (v_pk_*): {a.packed_ops_count}",
280
+ f" Full stalls (waitcnt 0): {a.waitcnt_full_stalls}",
281
+ f" Barriers: {a.barriers}",
282
+ ])
283
+
284
+ elif result.isa_analysis:
285
+ # .s/.gcn/.asm file analysis (local parsing)
286
+ a = result.isa_analysis
287
+ lines.extend([
288
+ f"Kernel: {a.kernel_name}",
289
+ f"Architecture: {a.architecture}",
290
+ "Source: ISA Assembly (.s)",
291
+ "",
292
+ "=== Registers ===",
293
+ f" VGPRs: {a.vgpr_count}",
294
+ f" SGPRs: {a.sgpr_count}",
295
+ f" AGPRs: {a.agpr_count}",
296
+ ])
297
+
298
+ if a.spill_count > 0:
299
+ lines.extend([
300
+ "",
301
+ "!!! SPILLS DETECTED !!!",
302
+ f" Total spills: {a.spill_count}",
303
+ f" VGPR spills: {a.vgpr_spill_count}",
304
+ f" SGPR spills: {a.sgpr_spill_count}",
305
+ ])
306
+ else:
307
+ lines.append(" Spills: None (good)")
308
+
309
+ lines.extend([
310
+ "",
311
+ "=== Memory ===",
312
+ f" LDS: {a.lds_size} bytes",
313
+ f" Scratch: {a.scratch_size} bytes",
314
+ f" Global loads: {a.global_load_count}",
315
+ f" Global stores: {a.global_store_count}",
316
+ f" LDS ops: {a.lds_ops_count}",
317
+ "",
318
+ "=== Instructions ===",
319
+ f" MFMA: {a.mfma_count} ({a.mfma_density_pct:.1f}% density)",
320
+ f" FMA: {a.fma_count}",
321
+ f" Packed (v_pk_*): {a.packed_ops_count}",
322
+ f" Barriers: {a.barrier_count}",
323
+ f" Full stalls: {a.full_stall_count}",
324
+ "",
325
+ "=== Instruction Mix ===",
326
+ f" VALU: {a.instruction_mix.valu_count}",
327
+ f" SALU: {a.instruction_mix.salu_count}",
328
+ f" VMEM: {a.instruction_mix.vmem_count}",
329
+ f" SMEM: {a.instruction_mix.smem_count}",
330
+ f" LDS: {a.instruction_mix.lds_count}",
331
+ f" MFMA: {a.instruction_mix.mfma_count}",
332
+ f" Sync: {a.instruction_mix.sync_count}",
333
+ f" Total: {a.instruction_mix.total_count}",
334
+ "",
335
+ "=== Occupancy ===",
336
+ f" Max waves (VGPR): {a.max_waves_vgpr}",
337
+ f" Max waves (SGPR): {a.max_waves_sgpr}",
338
+ f" Max waves (LDS): {a.max_waves_lds}",
339
+ f" Theoretical: {a.theoretical_occupancy} waves/CU",
340
+ ])
341
+
342
+ if a.warnings:
343
+ lines.extend([
344
+ "",
345
+ "=== Warnings ===",
346
+ ])
347
+ for warning in a.warnings:
348
+ lines.append(f" {warning}")
349
+
350
+ elif result.ttgir_analysis:
351
+ a = result.ttgir_analysis
352
+ lines.extend([
353
+ "TTGIR Analysis",
354
+ "",
355
+ "=== Operations ===",
356
+ f" tt.dot: {a.dot_count}",
357
+ f" tt.load: {a.load_count}",
358
+ f" tt.store: {a.store_count}",
359
+ f" tt.reduce: {a.reduce_count}",
360
+ f" Barriers: {a.barrier_count}",
361
+ ])
362
+
363
+ if a.tile_info:
364
+ lines.extend([
365
+ "",
366
+ "=== Tiling ===",
367
+ f" BLOCK_M: {a.tile_info.block_m}",
368
+ f" BLOCK_N: {a.tile_info.block_n}",
369
+ f" BLOCK_K: {a.tile_info.block_k}",
370
+ f" num_warps: {a.tile_info.num_warps}",
371
+ f" num_stages: {a.tile_info.num_stages}",
372
+ ])
373
+
374
+ if a.has_software_pipelining:
375
+ lines.append(" Software pipelining: enabled")
376
+
377
+ if a.estimated_compute_intensity:
378
+ lines.append(f" Compute intensity: {a.estimated_compute_intensity:.1f} FLOPs/byte")
379
+
380
+ elif result.llvm_ir_analysis:
381
+ a = result.llvm_ir_analysis
382
+ lines.extend([
383
+ "LLVM-IR Analysis",
384
+ "",
385
+ f" Functions: {a.function_count}",
386
+ f" Total instructions: {a.total_instructions}",
387
+ f" Functions with loops: {a.functions_with_loops}",
388
+ f" Has vector ops: {a.has_vector_ops}",
389
+ ])
390
+
391
+ if a.kernel_functions:
392
+ lines.append(f" Kernel functions: {', '.join(a.kernel_functions)}")
393
+
394
+ return "\n".join(lines)
395
+
396
+
397
+ def _result_to_csv(result) -> str:
398
+ """Format single result as CSV."""
399
+ header = "kernel_name,architecture,source_type,vgpr_count,sgpr_count,vgpr_spills,sgpr_spills,mfma_count,lds_bytes,global_loads,global_stores"
400
+
401
+ if result.code_object_analysis:
402
+ a = result.code_object_analysis
403
+ row = f"{a.kernel_name},{a.architecture},code_object,{a.vgpr_count},{a.sgpr_count},{a.vgpr_spill_count},{a.sgpr_spill_count},{a.mfma_count},{a.lds_bytes},{a.global_loads},{a.global_stores}"
404
+ return f"{header}\n{row}"
405
+
406
+ if result.isa_analysis:
407
+ a = result.isa_analysis
408
+ row = f"{a.kernel_name},{a.architecture},isa_assembly,{a.vgpr_count},{a.sgpr_count},{a.vgpr_spill_count},{a.sgpr_spill_count},{a.mfma_count},{a.lds_size},{a.global_load_count},{a.global_store_count}"
409
+ return f"{header}\n{row}"
410
+
411
+ return "# Unsupported format for CSV"
412
+
413
+
414
+ def _batch_to_text(batch_result) -> str:
415
+ """Format batch results as text."""
416
+ lines = [
417
+ f"Analyzed {batch_result.total_files} files",
418
+ f" Successful: {batch_result.successful}",
419
+ f" Failed: {batch_result.failed}",
420
+ "",
421
+ ]
422
+
423
+ if batch_result.summary:
424
+ lines.extend([
425
+ "=== Summary ===",
426
+ f" Avg VGPRs: {batch_result.summary.get('total_vgpr_avg', 0):.1f}",
427
+ f" Avg SGPRs: {batch_result.summary.get('total_sgpr_avg', 0):.1f}",
428
+ f" Total spills: {batch_result.summary.get('total_spills', 0)}",
429
+ f" Files with spills: {batch_result.summary.get('files_with_spills', 0)}",
430
+ f" Total MFMA: {batch_result.summary.get('total_mfma', 0)}",
431
+ f" Avg MFMA density: {batch_result.summary.get('avg_mfma_density', 0):.1f}%",
432
+ "",
433
+ ])
434
+
435
+ # Show individual results
436
+ for result in batch_result.results:
437
+ if result.success and result.code_object_analysis:
438
+ a = result.code_object_analysis
439
+ spills = a.vgpr_spill_count + a.sgpr_spill_count
440
+ status = "⚠️" if spills > 0 else "✓"
441
+ lines.append(
442
+ f" {status} {result.file_path}: "
443
+ f"VGPRs={a.vgpr_count}, spills={spills}, MFMA={a.mfma_count}"
444
+ )
445
+ elif result.success and result.isa_analysis:
446
+ a = result.isa_analysis
447
+ status = "⚠️" if a.spill_count > 0 else "✓"
448
+ lines.append(
449
+ f" {status} {result.file_path}: "
450
+ f"VGPRs={a.vgpr_count}, spills={a.spill_count}, MFMA={a.mfma_count}"
451
+ )
452
+ elif not result.success:
453
+ lines.append(f" ✗ {result.file_path}: {result.error}")
454
+
455
+ return "\n".join(lines)
456
+
457
+
458
+ def _batch_to_csv(batch_result) -> str:
459
+ """Format batch results as CSV."""
460
+ lines = ["file_path,kernel_name,architecture,source_type,vgpr_count,sgpr_count,vgpr_spills,sgpr_spills,mfma_count,lds_bytes"]
461
+
462
+ for result in batch_result.results:
463
+ if result.success and result.code_object_analysis:
464
+ a = result.code_object_analysis
465
+ lines.append(
466
+ f"{result.file_path},{a.kernel_name},{a.architecture},code_object,"
467
+ f"{a.vgpr_count},{a.sgpr_count},{a.vgpr_spill_count},{a.sgpr_spill_count},"
468
+ f"{a.mfma_count},{a.lds_bytes}"
469
+ )
470
+ elif result.success and result.isa_analysis:
471
+ a = result.isa_analysis
472
+ lines.append(
473
+ f"{result.file_path},{a.kernel_name},{a.architecture},isa_assembly,"
474
+ f"{a.vgpr_count},{a.sgpr_count},{a.vgpr_spill_count},{a.sgpr_spill_count},"
475
+ f"{a.mfma_count},{a.lds_size}"
476
+ )
477
+
478
+ return "\n".join(lines)
479
+
480
+
481
+ def _apply_filter(batch_result, filter_expr: str):
482
+ """Apply filter expression to batch results."""
483
+ # Simple filter parsing: "metric op value"
484
+ # Supported: spills > 0, vgpr_count > 128, mfma_count == 0
485
+ import re
486
+
487
+ match = re.match(r"(\w+)\s*(>|<|>=|<=|==|!=)\s*(\d+)", filter_expr)
488
+ if not match:
489
+ print(f"Warning: Invalid filter expression: {filter_expr}", file=sys.stderr)
490
+ return batch_result
491
+
492
+ metric = match.group(1)
493
+ op = match.group(2)
494
+ value = int(match.group(3))
495
+
496
+ # Map common aliases
497
+ metric_map = {
498
+ "spills": "spill_count",
499
+ "vgpr": "vgpr_count",
500
+ "sgpr": "sgpr_count",
501
+ "mfma": "mfma_count",
502
+ "occupancy": "theoretical_occupancy",
503
+ }
504
+ metric = metric_map.get(metric, metric)
505
+
506
+ # Filter function - supports both isa_analysis and code_object_analysis
507
+ def passes_filter(result):
508
+ if not result.success:
509
+ return False
510
+
511
+ # Try to get metric from either analysis type
512
+ actual = None
513
+ if result.isa_analysis:
514
+ actual = getattr(result.isa_analysis, metric, None)
515
+ elif result.code_object_analysis:
516
+ # Map isa_analysis metric names to code_object_analysis equivalents
517
+ co_metric_map = {
518
+ "spill_count": "vgpr_spill_count", # Use vgpr_spill_count as proxy
519
+ "lds_size": "lds_bytes",
520
+ }
521
+ co_metric = co_metric_map.get(metric, metric)
522
+ actual = getattr(result.code_object_analysis, co_metric, None)
523
+
524
+ if actual is None:
525
+ return False
526
+
527
+ if op == ">":
528
+ return actual > value
529
+ elif op == "<":
530
+ return actual < value
531
+ elif op == ">=":
532
+ return actual >= value
533
+ elif op == "<=":
534
+ return actual <= value
535
+ elif op == "==":
536
+ return actual == value
537
+ elif op == "!=":
538
+ return actual != value
539
+
540
+ return False
541
+
542
+ filtered_results = [r for r in batch_result.results if passes_filter(r)]
543
+
544
+ from wafer_core.lib.kernel_scope.api import BatchAnalysisResult
545
+
546
+ return BatchAnalysisResult(
547
+ total_files=len(filtered_results),
548
+ successful=sum(1 for r in filtered_results if r.success),
549
+ failed=sum(1 for r in filtered_results if not r.success),
550
+ results=tuple(filtered_results),
551
+ summary=batch_result.summary,
552
+ )