wings-quantum 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wings/benchmarks.py ADDED
@@ -0,0 +1,605 @@
1
+ """Benchmarking utilities for backend comparison."""
2
+
3
+ import time
4
+ from typing import Any, Optional
5
+
6
+ import numpy as np
7
+
8
+ from .compat import HAS_CUSTATEVEC
9
+ from .config import OptimizerConfig
10
+ from .optimizer import GaussianOptimizer
11
+
12
+
13
+ def _get_gpu_count():
14
+ """Get number of available GPUs."""
15
+ if not HAS_CUSTATEVEC:
16
+ return 0
17
+ try:
18
+ import cupy as cp
19
+
20
+ return cp.cuda.runtime.getDeviceCount()
21
+ except Exception:
22
+ return 0
23
+
24
+
25
+ __all__ = [
26
+ "benchmark_gpu",
27
+ "benchmark_multi_gpu",
28
+ "find_gpu_crossover",
29
+ "benchmark_all_backends",
30
+ "BenchmarkResult",
31
+ ]
32
+
33
+
34
+ class BenchmarkResult:
35
+ """Container for benchmark results."""
36
+
37
+ def __init__(self):
38
+ self.results: dict[str, dict[str, float]] = {}
39
+ self.winner: Optional[str] = None
40
+ self.recommendation: str = ""
41
+
42
+ def add_result(self, backend: str, metric: str, value: float):
43
+ if backend not in self.results:
44
+ self.results[backend] = {}
45
+ self.results[backend][metric] = value
46
+
47
+ def __repr__(self) -> str:
48
+ lines = ["BenchmarkResult:"]
49
+ for backend, metrics in self.results.items():
50
+ lines.append(f" {backend}:")
51
+ for metric, value in metrics.items():
52
+ lines.append(f" {metric}: {value}")
53
+ if self.winner:
54
+ lines.append(f" Winner: {self.winner}")
55
+ return "\n".join(lines)
56
+
57
+
58
+ def benchmark_gpu(
59
+ n_qubits: int = 8,
60
+ sigma: float = 0.5,
61
+ n_trials: int = 10,
62
+ verbose: bool = True,
63
+ ) -> BenchmarkResult:
64
+ """
65
+ Benchmark GPU vs CPU performance for a specific configuration.
66
+
67
+ Parameters
68
+ ----------
69
+ n_qubits : int
70
+ Number of qubits to benchmark
71
+ sigma : float
72
+ Gaussian width parameter
73
+ n_trials : int
74
+ Number of trials for timing (more = more accurate)
75
+ verbose : bool
76
+ Print detailed results
77
+
78
+ Returns
79
+ -------
80
+ BenchmarkResult
81
+ Object containing timing results and recommendations
82
+
83
+ Examples
84
+ --------
85
+ >>> result = benchmark_gpu(n_qubits=12, sigma=0.5)
86
+ >>> print(f"GPU speedup: {result.results['gpu']['speedup']:.2f}x")
87
+ """
88
+ result = BenchmarkResult()
89
+
90
+ if verbose:
91
+ print(f"\n{'=' * 80}")
92
+ print("GPU BENCHMARK")
93
+ print(f"{'=' * 80}")
94
+ print(f" Qubits: {n_qubits}")
95
+ print(f" Parameters: {n_qubits * n_qubits}")
96
+
97
+ # CPU configuration
98
+ config_cpu = OptimizerConfig(
99
+ n_qubits=n_qubits,
100
+ sigma=sigma,
101
+ box_size=4 * sigma,
102
+ verbose=False,
103
+ use_gpu=False,
104
+ use_custatevec=False,
105
+ parallel_gradients=False,
106
+ )
107
+
108
+ if verbose:
109
+ print("\nInitializing CPU optimizer...")
110
+ optimizer_cpu = GaussianOptimizer(config_cpu)
111
+ test_params = optimizer_cpu.get_initial_params("smart")
112
+
113
+ # ========================================
114
+ # Benchmark 1: Single evaluation
115
+ # ========================================
116
+ if verbose:
117
+ print("\n1. Single Statevector Evaluation")
118
+ print("-" * 50)
119
+
120
+ # CPU timing
121
+ start = time.perf_counter()
122
+ for _ in range(n_trials * 10):
123
+ optimizer_cpu.get_statevector(test_params)
124
+ cpu_single = (time.perf_counter() - start) / (n_trials * 10)
125
+ result.add_result("cpu", "single_eval_ms", cpu_single * 1000)
126
+
127
+ if verbose:
128
+ print(f" CPU: {cpu_single * 1000:.2f} ms/eval")
129
+
130
+ # GPU timing (Aer)
131
+ config_gpu = OptimizerConfig(
132
+ n_qubits=n_qubits,
133
+ sigma=sigma,
134
+ box_size=4 * sigma,
135
+ verbose=False,
136
+ use_gpu=True,
137
+ use_custatevec=False,
138
+ gpu_precision="double",
139
+ )
140
+
141
+ optimizer_gpu = GaussianOptimizer(config_gpu)
142
+
143
+ if optimizer_gpu._gpu_evaluator and optimizer_gpu._gpu_evaluator.gpu_available:
144
+ # Warm-up
145
+ for _ in range(5):
146
+ _ = optimizer_gpu.get_statevector(test_params)
147
+
148
+ start = time.perf_counter()
149
+ for _ in range(n_trials * 10):
150
+ optimizer_gpu.get_statevector(test_params)
151
+ gpu_single = (time.perf_counter() - start) / (n_trials * 10)
152
+
153
+ result.add_result("gpu_aer", "single_eval_ms", gpu_single * 1000)
154
+ result.add_result("gpu_aer", "speedup_vs_cpu", cpu_single / gpu_single)
155
+
156
+ if verbose:
157
+ print(f" GPU (Aer): {gpu_single * 1000:.2f} ms/eval")
158
+ print(f" Speedup: {cpu_single / gpu_single:.2f}x")
159
+ elif verbose:
160
+ print(" GPU (Aer): Not available")
161
+
162
+ # cuStateVec timing
163
+ if HAS_CUSTATEVEC:
164
+ config_cusv = OptimizerConfig(
165
+ n_qubits=n_qubits,
166
+ sigma=sigma,
167
+ box_size=4 * sigma,
168
+ verbose=False,
169
+ use_gpu=False,
170
+ use_custatevec=True,
171
+ gpu_precision="double",
172
+ )
173
+
174
+ optimizer_cusv = GaussianOptimizer(config_cusv)
175
+
176
+ if optimizer_cusv._custatevec_evaluator is not None:
177
+ # Warm-up
178
+ for _ in range(5):
179
+ _ = optimizer_cusv._custatevec_evaluator.compute_fidelity(test_params)
180
+
181
+ start = time.perf_counter()
182
+ for _ in range(n_trials * 10):
183
+ _ = optimizer_cusv._custatevec_evaluator.compute_fidelity(test_params)
184
+ cusv_single = (time.perf_counter() - start) / (n_trials * 10)
185
+
186
+ result.add_result("custatevec", "single_eval_ms", cusv_single * 1000)
187
+ result.add_result("custatevec", "speedup_vs_cpu", cpu_single / cusv_single)
188
+
189
+ if verbose:
190
+ print(f" cuStateVec: {cusv_single * 1000:.2f} ms/eval")
191
+ print(f" Speedup: {cpu_single / cusv_single:.2f}x")
192
+
193
+ # Cleanup
194
+ optimizer_cusv._custatevec_evaluator.cleanup()
195
+ if optimizer_cusv._custatevec_batch_evaluator:
196
+ optimizer_cusv._custatevec_batch_evaluator.cleanup()
197
+
198
+
199
+ def benchmark_multi_gpu(
200
+ n_qubits: int = 12,
201
+ sigma: float = 0.5,
202
+ batch_sizes: Optional[list[int]] = None,
203
+ verbose: bool = True,
204
+ ) -> BenchmarkResult:
205
+ """
206
+ Benchmark multi-GPU vs single-GPU performance.
207
+
208
+ Parameters
209
+ ----------
210
+ n_qubits : int
211
+ Number of qubits (should be >= 12 for meaningful results)
212
+ sigma : float
213
+ Gaussian width
214
+ batch_sizes : list of int, optional
215
+ Batch sizes to test (default: [64, 128, 256])
216
+ verbose : bool
217
+ Print results
218
+
219
+ Returns
220
+ -------
221
+ BenchmarkResult
222
+ Timing comparisons for single vs multi-GPU
223
+ """
224
+ result = BenchmarkResult()
225
+
226
+ n_gpus = _get_gpu_count()
227
+
228
+ if n_gpus < 2:
229
+ if verbose:
230
+ print("Multi-GPU benchmark requires 2+ GPUs")
231
+ result.recommendation = "Multi-GPU not available"
232
+ return result
233
+
234
+ if batch_sizes is None:
235
+ batch_sizes = [64, 128, 256]
236
+
237
+ if verbose:
238
+ print(f"\n{'=' * 60}")
239
+ print(f"MULTI-GPU BENCHMARK ({n_gpus} GPUs, {n_qubits} qubits)")
240
+ print(f"{'=' * 60}")
241
+
242
+ # Single GPU config
243
+ config_single = OptimizerConfig(
244
+ n_qubits=n_qubits,
245
+ sigma=sigma,
246
+ verbose=False,
247
+ use_custatevec=True,
248
+ use_multi_gpu=False,
249
+ )
250
+
251
+ # Multi-GPU config
252
+ config_multi = OptimizerConfig(
253
+ n_qubits=n_qubits,
254
+ sigma=sigma,
255
+ verbose=False,
256
+ use_custatevec=True,
257
+ use_multi_gpu=True,
258
+ )
259
+
260
+ opt_single = GaussianOptimizer(config_single)
261
+ opt_multi = GaussianOptimizer(config_multi)
262
+
263
+ if opt_multi._multi_gpu_evaluator is None:
264
+ if verbose:
265
+ print("Multi-GPU evaluator not initialized")
266
+ return result
267
+
268
+ for batch_size in batch_sizes:
269
+ population = np.random.randn(batch_size, config_single.n_params) * 0.1
270
+
271
+ # Single GPU
272
+ start = time.perf_counter()
273
+ _ = opt_single.evaluate_population(population, backend="custatevec")
274
+ single_time = time.perf_counter() - start
275
+
276
+ # Multi GPU
277
+ start = time.perf_counter()
278
+ _ = opt_multi.evaluate_population(population, backend="multi_gpu")
279
+ multi_time = time.perf_counter() - start
280
+
281
+ speedup = single_time / multi_time
282
+
283
+ result.add_result("single_gpu", f"batch_{batch_size}_ms", single_time * 1000)
284
+ result.add_result("multi_gpu", f"batch_{batch_size}_ms", multi_time * 1000)
285
+ result.add_result("multi_gpu", f"batch_{batch_size}_speedup", speedup)
286
+
287
+ if verbose:
288
+ print(
289
+ f" Batch {batch_size}: Single={single_time * 1000:.0f}ms, "
290
+ f"Multi={multi_time * 1000:.0f}ms, Speedup={speedup:.2f}x"
291
+ )
292
+
293
+ # Cleanup
294
+ if hasattr(opt_single, "cleanup"):
295
+ opt_single.cleanup()
296
+ if hasattr(opt_multi, "cleanup"):
297
+ opt_multi.cleanup()
298
+
299
+ avg_speedup = np.mean(
300
+ [result.results["multi_gpu"].get(f"batch_{bs}_speedup", 1.0) for bs in batch_sizes]
301
+ )
302
+
303
+ result.winner = "multi_gpu" if avg_speedup > 1.2 else "single_gpu"
304
+ result.recommendation = (
305
+ f"Multi-GPU provides {avg_speedup:.1f}x speedup for {n_qubits} qubits"
306
+ if avg_speedup > 1.2
307
+ else f"Single GPU sufficient for {n_qubits} qubits"
308
+ )
309
+
310
+ if verbose:
311
+ print(f"\nRecommendation: {result.recommendation}")
312
+
313
+ return result
314
+
315
+
316
+ def benchmark_batched_evaluation(
317
+ n_qubits: int = 10,
318
+ sigma: float = 0.5,
319
+ batch_sizes: Optional[list[int]] = None,
320
+ verbose: bool = True,
321
+ ) -> BenchmarkResult:
322
+ """
323
+ Benchmark batched evaluation (gradient-like workload) across backends.
324
+
325
+ Parameters
326
+ ----------
327
+ n_qubits : int
328
+ Number of qubits
329
+ sigma : float
330
+ Gaussian width
331
+ batch_sizes : list of int, optional
332
+ Batch sizes to test (default: [32, 64, 128])
333
+ verbose : bool
334
+ Print results
335
+
336
+ Returns
337
+ -------
338
+ BenchmarkResult
339
+ Timing comparisons for CPU vs GPU batched evaluation
340
+ """
341
+ result = BenchmarkResult()
342
+
343
+ if batch_sizes is None:
344
+ batch_sizes = [32, 64, 128]
345
+
346
+ # CPU config
347
+ config_cpu = OptimizerConfig(
348
+ n_qubits=n_qubits,
349
+ sigma=sigma,
350
+ verbose=False,
351
+ use_gpu=False,
352
+ use_custatevec=False,
353
+ )
354
+
355
+ # GPU config
356
+ config_gpu = OptimizerConfig(
357
+ n_qubits=n_qubits,
358
+ sigma=sigma,
359
+ verbose=False,
360
+ use_gpu=True,
361
+ use_custatevec=False,
362
+ )
363
+
364
+ optimizer_cpu = GaussianOptimizer(config_cpu)
365
+ optimizer_gpu = GaussianOptimizer(config_gpu)
366
+
367
+ if verbose:
368
+ print("\nBatched Evaluation (Gradient-like workload)")
369
+ print("-" * 50)
370
+
371
+ for batch_size in batch_sizes:
372
+ population = np.random.randn(batch_size, config_cpu.n_params) * 0.1
373
+
374
+ # CPU sequential
375
+ start = time.perf_counter()
376
+ np.array(
377
+ [
378
+ optimizer_cpu._compute_fidelity_fast(optimizer_cpu.get_statevector(p))
379
+ for p in population
380
+ ]
381
+ )
382
+ cpu_batch_time = time.perf_counter() - start
383
+
384
+ result.add_result("cpu", f"batch_{batch_size}_ms", cpu_batch_time * 1000)
385
+
386
+ if verbose:
387
+ print(f" Batch {batch_size}: CPU={cpu_batch_time * 1000:.0f}ms", end="")
388
+
389
+ # GPU batched
390
+ if optimizer_gpu._gpu_evaluator and optimizer_gpu._gpu_evaluator.gpu_available:
391
+ start = time.perf_counter()
392
+ optimizer_gpu.evaluate_population(population)
393
+ gpu_batch_time = time.perf_counter() - start
394
+
395
+ result.add_result("gpu_aer", f"batch_{batch_size}_ms", gpu_batch_time * 1000)
396
+
397
+ if verbose:
398
+ print(
399
+ f", GPU={gpu_batch_time * 1000:.0f}ms ({cpu_batch_time / gpu_batch_time:.1f}x)",
400
+ end="",
401
+ )
402
+
403
+ if verbose:
404
+ print()
405
+
406
+ # Determine winner
407
+ backends = ["cpu"]
408
+ if "gpu_aer" in result.results:
409
+ backends.append("gpu_aer")
410
+ if "custatevec" in result.results:
411
+ backends.append("custatevec")
412
+
413
+ best_time = float("inf")
414
+ for backend in backends:
415
+ if "single_eval_ms" in result.results.get(backend, {}):
416
+ t = result.results[backend]["single_eval_ms"]
417
+ if t < best_time:
418
+ best_time = t
419
+ result.winner = backend
420
+
421
+ result.recommendation = f"Use {result.winner} for {n_qubits} qubits"
422
+
423
+ if verbose:
424
+ print(f"\n{'=' * 50}")
425
+ print(f"RECOMMENDATION: {result.recommendation}")
426
+
427
+ return result
428
+
429
+
430
+ def find_gpu_crossover(
431
+ qubit_range: Optional[list[int]] = None,
432
+ sigma: float = 0.5,
433
+ verbose: bool = True,
434
+ ) -> dict[str, Any]:
435
+ """
436
+ Find where GPU becomes faster than CPU for your hardware.
437
+
438
+ This helps you determine the optimal backend for different problem sizes.
439
+
440
+ Parameters
441
+ ----------
442
+ qubit_range : list of int, optional
443
+ Qubit counts to test. Default: [6, 8, 10, 12, 14, 16, 18]
444
+ sigma : float
445
+ Gaussian width for test problems
446
+ verbose : bool
447
+ Print results table
448
+
449
+ Returns
450
+ -------
451
+ dict
452
+ Contains:
453
+ - 'crossover_qubits': Qubit count where GPU becomes faster
454
+ - 'results': Detailed timing for each qubit count
455
+ - 'recommendation': Human-readable recommendation
456
+
457
+ Examples
458
+ --------
459
+ >>> info = find_gpu_crossover()
460
+ >>> print(f"GPU becomes faster at {info['crossover_qubits']} qubits")
461
+ """
462
+ if qubit_range is None:
463
+ qubit_range = [6, 8, 10, 12, 14, 16, 18]
464
+
465
+ results = []
466
+ crossover = None
467
+
468
+ if verbose:
469
+ print(f"{'Qubits':<8} {'CPU (ms)':<12} {'cuSV (ms)':<12} {'Speedup':<10} {'Winner'}")
470
+ print("-" * 55)
471
+
472
+ for n_qubits in qubit_range:
473
+ n_params = n_qubits * n_qubits
474
+
475
+ # Skip if too large
476
+ if n_qubits > 20:
477
+ if verbose:
478
+ print(f"{n_qubits:<8} Skipped (memory)")
479
+ continue
480
+
481
+ # CPU timing
482
+ config_cpu = OptimizerConfig(
483
+ n_qubits=n_qubits,
484
+ sigma=sigma,
485
+ box_size=4.0,
486
+ verbose=False,
487
+ use_gpu=False,
488
+ use_custatevec=False,
489
+ )
490
+
491
+ opt_cpu = GaussianOptimizer(config_cpu)
492
+ test_params = np.random.randn(n_params) * 0.1
493
+
494
+ n_trials = 20 if n_qubits <= 14 else 5
495
+
496
+ start = time.perf_counter()
497
+ for _ in range(n_trials):
498
+ psi = opt_cpu.get_statevector(test_params)
499
+ _ = opt_cpu._compute_fidelity_fast(psi)
500
+ cpu_time = (time.perf_counter() - start) / n_trials * 1000
501
+
502
+ # cuStateVec timing
503
+ cusv_time = float("inf")
504
+
505
+ if HAS_CUSTATEVEC:
506
+ config_cusv = OptimizerConfig(
507
+ n_qubits=n_qubits,
508
+ sigma=sigma,
509
+ box_size=4.0,
510
+ verbose=False,
511
+ use_gpu=False,
512
+ use_custatevec=True,
513
+ )
514
+
515
+ try:
516
+ opt_cusv = GaussianOptimizer(config_cusv)
517
+
518
+ if opt_cusv._custatevec_evaluator is not None:
519
+ # Warm-up
520
+ for _ in range(3):
521
+ _ = opt_cusv._custatevec_evaluator.compute_fidelity(test_params)
522
+
523
+ start = time.perf_counter()
524
+ for _ in range(n_trials):
525
+ _ = opt_cusv._custatevec_evaluator.compute_fidelity(test_params)
526
+ cusv_time = (time.perf_counter() - start) / n_trials * 1000
527
+
528
+ # Cleanup
529
+ opt_cusv._custatevec_evaluator.cleanup()
530
+ if opt_cusv._custatevec_batch_evaluator:
531
+ opt_cusv._custatevec_batch_evaluator.cleanup()
532
+ except Exception:
533
+ pass
534
+
535
+ speedup = cpu_time / cusv_time if cusv_time > 0 and cusv_time != float("inf") else 0
536
+ winner = "cuSV" if speedup > 1 else "CPU"
537
+
538
+ # Track crossover point
539
+ if crossover is None and speedup > 1:
540
+ crossover = n_qubits
541
+
542
+ results.append(
543
+ {
544
+ "n_qubits": n_qubits,
545
+ "cpu_ms": cpu_time,
546
+ "cusv_ms": cusv_time if cusv_time != float("inf") else None,
547
+ "speedup": speedup,
548
+ "winner": winner,
549
+ }
550
+ )
551
+
552
+ if verbose:
553
+ cusv_str = f"{cusv_time:.2f}" if cusv_time != float("inf") else "N/A"
554
+ print(f"{n_qubits:<8} {cpu_time:<12.2f} {cusv_str:<12} {speedup:<10.2f} {winner}")
555
+
556
+ if verbose:
557
+ print("\n" + "=" * 55)
558
+ print("RECOMMENDATION:")
559
+ if crossover:
560
+ print(f" - Use CPU for qubits < {crossover}")
561
+ print(f" - Use cuStateVec for qubits >= {crossover}")
562
+ else:
563
+ print(" - CPU is faster for all tested sizes")
564
+ print(" - For gradient computation, crossover is ~2 qubits lower")
565
+
566
+ return {
567
+ "crossover_qubits": crossover,
568
+ "results": results,
569
+ "recommendation": f"GPU crossover at {crossover} qubits"
570
+ if crossover
571
+ else "CPU faster for all sizes",
572
+ }
573
+
574
+
575
+ def benchmark_all_backends(
576
+ n_qubits: int = 10,
577
+ sigma: float = 0.5,
578
+ ) -> dict[str, BenchmarkResult]:
579
+ """
580
+ Comprehensive benchmark of all available backends.
581
+
582
+ Parameters
583
+ ----------
584
+ n_qubits : int
585
+ Number of qubits for benchmark
586
+ sigma : float
587
+ Gaussian width
588
+
589
+ Returns
590
+ -------
591
+ dict
592
+ Results for each benchmark type
593
+ """
594
+ print("=" * 80)
595
+ print(f"COMPREHENSIVE BACKEND BENCHMARK ({n_qubits} qubits)")
596
+ print("=" * 80)
597
+
598
+ results = {}
599
+
600
+ # Single evaluation benchmark
601
+ results["single"] = benchmark_gpu(n_qubits, sigma, verbose=True)
602
+
603
+ # Gradient benchmark would go here
604
+
605
+ return results