wings-quantum 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wings/__init__.py +251 -0
- wings/adam.py +132 -0
- wings/ansatz.py +207 -0
- wings/benchmarks.py +605 -0
- wings/campaign.py +661 -0
- wings/cli.py +377 -0
- wings/compat.py +132 -0
- wings/config.py +443 -0
- wings/convenience.py +259 -0
- wings/evaluators/__init__.py +19 -0
- wings/evaluators/cpu.py +72 -0
- wings/evaluators/custatevec.py +783 -0
- wings/evaluators/gpu.py +220 -0
- wings/export.py +243 -0
- wings/optimizer.py +1898 -0
- wings/paths.py +295 -0
- wings/py.typed +2 -0
- wings/results.py +255 -0
- wings/types.py +14 -0
- wings_quantum-0.1.0.dist-info/METADATA +491 -0
- wings_quantum-0.1.0.dist-info/RECORD +25 -0
- wings_quantum-0.1.0.dist-info/WHEEL +5 -0
- wings_quantum-0.1.0.dist-info/entry_points.txt +2 -0
- wings_quantum-0.1.0.dist-info/licenses/LICENSE.txt +21 -0
- wings_quantum-0.1.0.dist-info/top_level.txt +1 -0
wings/benchmarks.py
ADDED
|
@@ -0,0 +1,605 @@
|
|
|
1
|
+
"""Benchmarking utilities for backend comparison."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .compat import HAS_CUSTATEVEC
|
|
9
|
+
from .config import OptimizerConfig
|
|
10
|
+
from .optimizer import GaussianOptimizer
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _get_gpu_count():
|
|
14
|
+
"""Get number of available GPUs."""
|
|
15
|
+
if not HAS_CUSTATEVEC:
|
|
16
|
+
return 0
|
|
17
|
+
try:
|
|
18
|
+
import cupy as cp
|
|
19
|
+
|
|
20
|
+
return cp.cuda.runtime.getDeviceCount()
|
|
21
|
+
except Exception:
|
|
22
|
+
return 0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"benchmark_gpu",
|
|
27
|
+
"benchmark_multi_gpu",
|
|
28
|
+
"find_gpu_crossover",
|
|
29
|
+
"benchmark_all_backends",
|
|
30
|
+
"BenchmarkResult",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BenchmarkResult:
|
|
35
|
+
"""Container for benchmark results."""
|
|
36
|
+
|
|
37
|
+
def __init__(self):
|
|
38
|
+
self.results: dict[str, dict[str, float]] = {}
|
|
39
|
+
self.winner: Optional[str] = None
|
|
40
|
+
self.recommendation: str = ""
|
|
41
|
+
|
|
42
|
+
def add_result(self, backend: str, metric: str, value: float):
|
|
43
|
+
if backend not in self.results:
|
|
44
|
+
self.results[backend] = {}
|
|
45
|
+
self.results[backend][metric] = value
|
|
46
|
+
|
|
47
|
+
def __repr__(self) -> str:
|
|
48
|
+
lines = ["BenchmarkResult:"]
|
|
49
|
+
for backend, metrics in self.results.items():
|
|
50
|
+
lines.append(f" {backend}:")
|
|
51
|
+
for metric, value in metrics.items():
|
|
52
|
+
lines.append(f" {metric}: {value}")
|
|
53
|
+
if self.winner:
|
|
54
|
+
lines.append(f" Winner: {self.winner}")
|
|
55
|
+
return "\n".join(lines)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def benchmark_gpu(
|
|
59
|
+
n_qubits: int = 8,
|
|
60
|
+
sigma: float = 0.5,
|
|
61
|
+
n_trials: int = 10,
|
|
62
|
+
verbose: bool = True,
|
|
63
|
+
) -> BenchmarkResult:
|
|
64
|
+
"""
|
|
65
|
+
Benchmark GPU vs CPU performance for a specific configuration.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
n_qubits : int
|
|
70
|
+
Number of qubits to benchmark
|
|
71
|
+
sigma : float
|
|
72
|
+
Gaussian width parameter
|
|
73
|
+
n_trials : int
|
|
74
|
+
Number of trials for timing (more = more accurate)
|
|
75
|
+
verbose : bool
|
|
76
|
+
Print detailed results
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
BenchmarkResult
|
|
81
|
+
Object containing timing results and recommendations
|
|
82
|
+
|
|
83
|
+
Examples
|
|
84
|
+
--------
|
|
85
|
+
>>> result = benchmark_gpu(n_qubits=12, sigma=0.5)
|
|
86
|
+
>>> print(f"GPU speedup: {result.results['gpu']['speedup']:.2f}x")
|
|
87
|
+
"""
|
|
88
|
+
result = BenchmarkResult()
|
|
89
|
+
|
|
90
|
+
if verbose:
|
|
91
|
+
print(f"\n{'=' * 80}")
|
|
92
|
+
print("GPU BENCHMARK")
|
|
93
|
+
print(f"{'=' * 80}")
|
|
94
|
+
print(f" Qubits: {n_qubits}")
|
|
95
|
+
print(f" Parameters: {n_qubits * n_qubits}")
|
|
96
|
+
|
|
97
|
+
# CPU configuration
|
|
98
|
+
config_cpu = OptimizerConfig(
|
|
99
|
+
n_qubits=n_qubits,
|
|
100
|
+
sigma=sigma,
|
|
101
|
+
box_size=4 * sigma,
|
|
102
|
+
verbose=False,
|
|
103
|
+
use_gpu=False,
|
|
104
|
+
use_custatevec=False,
|
|
105
|
+
parallel_gradients=False,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if verbose:
|
|
109
|
+
print("\nInitializing CPU optimizer...")
|
|
110
|
+
optimizer_cpu = GaussianOptimizer(config_cpu)
|
|
111
|
+
test_params = optimizer_cpu.get_initial_params("smart")
|
|
112
|
+
|
|
113
|
+
# ========================================
|
|
114
|
+
# Benchmark 1: Single evaluation
|
|
115
|
+
# ========================================
|
|
116
|
+
if verbose:
|
|
117
|
+
print("\n1. Single Statevector Evaluation")
|
|
118
|
+
print("-" * 50)
|
|
119
|
+
|
|
120
|
+
# CPU timing
|
|
121
|
+
start = time.perf_counter()
|
|
122
|
+
for _ in range(n_trials * 10):
|
|
123
|
+
optimizer_cpu.get_statevector(test_params)
|
|
124
|
+
cpu_single = (time.perf_counter() - start) / (n_trials * 10)
|
|
125
|
+
result.add_result("cpu", "single_eval_ms", cpu_single * 1000)
|
|
126
|
+
|
|
127
|
+
if verbose:
|
|
128
|
+
print(f" CPU: {cpu_single * 1000:.2f} ms/eval")
|
|
129
|
+
|
|
130
|
+
# GPU timing (Aer)
|
|
131
|
+
config_gpu = OptimizerConfig(
|
|
132
|
+
n_qubits=n_qubits,
|
|
133
|
+
sigma=sigma,
|
|
134
|
+
box_size=4 * sigma,
|
|
135
|
+
verbose=False,
|
|
136
|
+
use_gpu=True,
|
|
137
|
+
use_custatevec=False,
|
|
138
|
+
gpu_precision="double",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
optimizer_gpu = GaussianOptimizer(config_gpu)
|
|
142
|
+
|
|
143
|
+
if optimizer_gpu._gpu_evaluator and optimizer_gpu._gpu_evaluator.gpu_available:
|
|
144
|
+
# Warm-up
|
|
145
|
+
for _ in range(5):
|
|
146
|
+
_ = optimizer_gpu.get_statevector(test_params)
|
|
147
|
+
|
|
148
|
+
start = time.perf_counter()
|
|
149
|
+
for _ in range(n_trials * 10):
|
|
150
|
+
optimizer_gpu.get_statevector(test_params)
|
|
151
|
+
gpu_single = (time.perf_counter() - start) / (n_trials * 10)
|
|
152
|
+
|
|
153
|
+
result.add_result("gpu_aer", "single_eval_ms", gpu_single * 1000)
|
|
154
|
+
result.add_result("gpu_aer", "speedup_vs_cpu", cpu_single / gpu_single)
|
|
155
|
+
|
|
156
|
+
if verbose:
|
|
157
|
+
print(f" GPU (Aer): {gpu_single * 1000:.2f} ms/eval")
|
|
158
|
+
print(f" Speedup: {cpu_single / gpu_single:.2f}x")
|
|
159
|
+
elif verbose:
|
|
160
|
+
print(" GPU (Aer): Not available")
|
|
161
|
+
|
|
162
|
+
# cuStateVec timing
|
|
163
|
+
if HAS_CUSTATEVEC:
|
|
164
|
+
config_cusv = OptimizerConfig(
|
|
165
|
+
n_qubits=n_qubits,
|
|
166
|
+
sigma=sigma,
|
|
167
|
+
box_size=4 * sigma,
|
|
168
|
+
verbose=False,
|
|
169
|
+
use_gpu=False,
|
|
170
|
+
use_custatevec=True,
|
|
171
|
+
gpu_precision="double",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
optimizer_cusv = GaussianOptimizer(config_cusv)
|
|
175
|
+
|
|
176
|
+
if optimizer_cusv._custatevec_evaluator is not None:
|
|
177
|
+
# Warm-up
|
|
178
|
+
for _ in range(5):
|
|
179
|
+
_ = optimizer_cusv._custatevec_evaluator.compute_fidelity(test_params)
|
|
180
|
+
|
|
181
|
+
start = time.perf_counter()
|
|
182
|
+
for _ in range(n_trials * 10):
|
|
183
|
+
_ = optimizer_cusv._custatevec_evaluator.compute_fidelity(test_params)
|
|
184
|
+
cusv_single = (time.perf_counter() - start) / (n_trials * 10)
|
|
185
|
+
|
|
186
|
+
result.add_result("custatevec", "single_eval_ms", cusv_single * 1000)
|
|
187
|
+
result.add_result("custatevec", "speedup_vs_cpu", cpu_single / cusv_single)
|
|
188
|
+
|
|
189
|
+
if verbose:
|
|
190
|
+
print(f" cuStateVec: {cusv_single * 1000:.2f} ms/eval")
|
|
191
|
+
print(f" Speedup: {cpu_single / cusv_single:.2f}x")
|
|
192
|
+
|
|
193
|
+
# Cleanup
|
|
194
|
+
optimizer_cusv._custatevec_evaluator.cleanup()
|
|
195
|
+
if optimizer_cusv._custatevec_batch_evaluator:
|
|
196
|
+
optimizer_cusv._custatevec_batch_evaluator.cleanup()
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def benchmark_multi_gpu(
|
|
200
|
+
n_qubits: int = 12,
|
|
201
|
+
sigma: float = 0.5,
|
|
202
|
+
batch_sizes: Optional[list[int]] = None,
|
|
203
|
+
verbose: bool = True,
|
|
204
|
+
) -> BenchmarkResult:
|
|
205
|
+
"""
|
|
206
|
+
Benchmark multi-GPU vs single-GPU performance.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
n_qubits : int
|
|
211
|
+
Number of qubits (should be >= 12 for meaningful results)
|
|
212
|
+
sigma : float
|
|
213
|
+
Gaussian width
|
|
214
|
+
batch_sizes : list of int, optional
|
|
215
|
+
Batch sizes to test (default: [64, 128, 256])
|
|
216
|
+
verbose : bool
|
|
217
|
+
Print results
|
|
218
|
+
|
|
219
|
+
Returns
|
|
220
|
+
-------
|
|
221
|
+
BenchmarkResult
|
|
222
|
+
Timing comparisons for single vs multi-GPU
|
|
223
|
+
"""
|
|
224
|
+
result = BenchmarkResult()
|
|
225
|
+
|
|
226
|
+
n_gpus = _get_gpu_count()
|
|
227
|
+
|
|
228
|
+
if n_gpus < 2:
|
|
229
|
+
if verbose:
|
|
230
|
+
print("Multi-GPU benchmark requires 2+ GPUs")
|
|
231
|
+
result.recommendation = "Multi-GPU not available"
|
|
232
|
+
return result
|
|
233
|
+
|
|
234
|
+
if batch_sizes is None:
|
|
235
|
+
batch_sizes = [64, 128, 256]
|
|
236
|
+
|
|
237
|
+
if verbose:
|
|
238
|
+
print(f"\n{'=' * 60}")
|
|
239
|
+
print(f"MULTI-GPU BENCHMARK ({n_gpus} GPUs, {n_qubits} qubits)")
|
|
240
|
+
print(f"{'=' * 60}")
|
|
241
|
+
|
|
242
|
+
# Single GPU config
|
|
243
|
+
config_single = OptimizerConfig(
|
|
244
|
+
n_qubits=n_qubits,
|
|
245
|
+
sigma=sigma,
|
|
246
|
+
verbose=False,
|
|
247
|
+
use_custatevec=True,
|
|
248
|
+
use_multi_gpu=False,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Multi-GPU config
|
|
252
|
+
config_multi = OptimizerConfig(
|
|
253
|
+
n_qubits=n_qubits,
|
|
254
|
+
sigma=sigma,
|
|
255
|
+
verbose=False,
|
|
256
|
+
use_custatevec=True,
|
|
257
|
+
use_multi_gpu=True,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
opt_single = GaussianOptimizer(config_single)
|
|
261
|
+
opt_multi = GaussianOptimizer(config_multi)
|
|
262
|
+
|
|
263
|
+
if opt_multi._multi_gpu_evaluator is None:
|
|
264
|
+
if verbose:
|
|
265
|
+
print("Multi-GPU evaluator not initialized")
|
|
266
|
+
return result
|
|
267
|
+
|
|
268
|
+
for batch_size in batch_sizes:
|
|
269
|
+
population = np.random.randn(batch_size, config_single.n_params) * 0.1
|
|
270
|
+
|
|
271
|
+
# Single GPU
|
|
272
|
+
start = time.perf_counter()
|
|
273
|
+
_ = opt_single.evaluate_population(population, backend="custatevec")
|
|
274
|
+
single_time = time.perf_counter() - start
|
|
275
|
+
|
|
276
|
+
# Multi GPU
|
|
277
|
+
start = time.perf_counter()
|
|
278
|
+
_ = opt_multi.evaluate_population(population, backend="multi_gpu")
|
|
279
|
+
multi_time = time.perf_counter() - start
|
|
280
|
+
|
|
281
|
+
speedup = single_time / multi_time
|
|
282
|
+
|
|
283
|
+
result.add_result("single_gpu", f"batch_{batch_size}_ms", single_time * 1000)
|
|
284
|
+
result.add_result("multi_gpu", f"batch_{batch_size}_ms", multi_time * 1000)
|
|
285
|
+
result.add_result("multi_gpu", f"batch_{batch_size}_speedup", speedup)
|
|
286
|
+
|
|
287
|
+
if verbose:
|
|
288
|
+
print(
|
|
289
|
+
f" Batch {batch_size}: Single={single_time * 1000:.0f}ms, "
|
|
290
|
+
f"Multi={multi_time * 1000:.0f}ms, Speedup={speedup:.2f}x"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Cleanup
|
|
294
|
+
if hasattr(opt_single, "cleanup"):
|
|
295
|
+
opt_single.cleanup()
|
|
296
|
+
if hasattr(opt_multi, "cleanup"):
|
|
297
|
+
opt_multi.cleanup()
|
|
298
|
+
|
|
299
|
+
avg_speedup = np.mean(
|
|
300
|
+
[result.results["multi_gpu"].get(f"batch_{bs}_speedup", 1.0) for bs in batch_sizes]
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
result.winner = "multi_gpu" if avg_speedup > 1.2 else "single_gpu"
|
|
304
|
+
result.recommendation = (
|
|
305
|
+
f"Multi-GPU provides {avg_speedup:.1f}x speedup for {n_qubits} qubits"
|
|
306
|
+
if avg_speedup > 1.2
|
|
307
|
+
else f"Single GPU sufficient for {n_qubits} qubits"
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
if verbose:
|
|
311
|
+
print(f"\nRecommendation: {result.recommendation}")
|
|
312
|
+
|
|
313
|
+
return result
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def benchmark_batched_evaluation(
|
|
317
|
+
n_qubits: int = 10,
|
|
318
|
+
sigma: float = 0.5,
|
|
319
|
+
batch_sizes: Optional[list[int]] = None,
|
|
320
|
+
verbose: bool = True,
|
|
321
|
+
) -> BenchmarkResult:
|
|
322
|
+
"""
|
|
323
|
+
Benchmark batched evaluation (gradient-like workload) across backends.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
n_qubits : int
|
|
328
|
+
Number of qubits
|
|
329
|
+
sigma : float
|
|
330
|
+
Gaussian width
|
|
331
|
+
batch_sizes : list of int, optional
|
|
332
|
+
Batch sizes to test (default: [32, 64, 128])
|
|
333
|
+
verbose : bool
|
|
334
|
+
Print results
|
|
335
|
+
|
|
336
|
+
Returns
|
|
337
|
+
-------
|
|
338
|
+
BenchmarkResult
|
|
339
|
+
Timing comparisons for CPU vs GPU batched evaluation
|
|
340
|
+
"""
|
|
341
|
+
result = BenchmarkResult()
|
|
342
|
+
|
|
343
|
+
if batch_sizes is None:
|
|
344
|
+
batch_sizes = [32, 64, 128]
|
|
345
|
+
|
|
346
|
+
# CPU config
|
|
347
|
+
config_cpu = OptimizerConfig(
|
|
348
|
+
n_qubits=n_qubits,
|
|
349
|
+
sigma=sigma,
|
|
350
|
+
verbose=False,
|
|
351
|
+
use_gpu=False,
|
|
352
|
+
use_custatevec=False,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# GPU config
|
|
356
|
+
config_gpu = OptimizerConfig(
|
|
357
|
+
n_qubits=n_qubits,
|
|
358
|
+
sigma=sigma,
|
|
359
|
+
verbose=False,
|
|
360
|
+
use_gpu=True,
|
|
361
|
+
use_custatevec=False,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
optimizer_cpu = GaussianOptimizer(config_cpu)
|
|
365
|
+
optimizer_gpu = GaussianOptimizer(config_gpu)
|
|
366
|
+
|
|
367
|
+
if verbose:
|
|
368
|
+
print("\nBatched Evaluation (Gradient-like workload)")
|
|
369
|
+
print("-" * 50)
|
|
370
|
+
|
|
371
|
+
for batch_size in batch_sizes:
|
|
372
|
+
population = np.random.randn(batch_size, config_cpu.n_params) * 0.1
|
|
373
|
+
|
|
374
|
+
# CPU sequential
|
|
375
|
+
start = time.perf_counter()
|
|
376
|
+
np.array(
|
|
377
|
+
[
|
|
378
|
+
optimizer_cpu._compute_fidelity_fast(optimizer_cpu.get_statevector(p))
|
|
379
|
+
for p in population
|
|
380
|
+
]
|
|
381
|
+
)
|
|
382
|
+
cpu_batch_time = time.perf_counter() - start
|
|
383
|
+
|
|
384
|
+
result.add_result("cpu", f"batch_{batch_size}_ms", cpu_batch_time * 1000)
|
|
385
|
+
|
|
386
|
+
if verbose:
|
|
387
|
+
print(f" Batch {batch_size}: CPU={cpu_batch_time * 1000:.0f}ms", end="")
|
|
388
|
+
|
|
389
|
+
# GPU batched
|
|
390
|
+
if optimizer_gpu._gpu_evaluator and optimizer_gpu._gpu_evaluator.gpu_available:
|
|
391
|
+
start = time.perf_counter()
|
|
392
|
+
optimizer_gpu.evaluate_population(population)
|
|
393
|
+
gpu_batch_time = time.perf_counter() - start
|
|
394
|
+
|
|
395
|
+
result.add_result("gpu_aer", f"batch_{batch_size}_ms", gpu_batch_time * 1000)
|
|
396
|
+
|
|
397
|
+
if verbose:
|
|
398
|
+
print(
|
|
399
|
+
f", GPU={gpu_batch_time * 1000:.0f}ms ({cpu_batch_time / gpu_batch_time:.1f}x)",
|
|
400
|
+
end="",
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
if verbose:
|
|
404
|
+
print()
|
|
405
|
+
|
|
406
|
+
# Determine winner
|
|
407
|
+
backends = ["cpu"]
|
|
408
|
+
if "gpu_aer" in result.results:
|
|
409
|
+
backends.append("gpu_aer")
|
|
410
|
+
if "custatevec" in result.results:
|
|
411
|
+
backends.append("custatevec")
|
|
412
|
+
|
|
413
|
+
best_time = float("inf")
|
|
414
|
+
for backend in backends:
|
|
415
|
+
if "single_eval_ms" in result.results.get(backend, {}):
|
|
416
|
+
t = result.results[backend]["single_eval_ms"]
|
|
417
|
+
if t < best_time:
|
|
418
|
+
best_time = t
|
|
419
|
+
result.winner = backend
|
|
420
|
+
|
|
421
|
+
result.recommendation = f"Use {result.winner} for {n_qubits} qubits"
|
|
422
|
+
|
|
423
|
+
if verbose:
|
|
424
|
+
print(f"\n{'=' * 50}")
|
|
425
|
+
print(f"RECOMMENDATION: {result.recommendation}")
|
|
426
|
+
|
|
427
|
+
return result
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def find_gpu_crossover(
|
|
431
|
+
qubit_range: Optional[list[int]] = None,
|
|
432
|
+
sigma: float = 0.5,
|
|
433
|
+
verbose: bool = True,
|
|
434
|
+
) -> dict[str, Any]:
|
|
435
|
+
"""
|
|
436
|
+
Find where GPU becomes faster than CPU for your hardware.
|
|
437
|
+
|
|
438
|
+
This helps you determine the optimal backend for different problem sizes.
|
|
439
|
+
|
|
440
|
+
Parameters
|
|
441
|
+
----------
|
|
442
|
+
qubit_range : list of int, optional
|
|
443
|
+
Qubit counts to test. Default: [6, 8, 10, 12, 14, 16, 18]
|
|
444
|
+
sigma : float
|
|
445
|
+
Gaussian width for test problems
|
|
446
|
+
verbose : bool
|
|
447
|
+
Print results table
|
|
448
|
+
|
|
449
|
+
Returns
|
|
450
|
+
-------
|
|
451
|
+
dict
|
|
452
|
+
Contains:
|
|
453
|
+
- 'crossover_qubits': Qubit count where GPU becomes faster
|
|
454
|
+
- 'results': Detailed timing for each qubit count
|
|
455
|
+
- 'recommendation': Human-readable recommendation
|
|
456
|
+
|
|
457
|
+
Examples
|
|
458
|
+
--------
|
|
459
|
+
>>> info = find_gpu_crossover()
|
|
460
|
+
>>> print(f"GPU becomes faster at {info['crossover_qubits']} qubits")
|
|
461
|
+
"""
|
|
462
|
+
if qubit_range is None:
|
|
463
|
+
qubit_range = [6, 8, 10, 12, 14, 16, 18]
|
|
464
|
+
|
|
465
|
+
results = []
|
|
466
|
+
crossover = None
|
|
467
|
+
|
|
468
|
+
if verbose:
|
|
469
|
+
print(f"{'Qubits':<8} {'CPU (ms)':<12} {'cuSV (ms)':<12} {'Speedup':<10} {'Winner'}")
|
|
470
|
+
print("-" * 55)
|
|
471
|
+
|
|
472
|
+
for n_qubits in qubit_range:
|
|
473
|
+
n_params = n_qubits * n_qubits
|
|
474
|
+
|
|
475
|
+
# Skip if too large
|
|
476
|
+
if n_qubits > 20:
|
|
477
|
+
if verbose:
|
|
478
|
+
print(f"{n_qubits:<8} Skipped (memory)")
|
|
479
|
+
continue
|
|
480
|
+
|
|
481
|
+
# CPU timing
|
|
482
|
+
config_cpu = OptimizerConfig(
|
|
483
|
+
n_qubits=n_qubits,
|
|
484
|
+
sigma=sigma,
|
|
485
|
+
box_size=4.0,
|
|
486
|
+
verbose=False,
|
|
487
|
+
use_gpu=False,
|
|
488
|
+
use_custatevec=False,
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
opt_cpu = GaussianOptimizer(config_cpu)
|
|
492
|
+
test_params = np.random.randn(n_params) * 0.1
|
|
493
|
+
|
|
494
|
+
n_trials = 20 if n_qubits <= 14 else 5
|
|
495
|
+
|
|
496
|
+
start = time.perf_counter()
|
|
497
|
+
for _ in range(n_trials):
|
|
498
|
+
psi = opt_cpu.get_statevector(test_params)
|
|
499
|
+
_ = opt_cpu._compute_fidelity_fast(psi)
|
|
500
|
+
cpu_time = (time.perf_counter() - start) / n_trials * 1000
|
|
501
|
+
|
|
502
|
+
# cuStateVec timing
|
|
503
|
+
cusv_time = float("inf")
|
|
504
|
+
|
|
505
|
+
if HAS_CUSTATEVEC:
|
|
506
|
+
config_cusv = OptimizerConfig(
|
|
507
|
+
n_qubits=n_qubits,
|
|
508
|
+
sigma=sigma,
|
|
509
|
+
box_size=4.0,
|
|
510
|
+
verbose=False,
|
|
511
|
+
use_gpu=False,
|
|
512
|
+
use_custatevec=True,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
try:
|
|
516
|
+
opt_cusv = GaussianOptimizer(config_cusv)
|
|
517
|
+
|
|
518
|
+
if opt_cusv._custatevec_evaluator is not None:
|
|
519
|
+
# Warm-up
|
|
520
|
+
for _ in range(3):
|
|
521
|
+
_ = opt_cusv._custatevec_evaluator.compute_fidelity(test_params)
|
|
522
|
+
|
|
523
|
+
start = time.perf_counter()
|
|
524
|
+
for _ in range(n_trials):
|
|
525
|
+
_ = opt_cusv._custatevec_evaluator.compute_fidelity(test_params)
|
|
526
|
+
cusv_time = (time.perf_counter() - start) / n_trials * 1000
|
|
527
|
+
|
|
528
|
+
# Cleanup
|
|
529
|
+
opt_cusv._custatevec_evaluator.cleanup()
|
|
530
|
+
if opt_cusv._custatevec_batch_evaluator:
|
|
531
|
+
opt_cusv._custatevec_batch_evaluator.cleanup()
|
|
532
|
+
except Exception:
|
|
533
|
+
pass
|
|
534
|
+
|
|
535
|
+
speedup = cpu_time / cusv_time if cusv_time > 0 and cusv_time != float("inf") else 0
|
|
536
|
+
winner = "cuSV" if speedup > 1 else "CPU"
|
|
537
|
+
|
|
538
|
+
# Track crossover point
|
|
539
|
+
if crossover is None and speedup > 1:
|
|
540
|
+
crossover = n_qubits
|
|
541
|
+
|
|
542
|
+
results.append(
|
|
543
|
+
{
|
|
544
|
+
"n_qubits": n_qubits,
|
|
545
|
+
"cpu_ms": cpu_time,
|
|
546
|
+
"cusv_ms": cusv_time if cusv_time != float("inf") else None,
|
|
547
|
+
"speedup": speedup,
|
|
548
|
+
"winner": winner,
|
|
549
|
+
}
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
if verbose:
|
|
553
|
+
cusv_str = f"{cusv_time:.2f}" if cusv_time != float("inf") else "N/A"
|
|
554
|
+
print(f"{n_qubits:<8} {cpu_time:<12.2f} {cusv_str:<12} {speedup:<10.2f} {winner}")
|
|
555
|
+
|
|
556
|
+
if verbose:
|
|
557
|
+
print("\n" + "=" * 55)
|
|
558
|
+
print("RECOMMENDATION:")
|
|
559
|
+
if crossover:
|
|
560
|
+
print(f" - Use CPU for qubits < {crossover}")
|
|
561
|
+
print(f" - Use cuStateVec for qubits >= {crossover}")
|
|
562
|
+
else:
|
|
563
|
+
print(" - CPU is faster for all tested sizes")
|
|
564
|
+
print(" - For gradient computation, crossover is ~2 qubits lower")
|
|
565
|
+
|
|
566
|
+
return {
|
|
567
|
+
"crossover_qubits": crossover,
|
|
568
|
+
"results": results,
|
|
569
|
+
"recommendation": f"GPU crossover at {crossover} qubits"
|
|
570
|
+
if crossover
|
|
571
|
+
else "CPU faster for all sizes",
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def benchmark_all_backends(
|
|
576
|
+
n_qubits: int = 10,
|
|
577
|
+
sigma: float = 0.5,
|
|
578
|
+
) -> dict[str, BenchmarkResult]:
|
|
579
|
+
"""
|
|
580
|
+
Comprehensive benchmark of all available backends.
|
|
581
|
+
|
|
582
|
+
Parameters
|
|
583
|
+
----------
|
|
584
|
+
n_qubits : int
|
|
585
|
+
Number of qubits for benchmark
|
|
586
|
+
sigma : float
|
|
587
|
+
Gaussian width
|
|
588
|
+
|
|
589
|
+
Returns
|
|
590
|
+
-------
|
|
591
|
+
dict
|
|
592
|
+
Results for each benchmark type
|
|
593
|
+
"""
|
|
594
|
+
print("=" * 80)
|
|
595
|
+
print(f"COMPREHENSIVE BACKEND BENCHMARK ({n_qubits} qubits)")
|
|
596
|
+
print("=" * 80)
|
|
597
|
+
|
|
598
|
+
results = {}
|
|
599
|
+
|
|
600
|
+
# Single evaluation benchmark
|
|
601
|
+
results["single"] = benchmark_gpu(n_qubits, sigma, verbose=True)
|
|
602
|
+
|
|
603
|
+
# Gradient benchmark would go here
|
|
604
|
+
|
|
605
|
+
return results
|