warp-lang 1.9.0__py3-none-macosx_10_13_universal2.whl → 1.9.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Binary file
warp/bin/libwarp.dylib CHANGED
Binary file
warp/build_dll.py CHANGED
@@ -174,6 +174,324 @@ def add_llvm_bin_to_path(args):
174
174
  return True
175
175
 
176
176
 
177
+ def _get_architectures_cu12(
178
+ ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
179
+ ) -> tuple[list[str], list[str]]:
180
+ """Get architecture flags for CUDA 12.x."""
181
+ gencode_opts = []
182
+ clang_arch_flags = []
183
+
184
+ if quick_build:
185
+ gencode_opts = ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
186
+ clang_arch_flags = ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
187
+ else:
188
+ if arch == "aarch64" and target_platform == "linux" and ctk_version == (12, 9):
189
+ # Skip certain architectures for aarch64 with CUDA 12.9 due to CCCL bug
190
+ print(
191
+ "[INFO] Skipping sm_52, sm_60, sm_61, and sm_70 targets for ARM due to a CUDA Toolkit bug. "
192
+ "See https://nvidia.github.io/warp/installation.html#cuda-12-9-limitation-on-linux-arm-platforms "
193
+ "for details."
194
+ )
195
+ else:
196
+ gencode_opts.extend(
197
+ [
198
+ "-gencode=arch=compute_52,code=sm_52", # Maxwell
199
+ "-gencode=arch=compute_60,code=sm_60", # Pascal
200
+ "-gencode=arch=compute_61,code=sm_61",
201
+ "-gencode=arch=compute_70,code=sm_70", # Volta
202
+ ]
203
+ )
204
+ clang_arch_flags.extend(
205
+ [
206
+ "--cuda-gpu-arch=sm_52",
207
+ "--cuda-gpu-arch=sm_60",
208
+ "--cuda-gpu-arch=sm_61",
209
+ "--cuda-gpu-arch=sm_70",
210
+ ]
211
+ )
212
+
213
+ # Desktop architectures
214
+ gencode_opts.extend(
215
+ [
216
+ "-gencode=arch=compute_75,code=sm_75", # Turing
217
+ "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
218
+ "-gencode=arch=compute_80,code=sm_80", # Ampere
219
+ "-gencode=arch=compute_86,code=sm_86",
220
+ "-gencode=arch=compute_89,code=sm_89", # Ada
221
+ "-gencode=arch=compute_90,code=sm_90", # Hopper
222
+ ]
223
+ )
224
+ clang_arch_flags.extend(
225
+ [
226
+ "--cuda-gpu-arch=sm_75", # Turing
227
+ "--cuda-gpu-arch=sm_80", # Ampere
228
+ "--cuda-gpu-arch=sm_86",
229
+ "--cuda-gpu-arch=sm_89", # Ada
230
+ "--cuda-gpu-arch=sm_90", # Hopper
231
+ ]
232
+ )
233
+
234
+ if ctk_version >= (12, 8):
235
+ gencode_opts.extend(["-gencode=arch=compute_100,code=sm_100", "-gencode=arch=compute_120,code=sm_120"])
236
+ clang_arch_flags.extend(["--cuda-gpu-arch=sm_100", "--cuda-gpu-arch=sm_120"])
237
+
238
+ # Mobile architectures for aarch64 Linux
239
+ if arch == "aarch64" and target_platform == "linux":
240
+ gencode_opts.extend(
241
+ [
242
+ "-gencode=arch=compute_87,code=sm_87", # Orin
243
+ "-gencode=arch=compute_53,code=sm_53", # X1
244
+ "-gencode=arch=compute_62,code=sm_62", # X2
245
+ "-gencode=arch=compute_72,code=sm_72", # Xavier
246
+ ]
247
+ )
248
+ clang_arch_flags.extend(
249
+ [
250
+ "--cuda-gpu-arch=sm_87",
251
+ "--cuda-gpu-arch=sm_53",
252
+ "--cuda-gpu-arch=sm_62",
253
+ "--cuda-gpu-arch=sm_72",
254
+ ]
255
+ )
256
+
257
+ # Thor support in CUDA 12.8+
258
+ if ctk_version >= (12, 8):
259
+ gencode_opts.append("-gencode=arch=compute_101,code=sm_101") # Thor (CUDA 12 numbering)
260
+ clang_arch_flags.append("--cuda-gpu-arch=sm_101")
261
+
262
+ if ctk_version >= (12, 9):
263
+ gencode_opts.append("-gencode=arch=compute_121,code=sm_121")
264
+ clang_arch_flags.append("--cuda-gpu-arch=sm_121")
265
+
266
+ # PTX for future hardware (use highest available compute capability)
267
+ if ctk_version >= (12, 9):
268
+ gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
269
+ elif ctk_version >= (12, 8):
270
+ gencode_opts.extend(["-gencode=arch=compute_120,code=compute_120"])
271
+ else:
272
+ gencode_opts.append("-gencode=arch=compute_90,code=compute_90")
273
+
274
+ return gencode_opts, clang_arch_flags
275
+
276
+
277
+ def _get_architectures_cu13(
278
+ ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
279
+ ) -> tuple[list[str], list[str]]:
280
+ """Get architecture flags for CUDA 13.x."""
281
+ gencode_opts = []
282
+ clang_arch_flags = []
283
+
284
+ if quick_build:
285
+ gencode_opts = ["-gencode=arch=compute_75,code=compute_75"]
286
+ clang_arch_flags = ["--cuda-gpu-arch=sm_75"]
287
+ else:
288
+ # Desktop architectures
289
+ gencode_opts.extend(
290
+ [
291
+ "-gencode=arch=compute_75,code=sm_75", # Turing
292
+ "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
293
+ "-gencode=arch=compute_80,code=sm_80", # Ampere
294
+ "-gencode=arch=compute_86,code=sm_86",
295
+ "-gencode=arch=compute_89,code=sm_89", # Ada
296
+ "-gencode=arch=compute_90,code=sm_90", # Hopper
297
+ "-gencode=arch=compute_100,code=sm_100", # Blackwell
298
+ "-gencode=arch=compute_120,code=sm_120", # Blackwell
299
+ ]
300
+ )
301
+ clang_arch_flags.extend(
302
+ [
303
+ "--cuda-gpu-arch=sm_75", # Turing
304
+ "--cuda-gpu-arch=sm_80", # Ampere
305
+ "--cuda-gpu-arch=sm_86",
306
+ "--cuda-gpu-arch=sm_89", # Ada
307
+ "--cuda-gpu-arch=sm_90", # Hopper
308
+ "--cuda-gpu-arch=sm_100", # Blackwell
309
+ "--cuda-gpu-arch=sm_120", # Blackwell
310
+ ]
311
+ )
312
+
313
+ # Mobile architectures for aarch64 Linux
314
+ if arch == "aarch64" and target_platform == "linux":
315
+ gencode_opts.extend(
316
+ [
317
+ "-gencode=arch=compute_87,code=sm_87", # Orin
318
+ "-gencode=arch=compute_110,code=sm_110", # Thor
319
+ "-gencode=arch=compute_121,code=sm_121", # Spark
320
+ ]
321
+ )
322
+ clang_arch_flags.extend(
323
+ [
324
+ "--cuda-gpu-arch=sm_87",
325
+ "--cuda-gpu-arch=sm_110",
326
+ "--cuda-gpu-arch=sm_121",
327
+ ]
328
+ )
329
+
330
+ # PTX for future hardware (use highest available compute capability)
331
+ gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
332
+
333
+ return gencode_opts, clang_arch_flags
334
+
335
+
336
+ def _get_architectures_cu12(
337
+ ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
338
+ ) -> tuple[list[str], list[str]]:
339
+ """Get architecture flags for CUDA 12.x."""
340
+ gencode_opts = []
341
+ clang_arch_flags = []
342
+
343
+ if quick_build:
344
+ gencode_opts = ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
345
+ clang_arch_flags = ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
346
+ else:
347
+ if arch == "aarch64" and target_platform == "linux" and ctk_version == (12, 9):
348
+ # Skip certain architectures for aarch64 with CUDA 12.9 due to CCCL bug
349
+ print(
350
+ "[INFO] Skipping sm_52, sm_60, sm_61, and sm_70 targets for ARM due to a CUDA Toolkit bug. "
351
+ "See https://nvidia.github.io/warp/installation.html#cuda-12-9-limitation-on-linux-arm-platforms "
352
+ "for details."
353
+ )
354
+ else:
355
+ gencode_opts.extend(
356
+ [
357
+ "-gencode=arch=compute_52,code=sm_52", # Maxwell
358
+ "-gencode=arch=compute_60,code=sm_60", # Pascal
359
+ "-gencode=arch=compute_61,code=sm_61",
360
+ "-gencode=arch=compute_70,code=sm_70", # Volta
361
+ ]
362
+ )
363
+ clang_arch_flags.extend(
364
+ [
365
+ "--cuda-gpu-arch=sm_52",
366
+ "--cuda-gpu-arch=sm_60",
367
+ "--cuda-gpu-arch=sm_61",
368
+ "--cuda-gpu-arch=sm_70",
369
+ ]
370
+ )
371
+
372
+ # Desktop architectures
373
+ gencode_opts.extend(
374
+ [
375
+ "-gencode=arch=compute_75,code=sm_75", # Turing
376
+ "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
377
+ "-gencode=arch=compute_80,code=sm_80", # Ampere
378
+ "-gencode=arch=compute_86,code=sm_86",
379
+ "-gencode=arch=compute_89,code=sm_89", # Ada
380
+ "-gencode=arch=compute_90,code=sm_90", # Hopper
381
+ ]
382
+ )
383
+ clang_arch_flags.extend(
384
+ [
385
+ "--cuda-gpu-arch=sm_75", # Turing
386
+ "--cuda-gpu-arch=sm_80", # Ampere
387
+ "--cuda-gpu-arch=sm_86",
388
+ "--cuda-gpu-arch=sm_89", # Ada
389
+ "--cuda-gpu-arch=sm_90", # Hopper
390
+ ]
391
+ )
392
+
393
+ if ctk_version >= (12, 8):
394
+ gencode_opts.extend(["-gencode=arch=compute_100,code=sm_100", "-gencode=arch=compute_120,code=sm_120"])
395
+ clang_arch_flags.extend(["--cuda-gpu-arch=sm_100", "--cuda-gpu-arch=sm_120"])
396
+
397
+ # Mobile architectures for aarch64 Linux
398
+ if arch == "aarch64" and target_platform == "linux":
399
+ gencode_opts.extend(
400
+ [
401
+ "-gencode=arch=compute_87,code=sm_87", # Orin
402
+ "-gencode=arch=compute_53,code=sm_53", # X1
403
+ "-gencode=arch=compute_62,code=sm_62", # X2
404
+ "-gencode=arch=compute_72,code=sm_72", # Xavier
405
+ ]
406
+ )
407
+ clang_arch_flags.extend(
408
+ [
409
+ "--cuda-gpu-arch=sm_87",
410
+ "--cuda-gpu-arch=sm_53",
411
+ "--cuda-gpu-arch=sm_62",
412
+ "--cuda-gpu-arch=sm_72",
413
+ ]
414
+ )
415
+
416
+ # Thor support in CUDA 12.8+
417
+ if ctk_version >= (12, 8):
418
+ gencode_opts.append("-gencode=arch=compute_101,code=sm_101") # Thor (CUDA 12 numbering)
419
+ clang_arch_flags.append("--cuda-gpu-arch=sm_101")
420
+
421
+ if ctk_version >= (12, 9):
422
+ gencode_opts.append("-gencode=arch=compute_121,code=sm_121")
423
+ clang_arch_flags.append("--cuda-gpu-arch=sm_121")
424
+
425
+ # PTX for future hardware (use highest available compute capability)
426
+ if ctk_version >= (12, 9):
427
+ gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
428
+ elif ctk_version >= (12, 8):
429
+ gencode_opts.extend(["-gencode=arch=compute_120,code=compute_120"])
430
+ else:
431
+ gencode_opts.append("-gencode=arch=compute_90,code=compute_90")
432
+
433
+ return gencode_opts, clang_arch_flags
434
+
435
+
436
+ def _get_architectures_cu13(
437
+ ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
438
+ ) -> tuple[list[str], list[str]]:
439
+ """Get architecture flags for CUDA 13.x."""
440
+ gencode_opts = []
441
+ clang_arch_flags = []
442
+
443
+ if quick_build:
444
+ gencode_opts = ["-gencode=arch=compute_75,code=compute_75"]
445
+ clang_arch_flags = ["--cuda-gpu-arch=sm_75"]
446
+ else:
447
+ # Desktop architectures
448
+ gencode_opts.extend(
449
+ [
450
+ "-gencode=arch=compute_75,code=sm_75", # Turing
451
+ "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
452
+ "-gencode=arch=compute_80,code=sm_80", # Ampere
453
+ "-gencode=arch=compute_86,code=sm_86",
454
+ "-gencode=arch=compute_89,code=sm_89", # Ada
455
+ "-gencode=arch=compute_90,code=sm_90", # Hopper
456
+ "-gencode=arch=compute_100,code=sm_100", # Blackwell
457
+ "-gencode=arch=compute_120,code=sm_120", # Blackwell
458
+ ]
459
+ )
460
+ clang_arch_flags.extend(
461
+ [
462
+ "--cuda-gpu-arch=sm_75", # Turing
463
+ "--cuda-gpu-arch=sm_80", # Ampere
464
+ "--cuda-gpu-arch=sm_86",
465
+ "--cuda-gpu-arch=sm_89", # Ada
466
+ "--cuda-gpu-arch=sm_90", # Hopper
467
+ "--cuda-gpu-arch=sm_100", # Blackwell
468
+ "--cuda-gpu-arch=sm_120", # Blackwell
469
+ ]
470
+ )
471
+
472
+ # Mobile architectures for aarch64 Linux
473
+ if arch == "aarch64" and target_platform == "linux":
474
+ gencode_opts.extend(
475
+ [
476
+ "-gencode=arch=compute_87,code=sm_87", # Orin
477
+ "-gencode=arch=compute_110,code=sm_110", # Thor
478
+ "-gencode=arch=compute_121,code=sm_121", # Spark
479
+ ]
480
+ )
481
+ clang_arch_flags.extend(
482
+ [
483
+ "--cuda-gpu-arch=sm_87",
484
+ "--cuda-gpu-arch=sm_110",
485
+ "--cuda-gpu-arch=sm_121",
486
+ ]
487
+ )
488
+
489
+ # PTX for future hardware (use highest available compute capability)
490
+ gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
491
+
492
+ return gencode_opts, clang_arch_flags
493
+
494
+
177
495
  def build_dll_for_arch(args, dll_path, cpp_paths, cu_path, arch, libs: list[str] | None = None, mode=None):
178
496
  mode = args.mode if (mode is None) else mode
179
497
  cuda_home = args.cuda_path
@@ -208,79 +526,11 @@ def build_dll_for_arch(args, dll_path, cpp_paths, cu_path, arch, libs: list[str]
208
526
  f"CUDA Toolkit version {MIN_CTK_VERSION[0]}.{MIN_CTK_VERSION[1]}+ is required (found {ctk_version[0]}.{ctk_version[1]} in {cuda_home})"
209
527
  )
210
528
 
211
- # NVCC gencode options
212
- gencode_opts = []
213
-
214
- # Clang architecture flags
215
- clang_arch_flags = []
216
-
217
- if args.quick:
218
- # minimum supported architectures (PTX)
219
- if ctk_version >= (13, 0):
220
- gencode_opts += ["-gencode=arch=compute_75,code=compute_75"]
221
- clang_arch_flags += ["--cuda-gpu-arch=sm_75"]
222
- else:
223
- gencode_opts += ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
224
- clang_arch_flags += ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
529
+ # Get architecture flags based on CUDA version
530
+ if ctk_version >= (13, 0):
531
+ gencode_opts, clang_arch_flags = _get_architectures_cu13(ctk_version, arch, sys.platform, args.quick)
225
532
  else:
226
- # generate code for all supported architectures
227
- gencode_opts += [
228
- # SASS for supported desktop/datacenter architectures
229
- "-gencode=arch=compute_75,code=sm_75", # Turing
230
- "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
231
- "-gencode=arch=compute_80,code=sm_80", # Ampere
232
- "-gencode=arch=compute_86,code=sm_86",
233
- "-gencode=arch=compute_89,code=sm_89", # Ada
234
- "-gencode=arch=compute_90,code=sm_90", # Hopper
235
- ]
236
-
237
- clang_arch_flags += [
238
- # SASS for supported desktop/datacenter architectures
239
- "--cuda-gpu-arch=sm_75", # Turing
240
- "--cuda-gpu-arch=sm_80", # Ampere
241
- "--cuda-gpu-arch=sm_86",
242
- "--cuda-gpu-arch=sm_89", # Ada
243
- "--cuda-gpu-arch=sm_90", # Hopper
244
- ]
245
-
246
- if arch == "aarch64" and sys.platform == "linux":
247
- # SASS for supported mobile architectures (e.g. Tegra/Jetson)
248
- gencode_opts += ["-gencode=arch=compute_87,code=sm_87"] # Orin
249
- clang_arch_flags += ["--cuda-gpu-arch=sm_87"]
250
-
251
- if ctk_version >= (13, 0):
252
- gencode_opts += ["-gencode=arch=compute_110,code=sm_110"] # Thor
253
- clang_arch_flags += ["--cuda-gpu-arch=sm_110"]
254
- else:
255
- gencode_opts += [
256
- "-gencode=arch=compute_53,code=sm_53", # X1
257
- "-gencode=arch=compute_62,code=sm_62", # X2
258
- "-gencode=arch=compute_72,code=sm_72", # Xavier
259
- ]
260
- clang_arch_flags += [
261
- "--cuda-gpu-arch=sm_53",
262
- "--cuda-gpu-arch=sm_62",
263
- "--cuda-gpu-arch=sm_72",
264
- ]
265
-
266
- if ctk_version >= (12, 8):
267
- gencode_opts += ["-gencode=arch=compute_101,code=sm_101"] # Thor (CUDA 12 numbering)
268
- clang_arch_flags += ["--cuda-gpu-arch=sm_101"]
269
-
270
- if ctk_version >= (12, 8):
271
- # Support for Blackwell is available with CUDA Toolkit 12.8+
272
- gencode_opts += [
273
- "-gencode=arch=compute_100,code=sm_100", # Blackwell
274
- "-gencode=arch=compute_120,code=sm_120", # Blackwell
275
- "-gencode=arch=compute_120,code=compute_120", # PTX for future hardware
276
- ]
277
-
278
- clang_arch_flags += [
279
- "--cuda-gpu-arch=sm_100", # Blackwell
280
- "--cuda-gpu-arch=sm_120", # Blackwell
281
- ]
282
- else:
283
- gencode_opts += ["-gencode=arch=compute_90,code=compute_90"] # PTX for future hardware
533
+ gencode_opts, clang_arch_flags = _get_architectures_cu12(ctk_version, arch, sys.platform, args.quick)
284
534
 
285
535
  nvcc_opts = [
286
536
  *gencode_opts,