warp-lang 1.9.0__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.pyi +1420 -2
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build_dll.py +322 -72
- warp/builtins.py +289 -23
- warp/codegen.py +5 -0
- warp/config.py +1 -1
- warp/context.py +243 -32
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/jax_experimental/custom_call.py +24 -1
- warp/jax_experimental/ffi.py +20 -0
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/native/builtin.h +4 -4
- warp/native/sort.cu +22 -13
- warp/native/sort.h +2 -0
- warp/native/tile.h +188 -13
- warp/native/vec.h +0 -53
- warp/native/warp.cpp +3 -3
- warp/native/warp.cu +60 -30
- warp/native/warp.h +3 -3
- warp/render/render_opengl.py +14 -12
- warp/render/render_usd.py +1 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/test_array.py +2 -0
- warp/tests/test_codegen.py +1 -1
- warp/tests/test_fem.py +4 -4
- warp/tests/test_map.py +14 -0
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -0
- warp/tests/tile/test_tile.py +61 -0
- warp/types.py +17 -3
- {warp_lang-1.9.0.dist-info → warp_lang-1.9.1.dist-info}/METADATA +5 -8
- {warp_lang-1.9.0.dist-info → warp_lang-1.9.1.dist-info}/RECORD +37 -37
- {warp_lang-1.9.0.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.9.0.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.9.0.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/bin/warp-clang.dll
CHANGED
|
Binary file
|
warp/bin/warp.dll
CHANGED
|
Binary file
|
warp/build_dll.py
CHANGED
|
@@ -174,6 +174,324 @@ def add_llvm_bin_to_path(args):
|
|
|
174
174
|
return True
|
|
175
175
|
|
|
176
176
|
|
|
177
|
+
def _get_architectures_cu12(
|
|
178
|
+
ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
|
|
179
|
+
) -> tuple[list[str], list[str]]:
|
|
180
|
+
"""Get architecture flags for CUDA 12.x."""
|
|
181
|
+
gencode_opts = []
|
|
182
|
+
clang_arch_flags = []
|
|
183
|
+
|
|
184
|
+
if quick_build:
|
|
185
|
+
gencode_opts = ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
|
|
186
|
+
clang_arch_flags = ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
|
|
187
|
+
else:
|
|
188
|
+
if arch == "aarch64" and target_platform == "linux" and ctk_version == (12, 9):
|
|
189
|
+
# Skip certain architectures for aarch64 with CUDA 12.9 due to CCCL bug
|
|
190
|
+
print(
|
|
191
|
+
"[INFO] Skipping sm_52, sm_60, sm_61, and sm_70 targets for ARM due to a CUDA Toolkit bug. "
|
|
192
|
+
"See https://nvidia.github.io/warp/installation.html#cuda-12-9-limitation-on-linux-arm-platforms "
|
|
193
|
+
"for details."
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
gencode_opts.extend(
|
|
197
|
+
[
|
|
198
|
+
"-gencode=arch=compute_52,code=sm_52", # Maxwell
|
|
199
|
+
"-gencode=arch=compute_60,code=sm_60", # Pascal
|
|
200
|
+
"-gencode=arch=compute_61,code=sm_61",
|
|
201
|
+
"-gencode=arch=compute_70,code=sm_70", # Volta
|
|
202
|
+
]
|
|
203
|
+
)
|
|
204
|
+
clang_arch_flags.extend(
|
|
205
|
+
[
|
|
206
|
+
"--cuda-gpu-arch=sm_52",
|
|
207
|
+
"--cuda-gpu-arch=sm_60",
|
|
208
|
+
"--cuda-gpu-arch=sm_61",
|
|
209
|
+
"--cuda-gpu-arch=sm_70",
|
|
210
|
+
]
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Desktop architectures
|
|
214
|
+
gencode_opts.extend(
|
|
215
|
+
[
|
|
216
|
+
"-gencode=arch=compute_75,code=sm_75", # Turing
|
|
217
|
+
"-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
|
|
218
|
+
"-gencode=arch=compute_80,code=sm_80", # Ampere
|
|
219
|
+
"-gencode=arch=compute_86,code=sm_86",
|
|
220
|
+
"-gencode=arch=compute_89,code=sm_89", # Ada
|
|
221
|
+
"-gencode=arch=compute_90,code=sm_90", # Hopper
|
|
222
|
+
]
|
|
223
|
+
)
|
|
224
|
+
clang_arch_flags.extend(
|
|
225
|
+
[
|
|
226
|
+
"--cuda-gpu-arch=sm_75", # Turing
|
|
227
|
+
"--cuda-gpu-arch=sm_80", # Ampere
|
|
228
|
+
"--cuda-gpu-arch=sm_86",
|
|
229
|
+
"--cuda-gpu-arch=sm_89", # Ada
|
|
230
|
+
"--cuda-gpu-arch=sm_90", # Hopper
|
|
231
|
+
]
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if ctk_version >= (12, 8):
|
|
235
|
+
gencode_opts.extend(["-gencode=arch=compute_100,code=sm_100", "-gencode=arch=compute_120,code=sm_120"])
|
|
236
|
+
clang_arch_flags.extend(["--cuda-gpu-arch=sm_100", "--cuda-gpu-arch=sm_120"])
|
|
237
|
+
|
|
238
|
+
# Mobile architectures for aarch64 Linux
|
|
239
|
+
if arch == "aarch64" and target_platform == "linux":
|
|
240
|
+
gencode_opts.extend(
|
|
241
|
+
[
|
|
242
|
+
"-gencode=arch=compute_87,code=sm_87", # Orin
|
|
243
|
+
"-gencode=arch=compute_53,code=sm_53", # X1
|
|
244
|
+
"-gencode=arch=compute_62,code=sm_62", # X2
|
|
245
|
+
"-gencode=arch=compute_72,code=sm_72", # Xavier
|
|
246
|
+
]
|
|
247
|
+
)
|
|
248
|
+
clang_arch_flags.extend(
|
|
249
|
+
[
|
|
250
|
+
"--cuda-gpu-arch=sm_87",
|
|
251
|
+
"--cuda-gpu-arch=sm_53",
|
|
252
|
+
"--cuda-gpu-arch=sm_62",
|
|
253
|
+
"--cuda-gpu-arch=sm_72",
|
|
254
|
+
]
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Thor support in CUDA 12.8+
|
|
258
|
+
if ctk_version >= (12, 8):
|
|
259
|
+
gencode_opts.append("-gencode=arch=compute_101,code=sm_101") # Thor (CUDA 12 numbering)
|
|
260
|
+
clang_arch_flags.append("--cuda-gpu-arch=sm_101")
|
|
261
|
+
|
|
262
|
+
if ctk_version >= (12, 9):
|
|
263
|
+
gencode_opts.append("-gencode=arch=compute_121,code=sm_121")
|
|
264
|
+
clang_arch_flags.append("--cuda-gpu-arch=sm_121")
|
|
265
|
+
|
|
266
|
+
# PTX for future hardware (use highest available compute capability)
|
|
267
|
+
if ctk_version >= (12, 9):
|
|
268
|
+
gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
|
|
269
|
+
elif ctk_version >= (12, 8):
|
|
270
|
+
gencode_opts.extend(["-gencode=arch=compute_120,code=compute_120"])
|
|
271
|
+
else:
|
|
272
|
+
gencode_opts.append("-gencode=arch=compute_90,code=compute_90")
|
|
273
|
+
|
|
274
|
+
return gencode_opts, clang_arch_flags
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _get_architectures_cu13(
|
|
278
|
+
ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
|
|
279
|
+
) -> tuple[list[str], list[str]]:
|
|
280
|
+
"""Get architecture flags for CUDA 13.x."""
|
|
281
|
+
gencode_opts = []
|
|
282
|
+
clang_arch_flags = []
|
|
283
|
+
|
|
284
|
+
if quick_build:
|
|
285
|
+
gencode_opts = ["-gencode=arch=compute_75,code=compute_75"]
|
|
286
|
+
clang_arch_flags = ["--cuda-gpu-arch=sm_75"]
|
|
287
|
+
else:
|
|
288
|
+
# Desktop architectures
|
|
289
|
+
gencode_opts.extend(
|
|
290
|
+
[
|
|
291
|
+
"-gencode=arch=compute_75,code=sm_75", # Turing
|
|
292
|
+
"-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
|
|
293
|
+
"-gencode=arch=compute_80,code=sm_80", # Ampere
|
|
294
|
+
"-gencode=arch=compute_86,code=sm_86",
|
|
295
|
+
"-gencode=arch=compute_89,code=sm_89", # Ada
|
|
296
|
+
"-gencode=arch=compute_90,code=sm_90", # Hopper
|
|
297
|
+
"-gencode=arch=compute_100,code=sm_100", # Blackwell
|
|
298
|
+
"-gencode=arch=compute_120,code=sm_120", # Blackwell
|
|
299
|
+
]
|
|
300
|
+
)
|
|
301
|
+
clang_arch_flags.extend(
|
|
302
|
+
[
|
|
303
|
+
"--cuda-gpu-arch=sm_75", # Turing
|
|
304
|
+
"--cuda-gpu-arch=sm_80", # Ampere
|
|
305
|
+
"--cuda-gpu-arch=sm_86",
|
|
306
|
+
"--cuda-gpu-arch=sm_89", # Ada
|
|
307
|
+
"--cuda-gpu-arch=sm_90", # Hopper
|
|
308
|
+
"--cuda-gpu-arch=sm_100", # Blackwell
|
|
309
|
+
"--cuda-gpu-arch=sm_120", # Blackwell
|
|
310
|
+
]
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Mobile architectures for aarch64 Linux
|
|
314
|
+
if arch == "aarch64" and target_platform == "linux":
|
|
315
|
+
gencode_opts.extend(
|
|
316
|
+
[
|
|
317
|
+
"-gencode=arch=compute_87,code=sm_87", # Orin
|
|
318
|
+
"-gencode=arch=compute_110,code=sm_110", # Thor
|
|
319
|
+
"-gencode=arch=compute_121,code=sm_121", # Spark
|
|
320
|
+
]
|
|
321
|
+
)
|
|
322
|
+
clang_arch_flags.extend(
|
|
323
|
+
[
|
|
324
|
+
"--cuda-gpu-arch=sm_87",
|
|
325
|
+
"--cuda-gpu-arch=sm_110",
|
|
326
|
+
"--cuda-gpu-arch=sm_121",
|
|
327
|
+
]
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# PTX for future hardware (use highest available compute capability)
|
|
331
|
+
gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
|
|
332
|
+
|
|
333
|
+
return gencode_opts, clang_arch_flags
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def _get_architectures_cu12(
|
|
337
|
+
ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
|
|
338
|
+
) -> tuple[list[str], list[str]]:
|
|
339
|
+
"""Get architecture flags for CUDA 12.x."""
|
|
340
|
+
gencode_opts = []
|
|
341
|
+
clang_arch_flags = []
|
|
342
|
+
|
|
343
|
+
if quick_build:
|
|
344
|
+
gencode_opts = ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
|
|
345
|
+
clang_arch_flags = ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
|
|
346
|
+
else:
|
|
347
|
+
if arch == "aarch64" and target_platform == "linux" and ctk_version == (12, 9):
|
|
348
|
+
# Skip certain architectures for aarch64 with CUDA 12.9 due to CCCL bug
|
|
349
|
+
print(
|
|
350
|
+
"[INFO] Skipping sm_52, sm_60, sm_61, and sm_70 targets for ARM due to a CUDA Toolkit bug. "
|
|
351
|
+
"See https://nvidia.github.io/warp/installation.html#cuda-12-9-limitation-on-linux-arm-platforms "
|
|
352
|
+
"for details."
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
gencode_opts.extend(
|
|
356
|
+
[
|
|
357
|
+
"-gencode=arch=compute_52,code=sm_52", # Maxwell
|
|
358
|
+
"-gencode=arch=compute_60,code=sm_60", # Pascal
|
|
359
|
+
"-gencode=arch=compute_61,code=sm_61",
|
|
360
|
+
"-gencode=arch=compute_70,code=sm_70", # Volta
|
|
361
|
+
]
|
|
362
|
+
)
|
|
363
|
+
clang_arch_flags.extend(
|
|
364
|
+
[
|
|
365
|
+
"--cuda-gpu-arch=sm_52",
|
|
366
|
+
"--cuda-gpu-arch=sm_60",
|
|
367
|
+
"--cuda-gpu-arch=sm_61",
|
|
368
|
+
"--cuda-gpu-arch=sm_70",
|
|
369
|
+
]
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Desktop architectures
|
|
373
|
+
gencode_opts.extend(
|
|
374
|
+
[
|
|
375
|
+
"-gencode=arch=compute_75,code=sm_75", # Turing
|
|
376
|
+
"-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
|
|
377
|
+
"-gencode=arch=compute_80,code=sm_80", # Ampere
|
|
378
|
+
"-gencode=arch=compute_86,code=sm_86",
|
|
379
|
+
"-gencode=arch=compute_89,code=sm_89", # Ada
|
|
380
|
+
"-gencode=arch=compute_90,code=sm_90", # Hopper
|
|
381
|
+
]
|
|
382
|
+
)
|
|
383
|
+
clang_arch_flags.extend(
|
|
384
|
+
[
|
|
385
|
+
"--cuda-gpu-arch=sm_75", # Turing
|
|
386
|
+
"--cuda-gpu-arch=sm_80", # Ampere
|
|
387
|
+
"--cuda-gpu-arch=sm_86",
|
|
388
|
+
"--cuda-gpu-arch=sm_89", # Ada
|
|
389
|
+
"--cuda-gpu-arch=sm_90", # Hopper
|
|
390
|
+
]
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
if ctk_version >= (12, 8):
|
|
394
|
+
gencode_opts.extend(["-gencode=arch=compute_100,code=sm_100", "-gencode=arch=compute_120,code=sm_120"])
|
|
395
|
+
clang_arch_flags.extend(["--cuda-gpu-arch=sm_100", "--cuda-gpu-arch=sm_120"])
|
|
396
|
+
|
|
397
|
+
# Mobile architectures for aarch64 Linux
|
|
398
|
+
if arch == "aarch64" and target_platform == "linux":
|
|
399
|
+
gencode_opts.extend(
|
|
400
|
+
[
|
|
401
|
+
"-gencode=arch=compute_87,code=sm_87", # Orin
|
|
402
|
+
"-gencode=arch=compute_53,code=sm_53", # X1
|
|
403
|
+
"-gencode=arch=compute_62,code=sm_62", # X2
|
|
404
|
+
"-gencode=arch=compute_72,code=sm_72", # Xavier
|
|
405
|
+
]
|
|
406
|
+
)
|
|
407
|
+
clang_arch_flags.extend(
|
|
408
|
+
[
|
|
409
|
+
"--cuda-gpu-arch=sm_87",
|
|
410
|
+
"--cuda-gpu-arch=sm_53",
|
|
411
|
+
"--cuda-gpu-arch=sm_62",
|
|
412
|
+
"--cuda-gpu-arch=sm_72",
|
|
413
|
+
]
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Thor support in CUDA 12.8+
|
|
417
|
+
if ctk_version >= (12, 8):
|
|
418
|
+
gencode_opts.append("-gencode=arch=compute_101,code=sm_101") # Thor (CUDA 12 numbering)
|
|
419
|
+
clang_arch_flags.append("--cuda-gpu-arch=sm_101")
|
|
420
|
+
|
|
421
|
+
if ctk_version >= (12, 9):
|
|
422
|
+
gencode_opts.append("-gencode=arch=compute_121,code=sm_121")
|
|
423
|
+
clang_arch_flags.append("--cuda-gpu-arch=sm_121")
|
|
424
|
+
|
|
425
|
+
# PTX for future hardware (use highest available compute capability)
|
|
426
|
+
if ctk_version >= (12, 9):
|
|
427
|
+
gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
|
|
428
|
+
elif ctk_version >= (12, 8):
|
|
429
|
+
gencode_opts.extend(["-gencode=arch=compute_120,code=compute_120"])
|
|
430
|
+
else:
|
|
431
|
+
gencode_opts.append("-gencode=arch=compute_90,code=compute_90")
|
|
432
|
+
|
|
433
|
+
return gencode_opts, clang_arch_flags
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _get_architectures_cu13(
|
|
437
|
+
ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
|
|
438
|
+
) -> tuple[list[str], list[str]]:
|
|
439
|
+
"""Get architecture flags for CUDA 13.x."""
|
|
440
|
+
gencode_opts = []
|
|
441
|
+
clang_arch_flags = []
|
|
442
|
+
|
|
443
|
+
if quick_build:
|
|
444
|
+
gencode_opts = ["-gencode=arch=compute_75,code=compute_75"]
|
|
445
|
+
clang_arch_flags = ["--cuda-gpu-arch=sm_75"]
|
|
446
|
+
else:
|
|
447
|
+
# Desktop architectures
|
|
448
|
+
gencode_opts.extend(
|
|
449
|
+
[
|
|
450
|
+
"-gencode=arch=compute_75,code=sm_75", # Turing
|
|
451
|
+
"-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
|
|
452
|
+
"-gencode=arch=compute_80,code=sm_80", # Ampere
|
|
453
|
+
"-gencode=arch=compute_86,code=sm_86",
|
|
454
|
+
"-gencode=arch=compute_89,code=sm_89", # Ada
|
|
455
|
+
"-gencode=arch=compute_90,code=sm_90", # Hopper
|
|
456
|
+
"-gencode=arch=compute_100,code=sm_100", # Blackwell
|
|
457
|
+
"-gencode=arch=compute_120,code=sm_120", # Blackwell
|
|
458
|
+
]
|
|
459
|
+
)
|
|
460
|
+
clang_arch_flags.extend(
|
|
461
|
+
[
|
|
462
|
+
"--cuda-gpu-arch=sm_75", # Turing
|
|
463
|
+
"--cuda-gpu-arch=sm_80", # Ampere
|
|
464
|
+
"--cuda-gpu-arch=sm_86",
|
|
465
|
+
"--cuda-gpu-arch=sm_89", # Ada
|
|
466
|
+
"--cuda-gpu-arch=sm_90", # Hopper
|
|
467
|
+
"--cuda-gpu-arch=sm_100", # Blackwell
|
|
468
|
+
"--cuda-gpu-arch=sm_120", # Blackwell
|
|
469
|
+
]
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Mobile architectures for aarch64 Linux
|
|
473
|
+
if arch == "aarch64" and target_platform == "linux":
|
|
474
|
+
gencode_opts.extend(
|
|
475
|
+
[
|
|
476
|
+
"-gencode=arch=compute_87,code=sm_87", # Orin
|
|
477
|
+
"-gencode=arch=compute_110,code=sm_110", # Thor
|
|
478
|
+
"-gencode=arch=compute_121,code=sm_121", # Spark
|
|
479
|
+
]
|
|
480
|
+
)
|
|
481
|
+
clang_arch_flags.extend(
|
|
482
|
+
[
|
|
483
|
+
"--cuda-gpu-arch=sm_87",
|
|
484
|
+
"--cuda-gpu-arch=sm_110",
|
|
485
|
+
"--cuda-gpu-arch=sm_121",
|
|
486
|
+
]
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
# PTX for future hardware (use highest available compute capability)
|
|
490
|
+
gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
|
|
491
|
+
|
|
492
|
+
return gencode_opts, clang_arch_flags
|
|
493
|
+
|
|
494
|
+
|
|
177
495
|
def build_dll_for_arch(args, dll_path, cpp_paths, cu_path, arch, libs: list[str] | None = None, mode=None):
|
|
178
496
|
mode = args.mode if (mode is None) else mode
|
|
179
497
|
cuda_home = args.cuda_path
|
|
@@ -208,79 +526,11 @@ def build_dll_for_arch(args, dll_path, cpp_paths, cu_path, arch, libs: list[str]
|
|
|
208
526
|
f"CUDA Toolkit version {MIN_CTK_VERSION[0]}.{MIN_CTK_VERSION[1]}+ is required (found {ctk_version[0]}.{ctk_version[1]} in {cuda_home})"
|
|
209
527
|
)
|
|
210
528
|
|
|
211
|
-
#
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
# Clang architecture flags
|
|
215
|
-
clang_arch_flags = []
|
|
216
|
-
|
|
217
|
-
if args.quick:
|
|
218
|
-
# minimum supported architectures (PTX)
|
|
219
|
-
if ctk_version >= (13, 0):
|
|
220
|
-
gencode_opts += ["-gencode=arch=compute_75,code=compute_75"]
|
|
221
|
-
clang_arch_flags += ["--cuda-gpu-arch=sm_75"]
|
|
222
|
-
else:
|
|
223
|
-
gencode_opts += ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
|
|
224
|
-
clang_arch_flags += ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
|
|
529
|
+
# Get architecture flags based on CUDA version
|
|
530
|
+
if ctk_version >= (13, 0):
|
|
531
|
+
gencode_opts, clang_arch_flags = _get_architectures_cu13(ctk_version, arch, sys.platform, args.quick)
|
|
225
532
|
else:
|
|
226
|
-
|
|
227
|
-
gencode_opts += [
|
|
228
|
-
# SASS for supported desktop/datacenter architectures
|
|
229
|
-
"-gencode=arch=compute_75,code=sm_75", # Turing
|
|
230
|
-
"-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
|
|
231
|
-
"-gencode=arch=compute_80,code=sm_80", # Ampere
|
|
232
|
-
"-gencode=arch=compute_86,code=sm_86",
|
|
233
|
-
"-gencode=arch=compute_89,code=sm_89", # Ada
|
|
234
|
-
"-gencode=arch=compute_90,code=sm_90", # Hopper
|
|
235
|
-
]
|
|
236
|
-
|
|
237
|
-
clang_arch_flags += [
|
|
238
|
-
# SASS for supported desktop/datacenter architectures
|
|
239
|
-
"--cuda-gpu-arch=sm_75", # Turing
|
|
240
|
-
"--cuda-gpu-arch=sm_80", # Ampere
|
|
241
|
-
"--cuda-gpu-arch=sm_86",
|
|
242
|
-
"--cuda-gpu-arch=sm_89", # Ada
|
|
243
|
-
"--cuda-gpu-arch=sm_90", # Hopper
|
|
244
|
-
]
|
|
245
|
-
|
|
246
|
-
if arch == "aarch64" and sys.platform == "linux":
|
|
247
|
-
# SASS for supported mobile architectures (e.g. Tegra/Jetson)
|
|
248
|
-
gencode_opts += ["-gencode=arch=compute_87,code=sm_87"] # Orin
|
|
249
|
-
clang_arch_flags += ["--cuda-gpu-arch=sm_87"]
|
|
250
|
-
|
|
251
|
-
if ctk_version >= (13, 0):
|
|
252
|
-
gencode_opts += ["-gencode=arch=compute_110,code=sm_110"] # Thor
|
|
253
|
-
clang_arch_flags += ["--cuda-gpu-arch=sm_110"]
|
|
254
|
-
else:
|
|
255
|
-
gencode_opts += [
|
|
256
|
-
"-gencode=arch=compute_53,code=sm_53", # X1
|
|
257
|
-
"-gencode=arch=compute_62,code=sm_62", # X2
|
|
258
|
-
"-gencode=arch=compute_72,code=sm_72", # Xavier
|
|
259
|
-
]
|
|
260
|
-
clang_arch_flags += [
|
|
261
|
-
"--cuda-gpu-arch=sm_53",
|
|
262
|
-
"--cuda-gpu-arch=sm_62",
|
|
263
|
-
"--cuda-gpu-arch=sm_72",
|
|
264
|
-
]
|
|
265
|
-
|
|
266
|
-
if ctk_version >= (12, 8):
|
|
267
|
-
gencode_opts += ["-gencode=arch=compute_101,code=sm_101"] # Thor (CUDA 12 numbering)
|
|
268
|
-
clang_arch_flags += ["--cuda-gpu-arch=sm_101"]
|
|
269
|
-
|
|
270
|
-
if ctk_version >= (12, 8):
|
|
271
|
-
# Support for Blackwell is available with CUDA Toolkit 12.8+
|
|
272
|
-
gencode_opts += [
|
|
273
|
-
"-gencode=arch=compute_100,code=sm_100", # Blackwell
|
|
274
|
-
"-gencode=arch=compute_120,code=sm_120", # Blackwell
|
|
275
|
-
"-gencode=arch=compute_120,code=compute_120", # PTX for future hardware
|
|
276
|
-
]
|
|
277
|
-
|
|
278
|
-
clang_arch_flags += [
|
|
279
|
-
"--cuda-gpu-arch=sm_100", # Blackwell
|
|
280
|
-
"--cuda-gpu-arch=sm_120", # Blackwell
|
|
281
|
-
]
|
|
282
|
-
else:
|
|
283
|
-
gencode_opts += ["-gencode=arch=compute_90,code=compute_90"] # PTX for future hardware
|
|
533
|
+
gencode_opts, clang_arch_flags = _get_architectures_cu12(ctk_version, arch, sys.platform, args.quick)
|
|
284
534
|
|
|
285
535
|
nvcc_opts = [
|
|
286
536
|
*gencode_opts,
|