warp-lang 1.3.3__py3-none-manylinux2014_x86_64.whl → 1.4.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (106) hide show
  1. warp/__init__.py +6 -0
  2. warp/autograd.py +59 -6
  3. warp/bin/warp.so +0 -0
  4. warp/build_dll.py +8 -10
  5. warp/builtins.py +126 -4
  6. warp/codegen.py +435 -53
  7. warp/config.py +1 -1
  8. warp/context.py +678 -403
  9. warp/dlpack.py +2 -0
  10. warp/examples/benchmarks/benchmark_cloth.py +10 -0
  11. warp/examples/core/example_render_opengl.py +12 -10
  12. warp/examples/fem/example_adaptive_grid.py +251 -0
  13. warp/examples/fem/example_apic_fluid.py +1 -1
  14. warp/examples/fem/example_diffusion_3d.py +2 -2
  15. warp/examples/fem/example_magnetostatics.py +1 -1
  16. warp/examples/fem/example_streamlines.py +1 -0
  17. warp/examples/fem/utils.py +23 -4
  18. warp/examples/sim/example_cloth.py +50 -6
  19. warp/fem/__init__.py +2 -0
  20. warp/fem/adaptivity.py +493 -0
  21. warp/fem/field/field.py +2 -1
  22. warp/fem/field/nodal_field.py +18 -26
  23. warp/fem/field/test.py +4 -4
  24. warp/fem/field/trial.py +4 -4
  25. warp/fem/geometry/__init__.py +1 -0
  26. warp/fem/geometry/adaptive_nanogrid.py +843 -0
  27. warp/fem/geometry/nanogrid.py +55 -28
  28. warp/fem/space/__init__.py +1 -1
  29. warp/fem/space/nanogrid_function_space.py +69 -35
  30. warp/fem/utils.py +113 -107
  31. warp/jax_experimental.py +28 -15
  32. warp/native/array.h +0 -1
  33. warp/native/builtin.h +103 -6
  34. warp/native/bvh.cu +2 -0
  35. warp/native/cuda_util.cpp +14 -0
  36. warp/native/cuda_util.h +2 -0
  37. warp/native/error.cpp +4 -2
  38. warp/native/exports.h +99 -17
  39. warp/native/mat.h +97 -0
  40. warp/native/mesh.cpp +36 -0
  41. warp/native/mesh.cu +51 -0
  42. warp/native/mesh.h +1 -0
  43. warp/native/quat.h +43 -0
  44. warp/native/spatial.h +6 -0
  45. warp/native/vec.h +74 -0
  46. warp/native/warp.cpp +2 -1
  47. warp/native/warp.cu +10 -3
  48. warp/native/warp.h +8 -1
  49. warp/paddle.py +382 -0
  50. warp/sim/__init__.py +1 -0
  51. warp/sim/collide.py +519 -0
  52. warp/sim/integrator_euler.py +18 -5
  53. warp/sim/integrator_featherstone.py +5 -5
  54. warp/sim/integrator_vbd.py +1026 -0
  55. warp/sim/model.py +49 -23
  56. warp/stubs.py +459 -0
  57. warp/tape.py +2 -0
  58. warp/tests/aux_test_dependent.py +1 -0
  59. warp/tests/aux_test_name_clash1.py +32 -0
  60. warp/tests/aux_test_name_clash2.py +32 -0
  61. warp/tests/aux_test_square.py +1 -0
  62. warp/tests/test_array.py +188 -0
  63. warp/tests/test_async.py +3 -3
  64. warp/tests/test_atomic.py +6 -0
  65. warp/tests/test_closest_point_edge_edge.py +93 -1
  66. warp/tests/test_codegen.py +62 -15
  67. warp/tests/test_codegen_instancing.py +1457 -0
  68. warp/tests/test_collision.py +486 -0
  69. warp/tests/test_compile_consts.py +3 -28
  70. warp/tests/test_dlpack.py +170 -0
  71. warp/tests/test_examples.py +22 -8
  72. warp/tests/test_fast_math.py +10 -4
  73. warp/tests/test_fem.py +64 -0
  74. warp/tests/test_func.py +46 -0
  75. warp/tests/test_implicit_init.py +49 -0
  76. warp/tests/test_jax.py +58 -0
  77. warp/tests/test_mat.py +84 -0
  78. warp/tests/test_mesh_query_point.py +188 -0
  79. warp/tests/test_module_hashing.py +40 -0
  80. warp/tests/test_multigpu.py +3 -3
  81. warp/tests/test_overwrite.py +8 -0
  82. warp/tests/test_paddle.py +852 -0
  83. warp/tests/test_print.py +89 -0
  84. warp/tests/test_quat.py +111 -0
  85. warp/tests/test_reload.py +31 -1
  86. warp/tests/test_scalar_ops.py +2 -0
  87. warp/tests/test_static.py +412 -0
  88. warp/tests/test_streams.py +64 -3
  89. warp/tests/test_struct.py +4 -4
  90. warp/tests/test_torch.py +24 -0
  91. warp/tests/test_triangle_closest_point.py +137 -0
  92. warp/tests/test_types.py +1 -1
  93. warp/tests/test_vbd.py +386 -0
  94. warp/tests/test_vec.py +143 -0
  95. warp/tests/test_vec_scalar_ops.py +139 -0
  96. warp/tests/unittest_suites.py +12 -0
  97. warp/tests/unittest_utils.py +9 -5
  98. warp/thirdparty/dlpack.py +3 -1
  99. warp/types.py +150 -28
  100. warp/utils.py +37 -14
  101. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
  102. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/RECORD +105 -93
  103. warp/tests/test_point_triangle_closest_point.py +0 -143
  104. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
  105. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
  106. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,412 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+ from typing import Dict, List
10
+
11
+ import numpy as np
12
+
13
+ import warp
14
+ import warp as wp
15
+ from warp.tests.unittest_utils import *
16
+
17
+ global_variable = 3
18
+
19
+
20
+ @wp.func
21
+ def static_global_variable_func():
22
+ static_var = warp.static(global_variable + 2)
23
+ return static_var
24
+
25
+
26
+ @wp.kernel
27
+ def static_global_variable_kernel(results: wp.array(dtype=int)):
28
+ # evaluate a constant expression at codegen time
29
+ static_var = static_global_variable_func()
30
+ const_var = 3
31
+ # call a function at codegen time
32
+ static_func_result = wp.static(static_global_variable_func() + const_var)
33
+ results[0] = static_var
34
+ results[1] = static_func_result
35
+
36
+
37
+ @wp.struct
38
+ class StaticallyConstructableStruct:
39
+ mat: wp.mat33
40
+ vec: wp.vec3
41
+ i: int
42
+
43
+
44
+ @wp.struct
45
+ class StaticallyConstructableNestedStruct:
46
+ s: StaticallyConstructableStruct
47
+ tf: wp.transform
48
+ quat: wp.quat
49
+
50
+
51
+ @wp.func
52
+ def construct_struct(mat: wp.mat33, vec: wp.vec3, i: int):
53
+ s = StaticallyConstructableStruct()
54
+ s.mat = mat
55
+ s.vec = vec
56
+ s.i = i
57
+ return s
58
+
59
+
60
+ @wp.func
61
+ def construct_nested_struct(mat: wp.mat33, vec: wp.vec3, i: int, tf: wp.transform, quat: wp.quat):
62
+ n = StaticallyConstructableNestedStruct()
63
+ n.s = construct_struct(mat, vec, i)
64
+ n.tf = tf
65
+ n.quat = quat
66
+ return n
67
+
68
+
69
+ @wp.kernel
70
+ def construct_static_struct_kernel(results: wp.array(dtype=StaticallyConstructableStruct)):
71
+ static_struct = wp.static(
72
+ construct_struct(
73
+ wp.mat33(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
74
+ wp.vec3(1.0, 2.0, 3.0),
75
+ 1,
76
+ )
77
+ )
78
+ results[0] = static_struct
79
+
80
+
81
+ @wp.kernel
82
+ def construct_static_nested_struct_kernel(results: wp.array(dtype=StaticallyConstructableNestedStruct)):
83
+ static_struct = wp.static(
84
+ construct_nested_struct(
85
+ wp.mat33(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
86
+ wp.vec3(1.0, 2.0, 3.0),
87
+ 1,
88
+ wp.transform(wp.vec3(1.0, 2.0, 3.0), wp.quat_from_axis_angle(wp.vec3(0.0, 1.0, 0.0), wp.pi / 2.0)),
89
+ wp.quat_from_axis_angle(wp.normalize(wp.vec3(1.0, 2.0, 3.0)), wp.pi / 2.0),
90
+ )
91
+ )
92
+ results[0] = static_struct
93
+
94
+
95
+ def test_static_global_variable(test, device):
96
+ results = wp.zeros(2, dtype=int, device=device)
97
+ wp.launch(static_global_variable_kernel, 1, [results], device=device)
98
+ assert_np_equal(results.numpy(), np.array([5, 8], dtype=int))
99
+
100
+
101
+ def test_construct_static_struct(test, device):
102
+ results = wp.zeros(1, dtype=StaticallyConstructableStruct, device=device)
103
+ wp.launch(construct_static_struct_kernel, 1, [results], device=device)
104
+ results = results.numpy()
105
+ assert_np_equal(results[0][0], np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]))
106
+ assert_np_equal(results[0][1], np.array([1.0, 2.0, 3.0]))
107
+ assert_np_equal(results[0][2], 1)
108
+
109
+
110
+ def test_construct_static_nested_struct(test, device):
111
+ results = wp.zeros(1, dtype=StaticallyConstructableNestedStruct, device=device)
112
+ wp.launch(construct_static_nested_struct_kernel, 1, [results], device=device)
113
+ results = results.numpy()
114
+
115
+ tf = wp.transform(wp.vec3(1.0, 2.0, 3.0), wp.quat_from_axis_angle(wp.vec3(0.0, 1.0, 0.0), wp.pi / 2.0))
116
+ quat = wp.quat_from_axis_angle(wp.normalize(wp.vec3(1.0, 2.0, 3.0)), wp.pi / 2.0)
117
+
118
+ assert_np_equal(results[0][0][0], np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]))
119
+ assert_np_equal(results[0][0][1], np.array([1.0, 2.0, 3.0]))
120
+ assert_np_equal(results[0][0][2], 1)
121
+ assert_np_equal(results[0][1], np.array(tf))
122
+ assert_np_equal(results[0][2], np.array(quat))
123
+
124
+
125
+ def test_invalid_static_expression(test, device):
126
+ @wp.kernel
127
+ def invalid_kernel():
128
+ wp.static(1.0 / 0.0)
129
+
130
+ with test.assertRaisesRegex(
131
+ warp.codegen.WarpCodegenError, r"Error evaluating static expression\: float division by zero"
132
+ ):
133
+ wp.launch(invalid_kernel, 1, device=device)
134
+
135
+ @wp.kernel
136
+ def invalid_kernel(i: int):
137
+ wp.static(i * 2)
138
+
139
+ with test.assertRaisesRegex(
140
+ wp.codegen.WarpCodegenError,
141
+ r"Error evaluating static expression\: name 'i' is not defined\. Make sure all variables used in the static expression are constant\.",
142
+ ):
143
+ wp.launch(invalid_kernel, 1, device=device, inputs=[3])
144
+
145
+
146
+ def test_static_expression_return_types(test, device):
147
+ @wp.kernel
148
+ def invalid_kernel():
149
+ wp.static(wp.zeros(3, device=device))
150
+
151
+ with test.assertRaisesRegex(
152
+ warp.codegen.WarpCodegenError,
153
+ r"Static expression returns an unsupported value\: a Warp array cannot be created inside Warp kernels",
154
+ ):
155
+ wp.launch(invalid_kernel, 1, device=device)
156
+
157
+ @wp.struct
158
+ class Baz:
159
+ data: wp.array(dtype=int)
160
+ z: wp.vec3
161
+
162
+ @wp.struct
163
+ class Bar:
164
+ baz: Baz
165
+ y: float
166
+
167
+ @wp.struct
168
+ class Foo:
169
+ bar: Bar
170
+ x: int
171
+
172
+ def create_struct():
173
+ foo = Foo()
174
+ foo.bar = Bar()
175
+ foo.bar.baz = Baz()
176
+ foo.bar.baz.data = wp.zeros(3, dtype=int, device=device)
177
+ foo.bar.baz.z = wp.vec3(1, 2, 3)
178
+ foo.bar.y = 1.23
179
+ foo.x = 123
180
+ return foo
181
+
182
+ @wp.kernel
183
+ def invalid_kernel():
184
+ wp.static(create_struct())
185
+
186
+ with test.assertRaisesRegex(
187
+ warp.codegen.WarpCodegenError,
188
+ r"Static expression returns an unsupported value: the returned Warp struct contains a data type that cannot be constructed inside Warp kernels\: a Warp array cannot be created inside Warp kernels at .*?Foo\.bar\.baz",
189
+ ):
190
+ wp.launch(invalid_kernel, 1, device=device)
191
+
192
+ def function_with_no_return_value():
193
+ pass
194
+
195
+ @wp.kernel
196
+ def invalid_kernel():
197
+ wp.static(function_with_no_return_value())
198
+
199
+ with test.assertRaisesRegex(
200
+ warp.codegen.WarpCodegenError,
201
+ r"Static expression returns an unsupported value\: None is returned",
202
+ ):
203
+ wp.launch(invalid_kernel, 1, device=device)
204
+
205
+ class MyClass:
206
+ pass
207
+
208
+ @wp.kernel
209
+ def invalid_kernel():
210
+ wp.static(MyClass())
211
+
212
+ with test.assertRaisesRegex(
213
+ warp.codegen.WarpCodegenError,
214
+ r"Static expression returns an unsupported value\: value of type .*?MyClass",
215
+ ):
216
+ wp.launch(invalid_kernel, 1, device=device)
217
+
218
+
219
+ def test_function_variable(test, device):
220
+ # create a function and pass it in as a static variable to the kernel
221
+ @wp.func
222
+ def func1(a: int, b: int):
223
+ return a + b
224
+
225
+ @wp.func
226
+ def func2(a: int, b: int):
227
+ return a - b
228
+
229
+ for func in [func1, func2]:
230
+ # note that this example also works without using wp.static()
231
+
232
+ @wp.kernel
233
+ def function_variable_kernel(results: wp.array(dtype=int)):
234
+ results[0] = wp.static(func)(3, 2) # noqa: B023
235
+
236
+ results = wp.zeros(1, dtype=int, device=device)
237
+ # note that the kernel has to be recompiled everytime the value of func changes
238
+ wp.launch(function_variable_kernel, 1, [results], device=device)
239
+ assert_np_equal(results.numpy(), np.array([func(3, 2)], dtype=int))
240
+
241
+
242
+ def test_function_lookup(test, device):
243
+ @wp.func
244
+ def do_add(a: float, b: float):
245
+ return a + b
246
+
247
+ @wp.func
248
+ def do_sub(a: float, b: float):
249
+ return a - b
250
+
251
+ @wp.func
252
+ def do_mul(a: float, b: float):
253
+ return a * b
254
+
255
+ op_handlers = {
256
+ "add": do_add,
257
+ "sub": do_sub,
258
+ "mul": do_mul,
259
+ }
260
+
261
+ inputs = wp.array([[1, 2], [3, 0]], dtype=wp.float32)
262
+
263
+ outputs = wp.empty(2, dtype=wp.float32)
264
+
265
+ for op in op_handlers.keys():
266
+
267
+ @wp.kernel
268
+ def operate(input: wp.array(dtype=inputs.dtype, ndim=2), output: wp.array(dtype=wp.float32)):
269
+ tid = wp.tid()
270
+ a, b = input[tid, 0], input[tid, 1]
271
+ # retrieve the right function to use for the captured dtype variable
272
+ output[tid] = wp.static(op_handlers[op])(a, b) # noqa: B023
273
+
274
+ wp.launch(operate, dim=2, inputs=[inputs], outputs=[outputs])
275
+ outputs_np = outputs.numpy()
276
+ inputs_np = inputs.numpy()
277
+ for i in range(len(outputs_np)):
278
+ test.assertEqual(outputs_np[i], op_handlers[op](float(inputs_np[i][0]), float(inputs_np[i][1])))
279
+
280
+
281
+ def count_ssa_occurrences(kernel: wp.Kernel, ssas: List[str]) -> Dict[str, int]:
282
+ # analyze the generated code
283
+ counts = {ssa: 0 for ssa in ssas}
284
+ for line in kernel.adj.blocks[0].body_forward:
285
+ for ssa in ssas:
286
+ if ssa in line:
287
+ counts[ssa] += 1
288
+ return counts
289
+
290
+
291
+ def test_static_for_loop(test, device):
292
+ @wp.kernel
293
+ def static_loop_variable(results: wp.array(dtype=int)):
294
+ s = 0
295
+ for i in range(wp.static(static_global_variable_func())):
296
+ s += wp.static(i)
297
+ results[0] = s
298
+
299
+ wp.set_module_options(
300
+ options={"max_unroll": static_global_variable_func()},
301
+ )
302
+
303
+ results = wp.zeros(1, dtype=int, device=device)
304
+ wp.launch(static_loop_variable, 1, [results], device=device)
305
+ results = results.numpy()
306
+
307
+ s = 0
308
+ for i in range(wp.static(static_global_variable_func())):
309
+ s += wp.static(i)
310
+
311
+ test.assertEqual(results[0], s, "Static for loop has to compute the correct solution")
312
+
313
+ # analyze the generated code
314
+ if hasattr(static_loop_variable.adj, "blocks"):
315
+ counts = count_ssa_occurrences(static_loop_variable, ["add", "for"])
316
+
317
+ test.assertEqual(counts["add"], static_global_variable_func(), "Static for loop must be unrolled")
318
+ # there is just one occurrence of "for" in the comment referring to the original Python code
319
+ test.assertEqual(counts["for"], 1, "Static for loop must be unrolled")
320
+
321
+
322
+ def test_static_if_else_elif(test, device):
323
+ @wp.kernel
324
+ def static_condition1(results: wp.array(dtype=int)):
325
+ if wp.static(static_global_variable_func() in {2, 3, 5}):
326
+ results[0] = 1
327
+ elif wp.static(static_global_variable_func() in {0, 1}):
328
+ results[0] = 2
329
+ else:
330
+ results[0] = 3
331
+
332
+ results = wp.zeros(1, dtype=int, device=device)
333
+ wp.launch(static_condition1, 1, [results], device=device)
334
+ results = results.numpy()
335
+ assert_np_equal(results[0], 1)
336
+ # TODO this needs fixing to ensure we can run these tests multiple times
337
+ if hasattr(static_condition1.adj, "blocks"):
338
+ counts = count_ssa_occurrences(static_condition1, ["if", "else"])
339
+
340
+ # if, else, elif can appear as comments but the generated code must not contain
341
+ # such keywords since the conditions are resolved at the time of code generation
342
+ assert_np_equal(counts["if"], 1)
343
+ assert_np_equal(counts["else"], 0)
344
+
345
+ captured_var = "hello"
346
+
347
+ @wp.kernel
348
+ def static_condition2(results: wp.array(dtype=int)):
349
+ if wp.static(captured_var == "world"):
350
+ results[0] = 1
351
+ else:
352
+ results[0] = 2
353
+
354
+ results = wp.zeros(1, dtype=int, device=device)
355
+ wp.launch(static_condition2, 1, [results], device=device)
356
+ results = results.numpy()
357
+ assert_np_equal(results[0], 2)
358
+ if hasattr(static_condition2.adj, "blocks"):
359
+ counts = count_ssa_occurrences(static_condition2, ["if", "else"])
360
+ assert_np_equal(counts["if"], 1)
361
+ assert_np_equal(counts["else"], 0)
362
+
363
+ my_list = [1, 2, 3]
364
+
365
+ @wp.kernel
366
+ def static_condition3(results: wp.array(dtype=int)):
367
+ if wp.static(len(my_list) == 0):
368
+ results[0] = 0
369
+ elif wp.static(len(my_list) == 1):
370
+ results[0] = 1
371
+ elif wp.static(len(my_list) == 2):
372
+ results[0] = 2
373
+ elif wp.static(len(my_list) == 3):
374
+ results[0] = 3
375
+
376
+ results = wp.zeros(1, dtype=int, device=device)
377
+ wp.launch(static_condition3, 1, [results], device=device)
378
+ results = results.numpy()
379
+ assert_np_equal(results[0], 3)
380
+ if hasattr(static_condition3.adj, "blocks"):
381
+ counts = count_ssa_occurrences(static_condition3, ["if", "else"])
382
+ assert_np_equal(counts["if"], 4)
383
+ assert_np_equal(counts["else"], 0)
384
+
385
+
386
+ devices = get_test_devices()
387
+
388
+
389
+ class TestStatic(unittest.TestCase):
390
+ def test_static_python_call(self):
391
+ # ensure wp.static() works from a Python context
392
+ self.assertEqual(static_global_variable_func(), 5)
393
+
394
+
395
+ add_function_test(TestStatic, "test_static_global_variable", test_static_global_variable, devices=devices)
396
+ add_function_test(TestStatic, "test_construct_static_struct", test_construct_static_struct, devices=devices)
397
+ add_function_test(
398
+ TestStatic, "test_construct_static_nested_struct", test_construct_static_nested_struct, devices=devices
399
+ )
400
+ add_function_test(TestStatic, "test_function_variable", test_function_variable, devices=devices)
401
+ add_function_test(TestStatic, "test_function_lookup", test_function_lookup, devices=devices)
402
+ add_function_test(TestStatic, "test_invalid_static_expression", test_invalid_static_expression, devices=devices)
403
+ add_function_test(
404
+ TestStatic, "test_static_expression_return_types", test_static_expression_return_types, devices=devices
405
+ )
406
+ add_function_test(TestStatic, "test_static_for_loop", test_static_for_loop, devices=devices)
407
+ add_function_test(TestStatic, "test_static_if_else_elif", test_static_if_else_elif, devices=devices)
408
+
409
+
410
+ if __name__ == "__main__":
411
+ wp.clear_kernel_cache()
412
+ unittest.main(verbosity=2)
@@ -11,7 +11,7 @@ import numpy as np
11
11
 
12
12
  import warp as wp
13
13
  from warp.tests.unittest_utils import *
14
- from warp.utils import check_iommu
14
+ from warp.utils import check_p2p
15
15
 
16
16
 
17
17
  @wp.kernel
@@ -334,6 +334,65 @@ def test_event_elapsed_time(test, device):
334
334
  test.assertGreater(elapsed, 0)
335
335
 
336
336
 
337
+ def test_stream_priority_basics(test, device):
338
+ standard_stream = wp.Stream(device)
339
+ test.assertEqual(standard_stream.priority, 0, "Default priority of streams must be 0.")
340
+
341
+ # Create a high-priority stream with a priority value that is smaller than -1 (clamping expected)
342
+ stream_hi = wp.Stream(device, priority=-100)
343
+
344
+ # Create a low-priority stream with a priority value that is greter than 0 (clamping expected)
345
+ stream_lo = wp.Stream(device, priority=100)
346
+
347
+ if stream_lo.priority == stream_hi.priority:
348
+ test.skipTest("Device must support stream priorities.")
349
+
350
+ test.assertEqual(stream_hi.priority, -1)
351
+
352
+ test.assertEqual(stream_lo.priority, 0)
353
+
354
+ with test.assertRaises(TypeError):
355
+ stream_invalid_priority = wp.Stream(device, priority=0.5)
356
+
357
+
358
+ def test_stream_priority_timings(test, device):
359
+ total_size = 256 * 1024 * 1024
360
+ each_size = 128 * 1024 * 1024
361
+
362
+ array_lo = wp.zeros(total_size, dtype=wp.float32, device=device)
363
+ array_hi = wp.zeros(total_size, dtype=wp.float32, device=device)
364
+
365
+ stream_lo = wp.Stream(device, 0)
366
+ stream_hi = wp.Stream(device, -1)
367
+
368
+ if stream_lo.priority == stream_hi.priority:
369
+ test.skipTest("Device must support stream priorities.")
370
+
371
+ # Create some events
372
+ start_lo_event = wp.Event(device, enable_timing=True)
373
+ start_hi_event = wp.Event(device, enable_timing=True)
374
+ end_lo_event = wp.Event(device, enable_timing=True)
375
+ end_hi_event = wp.Event(device, enable_timing=True)
376
+
377
+ wp.synchronize_device(device)
378
+
379
+ stream_lo.record_event(start_lo_event)
380
+ stream_hi.record_event(start_hi_event)
381
+
382
+ for copy_offset in range(0, total_size, each_size):
383
+ wp.copy(array_lo, array_lo, copy_offset, copy_offset, each_size, stream_lo)
384
+ wp.copy(array_hi, array_hi, copy_offset, copy_offset, each_size, stream_hi)
385
+
386
+ stream_lo.record_event(end_lo_event)
387
+ stream_hi.record_event(end_hi_event)
388
+
389
+ # get elapsed time between the two events
390
+ elapsed_lo = wp.get_event_elapsed_time(start_lo_event, end_lo_event)
391
+ elapsed_hi = wp.get_event_elapsed_time(start_hi_event, end_hi_event)
392
+
393
+ test.assertLess(elapsed_hi, elapsed_lo, "Copies on higher-priority stream should be faster.")
394
+
395
+
337
396
  devices = get_selected_cuda_test_devices()
338
397
 
339
398
 
@@ -359,7 +418,7 @@ class TestStreams(unittest.TestCase):
359
418
  cpu_stream = cpu_device.stream # noqa: F841
360
419
 
361
420
  @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
362
- @unittest.skipUnless(check_iommu(), "IOMMU seems enabled")
421
+ @unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
363
422
  def test_stream_arg_graph_mgpu(self):
364
423
  wp.load_module(device="cuda:0")
365
424
  wp.load_module(device="cuda:1")
@@ -409,7 +468,7 @@ class TestStreams(unittest.TestCase):
409
468
  assert_np_equal(c0.numpy(), np.full(N, fill_value=2 * num_iters))
410
469
 
411
470
  @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
412
- @unittest.skipUnless(check_iommu(), "IOMMU seems enabled")
471
+ @unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
413
472
  def test_stream_scope_graph_mgpu(self):
414
473
  wp.load_module(device="cuda:0")
415
474
  wp.load_module(device="cuda:1")
@@ -485,6 +544,8 @@ add_function_test(TestStreams, "test_stream_arg_wait_stream", test_stream_arg_wa
485
544
  add_function_test(TestStreams, "test_stream_scope_synchronize", test_stream_scope_synchronize, devices=devices)
486
545
  add_function_test(TestStreams, "test_stream_scope_wait_event", test_stream_scope_wait_event, devices=devices)
487
546
  add_function_test(TestStreams, "test_stream_scope_wait_stream", test_stream_scope_wait_stream, devices=devices)
547
+ add_function_test(TestStreams, "test_stream_priority_basics", test_stream_priority_basics, devices=devices)
548
+ add_function_test(TestStreams, "test_stream_priority_timings", test_stream_priority_timings, devices=devices)
488
549
 
489
550
  add_function_test(TestStreams, "test_event_synchronize", test_event_synchronize, devices=devices)
490
551
  add_function_test(TestStreams, "test_event_elapsed_time", test_event_elapsed_time, devices=devices)
warp/tests/test_struct.py CHANGED
@@ -589,7 +589,7 @@ def test_dependent_module_import(c: DependentModuleImport_C):
589
589
  wp.tid() # nop, we're just testing codegen
590
590
 
591
591
 
592
- def test_struct_array_content_hash(test, device):
592
+ def test_struct_array_hash(test, device):
593
593
  # Ensure that the memory address of the struct does not affect the content hash
594
594
 
595
595
  @wp.struct
@@ -611,7 +611,7 @@ def test_struct_array_content_hash(test, device):
611
611
  def dummy_kernel(a: wp.array(dtype=ContentHashStruct)):
612
612
  i = wp.tid()
613
613
 
614
- module_hash_1 = wp.get_module(dummy_kernel.__module__).hash_module(recompute_content_hash=True)
614
+ module_hash_1 = wp.get_module(dummy_kernel.__module__).hash_module()
615
615
 
616
616
  test.assertEqual(
617
617
  module_hash_1,
@@ -628,7 +628,7 @@ def test_struct_array_content_hash(test, device):
628
628
  def dummy_kernel(a: wp.array(dtype=ContentHashStruct)):
629
629
  i = wp.tid()
630
630
 
631
- module_hash_2 = wp.get_module(dummy_kernel.__module__).hash_module(recompute_content_hash=True)
631
+ module_hash_2 = wp.get_module(dummy_kernel.__module__).hash_module()
632
632
 
633
633
  test.assertNotEqual(
634
634
  module_hash_2, module_hash_0, "Module hash should be different when ContentHashStruct redefined and changed."
@@ -718,7 +718,7 @@ add_kernel_test(
718
718
  devices=devices,
719
719
  )
720
720
 
721
- add_function_test(TestStruct, "test_struct_array_content_hash", test_struct_array_content_hash, devices=None)
721
+ add_function_test(TestStruct, "test_struct_array_hash", test_struct_array_hash, devices=None)
722
722
 
723
723
 
724
724
  if __name__ == "__main__":
warp/tests/test_torch.py CHANGED
@@ -382,6 +382,27 @@ def test_array_ctype_from_torch(test, device):
382
382
  wrap_vec_tensor_with_warp_grad(wp.transform)
383
383
 
384
384
 
385
+ def test_cuda_array_interface(test, device):
386
+ # We should be able to construct Torch tensors from Warp arrays via __cuda_array_interface__ on GPU.
387
+ # Note that Torch does not support __array_interface__ on CPU.
388
+
389
+ torch_device = wp.device_to_torch(device)
390
+ n = 10
391
+
392
+ # test the types supported by both Warp and Torch
393
+ scalar_types = [wp.float16, wp.float32, wp.float64, wp.int8, wp.int16, wp.int32, wp.int64, wp.uint8]
394
+
395
+ for dtype in scalar_types:
396
+ # test round trip
397
+ a1 = wp.zeros(n, dtype=dtype, device=device)
398
+ t = torch.tensor(a1, device=torch_device)
399
+ a2 = wp.array(t, device=device)
400
+
401
+ assert a1.dtype == a2.dtype
402
+ assert a1.shape == a2.shape
403
+ assert a1.strides == a2.strides
404
+
405
+
385
406
  def test_to_torch(test, device):
386
407
  import torch
387
408
 
@@ -918,6 +939,9 @@ try:
918
939
  test_warp_graph_torch_stream,
919
940
  devices=torch_compatible_cuda_devices,
920
941
  )
942
+ add_function_test(
943
+ TestTorch, "test_cuda_array_interface", test_cuda_array_interface, devices=torch_compatible_cuda_devices
944
+ )
921
945
 
922
946
  # multi-GPU tests
923
947
  if len(torch_compatible_cuda_devices) > 1: