warp-lang 1.3.2__py3-none-macosx_10_13_universal2.whl → 1.4.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +6 -0
- warp/autograd.py +59 -6
- warp/bin/libwarp.dylib +0 -0
- warp/build_dll.py +8 -10
- warp/builtins.py +126 -4
- warp/codegen.py +435 -53
- warp/config.py +1 -1
- warp/context.py +678 -403
- warp/dlpack.py +2 -0
- warp/examples/benchmarks/benchmark_cloth.py +10 -0
- warp/examples/core/example_render_opengl.py +12 -10
- warp/examples/fem/example_adaptive_grid.py +251 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +2 -2
- warp/examples/fem/example_magnetostatics.py +1 -1
- warp/examples/fem/example_streamlines.py +1 -0
- warp/examples/fem/utils.py +23 -4
- warp/examples/sim/example_cloth.py +50 -6
- warp/fem/__init__.py +2 -0
- warp/fem/adaptivity.py +493 -0
- warp/fem/field/field.py +2 -1
- warp/fem/field/nodal_field.py +18 -26
- warp/fem/field/test.py +4 -4
- warp/fem/field/trial.py +4 -4
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/adaptive_nanogrid.py +843 -0
- warp/fem/geometry/nanogrid.py +55 -28
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/nanogrid_function_space.py +69 -35
- warp/fem/utils.py +113 -107
- warp/jax_experimental.py +28 -15
- warp/native/array.h +0 -1
- warp/native/builtin.h +103 -6
- warp/native/bvh.cu +2 -0
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/error.cpp +4 -2
- warp/native/exports.h +99 -17
- warp/native/mat.h +97 -0
- warp/native/mesh.cpp +36 -0
- warp/native/mesh.cu +51 -0
- warp/native/mesh.h +1 -0
- warp/native/quat.h +43 -0
- warp/native/spatial.h +6 -0
- warp/native/vec.h +74 -0
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +10 -3
- warp/native/warp.h +8 -1
- warp/paddle.py +382 -0
- warp/sim/__init__.py +1 -0
- warp/sim/collide.py +519 -0
- warp/sim/integrator_euler.py +18 -5
- warp/sim/integrator_featherstone.py +5 -5
- warp/sim/integrator_vbd.py +1026 -0
- warp/sim/model.py +49 -23
- warp/stubs.py +459 -0
- warp/tape.py +2 -0
- warp/tests/aux_test_dependent.py +1 -0
- warp/tests/aux_test_name_clash1.py +32 -0
- warp/tests/aux_test_name_clash2.py +32 -0
- warp/tests/aux_test_square.py +1 -0
- warp/tests/test_array.py +222 -0
- warp/tests/test_async.py +3 -3
- warp/tests/test_atomic.py +6 -0
- warp/tests/test_closest_point_edge_edge.py +93 -1
- warp/tests/test_codegen.py +62 -15
- warp/tests/test_codegen_instancing.py +1457 -0
- warp/tests/test_collision.py +486 -0
- warp/tests/test_compile_consts.py +3 -28
- warp/tests/test_dlpack.py +170 -0
- warp/tests/test_examples.py +22 -8
- warp/tests/test_fast_math.py +10 -4
- warp/tests/test_fem.py +64 -0
- warp/tests/test_func.py +46 -0
- warp/tests/test_implicit_init.py +49 -0
- warp/tests/test_jax.py +58 -0
- warp/tests/test_mat.py +84 -0
- warp/tests/test_mesh_query_point.py +188 -0
- warp/tests/test_module_hashing.py +40 -0
- warp/tests/test_multigpu.py +3 -3
- warp/tests/test_overwrite.py +8 -0
- warp/tests/test_paddle.py +852 -0
- warp/tests/test_print.py +89 -0
- warp/tests/test_quat.py +111 -0
- warp/tests/test_reload.py +31 -1
- warp/tests/test_scalar_ops.py +2 -0
- warp/tests/test_static.py +412 -0
- warp/tests/test_streams.py +64 -3
- warp/tests/test_struct.py +4 -4
- warp/tests/test_torch.py +24 -0
- warp/tests/test_triangle_closest_point.py +137 -0
- warp/tests/test_types.py +1 -1
- warp/tests/test_vbd.py +386 -0
- warp/tests/test_vec.py +143 -0
- warp/tests/test_vec_scalar_ops.py +139 -0
- warp/tests/test_volume.py +30 -0
- warp/tests/unittest_suites.py +12 -0
- warp/tests/unittest_utils.py +9 -5
- warp/thirdparty/dlpack.py +3 -1
- warp/types.py +157 -34
- warp/utils.py +37 -14
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/RECORD +106 -94
- warp/tests/test_point_triangle_closest_point.py +0 -143
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,412 @@
|
|
|
1
|
+
# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
import unittest
|
|
9
|
+
from typing import Dict, List
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
import warp
|
|
14
|
+
import warp as wp
|
|
15
|
+
from warp.tests.unittest_utils import *
|
|
16
|
+
|
|
17
|
+
global_variable = 3
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@wp.func
|
|
21
|
+
def static_global_variable_func():
|
|
22
|
+
static_var = warp.static(global_variable + 2)
|
|
23
|
+
return static_var
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@wp.kernel
|
|
27
|
+
def static_global_variable_kernel(results: wp.array(dtype=int)):
|
|
28
|
+
# evaluate a constant expression at codegen time
|
|
29
|
+
static_var = static_global_variable_func()
|
|
30
|
+
const_var = 3
|
|
31
|
+
# call a function at codegen time
|
|
32
|
+
static_func_result = wp.static(static_global_variable_func() + const_var)
|
|
33
|
+
results[0] = static_var
|
|
34
|
+
results[1] = static_func_result
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@wp.struct
|
|
38
|
+
class StaticallyConstructableStruct:
|
|
39
|
+
mat: wp.mat33
|
|
40
|
+
vec: wp.vec3
|
|
41
|
+
i: int
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@wp.struct
|
|
45
|
+
class StaticallyConstructableNestedStruct:
|
|
46
|
+
s: StaticallyConstructableStruct
|
|
47
|
+
tf: wp.transform
|
|
48
|
+
quat: wp.quat
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@wp.func
|
|
52
|
+
def construct_struct(mat: wp.mat33, vec: wp.vec3, i: int):
|
|
53
|
+
s = StaticallyConstructableStruct()
|
|
54
|
+
s.mat = mat
|
|
55
|
+
s.vec = vec
|
|
56
|
+
s.i = i
|
|
57
|
+
return s
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@wp.func
|
|
61
|
+
def construct_nested_struct(mat: wp.mat33, vec: wp.vec3, i: int, tf: wp.transform, quat: wp.quat):
|
|
62
|
+
n = StaticallyConstructableNestedStruct()
|
|
63
|
+
n.s = construct_struct(mat, vec, i)
|
|
64
|
+
n.tf = tf
|
|
65
|
+
n.quat = quat
|
|
66
|
+
return n
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@wp.kernel
|
|
70
|
+
def construct_static_struct_kernel(results: wp.array(dtype=StaticallyConstructableStruct)):
|
|
71
|
+
static_struct = wp.static(
|
|
72
|
+
construct_struct(
|
|
73
|
+
wp.mat33(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
|
|
74
|
+
wp.vec3(1.0, 2.0, 3.0),
|
|
75
|
+
1,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
results[0] = static_struct
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@wp.kernel
|
|
82
|
+
def construct_static_nested_struct_kernel(results: wp.array(dtype=StaticallyConstructableNestedStruct)):
|
|
83
|
+
static_struct = wp.static(
|
|
84
|
+
construct_nested_struct(
|
|
85
|
+
wp.mat33(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
|
|
86
|
+
wp.vec3(1.0, 2.0, 3.0),
|
|
87
|
+
1,
|
|
88
|
+
wp.transform(wp.vec3(1.0, 2.0, 3.0), wp.quat_from_axis_angle(wp.vec3(0.0, 1.0, 0.0), wp.pi / 2.0)),
|
|
89
|
+
wp.quat_from_axis_angle(wp.normalize(wp.vec3(1.0, 2.0, 3.0)), wp.pi / 2.0),
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
results[0] = static_struct
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_static_global_variable(test, device):
|
|
96
|
+
results = wp.zeros(2, dtype=int, device=device)
|
|
97
|
+
wp.launch(static_global_variable_kernel, 1, [results], device=device)
|
|
98
|
+
assert_np_equal(results.numpy(), np.array([5, 8], dtype=int))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_construct_static_struct(test, device):
|
|
102
|
+
results = wp.zeros(1, dtype=StaticallyConstructableStruct, device=device)
|
|
103
|
+
wp.launch(construct_static_struct_kernel, 1, [results], device=device)
|
|
104
|
+
results = results.numpy()
|
|
105
|
+
assert_np_equal(results[0][0], np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]))
|
|
106
|
+
assert_np_equal(results[0][1], np.array([1.0, 2.0, 3.0]))
|
|
107
|
+
assert_np_equal(results[0][2], 1)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_construct_static_nested_struct(test, device):
|
|
111
|
+
results = wp.zeros(1, dtype=StaticallyConstructableNestedStruct, device=device)
|
|
112
|
+
wp.launch(construct_static_nested_struct_kernel, 1, [results], device=device)
|
|
113
|
+
results = results.numpy()
|
|
114
|
+
|
|
115
|
+
tf = wp.transform(wp.vec3(1.0, 2.0, 3.0), wp.quat_from_axis_angle(wp.vec3(0.0, 1.0, 0.0), wp.pi / 2.0))
|
|
116
|
+
quat = wp.quat_from_axis_angle(wp.normalize(wp.vec3(1.0, 2.0, 3.0)), wp.pi / 2.0)
|
|
117
|
+
|
|
118
|
+
assert_np_equal(results[0][0][0], np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]))
|
|
119
|
+
assert_np_equal(results[0][0][1], np.array([1.0, 2.0, 3.0]))
|
|
120
|
+
assert_np_equal(results[0][0][2], 1)
|
|
121
|
+
assert_np_equal(results[0][1], np.array(tf))
|
|
122
|
+
assert_np_equal(results[0][2], np.array(quat))
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_invalid_static_expression(test, device):
|
|
126
|
+
@wp.kernel
|
|
127
|
+
def invalid_kernel():
|
|
128
|
+
wp.static(1.0 / 0.0)
|
|
129
|
+
|
|
130
|
+
with test.assertRaisesRegex(
|
|
131
|
+
warp.codegen.WarpCodegenError, r"Error evaluating static expression\: float division by zero"
|
|
132
|
+
):
|
|
133
|
+
wp.launch(invalid_kernel, 1, device=device)
|
|
134
|
+
|
|
135
|
+
@wp.kernel
|
|
136
|
+
def invalid_kernel(i: int):
|
|
137
|
+
wp.static(i * 2)
|
|
138
|
+
|
|
139
|
+
with test.assertRaisesRegex(
|
|
140
|
+
wp.codegen.WarpCodegenError,
|
|
141
|
+
r"Error evaluating static expression\: name 'i' is not defined\. Make sure all variables used in the static expression are constant\.",
|
|
142
|
+
):
|
|
143
|
+
wp.launch(invalid_kernel, 1, device=device, inputs=[3])
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_static_expression_return_types(test, device):
|
|
147
|
+
@wp.kernel
|
|
148
|
+
def invalid_kernel():
|
|
149
|
+
wp.static(wp.zeros(3, device=device))
|
|
150
|
+
|
|
151
|
+
with test.assertRaisesRegex(
|
|
152
|
+
warp.codegen.WarpCodegenError,
|
|
153
|
+
r"Static expression returns an unsupported value\: a Warp array cannot be created inside Warp kernels",
|
|
154
|
+
):
|
|
155
|
+
wp.launch(invalid_kernel, 1, device=device)
|
|
156
|
+
|
|
157
|
+
@wp.struct
|
|
158
|
+
class Baz:
|
|
159
|
+
data: wp.array(dtype=int)
|
|
160
|
+
z: wp.vec3
|
|
161
|
+
|
|
162
|
+
@wp.struct
|
|
163
|
+
class Bar:
|
|
164
|
+
baz: Baz
|
|
165
|
+
y: float
|
|
166
|
+
|
|
167
|
+
@wp.struct
|
|
168
|
+
class Foo:
|
|
169
|
+
bar: Bar
|
|
170
|
+
x: int
|
|
171
|
+
|
|
172
|
+
def create_struct():
|
|
173
|
+
foo = Foo()
|
|
174
|
+
foo.bar = Bar()
|
|
175
|
+
foo.bar.baz = Baz()
|
|
176
|
+
foo.bar.baz.data = wp.zeros(3, dtype=int, device=device)
|
|
177
|
+
foo.bar.baz.z = wp.vec3(1, 2, 3)
|
|
178
|
+
foo.bar.y = 1.23
|
|
179
|
+
foo.x = 123
|
|
180
|
+
return foo
|
|
181
|
+
|
|
182
|
+
@wp.kernel
|
|
183
|
+
def invalid_kernel():
|
|
184
|
+
wp.static(create_struct())
|
|
185
|
+
|
|
186
|
+
with test.assertRaisesRegex(
|
|
187
|
+
warp.codegen.WarpCodegenError,
|
|
188
|
+
r"Static expression returns an unsupported value: the returned Warp struct contains a data type that cannot be constructed inside Warp kernels\: a Warp array cannot be created inside Warp kernels at .*?Foo\.bar\.baz",
|
|
189
|
+
):
|
|
190
|
+
wp.launch(invalid_kernel, 1, device=device)
|
|
191
|
+
|
|
192
|
+
def function_with_no_return_value():
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
@wp.kernel
|
|
196
|
+
def invalid_kernel():
|
|
197
|
+
wp.static(function_with_no_return_value())
|
|
198
|
+
|
|
199
|
+
with test.assertRaisesRegex(
|
|
200
|
+
warp.codegen.WarpCodegenError,
|
|
201
|
+
r"Static expression returns an unsupported value\: None is returned",
|
|
202
|
+
):
|
|
203
|
+
wp.launch(invalid_kernel, 1, device=device)
|
|
204
|
+
|
|
205
|
+
class MyClass:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
@wp.kernel
|
|
209
|
+
def invalid_kernel():
|
|
210
|
+
wp.static(MyClass())
|
|
211
|
+
|
|
212
|
+
with test.assertRaisesRegex(
|
|
213
|
+
warp.codegen.WarpCodegenError,
|
|
214
|
+
r"Static expression returns an unsupported value\: value of type .*?MyClass",
|
|
215
|
+
):
|
|
216
|
+
wp.launch(invalid_kernel, 1, device=device)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_function_variable(test, device):
|
|
220
|
+
# create a function and pass it in as a static variable to the kernel
|
|
221
|
+
@wp.func
|
|
222
|
+
def func1(a: int, b: int):
|
|
223
|
+
return a + b
|
|
224
|
+
|
|
225
|
+
@wp.func
|
|
226
|
+
def func2(a: int, b: int):
|
|
227
|
+
return a - b
|
|
228
|
+
|
|
229
|
+
for func in [func1, func2]:
|
|
230
|
+
# note that this example also works without using wp.static()
|
|
231
|
+
|
|
232
|
+
@wp.kernel
|
|
233
|
+
def function_variable_kernel(results: wp.array(dtype=int)):
|
|
234
|
+
results[0] = wp.static(func)(3, 2) # noqa: B023
|
|
235
|
+
|
|
236
|
+
results = wp.zeros(1, dtype=int, device=device)
|
|
237
|
+
# note that the kernel has to be recompiled everytime the value of func changes
|
|
238
|
+
wp.launch(function_variable_kernel, 1, [results], device=device)
|
|
239
|
+
assert_np_equal(results.numpy(), np.array([func(3, 2)], dtype=int))
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_function_lookup(test, device):
|
|
243
|
+
@wp.func
|
|
244
|
+
def do_add(a: float, b: float):
|
|
245
|
+
return a + b
|
|
246
|
+
|
|
247
|
+
@wp.func
|
|
248
|
+
def do_sub(a: float, b: float):
|
|
249
|
+
return a - b
|
|
250
|
+
|
|
251
|
+
@wp.func
|
|
252
|
+
def do_mul(a: float, b: float):
|
|
253
|
+
return a * b
|
|
254
|
+
|
|
255
|
+
op_handlers = {
|
|
256
|
+
"add": do_add,
|
|
257
|
+
"sub": do_sub,
|
|
258
|
+
"mul": do_mul,
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
inputs = wp.array([[1, 2], [3, 0]], dtype=wp.float32)
|
|
262
|
+
|
|
263
|
+
outputs = wp.empty(2, dtype=wp.float32)
|
|
264
|
+
|
|
265
|
+
for op in op_handlers.keys():
|
|
266
|
+
|
|
267
|
+
@wp.kernel
|
|
268
|
+
def operate(input: wp.array(dtype=inputs.dtype, ndim=2), output: wp.array(dtype=wp.float32)):
|
|
269
|
+
tid = wp.tid()
|
|
270
|
+
a, b = input[tid, 0], input[tid, 1]
|
|
271
|
+
# retrieve the right function to use for the captured dtype variable
|
|
272
|
+
output[tid] = wp.static(op_handlers[op])(a, b) # noqa: B023
|
|
273
|
+
|
|
274
|
+
wp.launch(operate, dim=2, inputs=[inputs], outputs=[outputs])
|
|
275
|
+
outputs_np = outputs.numpy()
|
|
276
|
+
inputs_np = inputs.numpy()
|
|
277
|
+
for i in range(len(outputs_np)):
|
|
278
|
+
test.assertEqual(outputs_np[i], op_handlers[op](float(inputs_np[i][0]), float(inputs_np[i][1])))
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def count_ssa_occurrences(kernel: wp.Kernel, ssas: List[str]) -> Dict[str, int]:
|
|
282
|
+
# analyze the generated code
|
|
283
|
+
counts = {ssa: 0 for ssa in ssas}
|
|
284
|
+
for line in kernel.adj.blocks[0].body_forward:
|
|
285
|
+
for ssa in ssas:
|
|
286
|
+
if ssa in line:
|
|
287
|
+
counts[ssa] += 1
|
|
288
|
+
return counts
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def test_static_for_loop(test, device):
|
|
292
|
+
@wp.kernel
|
|
293
|
+
def static_loop_variable(results: wp.array(dtype=int)):
|
|
294
|
+
s = 0
|
|
295
|
+
for i in range(wp.static(static_global_variable_func())):
|
|
296
|
+
s += wp.static(i)
|
|
297
|
+
results[0] = s
|
|
298
|
+
|
|
299
|
+
wp.set_module_options(
|
|
300
|
+
options={"max_unroll": static_global_variable_func()},
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
results = wp.zeros(1, dtype=int, device=device)
|
|
304
|
+
wp.launch(static_loop_variable, 1, [results], device=device)
|
|
305
|
+
results = results.numpy()
|
|
306
|
+
|
|
307
|
+
s = 0
|
|
308
|
+
for i in range(wp.static(static_global_variable_func())):
|
|
309
|
+
s += wp.static(i)
|
|
310
|
+
|
|
311
|
+
test.assertEqual(results[0], s, "Static for loop has to compute the correct solution")
|
|
312
|
+
|
|
313
|
+
# analyze the generated code
|
|
314
|
+
if hasattr(static_loop_variable.adj, "blocks"):
|
|
315
|
+
counts = count_ssa_occurrences(static_loop_variable, ["add", "for"])
|
|
316
|
+
|
|
317
|
+
test.assertEqual(counts["add"], static_global_variable_func(), "Static for loop must be unrolled")
|
|
318
|
+
# there is just one occurrence of "for" in the comment referring to the original Python code
|
|
319
|
+
test.assertEqual(counts["for"], 1, "Static for loop must be unrolled")
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def test_static_if_else_elif(test, device):
|
|
323
|
+
@wp.kernel
|
|
324
|
+
def static_condition1(results: wp.array(dtype=int)):
|
|
325
|
+
if wp.static(static_global_variable_func() in {2, 3, 5}):
|
|
326
|
+
results[0] = 1
|
|
327
|
+
elif wp.static(static_global_variable_func() in {0, 1}):
|
|
328
|
+
results[0] = 2
|
|
329
|
+
else:
|
|
330
|
+
results[0] = 3
|
|
331
|
+
|
|
332
|
+
results = wp.zeros(1, dtype=int, device=device)
|
|
333
|
+
wp.launch(static_condition1, 1, [results], device=device)
|
|
334
|
+
results = results.numpy()
|
|
335
|
+
assert_np_equal(results[0], 1)
|
|
336
|
+
# TODO this needs fixing to ensure we can run these tests multiple times
|
|
337
|
+
if hasattr(static_condition1.adj, "blocks"):
|
|
338
|
+
counts = count_ssa_occurrences(static_condition1, ["if", "else"])
|
|
339
|
+
|
|
340
|
+
# if, else, elif can appear as comments but the generated code must not contain
|
|
341
|
+
# such keywords since the conditions are resolved at the time of code generation
|
|
342
|
+
assert_np_equal(counts["if"], 1)
|
|
343
|
+
assert_np_equal(counts["else"], 0)
|
|
344
|
+
|
|
345
|
+
captured_var = "hello"
|
|
346
|
+
|
|
347
|
+
@wp.kernel
|
|
348
|
+
def static_condition2(results: wp.array(dtype=int)):
|
|
349
|
+
if wp.static(captured_var == "world"):
|
|
350
|
+
results[0] = 1
|
|
351
|
+
else:
|
|
352
|
+
results[0] = 2
|
|
353
|
+
|
|
354
|
+
results = wp.zeros(1, dtype=int, device=device)
|
|
355
|
+
wp.launch(static_condition2, 1, [results], device=device)
|
|
356
|
+
results = results.numpy()
|
|
357
|
+
assert_np_equal(results[0], 2)
|
|
358
|
+
if hasattr(static_condition2.adj, "blocks"):
|
|
359
|
+
counts = count_ssa_occurrences(static_condition2, ["if", "else"])
|
|
360
|
+
assert_np_equal(counts["if"], 1)
|
|
361
|
+
assert_np_equal(counts["else"], 0)
|
|
362
|
+
|
|
363
|
+
my_list = [1, 2, 3]
|
|
364
|
+
|
|
365
|
+
@wp.kernel
|
|
366
|
+
def static_condition3(results: wp.array(dtype=int)):
|
|
367
|
+
if wp.static(len(my_list) == 0):
|
|
368
|
+
results[0] = 0
|
|
369
|
+
elif wp.static(len(my_list) == 1):
|
|
370
|
+
results[0] = 1
|
|
371
|
+
elif wp.static(len(my_list) == 2):
|
|
372
|
+
results[0] = 2
|
|
373
|
+
elif wp.static(len(my_list) == 3):
|
|
374
|
+
results[0] = 3
|
|
375
|
+
|
|
376
|
+
results = wp.zeros(1, dtype=int, device=device)
|
|
377
|
+
wp.launch(static_condition3, 1, [results], device=device)
|
|
378
|
+
results = results.numpy()
|
|
379
|
+
assert_np_equal(results[0], 3)
|
|
380
|
+
if hasattr(static_condition3.adj, "blocks"):
|
|
381
|
+
counts = count_ssa_occurrences(static_condition3, ["if", "else"])
|
|
382
|
+
assert_np_equal(counts["if"], 4)
|
|
383
|
+
assert_np_equal(counts["else"], 0)
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
devices = get_test_devices()
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class TestStatic(unittest.TestCase):
|
|
390
|
+
def test_static_python_call(self):
|
|
391
|
+
# ensure wp.static() works from a Python context
|
|
392
|
+
self.assertEqual(static_global_variable_func(), 5)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
add_function_test(TestStatic, "test_static_global_variable", test_static_global_variable, devices=devices)
|
|
396
|
+
add_function_test(TestStatic, "test_construct_static_struct", test_construct_static_struct, devices=devices)
|
|
397
|
+
add_function_test(
|
|
398
|
+
TestStatic, "test_construct_static_nested_struct", test_construct_static_nested_struct, devices=devices
|
|
399
|
+
)
|
|
400
|
+
add_function_test(TestStatic, "test_function_variable", test_function_variable, devices=devices)
|
|
401
|
+
add_function_test(TestStatic, "test_function_lookup", test_function_lookup, devices=devices)
|
|
402
|
+
add_function_test(TestStatic, "test_invalid_static_expression", test_invalid_static_expression, devices=devices)
|
|
403
|
+
add_function_test(
|
|
404
|
+
TestStatic, "test_static_expression_return_types", test_static_expression_return_types, devices=devices
|
|
405
|
+
)
|
|
406
|
+
add_function_test(TestStatic, "test_static_for_loop", test_static_for_loop, devices=devices)
|
|
407
|
+
add_function_test(TestStatic, "test_static_if_else_elif", test_static_if_else_elif, devices=devices)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
if __name__ == "__main__":
|
|
411
|
+
wp.clear_kernel_cache()
|
|
412
|
+
unittest.main(verbosity=2)
|
warp/tests/test_streams.py
CHANGED
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
|
11
11
|
|
|
12
12
|
import warp as wp
|
|
13
13
|
from warp.tests.unittest_utils import *
|
|
14
|
-
from warp.utils import
|
|
14
|
+
from warp.utils import check_p2p
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
@wp.kernel
|
|
@@ -334,6 +334,65 @@ def test_event_elapsed_time(test, device):
|
|
|
334
334
|
test.assertGreater(elapsed, 0)
|
|
335
335
|
|
|
336
336
|
|
|
337
|
+
def test_stream_priority_basics(test, device):
|
|
338
|
+
standard_stream = wp.Stream(device)
|
|
339
|
+
test.assertEqual(standard_stream.priority, 0, "Default priority of streams must be 0.")
|
|
340
|
+
|
|
341
|
+
# Create a high-priority stream with a priority value that is smaller than -1 (clamping expected)
|
|
342
|
+
stream_hi = wp.Stream(device, priority=-100)
|
|
343
|
+
|
|
344
|
+
# Create a low-priority stream with a priority value that is greter than 0 (clamping expected)
|
|
345
|
+
stream_lo = wp.Stream(device, priority=100)
|
|
346
|
+
|
|
347
|
+
if stream_lo.priority == stream_hi.priority:
|
|
348
|
+
test.skipTest("Device must support stream priorities.")
|
|
349
|
+
|
|
350
|
+
test.assertEqual(stream_hi.priority, -1)
|
|
351
|
+
|
|
352
|
+
test.assertEqual(stream_lo.priority, 0)
|
|
353
|
+
|
|
354
|
+
with test.assertRaises(TypeError):
|
|
355
|
+
stream_invalid_priority = wp.Stream(device, priority=0.5)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def test_stream_priority_timings(test, device):
|
|
359
|
+
total_size = 256 * 1024 * 1024
|
|
360
|
+
each_size = 128 * 1024 * 1024
|
|
361
|
+
|
|
362
|
+
array_lo = wp.zeros(total_size, dtype=wp.float32, device=device)
|
|
363
|
+
array_hi = wp.zeros(total_size, dtype=wp.float32, device=device)
|
|
364
|
+
|
|
365
|
+
stream_lo = wp.Stream(device, 0)
|
|
366
|
+
stream_hi = wp.Stream(device, -1)
|
|
367
|
+
|
|
368
|
+
if stream_lo.priority == stream_hi.priority:
|
|
369
|
+
test.skipTest("Device must support stream priorities.")
|
|
370
|
+
|
|
371
|
+
# Create some events
|
|
372
|
+
start_lo_event = wp.Event(device, enable_timing=True)
|
|
373
|
+
start_hi_event = wp.Event(device, enable_timing=True)
|
|
374
|
+
end_lo_event = wp.Event(device, enable_timing=True)
|
|
375
|
+
end_hi_event = wp.Event(device, enable_timing=True)
|
|
376
|
+
|
|
377
|
+
wp.synchronize_device(device)
|
|
378
|
+
|
|
379
|
+
stream_lo.record_event(start_lo_event)
|
|
380
|
+
stream_hi.record_event(start_hi_event)
|
|
381
|
+
|
|
382
|
+
for copy_offset in range(0, total_size, each_size):
|
|
383
|
+
wp.copy(array_lo, array_lo, copy_offset, copy_offset, each_size, stream_lo)
|
|
384
|
+
wp.copy(array_hi, array_hi, copy_offset, copy_offset, each_size, stream_hi)
|
|
385
|
+
|
|
386
|
+
stream_lo.record_event(end_lo_event)
|
|
387
|
+
stream_hi.record_event(end_hi_event)
|
|
388
|
+
|
|
389
|
+
# get elapsed time between the two events
|
|
390
|
+
elapsed_lo = wp.get_event_elapsed_time(start_lo_event, end_lo_event)
|
|
391
|
+
elapsed_hi = wp.get_event_elapsed_time(start_hi_event, end_hi_event)
|
|
392
|
+
|
|
393
|
+
test.assertLess(elapsed_hi, elapsed_lo, "Copies on higher-priority stream should be faster.")
|
|
394
|
+
|
|
395
|
+
|
|
337
396
|
devices = get_selected_cuda_test_devices()
|
|
338
397
|
|
|
339
398
|
|
|
@@ -359,7 +418,7 @@ class TestStreams(unittest.TestCase):
|
|
|
359
418
|
cpu_stream = cpu_device.stream # noqa: F841
|
|
360
419
|
|
|
361
420
|
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
362
|
-
@unittest.skipUnless(
|
|
421
|
+
@unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
|
|
363
422
|
def test_stream_arg_graph_mgpu(self):
|
|
364
423
|
wp.load_module(device="cuda:0")
|
|
365
424
|
wp.load_module(device="cuda:1")
|
|
@@ -409,7 +468,7 @@ class TestStreams(unittest.TestCase):
|
|
|
409
468
|
assert_np_equal(c0.numpy(), np.full(N, fill_value=2 * num_iters))
|
|
410
469
|
|
|
411
470
|
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
412
|
-
@unittest.skipUnless(
|
|
471
|
+
@unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
|
|
413
472
|
def test_stream_scope_graph_mgpu(self):
|
|
414
473
|
wp.load_module(device="cuda:0")
|
|
415
474
|
wp.load_module(device="cuda:1")
|
|
@@ -485,6 +544,8 @@ add_function_test(TestStreams, "test_stream_arg_wait_stream", test_stream_arg_wa
|
|
|
485
544
|
add_function_test(TestStreams, "test_stream_scope_synchronize", test_stream_scope_synchronize, devices=devices)
|
|
486
545
|
add_function_test(TestStreams, "test_stream_scope_wait_event", test_stream_scope_wait_event, devices=devices)
|
|
487
546
|
add_function_test(TestStreams, "test_stream_scope_wait_stream", test_stream_scope_wait_stream, devices=devices)
|
|
547
|
+
add_function_test(TestStreams, "test_stream_priority_basics", test_stream_priority_basics, devices=devices)
|
|
548
|
+
add_function_test(TestStreams, "test_stream_priority_timings", test_stream_priority_timings, devices=devices)
|
|
488
549
|
|
|
489
550
|
add_function_test(TestStreams, "test_event_synchronize", test_event_synchronize, devices=devices)
|
|
490
551
|
add_function_test(TestStreams, "test_event_elapsed_time", test_event_elapsed_time, devices=devices)
|
warp/tests/test_struct.py
CHANGED
|
@@ -589,7 +589,7 @@ def test_dependent_module_import(c: DependentModuleImport_C):
|
|
|
589
589
|
wp.tid() # nop, we're just testing codegen
|
|
590
590
|
|
|
591
591
|
|
|
592
|
-
def
|
|
592
|
+
def test_struct_array_hash(test, device):
|
|
593
593
|
# Ensure that the memory address of the struct does not affect the content hash
|
|
594
594
|
|
|
595
595
|
@wp.struct
|
|
@@ -611,7 +611,7 @@ def test_struct_array_content_hash(test, device):
|
|
|
611
611
|
def dummy_kernel(a: wp.array(dtype=ContentHashStruct)):
|
|
612
612
|
i = wp.tid()
|
|
613
613
|
|
|
614
|
-
module_hash_1 = wp.get_module(dummy_kernel.__module__).hash_module(
|
|
614
|
+
module_hash_1 = wp.get_module(dummy_kernel.__module__).hash_module()
|
|
615
615
|
|
|
616
616
|
test.assertEqual(
|
|
617
617
|
module_hash_1,
|
|
@@ -628,7 +628,7 @@ def test_struct_array_content_hash(test, device):
|
|
|
628
628
|
def dummy_kernel(a: wp.array(dtype=ContentHashStruct)):
|
|
629
629
|
i = wp.tid()
|
|
630
630
|
|
|
631
|
-
module_hash_2 = wp.get_module(dummy_kernel.__module__).hash_module(
|
|
631
|
+
module_hash_2 = wp.get_module(dummy_kernel.__module__).hash_module()
|
|
632
632
|
|
|
633
633
|
test.assertNotEqual(
|
|
634
634
|
module_hash_2, module_hash_0, "Module hash should be different when ContentHashStruct redefined and changed."
|
|
@@ -718,7 +718,7 @@ add_kernel_test(
|
|
|
718
718
|
devices=devices,
|
|
719
719
|
)
|
|
720
720
|
|
|
721
|
-
add_function_test(TestStruct, "
|
|
721
|
+
add_function_test(TestStruct, "test_struct_array_hash", test_struct_array_hash, devices=None)
|
|
722
722
|
|
|
723
723
|
|
|
724
724
|
if __name__ == "__main__":
|
warp/tests/test_torch.py
CHANGED
|
@@ -382,6 +382,27 @@ def test_array_ctype_from_torch(test, device):
|
|
|
382
382
|
wrap_vec_tensor_with_warp_grad(wp.transform)
|
|
383
383
|
|
|
384
384
|
|
|
385
|
+
def test_cuda_array_interface(test, device):
|
|
386
|
+
# We should be able to construct Torch tensors from Warp arrays via __cuda_array_interface__ on GPU.
|
|
387
|
+
# Note that Torch does not support __array_interface__ on CPU.
|
|
388
|
+
|
|
389
|
+
torch_device = wp.device_to_torch(device)
|
|
390
|
+
n = 10
|
|
391
|
+
|
|
392
|
+
# test the types supported by both Warp and Torch
|
|
393
|
+
scalar_types = [wp.float16, wp.float32, wp.float64, wp.int8, wp.int16, wp.int32, wp.int64, wp.uint8]
|
|
394
|
+
|
|
395
|
+
for dtype in scalar_types:
|
|
396
|
+
# test round trip
|
|
397
|
+
a1 = wp.zeros(n, dtype=dtype, device=device)
|
|
398
|
+
t = torch.tensor(a1, device=torch_device)
|
|
399
|
+
a2 = wp.array(t, device=device)
|
|
400
|
+
|
|
401
|
+
assert a1.dtype == a2.dtype
|
|
402
|
+
assert a1.shape == a2.shape
|
|
403
|
+
assert a1.strides == a2.strides
|
|
404
|
+
|
|
405
|
+
|
|
385
406
|
def test_to_torch(test, device):
|
|
386
407
|
import torch
|
|
387
408
|
|
|
@@ -918,6 +939,9 @@ try:
|
|
|
918
939
|
test_warp_graph_torch_stream,
|
|
919
940
|
devices=torch_compatible_cuda_devices,
|
|
920
941
|
)
|
|
942
|
+
add_function_test(
|
|
943
|
+
TestTorch, "test_cuda_array_interface", test_cuda_array_interface, devices=torch_compatible_cuda_devices
|
|
944
|
+
)
|
|
921
945
|
|
|
922
946
|
# multi-GPU tests
|
|
923
947
|
if len(torch_compatible_cuda_devices) > 1:
|