warp-lang 1.5.1__py3-none-macosx_10_13_universal2.whl → 1.6.1__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +5 -0
- warp/autograd.py +414 -191
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +40 -12
- warp/build_dll.py +13 -6
- warp/builtins.py +1077 -481
- warp/codegen.py +250 -122
- warp/config.py +65 -21
- warp/context.py +500 -149
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_gemm.py +27 -18
- warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
- warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
- warp/examples/core/example_marching_cubes.py +1 -1
- warp/examples/core/example_mesh.py +1 -1
- warp/examples/core/example_torch.py +18 -34
- warp/examples/core/example_wave.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -0
- warp/examples/fem/example_mixed_elasticity.py +1 -1
- warp/examples/optim/example_bounce.py +1 -1
- warp/examples/optim/example_cloth_throw.py +1 -1
- warp/examples/optim/example_diffray.py +4 -15
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/optim/example_softbody_properties.py +392 -0
- warp/examples/optim/example_trajectory.py +1 -3
- warp/examples/optim/example_walker.py +5 -0
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth_self_contact.py +314 -0
- warp/examples/sim/example_granular_collision_sdf.py +4 -5
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_quadruped.py +5 -2
- warp/examples/tile/example_tile_cholesky.py +79 -0
- warp/examples/tile/example_tile_convolution.py +2 -2
- warp/examples/tile/example_tile_fft.py +2 -2
- warp/examples/tile/example_tile_filtering.py +3 -3
- warp/examples/tile/example_tile_matmul.py +4 -4
- warp/examples/tile/example_tile_mlp.py +12 -12
- warp/examples/tile/example_tile_nbody.py +191 -0
- warp/examples/tile/example_tile_walker.py +319 -0
- warp/math.py +147 -0
- warp/native/array.h +12 -0
- warp/native/builtin.h +0 -1
- warp/native/bvh.cpp +149 -70
- warp/native/bvh.cu +287 -68
- warp/native/bvh.h +195 -85
- warp/native/clang/clang.cpp +6 -2
- warp/native/crt.h +1 -0
- warp/native/cuda_util.cpp +35 -0
- warp/native/cuda_util.h +5 -0
- warp/native/exports.h +40 -40
- warp/native/intersect.h +17 -0
- warp/native/mat.h +57 -3
- warp/native/mathdx.cpp +19 -0
- warp/native/mesh.cpp +25 -8
- warp/native/mesh.cu +153 -101
- warp/native/mesh.h +482 -403
- warp/native/quat.h +40 -0
- warp/native/solid_angle.h +7 -0
- warp/native/sort.cpp +85 -0
- warp/native/sort.cu +34 -0
- warp/native/sort.h +3 -1
- warp/native/spatial.h +11 -0
- warp/native/tile.h +1189 -664
- warp/native/tile_reduce.h +8 -6
- warp/native/vec.h +41 -0
- warp/native/warp.cpp +8 -1
- warp/native/warp.cu +263 -40
- warp/native/warp.h +19 -5
- warp/optim/linear.py +22 -4
- warp/render/render_opengl.py +132 -59
- warp/render/render_usd.py +10 -2
- warp/sim/__init__.py +6 -1
- warp/sim/collide.py +289 -32
- warp/sim/import_urdf.py +20 -5
- warp/sim/integrator_euler.py +25 -7
- warp/sim/integrator_featherstone.py +147 -35
- warp/sim/integrator_vbd.py +842 -40
- warp/sim/model.py +173 -112
- warp/sim/render.py +2 -2
- warp/stubs.py +249 -116
- warp/tape.py +28 -30
- warp/tests/aux_test_module_unload.py +15 -0
- warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
- warp/tests/test_array.py +100 -0
- warp/tests/test_assert.py +242 -0
- warp/tests/test_codegen.py +14 -61
- warp/tests/test_collision.py +8 -8
- warp/tests/test_examples.py +16 -1
- warp/tests/test_grad_debug.py +87 -2
- warp/tests/test_hash_grid.py +1 -1
- warp/tests/test_ipc.py +116 -0
- warp/tests/test_launch.py +77 -26
- warp/tests/test_mat.py +213 -168
- warp/tests/test_math.py +47 -1
- warp/tests/test_matmul.py +11 -7
- warp/tests/test_matmul_lite.py +4 -4
- warp/tests/test_mesh.py +84 -60
- warp/tests/test_mesh_query_aabb.py +165 -0
- warp/tests/test_mesh_query_point.py +328 -286
- warp/tests/test_mesh_query_ray.py +134 -121
- warp/tests/test_mlp.py +2 -2
- warp/tests/test_operators.py +43 -0
- warp/tests/test_overwrite.py +6 -5
- warp/tests/test_quat.py +77 -0
- warp/tests/test_reload.py +29 -0
- warp/tests/test_sim_grad_bounce_linear.py +204 -0
- warp/tests/test_static.py +16 -0
- warp/tests/test_tape.py +25 -0
- warp/tests/test_tile.py +134 -191
- warp/tests/test_tile_load.py +399 -0
- warp/tests/test_tile_mathdx.py +61 -8
- warp/tests/test_tile_mlp.py +17 -17
- warp/tests/test_tile_reduce.py +24 -18
- warp/tests/test_tile_shared_memory.py +66 -17
- warp/tests/test_tile_view.py +165 -0
- warp/tests/test_torch.py +35 -0
- warp/tests/test_utils.py +36 -24
- warp/tests/test_vec.py +110 -0
- warp/tests/unittest_suites.py +29 -4
- warp/tests/unittest_utils.py +30 -11
- warp/thirdparty/unittest_parallel.py +5 -2
- warp/types.py +419 -111
- warp/utils.py +9 -5
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
- warp/examples/benchmarks/benchmark_tile.py +0 -179
- warp/native/tile_gemm.h +0 -341
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/tests/test_mat.py
CHANGED
|
@@ -6,20 +6,14 @@
|
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
8
|
import unittest
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import numpy as np
|
|
11
12
|
|
|
12
13
|
import warp as wp
|
|
13
14
|
from warp.tests.unittest_utils import *
|
|
14
15
|
|
|
15
|
-
np_signed_int_types = [
|
|
16
|
-
np.int8,
|
|
17
|
-
np.int16,
|
|
18
|
-
np.int32,
|
|
19
|
-
np.int64,
|
|
20
|
-
np.byte,
|
|
21
|
-
]
|
|
22
|
-
|
|
16
|
+
np_signed_int_types = [np.int8, np.int16, np.int32, np.int64, np.byte]
|
|
23
17
|
np_float_types = [np.float16, np.float32, np.float64]
|
|
24
18
|
|
|
25
19
|
|
|
@@ -42,11 +36,7 @@ def getkernel(func, suffix=""):
|
|
|
42
36
|
|
|
43
37
|
|
|
44
38
|
def get_select_kernel(dtype):
|
|
45
|
-
def output_select_kernel_fn(
|
|
46
|
-
input: wp.array(dtype=dtype),
|
|
47
|
-
index: int,
|
|
48
|
-
out: wp.array(dtype=dtype),
|
|
49
|
-
):
|
|
39
|
+
def output_select_kernel_fn(input: wp.array(dtype=dtype), index: int, out: wp.array(dtype=dtype)):
|
|
50
40
|
out[0] = input[index]
|
|
51
41
|
|
|
52
42
|
return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
|
|
@@ -61,33 +51,19 @@ def test_anon_constructor_error_shape_arg_missing(test, device):
|
|
|
61
51
|
RuntimeError,
|
|
62
52
|
r"the `shape` argument must be specified when initializing a matrix by value$",
|
|
63
53
|
):
|
|
64
|
-
wp.launch(
|
|
65
|
-
kernel,
|
|
66
|
-
dim=1,
|
|
67
|
-
inputs=[],
|
|
68
|
-
device=device,
|
|
69
|
-
)
|
|
54
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
70
55
|
|
|
71
56
|
|
|
72
57
|
def test_anon_constructor_error_shape_mismatch(test, device):
|
|
73
58
|
@wp.kernel
|
|
74
59
|
def kernel():
|
|
75
|
-
wp.matrix(
|
|
76
|
-
wp.matrix(shape=(1, 2), dtype=float),
|
|
77
|
-
shape=(3, 4),
|
|
78
|
-
dtype=float,
|
|
79
|
-
)
|
|
60
|
+
wp.matrix(wp.matrix(shape=(1, 2), dtype=float), shape=(3, 4), dtype=float)
|
|
80
61
|
|
|
81
62
|
with test.assertRaisesRegex(
|
|
82
63
|
RuntimeError,
|
|
83
64
|
r"incompatible matrix of shape \(3, 4\) given when copy constructing a matrix of shape \(1, 2\)$",
|
|
84
65
|
):
|
|
85
|
-
wp.launch(
|
|
86
|
-
kernel,
|
|
87
|
-
dim=1,
|
|
88
|
-
inputs=[],
|
|
89
|
-
device=device,
|
|
90
|
-
)
|
|
66
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
91
67
|
|
|
92
68
|
|
|
93
69
|
def test_anon_constructor_error_type_mismatch(test, device):
|
|
@@ -99,12 +75,7 @@ def test_anon_constructor_error_type_mismatch(test, device):
|
|
|
99
75
|
RuntimeError,
|
|
100
76
|
r"the value used to fill this matrix is expected to be of the type `float16`$",
|
|
101
77
|
):
|
|
102
|
-
wp.launch(
|
|
103
|
-
kernel,
|
|
104
|
-
dim=1,
|
|
105
|
-
inputs=[],
|
|
106
|
-
device=device,
|
|
107
|
-
)
|
|
78
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
108
79
|
|
|
109
80
|
|
|
110
81
|
def test_anon_constructor_error_invalid_arg_count(test, device):
|
|
@@ -116,12 +87,7 @@ def test_anon_constructor_error_invalid_arg_count(test, device):
|
|
|
116
87
|
RuntimeError,
|
|
117
88
|
r"incompatible number of values given \(3\) when constructing a matrix of shape \(2, 2\)$",
|
|
118
89
|
):
|
|
119
|
-
wp.launch(
|
|
120
|
-
kernel,
|
|
121
|
-
dim=1,
|
|
122
|
-
inputs=[],
|
|
123
|
-
device=device,
|
|
124
|
-
)
|
|
90
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
125
91
|
|
|
126
92
|
|
|
127
93
|
def test_anon_xform_constructor_error_type_mismatch(test, device):
|
|
@@ -150,12 +116,7 @@ def test_tpl_constructor_error_incompatible_sizes(test, device):
|
|
|
150
116
|
RuntimeError,
|
|
151
117
|
r"incompatible matrix of shape \(3, 3\) given when copy constructing a matrix of shape \(2, 2\)$",
|
|
152
118
|
):
|
|
153
|
-
wp.launch(
|
|
154
|
-
kernel,
|
|
155
|
-
dim=1,
|
|
156
|
-
inputs=[],
|
|
157
|
-
device=device,
|
|
158
|
-
)
|
|
119
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
159
120
|
|
|
160
121
|
|
|
161
122
|
def test_tpl_constructor_error_invalid_vector_count(test, device):
|
|
@@ -167,12 +128,7 @@ def test_tpl_constructor_error_invalid_vector_count(test, device):
|
|
|
167
128
|
RuntimeError,
|
|
168
129
|
r"incompatible number of column vectors given \(2\) when constructing a matrix of shape \(3, 3\)$",
|
|
169
130
|
):
|
|
170
|
-
wp.launch(
|
|
171
|
-
kernel,
|
|
172
|
-
dim=1,
|
|
173
|
-
inputs=[],
|
|
174
|
-
device=device,
|
|
175
|
-
)
|
|
131
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
176
132
|
|
|
177
133
|
|
|
178
134
|
def test_tpl_constructor_error_invalid_vector_shape(test, device):
|
|
@@ -184,12 +140,7 @@ def test_tpl_constructor_error_invalid_vector_shape(test, device):
|
|
|
184
140
|
RuntimeError,
|
|
185
141
|
r"incompatible column vector lengths given when constructing a matrix of shape \(2, 2\)$",
|
|
186
142
|
):
|
|
187
|
-
wp.launch(
|
|
188
|
-
kernel,
|
|
189
|
-
dim=1,
|
|
190
|
-
inputs=[],
|
|
191
|
-
device=device,
|
|
192
|
-
)
|
|
143
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
193
144
|
|
|
194
145
|
|
|
195
146
|
def test_tpl_constructor_error_invalid_arg_count(test, device):
|
|
@@ -201,12 +152,7 @@ def test_tpl_constructor_error_invalid_arg_count(test, device):
|
|
|
201
152
|
RuntimeError,
|
|
202
153
|
r"incompatible number of values given \(3\) when constructing a matrix of shape \(2, 2\)$",
|
|
203
154
|
):
|
|
204
|
-
wp.launch(
|
|
205
|
-
kernel,
|
|
206
|
-
dim=1,
|
|
207
|
-
inputs=[],
|
|
208
|
-
device=device,
|
|
209
|
-
)
|
|
155
|
+
wp.launch(kernel, dim=1, inputs=[], device=device)
|
|
210
156
|
|
|
211
157
|
|
|
212
158
|
def test_py_arithmetic_ops(test, device, dtype):
|
|
@@ -438,6 +384,77 @@ def test_negation(test, device, dtype, register_kernels=False):
|
|
|
438
384
|
idx = idx + 1
|
|
439
385
|
|
|
440
386
|
|
|
387
|
+
def test_matmul(test, device, dtype, register_kernels=False):
|
|
388
|
+
rng = np.random.default_rng(123)
|
|
389
|
+
|
|
390
|
+
tol = {
|
|
391
|
+
np.float16: 5.0e-3,
|
|
392
|
+
np.float32: 1.0e-6,
|
|
393
|
+
np.float64: 1.0e-12,
|
|
394
|
+
}.get(dtype, 0)
|
|
395
|
+
|
|
396
|
+
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
|
|
397
|
+
mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
|
|
398
|
+
mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
|
|
399
|
+
mat23 = wp.types.matrix(shape=(2, 3), dtype=wptype)
|
|
400
|
+
mat32 = wp.types.matrix(shape=(3, 2), dtype=wptype)
|
|
401
|
+
mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
|
|
402
|
+
|
|
403
|
+
output_select_kernel = get_select_kernel(wptype)
|
|
404
|
+
|
|
405
|
+
def check_mat_mul(
|
|
406
|
+
i23: wp.array(dtype=mat23),
|
|
407
|
+
i32: wp.array(dtype=mat32),
|
|
408
|
+
i44: wp.array(dtype=mat44),
|
|
409
|
+
o22: wp.array(dtype=mat22),
|
|
410
|
+
o33: wp.array(dtype=mat33),
|
|
411
|
+
o44: wp.array(dtype=mat44),
|
|
412
|
+
):
|
|
413
|
+
i = wp.tid()
|
|
414
|
+
o22[i] = i23[i] @ i32[i]
|
|
415
|
+
o33[i] = i32[i] @ i23[i]
|
|
416
|
+
o44[i] = i44[i] @ i44[i]
|
|
417
|
+
|
|
418
|
+
kernel = getkernel(check_mat_mul, suffix=dtype.__name__)
|
|
419
|
+
|
|
420
|
+
if register_kernels:
|
|
421
|
+
return
|
|
422
|
+
|
|
423
|
+
test_adj = dtype in np_float_types
|
|
424
|
+
|
|
425
|
+
i23 = wp.array(randvals(rng, [1, 2, 3], dtype), dtype=mat23, requires_grad=test_adj, device=device)
|
|
426
|
+
i32 = wp.array(randvals(rng, [1, 3, 2], dtype), dtype=mat32, requires_grad=test_adj, device=device)
|
|
427
|
+
i44 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=test_adj, device=device)
|
|
428
|
+
o22 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=test_adj, device=device)
|
|
429
|
+
o33 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=test_adj, device=device)
|
|
430
|
+
o44 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=test_adj, device=device)
|
|
431
|
+
|
|
432
|
+
tape = wp.Tape()
|
|
433
|
+
with tape:
|
|
434
|
+
wp.launch(
|
|
435
|
+
kernel,
|
|
436
|
+
dim=1,
|
|
437
|
+
inputs=[i23, i32, i44],
|
|
438
|
+
outputs=[o22, o33, o44],
|
|
439
|
+
device=device,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
assert_np_equal(o22.numpy(), i23.numpy() @ i32.numpy(), tol=tol)
|
|
443
|
+
assert_np_equal(o33.numpy(), i32.numpy() @ i23.numpy(), tol=tol)
|
|
444
|
+
assert_np_equal(o44.numpy(), i44.numpy() @ i44.numpy(), tol=tol)
|
|
445
|
+
|
|
446
|
+
if test_adj:
|
|
447
|
+
o22.grad.assign([np.eye(2)])
|
|
448
|
+
o33.grad.assign([np.eye(3)])
|
|
449
|
+
o44.grad.assign([np.eye(4)])
|
|
450
|
+
|
|
451
|
+
tape.backward()
|
|
452
|
+
|
|
453
|
+
assert_np_equal(i23.grad.numpy(), 2.0 * i32.numpy().T, tol=tol)
|
|
454
|
+
assert_np_equal(i32.grad.numpy(), 2.0 * i23.numpy().T, tol=tol)
|
|
455
|
+
assert_np_equal(i44.grad.numpy(), 2.0 * i44.numpy().T, tol=tol)
|
|
456
|
+
|
|
457
|
+
|
|
441
458
|
def test_subtraction(test, device, dtype, register_kernels=False):
|
|
442
459
|
rng = np.random.default_rng(123)
|
|
443
460
|
|
|
@@ -541,16 +558,7 @@ def test_subtraction(test, device, dtype, register_kernels=False):
|
|
|
541
558
|
wp.launch(
|
|
542
559
|
kernel,
|
|
543
560
|
dim=1,
|
|
544
|
-
inputs=[
|
|
545
|
-
s2,
|
|
546
|
-
s3,
|
|
547
|
-
s4,
|
|
548
|
-
s5,
|
|
549
|
-
v2,
|
|
550
|
-
v3,
|
|
551
|
-
v4,
|
|
552
|
-
v5,
|
|
553
|
-
],
|
|
561
|
+
inputs=[s2, s3, s4, s5, v2, v3, v4, v5],
|
|
554
562
|
outputs=[outcomponents],
|
|
555
563
|
device=device,
|
|
556
564
|
)
|
|
@@ -558,11 +566,11 @@ def test_subtraction(test, device, dtype, register_kernels=False):
|
|
|
558
566
|
output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device
|
|
559
567
|
)
|
|
560
568
|
tape.backward(loss=out)
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
assert_np_equal(tape.gradients[in2].numpy()[0],
|
|
564
|
-
|
|
565
|
-
assert_np_equal(tape.gradients[in1].numpy()[0],
|
|
569
|
+
expected_result = np.zeros((dim, dim), dtype=dtype)
|
|
570
|
+
expected_result[i, j] = 2
|
|
571
|
+
assert_np_equal(tape.gradients[in2].numpy()[0], expected_result, tol=10 * tol)
|
|
572
|
+
expected_result[i, j] = -2
|
|
573
|
+
assert_np_equal(tape.gradients[in1].numpy()[0], expected_result, tol=10 * tol)
|
|
566
574
|
tape.zero()
|
|
567
575
|
|
|
568
576
|
idx = idx + 1
|
|
@@ -608,21 +616,7 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
608
616
|
|
|
609
617
|
tape = wp.Tape()
|
|
610
618
|
with tape:
|
|
611
|
-
wp.launch(
|
|
612
|
-
kernel,
|
|
613
|
-
dim=1,
|
|
614
|
-
inputs=[
|
|
615
|
-
v2,
|
|
616
|
-
v3,
|
|
617
|
-
v4,
|
|
618
|
-
],
|
|
619
|
-
outputs=[
|
|
620
|
-
det2,
|
|
621
|
-
det3,
|
|
622
|
-
det4,
|
|
623
|
-
],
|
|
624
|
-
device=device,
|
|
625
|
-
)
|
|
619
|
+
wp.launch(kernel, dim=1, inputs=[v2, v3, v4], outputs=[det2, det3, det4], device=device)
|
|
626
620
|
|
|
627
621
|
if dtype in np_float_types:
|
|
628
622
|
assert_np_equal(det2.numpy()[0], 2 * np.linalg.det(v2.numpy()[0].astype(np.float64)), tol=100 * tol)
|
|
@@ -658,16 +652,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
658
652
|
wp.launch(
|
|
659
653
|
kernel,
|
|
660
654
|
dim=1,
|
|
661
|
-
inputs=[
|
|
662
|
-
|
|
663
|
-
v3,
|
|
664
|
-
v4,
|
|
665
|
-
],
|
|
666
|
-
outputs=[
|
|
667
|
-
det2,
|
|
668
|
-
det3,
|
|
669
|
-
det4,
|
|
670
|
-
],
|
|
655
|
+
inputs=[wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device), v3, v4],
|
|
656
|
+
outputs=[det2, det3, det4],
|
|
671
657
|
device=device,
|
|
672
658
|
)
|
|
673
659
|
dplus = det2.numpy()[0]
|
|
@@ -675,16 +661,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
675
661
|
wp.launch(
|
|
676
662
|
kernel,
|
|
677
663
|
dim=1,
|
|
678
|
-
inputs=[
|
|
679
|
-
|
|
680
|
-
v3,
|
|
681
|
-
v4,
|
|
682
|
-
],
|
|
683
|
-
outputs=[
|
|
684
|
-
det2,
|
|
685
|
-
det3,
|
|
686
|
-
det4,
|
|
687
|
-
],
|
|
664
|
+
inputs=[wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device), v3, v4],
|
|
665
|
+
outputs=[det2, det3, det4],
|
|
688
666
|
device=device,
|
|
689
667
|
)
|
|
690
668
|
dminus = det2.numpy()[0]
|
|
@@ -697,16 +675,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
697
675
|
wp.launch(
|
|
698
676
|
kernel,
|
|
699
677
|
dim=1,
|
|
700
|
-
inputs=[
|
|
701
|
-
|
|
702
|
-
wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
|
|
703
|
-
v4,
|
|
704
|
-
],
|
|
705
|
-
outputs=[
|
|
706
|
-
det2,
|
|
707
|
-
det3,
|
|
708
|
-
det4,
|
|
709
|
-
],
|
|
678
|
+
inputs=[v2, wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device), v4],
|
|
679
|
+
outputs=[det2, det3, det4],
|
|
710
680
|
device=device,
|
|
711
681
|
)
|
|
712
682
|
dplus = det3.numpy()[0]
|
|
@@ -714,16 +684,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
714
684
|
wp.launch(
|
|
715
685
|
kernel,
|
|
716
686
|
dim=1,
|
|
717
|
-
inputs=[
|
|
718
|
-
|
|
719
|
-
wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
|
|
720
|
-
v4,
|
|
721
|
-
],
|
|
722
|
-
outputs=[
|
|
723
|
-
det2,
|
|
724
|
-
det3,
|
|
725
|
-
det4,
|
|
726
|
-
],
|
|
687
|
+
inputs=[v2, wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device), v4],
|
|
688
|
+
outputs=[det2, det3, det4],
|
|
727
689
|
device=device,
|
|
728
690
|
)
|
|
729
691
|
dminus = det3.numpy()[0]
|
|
@@ -736,16 +698,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
736
698
|
wp.launch(
|
|
737
699
|
kernel,
|
|
738
700
|
dim=1,
|
|
739
|
-
inputs=[
|
|
740
|
-
|
|
741
|
-
v3,
|
|
742
|
-
wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
|
|
743
|
-
],
|
|
744
|
-
outputs=[
|
|
745
|
-
det2,
|
|
746
|
-
det3,
|
|
747
|
-
det4,
|
|
748
|
-
],
|
|
701
|
+
inputs=[v2, v3, wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device)],
|
|
702
|
+
outputs=[det2, det3, det4],
|
|
749
703
|
device=device,
|
|
750
704
|
)
|
|
751
705
|
dplus = det4.numpy()[0]
|
|
@@ -753,16 +707,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
|
|
|
753
707
|
wp.launch(
|
|
754
708
|
kernel,
|
|
755
709
|
dim=1,
|
|
756
|
-
inputs=[
|
|
757
|
-
|
|
758
|
-
v3,
|
|
759
|
-
wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
|
|
760
|
-
],
|
|
761
|
-
outputs=[
|
|
762
|
-
det2,
|
|
763
|
-
det3,
|
|
764
|
-
det4,
|
|
765
|
-
],
|
|
710
|
+
inputs=[v2, v3, wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device)],
|
|
711
|
+
outputs=[det2, det3, det4],
|
|
766
712
|
device=device,
|
|
767
713
|
)
|
|
768
714
|
dminus = det4.numpy()[0]
|
|
@@ -999,7 +945,7 @@ def test_svd(test, device, dtype, register_kernels=False):
|
|
|
999
945
|
tol = {
|
|
1000
946
|
np.float16: 1.0e-3,
|
|
1001
947
|
np.float32: 1.0e-6,
|
|
1002
|
-
np.float64: 1.0e-
|
|
948
|
+
np.float64: 1.0e-12,
|
|
1003
949
|
}.get(dtype, 0)
|
|
1004
950
|
|
|
1005
951
|
wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
|
|
@@ -1722,8 +1668,9 @@ def test_matrix_mutation(expected: wp.types.matrix(shape=(10, 3), dtype=float)):
|
|
|
1722
1668
|
wp.expect_eq(m, expected)
|
|
1723
1669
|
|
|
1724
1670
|
|
|
1725
|
-
|
|
1726
|
-
|
|
1671
|
+
# NOTE: Compile tile is highly sensitive to shape so we use small values now
|
|
1672
|
+
CONSTANT_SHAPE_ROWS = wp.constant(2)
|
|
1673
|
+
CONSTANT_SHAPE_COLS = wp.constant(2)
|
|
1727
1674
|
|
|
1728
1675
|
|
|
1729
1676
|
# tests that we can use global constants in shape keyword argument
|
|
@@ -1737,6 +1684,106 @@ def test_constructors_constant_shape():
|
|
|
1737
1684
|
m[i, j] = float(i * j)
|
|
1738
1685
|
|
|
1739
1686
|
|
|
1687
|
+
Mat23 = wp.mat((2, 3), dtype=wp.float16)
|
|
1688
|
+
|
|
1689
|
+
|
|
1690
|
+
@wp.kernel
|
|
1691
|
+
def matrix_len_kernel(
|
|
1692
|
+
m1: wp.mat22, m2: wp.mat((3, 3), float), m3: wp.mat((Any, Any), float), m4: Mat23, out: wp.array(dtype=int)
|
|
1693
|
+
):
|
|
1694
|
+
length = wp.static(len(m1))
|
|
1695
|
+
wp.expect_eq(len(m1), 2)
|
|
1696
|
+
out[0] = len(m1)
|
|
1697
|
+
|
|
1698
|
+
length = len(m2)
|
|
1699
|
+
wp.expect_eq(wp.static(len(m2)), 3)
|
|
1700
|
+
out[1] = len(m2)
|
|
1701
|
+
|
|
1702
|
+
length = len(m3)
|
|
1703
|
+
wp.expect_eq(len(m3), 4)
|
|
1704
|
+
out[2] = wp.static(len(m3))
|
|
1705
|
+
|
|
1706
|
+
length = wp.static(len(m4))
|
|
1707
|
+
wp.expect_eq(wp.static(len(m4)), 2)
|
|
1708
|
+
out[3] = wp.static(len(m4))
|
|
1709
|
+
|
|
1710
|
+
foo = wp.mat22()
|
|
1711
|
+
length = len(foo)
|
|
1712
|
+
wp.expect_eq(len(foo), 2)
|
|
1713
|
+
out[4] = len(foo)
|
|
1714
|
+
|
|
1715
|
+
|
|
1716
|
+
def test_matrix_len(test, device):
|
|
1717
|
+
m1 = wp.mat22()
|
|
1718
|
+
m2 = wp.mat33()
|
|
1719
|
+
m3 = wp.mat44()
|
|
1720
|
+
m4 = Mat23()
|
|
1721
|
+
out = wp.empty(5, dtype=int, device=device)
|
|
1722
|
+
wp.launch(matrix_len_kernel, dim=(1,), inputs=(m1, m2, m3, m4), outputs=(out,), device=device)
|
|
1723
|
+
|
|
1724
|
+
test.assertEqual(out.numpy()[0], 2)
|
|
1725
|
+
test.assertEqual(out.numpy()[1], 3)
|
|
1726
|
+
test.assertEqual(out.numpy()[2], 4)
|
|
1727
|
+
test.assertEqual(out.numpy()[3], 2)
|
|
1728
|
+
test.assertEqual(out.numpy()[4], 2)
|
|
1729
|
+
|
|
1730
|
+
test.assertEqual(len(m1), 2)
|
|
1731
|
+
test.assertEqual(len(m2), 3)
|
|
1732
|
+
test.assertEqual(len(m3), 4)
|
|
1733
|
+
test.assertEqual(len(m4), 2)
|
|
1734
|
+
|
|
1735
|
+
|
|
1736
|
+
@wp.kernel
|
|
1737
|
+
def matrix_augassign_kernel(
|
|
1738
|
+
a: wp.array(dtype=wp.mat22), b: wp.array(dtype=wp.mat22), c: wp.array(dtype=wp.mat22), d: wp.array(dtype=wp.mat22)
|
|
1739
|
+
):
|
|
1740
|
+
i = wp.tid()
|
|
1741
|
+
|
|
1742
|
+
m1 = wp.mat22()
|
|
1743
|
+
m2 = b[i]
|
|
1744
|
+
|
|
1745
|
+
m1[0, 0] += m2[0, 0]
|
|
1746
|
+
m1[0, 1] += m2[0, 1]
|
|
1747
|
+
m1[1, 0] += m2[1, 0]
|
|
1748
|
+
m1[1, 1] += m2[1, 1]
|
|
1749
|
+
|
|
1750
|
+
a[i] = m1
|
|
1751
|
+
|
|
1752
|
+
m3 = wp.mat22()
|
|
1753
|
+
m4 = d[i]
|
|
1754
|
+
|
|
1755
|
+
m3[0, 0] -= m4[0, 0]
|
|
1756
|
+
m3[0, 1] -= m4[0, 1]
|
|
1757
|
+
m3[1, 0] -= m4[1, 0]
|
|
1758
|
+
m3[1, 1] -= m4[1, 1]
|
|
1759
|
+
|
|
1760
|
+
c[i] = m3
|
|
1761
|
+
|
|
1762
|
+
|
|
1763
|
+
def test_matrix_augassign(test, device):
|
|
1764
|
+
N = 3
|
|
1765
|
+
|
|
1766
|
+
a = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
|
|
1767
|
+
b = wp.ones(N, dtype=wp.mat22, requires_grad=True)
|
|
1768
|
+
|
|
1769
|
+
c = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
|
|
1770
|
+
d = wp.ones(N, dtype=wp.mat22, requires_grad=True)
|
|
1771
|
+
|
|
1772
|
+
tape = wp.Tape()
|
|
1773
|
+
with tape:
|
|
1774
|
+
wp.launch(matrix_augassign_kernel, N, inputs=[a, b, c, d])
|
|
1775
|
+
|
|
1776
|
+
tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
|
|
1777
|
+
|
|
1778
|
+
assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
|
|
1779
|
+
assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
|
|
1780
|
+
assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
|
|
1781
|
+
|
|
1782
|
+
assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
|
|
1783
|
+
assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
|
|
1784
|
+
assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
|
|
1785
|
+
|
|
1786
|
+
|
|
1740
1787
|
devices = get_test_devices()
|
|
1741
1788
|
|
|
1742
1789
|
|
|
@@ -1789,6 +1836,9 @@ for dtype in np_signed_int_types + np_float_types:
|
|
|
1789
1836
|
add_function_test_register_kernel(
|
|
1790
1837
|
TestMat, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
|
|
1791
1838
|
)
|
|
1839
|
+
add_function_test_register_kernel(
|
|
1840
|
+
TestMat, f"test_matmul_{dtype.__name__}", test_matmul, devices=devices, dtype=dtype
|
|
1841
|
+
)
|
|
1792
1842
|
|
|
1793
1843
|
add_function_test(
|
|
1794
1844
|
TestMat,
|
|
@@ -1797,16 +1847,10 @@ add_function_test(
|
|
|
1797
1847
|
devices=devices,
|
|
1798
1848
|
)
|
|
1799
1849
|
add_function_test(
|
|
1800
|
-
TestMat,
|
|
1801
|
-
"test_anon_constructor_error_shape_mismatch",
|
|
1802
|
-
test_anon_constructor_error_shape_mismatch,
|
|
1803
|
-
devices=devices,
|
|
1850
|
+
TestMat, "test_anon_constructor_error_shape_mismatch", test_anon_constructor_error_shape_mismatch, devices=devices
|
|
1804
1851
|
)
|
|
1805
1852
|
add_function_test(
|
|
1806
|
-
TestMat,
|
|
1807
|
-
"test_anon_constructor_error_type_mismatch",
|
|
1808
|
-
test_anon_constructor_error_type_mismatch,
|
|
1809
|
-
devices=devices,
|
|
1853
|
+
TestMat, "test_anon_constructor_error_type_mismatch", test_anon_constructor_error_type_mismatch, devices=devices
|
|
1810
1854
|
)
|
|
1811
1855
|
add_function_test(
|
|
1812
1856
|
TestMat,
|
|
@@ -1875,7 +1919,8 @@ for dtype in np_float_types:
|
|
|
1875
1919
|
devices=devices,
|
|
1876
1920
|
dtype=dtype,
|
|
1877
1921
|
)
|
|
1878
|
-
|
|
1922
|
+
add_function_test(TestMat, "test_matrix_len", test_matrix_len, devices=devices)
|
|
1923
|
+
add_function_test(TestMat, "test_matrix_augassign", test_matrix_augassign, devices=devices)
|
|
1879
1924
|
|
|
1880
1925
|
if __name__ == "__main__":
|
|
1881
1926
|
wp.clear_kernel_cache()
|
warp/tests/test_math.py
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
8
|
import unittest
|
|
9
|
-
from typing import NamedTuple
|
|
9
|
+
from typing import Any, NamedTuple
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
|
|
@@ -50,6 +50,51 @@ def test_scalar_math(test, device):
|
|
|
50
50
|
assert_np_equal(tape.gradients[x].numpy(), np.array([adj_float_results_expected[i]]), tol=1e-6)
|
|
51
51
|
|
|
52
52
|
|
|
53
|
+
@wp.kernel
|
|
54
|
+
def test_vec_norm_kernel(vs: wp.array(dtype=Any), out: wp.array(dtype=float, ndim=2)):
|
|
55
|
+
tid = wp.tid()
|
|
56
|
+
out[tid, 0] = wp.norm_l1(vs[tid])
|
|
57
|
+
out[tid, 1] = wp.norm_l2(vs[tid])
|
|
58
|
+
out[tid, 2] = wp.norm_huber(vs[tid])
|
|
59
|
+
out[tid, 3] = wp.norm_pseudo_huber(vs[tid])
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_vec_norm(test, device):
|
|
63
|
+
# ground-truth implementations from SciPy
|
|
64
|
+
def huber(delta, x):
|
|
65
|
+
if x <= delta:
|
|
66
|
+
return 0.5 * x**2
|
|
67
|
+
else:
|
|
68
|
+
return delta * (x - 0.5 * delta)
|
|
69
|
+
|
|
70
|
+
def pseudo_huber(delta, x):
|
|
71
|
+
return delta**2 * (np.sqrt(1 + (x / delta) ** 2) - 1)
|
|
72
|
+
|
|
73
|
+
v0 = wp.vec3(-2.0, -1.0, -3.0)
|
|
74
|
+
v1 = wp.vec3(2.0, 1.0, 3.0)
|
|
75
|
+
v2 = wp.vec3(0.0, 0.0, 0.0)
|
|
76
|
+
|
|
77
|
+
xs = wp.array([v0, v1, v2], dtype=wp.vec3, requires_grad=True, device=device)
|
|
78
|
+
out = wp.empty((len(xs), 4), dtype=wp.float32, requires_grad=True, device=device)
|
|
79
|
+
|
|
80
|
+
wp.launch(test_vec_norm_kernel, dim=len(xs), inputs=[xs], outputs=[out], device=device)
|
|
81
|
+
|
|
82
|
+
for i, x in enumerate([v0, v1, v2]):
|
|
83
|
+
assert_np_equal(
|
|
84
|
+
out.numpy()[i],
|
|
85
|
+
np.array(
|
|
86
|
+
[
|
|
87
|
+
np.linalg.norm(x, ord=1),
|
|
88
|
+
np.linalg.norm(x, ord=2),
|
|
89
|
+
huber(1.0, wp.length(x)),
|
|
90
|
+
# note SciPy defines the Pseudo-Huber loss slightly differently
|
|
91
|
+
pseudo_huber(1.0, wp.length(x)) + 1.0,
|
|
92
|
+
]
|
|
93
|
+
),
|
|
94
|
+
tol=1e-6,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
53
98
|
devices = get_test_devices()
|
|
54
99
|
|
|
55
100
|
|
|
@@ -117,6 +162,7 @@ class TestMath(unittest.TestCase):
|
|
|
117
162
|
|
|
118
163
|
|
|
119
164
|
add_function_test(TestMath, "test_scalar_math", test_scalar_math, devices=devices)
|
|
165
|
+
add_function_test(TestMath, "test_vec_norm", test_vec_norm, devices=devices)
|
|
120
166
|
|
|
121
167
|
|
|
122
168
|
if __name__ == "__main__":
|
warp/tests/test_matmul.py
CHANGED
|
@@ -485,13 +485,17 @@ class TestMatmul(unittest.TestCase):
|
|
|
485
485
|
|
|
486
486
|
|
|
487
487
|
# add_function_test(TestMatmul, "test_f16", test_f16, devices=devices)
|
|
488
|
-
add_function_test(TestMatmul, "test_f32", test_f32, devices=devices)
|
|
489
|
-
add_function_test(TestMatmul, "test_f64", test_f64, devices=devices)
|
|
490
|
-
add_function_test(TestMatmul, "test_tape", test_tape, devices=devices)
|
|
491
|
-
add_function_test(TestMatmul, "test_operator", test_operator, devices=devices)
|
|
492
|
-
add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices)
|
|
493
|
-
add_function_test(
|
|
494
|
-
|
|
488
|
+
add_function_test(TestMatmul, "test_f32", test_f32, devices=devices, check_output=False)
|
|
489
|
+
add_function_test(TestMatmul, "test_f64", test_f64, devices=devices, check_output=False)
|
|
490
|
+
add_function_test(TestMatmul, "test_tape", test_tape, devices=devices, check_output=False)
|
|
491
|
+
add_function_test(TestMatmul, "test_operator", test_operator, devices=devices, check_output=False)
|
|
492
|
+
add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices, check_output=False)
|
|
493
|
+
add_function_test(
|
|
494
|
+
TestMatmul, "test_adjoint_accumulation", test_adjoint_accumulation, devices=devices, check_output=False
|
|
495
|
+
)
|
|
496
|
+
add_function_test(
|
|
497
|
+
TestMatmul, "test_cuda_graph_capture", test_cuda_graph_capture, devices=cuda_devices, check_output=False
|
|
498
|
+
)
|
|
495
499
|
|
|
496
500
|
|
|
497
501
|
if __name__ == "__main__":
|
warp/tests/test_matmul_lite.py
CHANGED
|
@@ -392,10 +392,10 @@ class TestMatmulLite(unittest.TestCase):
|
|
|
392
392
|
pass
|
|
393
393
|
|
|
394
394
|
|
|
395
|
-
add_function_test(TestMatmulLite, "test_f32", test_f32, devices=devices)
|
|
396
|
-
add_function_test(TestMatmulLite, "test_tape", test_tape, devices=devices)
|
|
397
|
-
add_function_test(TestMatmulLite, "test_operator", test_operator, devices=devices)
|
|
398
|
-
add_function_test(TestMatmulLite, "test_large_batch_count", test_large_batch_count, devices=devices)
|
|
395
|
+
add_function_test(TestMatmulLite, "test_f32", test_f32, devices=devices, check_output=False)
|
|
396
|
+
add_function_test(TestMatmulLite, "test_tape", test_tape, devices=devices, check_output=False)
|
|
397
|
+
add_function_test(TestMatmulLite, "test_operator", test_operator, devices=devices, check_output=False)
|
|
398
|
+
add_function_test(TestMatmulLite, "test_large_batch_count", test_large_batch_count, devices=devices, check_output=False)
|
|
399
399
|
|
|
400
400
|
|
|
401
401
|
if __name__ == "__main__":
|