warp-lang 1.1.0__py3-none-win_amd64.whl → 1.2.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +10 -37
- warp/build_dll.py +2 -2
- warp/builtins.py +274 -6
- warp/codegen.py +51 -4
- warp/config.py +2 -2
- warp/constants.py +4 -0
- warp/context.py +422 -203
- warp/examples/benchmarks/benchmark_api.py +0 -2
- warp/examples/benchmarks/benchmark_cloth_warp.py +0 -1
- warp/examples/benchmarks/benchmark_launches.py +0 -2
- warp/examples/core/example_dem.py +0 -2
- warp/examples/core/example_fluid.py +0 -2
- warp/examples/core/example_graph_capture.py +0 -2
- warp/examples/core/example_marching_cubes.py +0 -2
- warp/examples/core/example_mesh.py +0 -2
- warp/examples/core/example_mesh_intersect.py +0 -2
- warp/examples/core/example_nvdb.py +0 -2
- warp/examples/core/example_raycast.py +0 -2
- warp/examples/core/example_raymarch.py +0 -2
- warp/examples/core/example_render_opengl.py +0 -2
- warp/examples/core/example_sph.py +0 -2
- warp/examples/core/example_torch.py +0 -3
- warp/examples/core/example_wave.py +0 -2
- warp/examples/fem/example_apic_fluid.py +140 -115
- warp/examples/fem/example_burgers.py +262 -0
- warp/examples/fem/example_convection_diffusion.py +0 -2
- warp/examples/fem/example_convection_diffusion_dg.py +0 -2
- warp/examples/fem/example_deformed_geometry.py +0 -2
- warp/examples/fem/example_diffusion.py +0 -2
- warp/examples/fem/example_diffusion_3d.py +5 -4
- warp/examples/fem/example_diffusion_mgpu.py +0 -2
- warp/examples/fem/example_mixed_elasticity.py +0 -2
- warp/examples/fem/example_navier_stokes.py +0 -2
- warp/examples/fem/example_stokes.py +0 -2
- warp/examples/fem/example_stokes_transfer.py +0 -2
- warp/examples/optim/example_bounce.py +0 -2
- warp/examples/optim/example_cloth_throw.py +0 -2
- warp/examples/optim/example_diffray.py +0 -2
- warp/examples/optim/example_drone.py +0 -2
- warp/examples/optim/example_inverse_kinematics.py +0 -2
- warp/examples/optim/example_inverse_kinematics_torch.py +0 -2
- warp/examples/optim/example_spring_cage.py +0 -2
- warp/examples/optim/example_trajectory.py +0 -2
- warp/examples/optim/example_walker.py +0 -2
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth.py +0 -2
- warp/examples/sim/example_granular.py +0 -2
- warp/examples/sim/example_granular_collision_sdf.py +0 -2
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_particle_chain.py +0 -2
- warp/examples/sim/example_quadruped.py +0 -2
- warp/examples/sim/example_rigid_chain.py +0 -2
- warp/examples/sim/example_rigid_contact.py +0 -2
- warp/examples/sim/example_rigid_force.py +0 -2
- warp/examples/sim/example_rigid_gyroscopic.py +0 -2
- warp/examples/sim/example_rigid_soft_contact.py +0 -2
- warp/examples/sim/example_soft_body.py +0 -2
- warp/fem/__init__.py +1 -0
- warp/fem/cache.py +3 -1
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/element.py +4 -0
- warp/fem/geometry/grid_3d.py +0 -4
- warp/fem/geometry/nanogrid.py +455 -0
- warp/fem/integrate.py +63 -9
- warp/fem/space/__init__.py +43 -158
- warp/fem/space/basis_space.py +34 -0
- warp/fem/space/collocated_function_space.py +1 -1
- warp/fem/space/grid_2d_function_space.py +13 -132
- warp/fem/space/grid_3d_function_space.py +16 -154
- warp/fem/space/hexmesh_function_space.py +37 -134
- warp/fem/space/nanogrid_function_space.py +202 -0
- warp/fem/space/quadmesh_2d_function_space.py +12 -119
- warp/fem/space/restriction.py +4 -1
- warp/fem/space/shape/__init__.py +77 -0
- warp/fem/space/shape/cube_shape_function.py +5 -15
- warp/fem/space/tetmesh_function_space.py +6 -76
- warp/fem/space/trimesh_2d_function_space.py +6 -76
- warp/native/array.h +12 -3
- warp/native/builtin.h +48 -5
- warp/native/bvh.cpp +14 -10
- warp/native/bvh.cu +23 -15
- warp/native/bvh.h +1 -0
- warp/native/clang/clang.cpp +2 -1
- warp/native/crt.cpp +11 -1
- warp/native/crt.h +18 -1
- warp/native/exports.h +187 -0
- warp/native/mat.h +47 -0
- warp/native/mesh.cpp +1 -1
- warp/native/mesh.cu +1 -2
- warp/native/nanovdb/GridHandle.h +366 -0
- warp/native/nanovdb/HostBuffer.h +590 -0
- warp/native/nanovdb/NanoVDB.h +3999 -2157
- warp/native/nanovdb/PNanoVDB.h +936 -99
- warp/native/quat.h +28 -1
- warp/native/rand.h +5 -1
- warp/native/vec.h +45 -1
- warp/native/volume.cpp +335 -103
- warp/native/volume.cu +39 -13
- warp/native/volume.h +725 -303
- warp/native/volume_builder.cu +381 -360
- warp/native/volume_builder.h +16 -1
- warp/native/volume_impl.h +61 -0
- warp/native/warp.cu +8 -2
- warp/native/warp.h +15 -7
- warp/render/render_opengl.py +191 -52
- warp/sim/integrator_featherstone.py +10 -3
- warp/sim/integrator_xpbd.py +16 -22
- warp/sparse.py +89 -27
- warp/stubs.py +83 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/aux_test_dependent.py +0 -2
- warp/tests/aux_test_grad_customs.py +0 -2
- warp/tests/aux_test_reference.py +0 -2
- warp/tests/aux_test_reference_reference.py +0 -2
- warp/tests/aux_test_square.py +0 -2
- warp/tests/disabled_kinematics.py +0 -2
- warp/tests/test_adam.py +0 -2
- warp/tests/test_arithmetic.py +0 -36
- warp/tests/test_array.py +9 -11
- warp/tests/test_array_reduce.py +0 -2
- warp/tests/test_async.py +0 -2
- warp/tests/test_atomic.py +0 -2
- warp/tests/test_bool.py +58 -50
- warp/tests/test_builtins_resolution.py +0 -2
- warp/tests/test_bvh.py +0 -2
- warp/tests/test_closest_point_edge_edge.py +0 -1
- warp/tests/test_codegen.py +0 -4
- warp/tests/test_compile_consts.py +130 -10
- warp/tests/test_conditional.py +0 -2
- warp/tests/test_copy.py +0 -2
- warp/tests/test_ctypes.py +6 -8
- warp/tests/test_dense.py +0 -2
- warp/tests/test_devices.py +0 -2
- warp/tests/test_dlpack.py +9 -11
- warp/tests/test_examples.py +42 -39
- warp/tests/test_fabricarray.py +0 -3
- warp/tests/test_fast_math.py +0 -2
- warp/tests/test_fem.py +75 -54
- warp/tests/test_fp16.py +0 -2
- warp/tests/test_func.py +0 -2
- warp/tests/test_generics.py +27 -2
- warp/tests/test_grad.py +147 -8
- warp/tests/test_grad_customs.py +0 -2
- warp/tests/test_hash_grid.py +1 -3
- warp/tests/test_import.py +0 -2
- warp/tests/test_indexedarray.py +0 -2
- warp/tests/test_intersect.py +0 -2
- warp/tests/test_jax.py +0 -2
- warp/tests/test_large.py +11 -9
- warp/tests/test_launch.py +0 -2
- warp/tests/test_lerp.py +10 -54
- warp/tests/test_linear_solvers.py +3 -5
- warp/tests/test_lvalue.py +0 -2
- warp/tests/test_marching_cubes.py +0 -2
- warp/tests/test_mat.py +0 -2
- warp/tests/test_mat_lite.py +0 -2
- warp/tests/test_mat_scalar_ops.py +0 -2
- warp/tests/test_math.py +0 -2
- warp/tests/test_matmul.py +35 -37
- warp/tests/test_matmul_lite.py +29 -31
- warp/tests/test_mempool.py +0 -2
- warp/tests/test_mesh.py +0 -3
- warp/tests/test_mesh_query_aabb.py +0 -2
- warp/tests/test_mesh_query_point.py +0 -2
- warp/tests/test_mesh_query_ray.py +0 -2
- warp/tests/test_mlp.py +0 -2
- warp/tests/test_model.py +0 -2
- warp/tests/test_module_hashing.py +111 -0
- warp/tests/test_modules_lite.py +0 -3
- warp/tests/test_multigpu.py +0 -2
- warp/tests/test_noise.py +0 -4
- warp/tests/test_operators.py +0 -2
- warp/tests/test_options.py +0 -2
- warp/tests/test_peer.py +0 -2
- warp/tests/test_pinned.py +0 -2
- warp/tests/test_print.py +0 -2
- warp/tests/test_quat.py +0 -2
- warp/tests/test_rand.py +41 -5
- warp/tests/test_reload.py +0 -10
- warp/tests/test_rounding.py +0 -2
- warp/tests/test_runlength_encode.py +0 -2
- warp/tests/test_sim_grad.py +0 -2
- warp/tests/test_sim_kinematics.py +0 -2
- warp/tests/test_smoothstep.py +0 -2
- warp/tests/test_snippet.py +0 -2
- warp/tests/test_sparse.py +0 -2
- warp/tests/test_spatial.py +0 -2
- warp/tests/test_special_values.py +362 -0
- warp/tests/test_streams.py +0 -2
- warp/tests/test_struct.py +0 -2
- warp/tests/test_tape.py +0 -2
- warp/tests/test_torch.py +0 -2
- warp/tests/test_transient_module.py +0 -2
- warp/tests/test_types.py +0 -2
- warp/tests/test_utils.py +0 -2
- warp/tests/test_vec.py +0 -2
- warp/tests/test_vec_lite.py +0 -2
- warp/tests/test_vec_scalar_ops.py +0 -2
- warp/tests/test_verify_fp.py +0 -2
- warp/tests/test_volume.py +237 -13
- warp/tests/test_volume_write.py +86 -3
- warp/tests/unittest_serial.py +10 -9
- warp/tests/unittest_suites.py +6 -2
- warp/tests/unittest_utils.py +2 -171
- warp/tests/unused_test_misc.py +0 -2
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +37 -40
- warp/types.py +526 -85
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/METADATA +61 -31
- warp_lang-1.2.1.dist-info/RECORD +359 -0
- warp/examples/fem/example_convection_diffusion_dg0.py +0 -204
- warp/native/nanovdb/PNanoVDBWrite.h +0 -295
- warp_lang-1.1.0.dist-info/RECORD +0 -352
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/top_level.txt +0 -0
warp/tests/test_import.py
CHANGED
warp/tests/test_indexedarray.py
CHANGED
warp/tests/test_intersect.py
CHANGED
warp/tests/test_jax.py
CHANGED
warp/tests/test_large.py
CHANGED
|
@@ -7,12 +7,11 @@
|
|
|
7
7
|
|
|
8
8
|
import math
|
|
9
9
|
import unittest
|
|
10
|
+
from typing import Any
|
|
10
11
|
|
|
11
12
|
import warp as wp
|
|
12
13
|
from warp.tests.unittest_utils import *
|
|
13
14
|
|
|
14
|
-
wp.init()
|
|
15
|
-
|
|
16
15
|
|
|
17
16
|
@wp.kernel
|
|
18
17
|
def conditional_sum(result: wp.array(dtype=wp.uint64)):
|
|
@@ -86,7 +85,7 @@ def test_large_arrays_slow(test, device):
|
|
|
86
85
|
dim_x = math.ceil(total_elements ** (1 / total_dims))
|
|
87
86
|
shape_tuple = tuple([dim_x] * total_dims)
|
|
88
87
|
|
|
89
|
-
for
|
|
88
|
+
for wptype in wp.types.scalar_types:
|
|
90
89
|
a1 = wp.zeros(shape_tuple, dtype=wptype, device=device)
|
|
91
90
|
assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
|
|
92
91
|
|
|
@@ -97,22 +96,25 @@ def test_large_arrays_slow(test, device):
|
|
|
97
96
|
assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
|
|
98
97
|
|
|
99
98
|
|
|
99
|
+
@wp.kernel
|
|
100
|
+
def check_array_equal_value(data: wp.array2d(dtype=Any), expect: Any):
|
|
101
|
+
i, j = wp.tid()
|
|
102
|
+
wp.expect_eq(data[i, j], expect)
|
|
103
|
+
|
|
104
|
+
|
|
100
105
|
def test_large_arrays_fast(test, device):
|
|
101
106
|
# A truncated version of test_large_arrays_slow meant to catch basic errors
|
|
102
107
|
|
|
103
108
|
# Make is so that a (dim_x, dim_x) array has more than 2**31 elements
|
|
104
109
|
dim_x = math.ceil(math.sqrt(2**31))
|
|
105
110
|
|
|
106
|
-
|
|
107
|
-
wptype = wp.types.np_dtype_to_warp_type[nptype]
|
|
108
|
-
|
|
109
|
-
a1 = wp.zeros((dim_x, dim_x), dtype=wptype, device=device)
|
|
111
|
+
a1 = wp.zeros((dim_x, dim_x), dtype=wp.int8, device=device)
|
|
110
112
|
a1.fill_(127)
|
|
111
113
|
|
|
112
|
-
|
|
114
|
+
wp.launch(check_array_equal_value, a1.shape, inputs=[a1, wp.int8(127)], device=device)
|
|
113
115
|
|
|
114
116
|
a1.zero_()
|
|
115
|
-
|
|
117
|
+
wp.launch(check_array_equal_value, a1.shape, inputs=[a1, wp.int8(0)], device=device)
|
|
116
118
|
|
|
117
119
|
|
|
118
120
|
def test_large_array_excessive_zeros(test, device):
|
warp/tests/test_launch.py
CHANGED
warp/tests/test_lerp.py
CHANGED
|
@@ -14,8 +14,6 @@ import numpy as np
|
|
|
14
14
|
import warp as wp
|
|
15
15
|
from warp.tests.unittest_utils import *
|
|
16
16
|
|
|
17
|
-
wp.init()
|
|
18
|
-
|
|
19
17
|
|
|
20
18
|
@dataclass
|
|
21
19
|
class TestData:
|
|
@@ -179,71 +177,29 @@ def test_lerp(test, device):
|
|
|
179
177
|
|
|
180
178
|
for data_type in TEST_DATA:
|
|
181
179
|
kernel_fn = make_kernel_fn(data_type)
|
|
182
|
-
kernel = wp.Kernel(
|
|
183
|
-
func=kernel_fn,
|
|
184
|
-
key=f"test_lerp_{data_type.__name__}_kernel",
|
|
185
|
-
)
|
|
180
|
+
kernel = wp.Kernel(func=kernel_fn, key=f"test_lerp_{data_type.__name__}_kernel")
|
|
186
181
|
|
|
187
182
|
with test.subTest(data_type=data_type):
|
|
188
183
|
for test_data in TEST_DATA[data_type]:
|
|
189
|
-
a = wp.array(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
device=device,
|
|
193
|
-
requires_grad=True,
|
|
194
|
-
)
|
|
195
|
-
b = wp.array(
|
|
196
|
-
[test_data.b],
|
|
197
|
-
dtype=data_type,
|
|
198
|
-
device=device,
|
|
199
|
-
requires_grad=True,
|
|
200
|
-
)
|
|
201
|
-
t = wp.array(
|
|
202
|
-
[test_data.t],
|
|
203
|
-
dtype=float,
|
|
204
|
-
device=device,
|
|
205
|
-
requires_grad=True,
|
|
206
|
-
)
|
|
184
|
+
a = wp.array([test_data.a], dtype=data_type, device=device, requires_grad=True)
|
|
185
|
+
b = wp.array([test_data.b], dtype=data_type, device=device, requires_grad=True)
|
|
186
|
+
t = wp.array([test_data.t], dtype=float, device=device, requires_grad=True)
|
|
207
187
|
out = wp.array(
|
|
208
|
-
[0] * wp.types.type_length(data_type),
|
|
209
|
-
dtype=data_type,
|
|
210
|
-
device=device,
|
|
211
|
-
requires_grad=True,
|
|
188
|
+
[0] * wp.types.type_length(data_type), dtype=data_type, device=device, requires_grad=True
|
|
212
189
|
)
|
|
213
190
|
|
|
214
191
|
tape = wp.Tape()
|
|
215
192
|
with tape:
|
|
216
|
-
wp.launch(
|
|
217
|
-
kernel,
|
|
218
|
-
dim=1,
|
|
219
|
-
inputs=[a, b, t, out],
|
|
220
|
-
device=device,
|
|
221
|
-
)
|
|
193
|
+
wp.launch(kernel, dim=1, inputs=[a, b, t, out], device=device)
|
|
222
194
|
|
|
223
|
-
assert_np_equal(
|
|
224
|
-
out.numpy(),
|
|
225
|
-
np.array([test_data.expected]),
|
|
226
|
-
tol=1e-6,
|
|
227
|
-
)
|
|
195
|
+
assert_np_equal(out.numpy(), np.array([test_data.expected]), tol=1e-6)
|
|
228
196
|
|
|
229
197
|
if test_data.check_backwards():
|
|
230
198
|
tape.backward(out)
|
|
231
199
|
|
|
232
|
-
assert_np_equal(
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
tol=1e-6,
|
|
236
|
-
)
|
|
237
|
-
assert_np_equal(
|
|
238
|
-
tape.gradients[b].numpy(),
|
|
239
|
-
np.array([test_data.expected_adj_b]),
|
|
240
|
-
tol=1e-6,
|
|
241
|
-
)
|
|
242
|
-
assert_np_equal(
|
|
243
|
-
tape.gradients[t].numpy(),
|
|
244
|
-
np.array([test_data.expected_adj_t]),
|
|
245
|
-
tol=1e-6,
|
|
246
|
-
)
|
|
200
|
+
assert_np_equal(tape.gradients[a].numpy(), np.array([test_data.expected_adj_a]), tol=1e-6)
|
|
201
|
+
assert_np_equal(tape.gradients[b].numpy(), np.array([test_data.expected_adj_b]), tol=1e-6)
|
|
202
|
+
assert_np_equal(tape.gradients[t].numpy(), np.array([test_data.expected_adj_t]), tol=1e-6)
|
|
247
203
|
|
|
248
204
|
|
|
249
205
|
devices = get_test_devices()
|
|
@@ -6,9 +6,7 @@ import warp as wp
|
|
|
6
6
|
from warp.optim.linear import bicgstab, cg, cr, gmres, preconditioner
|
|
7
7
|
from warp.tests.unittest_utils import *
|
|
8
8
|
|
|
9
|
-
wp.init()
|
|
10
|
-
|
|
11
|
-
from warp.context import runtime # noqa: E402
|
|
9
|
+
wp.init() # For runtime.core.is_cutlass_enabled()
|
|
12
10
|
|
|
13
11
|
|
|
14
12
|
def _check_linear_solve(test, A, b, func, *args, **kwargs):
|
|
@@ -172,11 +170,11 @@ class TestLinearSolvers(unittest.TestCase):
|
|
|
172
170
|
|
|
173
171
|
devices = get_test_devices()
|
|
174
172
|
|
|
175
|
-
if not runtime.core.is_cutlass_enabled():
|
|
173
|
+
if not wp.context.runtime.core.is_cutlass_enabled():
|
|
176
174
|
devices = [d for d in devices if not d.is_cuda]
|
|
177
175
|
print("Skipping CUDA linear solver tests because CUTLASS is not supported in this build")
|
|
178
176
|
|
|
179
|
-
if runtime.core.is_debug_enabled():
|
|
177
|
+
if wp.context.runtime.core.is_debug_enabled():
|
|
180
178
|
# cutlass-based matmul is *very* slow in debug mode -- skip
|
|
181
179
|
devices = [d for d in devices if not d.is_cuda]
|
|
182
180
|
print("Skipping CUDA linear solver tests in debug mode")
|
warp/tests/test_lvalue.py
CHANGED
warp/tests/test_mat.py
CHANGED
warp/tests/test_mat_lite.py
CHANGED
warp/tests/test_math.py
CHANGED
warp/tests/test_matmul.py
CHANGED
|
@@ -13,9 +13,7 @@ import numpy as np
|
|
|
13
13
|
import warp as wp
|
|
14
14
|
from warp.tests.unittest_utils import *
|
|
15
15
|
|
|
16
|
-
wp.init()
|
|
17
|
-
|
|
18
|
-
from warp.context import runtime # noqa: E402
|
|
16
|
+
wp.init() # For wp.context.runtime.core.is_cutlass_enabled()
|
|
19
17
|
|
|
20
18
|
|
|
21
19
|
class gemm_test_bed_runner:
|
|
@@ -81,7 +79,7 @@ class gemm_test_bed_runner:
|
|
|
81
79
|
tape.backward(grads={D: ones})
|
|
82
80
|
|
|
83
81
|
D_np = alpha * (A.numpy() @ B.numpy()) + beta * C.numpy()
|
|
84
|
-
|
|
82
|
+
assert_np_equal(D.numpy(), D_np)
|
|
85
83
|
|
|
86
84
|
adj_A_np = alpha * np.matmul(ones.numpy(), B.numpy().transpose())
|
|
87
85
|
adj_B_np = alpha * (A.numpy().transpose() @ ones.numpy())
|
|
@@ -94,15 +92,15 @@ class gemm_test_bed_runner:
|
|
|
94
92
|
tape.backward(grads={D: ones})
|
|
95
93
|
|
|
96
94
|
D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
|
|
97
|
-
|
|
95
|
+
assert_np_equal(D.numpy(), D_np)
|
|
98
96
|
|
|
99
97
|
adj_A_np = alpha * np.matmul(ones.numpy(), B.numpy().transpose((0, 2, 1)))
|
|
100
98
|
adj_B_np = alpha * np.matmul(A.numpy().transpose((0, 2, 1)), ones.numpy())
|
|
101
99
|
adj_C_np = beta * ones.numpy()
|
|
102
100
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
101
|
+
assert_np_equal(A.grad.numpy(), adj_A_np)
|
|
102
|
+
assert_np_equal(B.grad.numpy(), adj_B_np)
|
|
103
|
+
assert_np_equal(C.grad.numpy(), adj_C_np)
|
|
106
104
|
|
|
107
105
|
def run(self):
|
|
108
106
|
Ms = [64, 128, 512]
|
|
@@ -203,9 +201,9 @@ class gemm_test_bed_runner_transpose:
|
|
|
203
201
|
tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
|
|
204
202
|
|
|
205
203
|
D_np = alpha * (A.numpy() @ B.numpy()) + beta * C1.numpy()
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
204
|
+
assert_np_equal(D1.numpy(), D_np)
|
|
205
|
+
assert_np_equal(D2.numpy(), D_np)
|
|
206
|
+
assert_np_equal(D3.numpy(), D_np)
|
|
209
207
|
|
|
210
208
|
adj_A_np = alpha * (ones1.numpy() @ B.numpy().transpose())
|
|
211
209
|
adj_B_np = alpha * (A.numpy().transpose() @ ones1.numpy())
|
|
@@ -224,23 +222,23 @@ class gemm_test_bed_runner_transpose:
|
|
|
224
222
|
tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
|
|
225
223
|
|
|
226
224
|
D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C1.numpy()
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
225
|
+
assert_np_equal(D1.numpy(), D_np)
|
|
226
|
+
assert_np_equal(D2.numpy(), D_np)
|
|
227
|
+
assert_np_equal(D3.numpy(), D_np)
|
|
230
228
|
|
|
231
229
|
adj_A_np = alpha * np.matmul(ones1.numpy(), B.numpy().transpose((0, 2, 1)))
|
|
232
230
|
adj_B_np = alpha * np.matmul(A.numpy().transpose((0, 2, 1)), ones1.numpy())
|
|
233
231
|
adj_C_np = beta * ones1.numpy()
|
|
234
232
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
233
|
+
assert_np_equal(A.grad.numpy(), adj_A_np)
|
|
234
|
+
assert_np_equal(ATT1.grad.numpy(), adj_A_np)
|
|
235
|
+
assert_np_equal(ATT2.grad.numpy(), adj_A_np)
|
|
236
|
+
assert_np_equal(B.grad.numpy(), adj_B_np)
|
|
237
|
+
assert_np_equal(BTT1.grad.numpy(), adj_B_np)
|
|
238
|
+
assert_np_equal(BTT2.grad.numpy(), adj_B_np)
|
|
239
|
+
assert_np_equal(C1.grad.numpy(), adj_C_np)
|
|
240
|
+
assert_np_equal(C2.grad.numpy(), adj_C_np)
|
|
241
|
+
assert_np_equal(C3.grad.numpy(), adj_C_np)
|
|
244
242
|
|
|
245
243
|
def run(self):
|
|
246
244
|
m = 16
|
|
@@ -260,13 +258,13 @@ def test_f16(test, device):
|
|
|
260
258
|
gemm_test_bed_runner_transpose(wp.float16, device).run()
|
|
261
259
|
|
|
262
260
|
|
|
263
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
261
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
264
262
|
def test_f32(test, device):
|
|
265
263
|
gemm_test_bed_runner(wp.float32, device).run()
|
|
266
264
|
gemm_test_bed_runner_transpose(wp.float32, device).run()
|
|
267
265
|
|
|
268
266
|
|
|
269
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
267
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
270
268
|
def test_f64(test, device):
|
|
271
269
|
gemm_test_bed_runner(wp.float64, device).run()
|
|
272
270
|
gemm_test_bed_runner_transpose(wp.float64, device).run()
|
|
@@ -278,7 +276,7 @@ def matrix_sum_kernel(arr: wp.array2d(dtype=float), loss: wp.array(dtype=float))
|
|
|
278
276
|
wp.atomic_add(loss, 0, arr[i, j])
|
|
279
277
|
|
|
280
278
|
|
|
281
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
279
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
282
280
|
def test_tape(test, device):
|
|
283
281
|
rng = np.random.default_rng(42)
|
|
284
282
|
low = -4.5
|
|
@@ -318,7 +316,7 @@ def test_tape(test, device):
|
|
|
318
316
|
assert_array_equal(A.grad, wp.zeros_like(A))
|
|
319
317
|
|
|
320
318
|
|
|
321
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
319
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
322
320
|
def test_operator(test, device):
|
|
323
321
|
rng = np.random.default_rng(42)
|
|
324
322
|
low = -4.5
|
|
@@ -354,7 +352,7 @@ def test_operator(test, device):
|
|
|
354
352
|
assert_array_equal(A.grad, wp.zeros_like(A))
|
|
355
353
|
|
|
356
354
|
|
|
357
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
355
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
358
356
|
def test_large_batch_count(test, device):
|
|
359
357
|
rng = np.random.default_rng(42)
|
|
360
358
|
low = -4.5
|
|
@@ -394,18 +392,18 @@ def test_large_batch_count(test, device):
|
|
|
394
392
|
tape.backward(grads={D: ones})
|
|
395
393
|
|
|
396
394
|
D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
|
|
397
|
-
|
|
395
|
+
assert_np_equal(D.numpy(), D_np)
|
|
398
396
|
|
|
399
397
|
adj_A_np = alpha * np.matmul(ones.numpy(), B.numpy().transpose((0, 2, 1)))
|
|
400
398
|
adj_B_np = alpha * np.matmul(A.numpy().transpose((0, 2, 1)), ones.numpy())
|
|
401
399
|
adj_C_np = beta * ones.numpy()
|
|
402
400
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
401
|
+
assert_np_equal(A.grad.numpy(), adj_A_np)
|
|
402
|
+
assert_np_equal(B.grad.numpy(), adj_B_np)
|
|
403
|
+
assert_np_equal(C.grad.numpy(), adj_C_np)
|
|
406
404
|
|
|
407
405
|
|
|
408
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
406
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
409
407
|
def test_adjoint_accumulation(test, device):
|
|
410
408
|
a_np = np.ones(shape=(2, 3))
|
|
411
409
|
b_np = np.ones(shape=(3, 2))
|
|
@@ -429,12 +427,12 @@ def test_adjoint_accumulation(test, device):
|
|
|
429
427
|
grads = {d2_wp: d_grad}
|
|
430
428
|
tape.backward(grads=grads)
|
|
431
429
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
430
|
+
assert_np_equal(a_wp.grad.numpy(), 4.0 * np.ones(shape=(2, 3)))
|
|
431
|
+
assert_np_equal(b_wp.grad.numpy(), 4.0 * np.ones(shape=(3, 2)))
|
|
432
|
+
assert_np_equal(c_wp.grad.numpy(), np.ones(shape=(2, 2)))
|
|
435
433
|
|
|
436
434
|
|
|
437
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
435
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
438
436
|
def test_cuda_graph_capture(test, device):
|
|
439
437
|
@wp.kernel
|
|
440
438
|
def mat_sum(mat: wp.array2d(dtype=Any), loss: wp.array(dtype=Any)):
|
warp/tests/test_matmul_lite.py
CHANGED
|
@@ -12,9 +12,7 @@ import numpy as np
|
|
|
12
12
|
import warp as wp
|
|
13
13
|
from warp.tests.unittest_utils import *
|
|
14
14
|
|
|
15
|
-
wp.init()
|
|
16
|
-
|
|
17
|
-
from warp.context import runtime # noqa: E402
|
|
15
|
+
wp.init() # For wp.context.runtime.core.is_cutlass_enabled()
|
|
18
16
|
|
|
19
17
|
|
|
20
18
|
class gemm_test_bed_runner:
|
|
@@ -80,7 +78,7 @@ class gemm_test_bed_runner:
|
|
|
80
78
|
tape.backward(grads={D: ones})
|
|
81
79
|
|
|
82
80
|
D_np = alpha * (A.numpy() @ B.numpy()) + beta * C.numpy()
|
|
83
|
-
|
|
81
|
+
assert_np_equal(D.numpy(), D_np)
|
|
84
82
|
|
|
85
83
|
adj_A_np = alpha * np.matmul(ones.numpy(), B.numpy().transpose())
|
|
86
84
|
adj_B_np = alpha * (A.numpy().transpose() @ ones.numpy())
|
|
@@ -93,15 +91,15 @@ class gemm_test_bed_runner:
|
|
|
93
91
|
tape.backward(grads={D: ones})
|
|
94
92
|
|
|
95
93
|
D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
|
|
96
|
-
|
|
94
|
+
assert_np_equal(D.numpy(), D_np)
|
|
97
95
|
|
|
98
96
|
adj_A_np = alpha * np.matmul(ones.numpy(), B.numpy().transpose((0, 2, 1)))
|
|
99
97
|
adj_B_np = alpha * np.matmul(A.numpy().transpose((0, 2, 1)), ones.numpy())
|
|
100
98
|
adj_C_np = beta * ones.numpy()
|
|
101
99
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
100
|
+
assert_np_equal(A.grad.numpy(), adj_A_np)
|
|
101
|
+
assert_np_equal(B.grad.numpy(), adj_B_np)
|
|
102
|
+
assert_np_equal(C.grad.numpy(), adj_C_np)
|
|
105
103
|
|
|
106
104
|
def run(self):
|
|
107
105
|
Ms = [8]
|
|
@@ -202,9 +200,9 @@ class gemm_test_bed_runner_transpose:
|
|
|
202
200
|
tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
|
|
203
201
|
|
|
204
202
|
D_np = alpha * (A.numpy() @ B.numpy()) + beta * C1.numpy()
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
203
|
+
assert_np_equal(D1.numpy(), D_np)
|
|
204
|
+
assert_np_equal(D2.numpy(), D_np)
|
|
205
|
+
assert_np_equal(D3.numpy(), D_np)
|
|
208
206
|
|
|
209
207
|
adj_A_np = alpha * (ones1.numpy() @ B.numpy().transpose())
|
|
210
208
|
adj_B_np = alpha * (A.numpy().transpose() @ ones1.numpy())
|
|
@@ -223,23 +221,23 @@ class gemm_test_bed_runner_transpose:
|
|
|
223
221
|
tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
|
|
224
222
|
|
|
225
223
|
D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C1.numpy()
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
224
|
+
assert_np_equal(D1.numpy(), D_np)
|
|
225
|
+
assert_np_equal(D2.numpy(), D_np)
|
|
226
|
+
assert_np_equal(D3.numpy(), D_np)
|
|
229
227
|
|
|
230
228
|
adj_A_np = alpha * np.matmul(ones1.numpy(), B.numpy().transpose((0, 2, 1)))
|
|
231
229
|
adj_B_np = alpha * np.matmul(A.numpy().transpose((0, 2, 1)), ones1.numpy())
|
|
232
230
|
adj_C_np = beta * ones1.numpy()
|
|
233
231
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
232
|
+
assert_np_equal(A.grad.numpy(), adj_A_np)
|
|
233
|
+
assert_np_equal(ATT1.grad.numpy(), adj_A_np)
|
|
234
|
+
assert_np_equal(ATT2.grad.numpy(), adj_A_np)
|
|
235
|
+
assert_np_equal(B.grad.numpy(), adj_B_np)
|
|
236
|
+
assert_np_equal(BTT1.grad.numpy(), adj_B_np)
|
|
237
|
+
assert_np_equal(BTT2.grad.numpy(), adj_B_np)
|
|
238
|
+
assert_np_equal(C1.grad.numpy(), adj_C_np)
|
|
239
|
+
assert_np_equal(C2.grad.numpy(), adj_C_np)
|
|
240
|
+
assert_np_equal(C3.grad.numpy(), adj_C_np)
|
|
243
241
|
|
|
244
242
|
def run(self):
|
|
245
243
|
m = 8
|
|
@@ -253,7 +251,7 @@ class gemm_test_bed_runner_transpose:
|
|
|
253
251
|
self.run_and_verify(m, n, k, batch_count, alpha, beta)
|
|
254
252
|
|
|
255
253
|
|
|
256
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
254
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
257
255
|
def test_f32(test, device):
|
|
258
256
|
gemm_test_bed_runner(wp.float32, device).run()
|
|
259
257
|
gemm_test_bed_runner_transpose(wp.float32, device).run()
|
|
@@ -265,7 +263,7 @@ def matrix_sum_kernel(arr: wp.array2d(dtype=float), loss: wp.array(dtype=float))
|
|
|
265
263
|
wp.atomic_add(loss, 0, arr[i, j])
|
|
266
264
|
|
|
267
265
|
|
|
268
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
266
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
269
267
|
def test_tape(test, device):
|
|
270
268
|
rng = np.random.default_rng(42)
|
|
271
269
|
low = -4.5
|
|
@@ -305,7 +303,7 @@ def test_tape(test, device):
|
|
|
305
303
|
assert_array_equal(A.grad, wp.zeros_like(A))
|
|
306
304
|
|
|
307
305
|
|
|
308
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
306
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
309
307
|
def test_operator(test, device):
|
|
310
308
|
rng = np.random.default_rng(42)
|
|
311
309
|
low = -4.5
|
|
@@ -341,7 +339,7 @@ def test_operator(test, device):
|
|
|
341
339
|
assert_array_equal(A.grad, wp.zeros_like(A))
|
|
342
340
|
|
|
343
341
|
|
|
344
|
-
@unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
342
|
+
@unittest.skipUnless(wp.context.runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
|
|
345
343
|
def test_large_batch_count(test, device):
|
|
346
344
|
rng = np.random.default_rng(42)
|
|
347
345
|
low = -4.5
|
|
@@ -381,15 +379,15 @@ def test_large_batch_count(test, device):
|
|
|
381
379
|
tape.backward(grads={D: ones})
|
|
382
380
|
|
|
383
381
|
D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
|
|
384
|
-
|
|
382
|
+
assert_np_equal(D.numpy(), D_np)
|
|
385
383
|
|
|
386
384
|
adj_A_np = alpha * np.matmul(ones.numpy(), B.numpy().transpose((0, 2, 1)))
|
|
387
385
|
adj_B_np = alpha * np.matmul(A.numpy().transpose((0, 2, 1)), ones.numpy())
|
|
388
386
|
adj_C_np = beta * ones.numpy()
|
|
389
387
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
388
|
+
assert_np_equal(A.grad.numpy(), adj_A_np)
|
|
389
|
+
assert_np_equal(B.grad.numpy(), adj_B_np)
|
|
390
|
+
assert_np_equal(C.grad.numpy(), adj_C_np)
|
|
393
391
|
|
|
394
392
|
|
|
395
393
|
devices = get_test_devices()
|
warp/tests/test_mempool.py
CHANGED
warp/tests/test_mesh.py
CHANGED
warp/tests/test_mlp.py
CHANGED