warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,7 @@ import numpy as np
|
|
|
21
21
|
import warp as wp
|
|
22
22
|
from warp.tests.unittest_utils import *
|
|
23
23
|
|
|
24
|
-
wp.init() # For wp.context.runtime.core.
|
|
24
|
+
wp.init() # For wp.context.runtime.core.wp_is_mathdx_enabled()
|
|
25
25
|
|
|
26
26
|
TILE_M = wp.constant(8)
|
|
27
27
|
TILE_N = wp.constant(4)
|
|
@@ -45,7 +45,6 @@ def tile_math_matmul_kernel(
|
|
|
45
45
|
wp.tile_store(gc, c, offset=(i * TILE_M, j * TILE_N))
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
49
48
|
def test_tile_math_matmul(test, device):
|
|
50
49
|
rng = np.random.default_rng(42)
|
|
51
50
|
|
|
@@ -93,7 +92,7 @@ def tile_math_fft_kernel_vec2d(gx: wp.array2d(dtype=wp.vec2d), gy: wp.array2d(dt
|
|
|
93
92
|
wp.tile_store(gy, xy)
|
|
94
93
|
|
|
95
94
|
|
|
96
|
-
@unittest.skipUnless(wp.context.runtime.core.
|
|
95
|
+
@unittest.skipUnless(wp.context.runtime.core.wp_is_mathdx_enabled(), "Warp was not built with MathDx support")
|
|
97
96
|
def test_tile_math_fft(test, device, wp_dtype):
|
|
98
97
|
np_real_dtype = {wp.vec2f: np.float32, wp.vec2d: np.float64}[wp_dtype]
|
|
99
98
|
np_cplx_dtype = {wp.vec2f: np.complex64, wp.vec2d: np.complex128}[wp_dtype]
|
|
@@ -124,503 +123,6 @@ def test_tile_math_fft(test, device, wp_dtype):
|
|
|
124
123
|
# TODO: implement and test backward pass
|
|
125
124
|
|
|
126
125
|
|
|
127
|
-
@wp.kernel()
|
|
128
|
-
def tile_math_cholesky(
|
|
129
|
-
gA: wp.array2d(dtype=wp.float64),
|
|
130
|
-
gD: wp.array1d(dtype=wp.float64),
|
|
131
|
-
gL: wp.array2d(dtype=wp.float64),
|
|
132
|
-
gy: wp.array1d(dtype=wp.float64),
|
|
133
|
-
gx: wp.array1d(dtype=wp.float64),
|
|
134
|
-
):
|
|
135
|
-
i, j = wp.tid()
|
|
136
|
-
# Load A, D & y
|
|
137
|
-
a = wp.tile_load(gA, shape=(TILE_M, TILE_M), storage="shared")
|
|
138
|
-
d = wp.tile_load(gD, shape=TILE_M, storage="shared")
|
|
139
|
-
y = wp.tile_load(gy, shape=TILE_M, storage="shared")
|
|
140
|
-
# Ensure tile_diag_add() and tile_cholesky_solve() work with transposed matrices
|
|
141
|
-
a_t = wp.tile_transpose(a)
|
|
142
|
-
# Compute L st LL^T = A^T + diag(D)
|
|
143
|
-
b = wp.tile_diag_add(a_t, d)
|
|
144
|
-
l = wp.tile_cholesky(b)
|
|
145
|
-
# Solve for y in LL^T x = y
|
|
146
|
-
x = wp.tile_cholesky_solve(l, y)
|
|
147
|
-
# Store L & y
|
|
148
|
-
wp.tile_store(gL, l)
|
|
149
|
-
wp.tile_store(gx, x)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
153
|
-
def test_tile_math_cholesky(test, device):
|
|
154
|
-
A_h = np.ones((TILE_M, TILE_M), dtype=np.float64)
|
|
155
|
-
D_h = 8.0 * np.ones(TILE_M, dtype=np.float64)
|
|
156
|
-
L_h = np.zeros_like(A_h)
|
|
157
|
-
Y_h = np.arange(TILE_M, dtype=np.float64)
|
|
158
|
-
X_h = np.zeros_like(Y_h)
|
|
159
|
-
|
|
160
|
-
A_np = A_h.T + np.diag(D_h)
|
|
161
|
-
L_np = np.linalg.cholesky(A_np)
|
|
162
|
-
X_np = np.linalg.solve(A_np, Y_h)
|
|
163
|
-
|
|
164
|
-
A_wp = wp.array2d(A_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
165
|
-
D_wp = wp.array2d(D_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
166
|
-
L_wp = wp.array2d(L_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
167
|
-
Y_wp = wp.array2d(Y_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
168
|
-
X_wp = wp.array2d(X_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
169
|
-
|
|
170
|
-
wp.launch_tiled(
|
|
171
|
-
tile_math_cholesky, dim=[1, 1], inputs=[A_wp, D_wp, L_wp, Y_wp, X_wp], block_dim=TILE_DIM, device=device
|
|
172
|
-
)
|
|
173
|
-
wp.synchronize_device(device)
|
|
174
|
-
|
|
175
|
-
np.testing.assert_allclose(X_wp.numpy(), X_np)
|
|
176
|
-
np.testing.assert_allclose(L_wp.numpy(), L_np)
|
|
177
|
-
|
|
178
|
-
# TODO: implement and test backward pass
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
@wp.kernel()
|
|
182
|
-
def tile_math_cholesky_multiple_rhs(
|
|
183
|
-
gA: wp.array2d(dtype=wp.float64),
|
|
184
|
-
gD: wp.array1d(dtype=wp.float64),
|
|
185
|
-
gL: wp.array2d(dtype=wp.float64),
|
|
186
|
-
gy: wp.array2d(dtype=wp.float64),
|
|
187
|
-
gx: wp.array2d(dtype=wp.float64),
|
|
188
|
-
gz: wp.array2d(dtype=wp.float64),
|
|
189
|
-
):
|
|
190
|
-
i, j = wp.tid()
|
|
191
|
-
# Load A, D & y
|
|
192
|
-
a = wp.tile_load(gA, shape=(TILE_M, TILE_M), storage="shared")
|
|
193
|
-
d = wp.tile_load(gD, shape=TILE_M, storage="shared")
|
|
194
|
-
y = wp.tile_load(gy, shape=(TILE_M, TILE_M), storage="shared")
|
|
195
|
-
# Ensure tile_diag_add() and tile_cholesky_solve() work with transposed matrices
|
|
196
|
-
a_t = wp.tile_transpose(a)
|
|
197
|
-
# Compute L st LL^T = A.T + diag(D)
|
|
198
|
-
b = wp.tile_diag_add(a_t, d)
|
|
199
|
-
l = wp.tile_cholesky(b)
|
|
200
|
-
# Solve for y in LL^T x = y.T
|
|
201
|
-
y_t = wp.tile_transpose(y)
|
|
202
|
-
x = wp.tile_cholesky_solve(l, y_t)
|
|
203
|
-
# Ensure matmul receives correct layout information
|
|
204
|
-
z = wp.tile_matmul(x, x)
|
|
205
|
-
# Store L & y
|
|
206
|
-
wp.tile_store(gL, l)
|
|
207
|
-
wp.tile_store(gx, x)
|
|
208
|
-
wp.tile_store(gz, z)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
212
|
-
def test_tile_math_cholesky_multiple_rhs(test, device):
|
|
213
|
-
A_h = np.ones((TILE_M, TILE_M), dtype=np.float64)
|
|
214
|
-
D_h = 8.0 * np.ones(TILE_M, dtype=np.float64)
|
|
215
|
-
L_h = np.zeros_like(A_h)
|
|
216
|
-
Y_h = np.arange((TILE_M, TILE_M), dtype=np.float64)
|
|
217
|
-
X_h = np.zeros_like(Y_h)
|
|
218
|
-
Z_h = np.zeros_like(Y_h)
|
|
219
|
-
|
|
220
|
-
A_np = A_h.T + np.diag(D_h)
|
|
221
|
-
L_np = np.linalg.cholesky(A_np)
|
|
222
|
-
X_np = np.linalg.solve(A_np, Y_h.T)
|
|
223
|
-
Z_np = X_np @ X_np
|
|
224
|
-
|
|
225
|
-
A_wp = wp.array2d(A_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
226
|
-
D_wp = wp.array2d(D_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
227
|
-
L_wp = wp.array2d(L_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
228
|
-
Y_wp = wp.array2d(Y_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
229
|
-
X_wp = wp.array2d(X_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
230
|
-
Z_wp = wp.array2d(Z_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
231
|
-
|
|
232
|
-
wp.launch_tiled(
|
|
233
|
-
tile_math_cholesky_multiple_rhs,
|
|
234
|
-
dim=[1, 1],
|
|
235
|
-
inputs=[A_wp, D_wp, L_wp, Y_wp, X_wp, Z_wp],
|
|
236
|
-
block_dim=TILE_DIM,
|
|
237
|
-
device=device,
|
|
238
|
-
)
|
|
239
|
-
wp.synchronize_device(device)
|
|
240
|
-
|
|
241
|
-
np.testing.assert_allclose(L_wp.numpy(), L_np)
|
|
242
|
-
np.testing.assert_allclose(X_wp.numpy(), X_np)
|
|
243
|
-
np.testing.assert_allclose(Z_wp.numpy(), Z_np)
|
|
244
|
-
|
|
245
|
-
# TODO: implement and test backward pass
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
@wp.kernel
|
|
249
|
-
def tile_math_forward_substitution(
|
|
250
|
-
gL: wp.array2d(dtype=wp.float64), gx: wp.array1d(dtype=wp.float64), gz: wp.array1d(dtype=wp.float64)
|
|
251
|
-
):
|
|
252
|
-
i, j = wp.tid()
|
|
253
|
-
# Load L & x
|
|
254
|
-
L = wp.tile_load(gL, shape=(TILE_M, TILE_M), storage="shared")
|
|
255
|
-
x = wp.tile_load(gx, shape=TILE_M, storage="shared")
|
|
256
|
-
# Solve for z in Lz = x
|
|
257
|
-
# Transpose because we loaded an upper triangular matrix
|
|
258
|
-
z = wp.tile_lower_solve(wp.tile_transpose(L), x)
|
|
259
|
-
# Store z
|
|
260
|
-
wp.tile_store(gz, z)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
264
|
-
def test_tile_math_forward_substitution(test, device):
|
|
265
|
-
# Create test data
|
|
266
|
-
rng = np.random.default_rng(42)
|
|
267
|
-
L_h = np.triu(rng.random((TILE_M, TILE_M))) # Upper triangular matrix
|
|
268
|
-
x_h = rng.random(TILE_M)
|
|
269
|
-
z_h = np.zeros_like(x_h)
|
|
270
|
-
|
|
271
|
-
# Compute reference solution using numpy
|
|
272
|
-
z_np = np.linalg.solve(L_h.T, x_h)
|
|
273
|
-
|
|
274
|
-
# Create Warp arrays
|
|
275
|
-
L_wp = wp.array2d(L_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
276
|
-
x_wp = wp.array1d(x_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
277
|
-
z_wp = wp.array1d(z_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
278
|
-
|
|
279
|
-
# Run kernel
|
|
280
|
-
wp.launch_tiled(
|
|
281
|
-
tile_math_forward_substitution, dim=[1, 1], inputs=[L_wp, x_wp, z_wp], block_dim=TILE_DIM, device=device
|
|
282
|
-
)
|
|
283
|
-
wp.synchronize_device(device)
|
|
284
|
-
|
|
285
|
-
# Verify results
|
|
286
|
-
np.testing.assert_allclose(z_wp.numpy(), z_np)
|
|
287
|
-
|
|
288
|
-
# TODO: implement and test backward pass
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
@wp.kernel
|
|
292
|
-
def tile_math_back_substitution(
|
|
293
|
-
gL: wp.array2d(dtype=wp.float64), gx: wp.array1d(dtype=wp.float64), gz: wp.array1d(dtype=wp.float64)
|
|
294
|
-
):
|
|
295
|
-
i, j = wp.tid()
|
|
296
|
-
# Load L & x
|
|
297
|
-
L = wp.tile_load(gL, shape=(TILE_M, TILE_M), storage="shared")
|
|
298
|
-
x = wp.tile_load(gx, shape=TILE_M, storage="shared")
|
|
299
|
-
# Solve for z in L^T z = x
|
|
300
|
-
# Transpose because we loaded a lower triangular matrix
|
|
301
|
-
z = wp.tile_upper_solve(wp.tile_transpose(L), x)
|
|
302
|
-
# Store z
|
|
303
|
-
wp.tile_store(gz, z)
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
307
|
-
def test_tile_math_back_substitution(test, device):
|
|
308
|
-
# Create test data
|
|
309
|
-
rng = np.random.default_rng(42)
|
|
310
|
-
L_h = np.tril(rng.random((TILE_M, TILE_M))) # Lower triangular matrix
|
|
311
|
-
x_h = rng.random(TILE_M)
|
|
312
|
-
z_h = np.zeros_like(x_h)
|
|
313
|
-
|
|
314
|
-
# Compute reference solution using numpy
|
|
315
|
-
z_np = np.linalg.solve(L_h.T, x_h)
|
|
316
|
-
|
|
317
|
-
# Create Warp arrays
|
|
318
|
-
L_wp = wp.array2d(L_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
319
|
-
x_wp = wp.array1d(x_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
320
|
-
z_wp = wp.array1d(z_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
321
|
-
|
|
322
|
-
# Run kernel
|
|
323
|
-
wp.launch_tiled(
|
|
324
|
-
tile_math_back_substitution, dim=[1, 1], inputs=[L_wp, x_wp, z_wp], block_dim=TILE_DIM, device=device
|
|
325
|
-
)
|
|
326
|
-
wp.synchronize_device(device)
|
|
327
|
-
|
|
328
|
-
# Verify results
|
|
329
|
-
np.testing.assert_allclose(z_wp.numpy(), z_np)
|
|
330
|
-
|
|
331
|
-
# TODO: implement and test backward pass
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
@wp.kernel
|
|
335
|
-
def tile_math_forward_substitution_multiple_rhs(
|
|
336
|
-
gL: wp.array2d(dtype=wp.float64),
|
|
337
|
-
gx: wp.array2d(dtype=wp.float64),
|
|
338
|
-
gz: wp.array2d(dtype=wp.float64),
|
|
339
|
-
gc: wp.array2d(dtype=wp.float64),
|
|
340
|
-
):
|
|
341
|
-
i, j = wp.tid()
|
|
342
|
-
# Load L & x
|
|
343
|
-
L = wp.tile_load(gL, shape=(TILE_M, TILE_M), storage="shared")
|
|
344
|
-
x = wp.tile_load(gx, shape=(TILE_M, TILE_M), storage="shared")
|
|
345
|
-
# Solve for z in Lz = x.T
|
|
346
|
-
x_t = wp.tile_transpose(x)
|
|
347
|
-
z = wp.tile_lower_solve(L, x_t)
|
|
348
|
-
# Ensure matmul receives correct layout information
|
|
349
|
-
c = wp.tile_matmul(z, z)
|
|
350
|
-
# Store z and c
|
|
351
|
-
wp.tile_store(gz, z)
|
|
352
|
-
wp.tile_store(gc, c)
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
356
|
-
def test_tile_math_forward_substitution_multiple_rhs(test, device):
|
|
357
|
-
# Create test data
|
|
358
|
-
rng = np.random.default_rng(42)
|
|
359
|
-
L_h = np.tril(rng.random((TILE_M, TILE_M))) # Lower triangular matrix
|
|
360
|
-
x_h = rng.random((TILE_M, TILE_M)) # Multiple right-hand sides
|
|
361
|
-
z_h = np.zeros_like(x_h)
|
|
362
|
-
c_h = np.zeros_like(x_h)
|
|
363
|
-
|
|
364
|
-
# Compute reference solution using numpy
|
|
365
|
-
z_np = np.linalg.solve(L_h, x_h.T)
|
|
366
|
-
c_np = z_np @ z_np
|
|
367
|
-
|
|
368
|
-
# Create Warp arrays
|
|
369
|
-
L_wp = wp.array2d(L_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
370
|
-
x_wp = wp.array2d(x_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
371
|
-
z_wp = wp.array2d(z_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
372
|
-
c_wp = wp.array2d(c_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
373
|
-
|
|
374
|
-
# Run kernel
|
|
375
|
-
wp.launch_tiled(
|
|
376
|
-
tile_math_forward_substitution_multiple_rhs,
|
|
377
|
-
dim=[1, 1],
|
|
378
|
-
inputs=[L_wp, x_wp, z_wp, c_wp],
|
|
379
|
-
block_dim=TILE_DIM,
|
|
380
|
-
device=device,
|
|
381
|
-
)
|
|
382
|
-
wp.synchronize_device()
|
|
383
|
-
|
|
384
|
-
# Verify results
|
|
385
|
-
assert np.allclose(z_wp.numpy(), z_np)
|
|
386
|
-
assert np.allclose(c_wp.numpy(), c_np)
|
|
387
|
-
|
|
388
|
-
# TODO: implement and test backward pass
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
@wp.kernel
|
|
392
|
-
def tile_math_back_substitution_multiple_rhs(
|
|
393
|
-
gL: wp.array2d(dtype=wp.float64),
|
|
394
|
-
gx: wp.array2d(dtype=wp.float64),
|
|
395
|
-
gz: wp.array2d(dtype=wp.float64),
|
|
396
|
-
gc: wp.array2d(dtype=wp.float64),
|
|
397
|
-
):
|
|
398
|
-
i, j = wp.tid()
|
|
399
|
-
# Load L & x
|
|
400
|
-
L = wp.tile_load(gL, shape=(TILE_M, TILE_M), storage="shared")
|
|
401
|
-
x = wp.tile_load(gx, shape=(TILE_M, TILE_M), storage="shared")
|
|
402
|
-
# Solve for z in L^T z = x.T
|
|
403
|
-
x_t = wp.tile_transpose(x)
|
|
404
|
-
z = wp.tile_upper_solve(wp.tile_transpose(L), x_t)
|
|
405
|
-
# Ensure matmul receives correct layout information
|
|
406
|
-
c = wp.tile_matmul(z, z)
|
|
407
|
-
# Store z and c
|
|
408
|
-
wp.tile_store(gz, z)
|
|
409
|
-
wp.tile_store(gc, c)
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
413
|
-
def test_tile_math_back_substitution_multiple_rhs(test, device):
|
|
414
|
-
# Create test data
|
|
415
|
-
rng = np.random.default_rng(42)
|
|
416
|
-
L_h = np.tril(rng.random((TILE_M, TILE_M))) # Lower triangular matrix
|
|
417
|
-
x_h = rng.random((TILE_M, TILE_M)) # Multiple right-hand sides
|
|
418
|
-
z_h = np.zeros_like(x_h)
|
|
419
|
-
c_h = np.zeros_like(x_h)
|
|
420
|
-
|
|
421
|
-
# Compute reference solution using numpy
|
|
422
|
-
z_np = np.linalg.solve(L_h.T, x_h.T)
|
|
423
|
-
c_np = z_np @ z_np
|
|
424
|
-
|
|
425
|
-
# Create Warp arrays
|
|
426
|
-
L_wp = wp.array2d(L_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
427
|
-
x_wp = wp.array2d(x_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
428
|
-
z_wp = wp.array2d(z_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
429
|
-
c_wp = wp.array2d(c_h, requires_grad=True, dtype=wp.float64, device=device)
|
|
430
|
-
|
|
431
|
-
# Run kernel
|
|
432
|
-
wp.launch_tiled(
|
|
433
|
-
tile_math_back_substitution_multiple_rhs,
|
|
434
|
-
dim=[1, 1],
|
|
435
|
-
inputs=[L_wp, x_wp, z_wp, c_wp],
|
|
436
|
-
block_dim=TILE_DIM,
|
|
437
|
-
device=device,
|
|
438
|
-
)
|
|
439
|
-
wp.synchronize_device()
|
|
440
|
-
|
|
441
|
-
# Verify results
|
|
442
|
-
assert np.allclose(z_wp.numpy(), z_np)
|
|
443
|
-
assert np.allclose(c_wp.numpy(), c_np)
|
|
444
|
-
|
|
445
|
-
# TODO: implement and test backward pass
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
# tests a complex composition of most libmathdx calls
|
|
449
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
450
|
-
def test_tile_math_block_cholesky(test, device):
|
|
451
|
-
BLOCK_SIZE = wp.constant(TILE_M // 2)
|
|
452
|
-
|
|
453
|
-
@wp.kernel(module="unique")
|
|
454
|
-
def block_cholesky_kernel(
|
|
455
|
-
A: wp.array2d(dtype=float),
|
|
456
|
-
L: wp.array2d(dtype=float),
|
|
457
|
-
):
|
|
458
|
-
"""
|
|
459
|
-
Computes the Cholesky factorization of a symmetric positive definite matrix A in blocks.
|
|
460
|
-
It returns a lower-triangular matrix L such that A = L L^T.
|
|
461
|
-
"""
|
|
462
|
-
|
|
463
|
-
# Process the matrix in blocks along its leading dimension.
|
|
464
|
-
for k in range(0, TILE_M, BLOCK_SIZE):
|
|
465
|
-
end = k + BLOCK_SIZE
|
|
466
|
-
|
|
467
|
-
# Load current diagonal block A[k:end, k:end]
|
|
468
|
-
# and update with contributions from previously computed blocks.
|
|
469
|
-
A_kk_tile = wp.tile_load(A, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(k, k), storage="shared")
|
|
470
|
-
|
|
471
|
-
for j in range(0, k, BLOCK_SIZE):
|
|
472
|
-
L_block = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(k, j))
|
|
473
|
-
L_block_T = wp.tile_transpose(L_block)
|
|
474
|
-
L_L_T_block = wp.tile_matmul(L_block, L_block_T)
|
|
475
|
-
A_kk_tile -= L_L_T_block
|
|
476
|
-
|
|
477
|
-
# Compute the Cholesky factorization for the block
|
|
478
|
-
# print(A_kk_tile)
|
|
479
|
-
L_kk_tile = wp.tile_cholesky(A_kk_tile)
|
|
480
|
-
wp.tile_store(L, L_kk_tile, offset=(k, k))
|
|
481
|
-
|
|
482
|
-
# Process the blocks below the current block
|
|
483
|
-
for i in range(end, TILE_M, BLOCK_SIZE):
|
|
484
|
-
A_ik_tile = wp.tile_load(A, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(i, k), storage="shared")
|
|
485
|
-
|
|
486
|
-
for j in range(0, k, BLOCK_SIZE):
|
|
487
|
-
L_tile = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(i, j))
|
|
488
|
-
L_2_tile = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(k, j))
|
|
489
|
-
L_T_tile = wp.tile_transpose(L_2_tile)
|
|
490
|
-
L_L_T_tile = wp.tile_matmul(L_tile, L_T_tile)
|
|
491
|
-
A_ik_tile -= L_L_T_tile
|
|
492
|
-
|
|
493
|
-
A_ik_T_tile = wp.tile_transpose(A_ik_tile)
|
|
494
|
-
sol_T_tile = wp.tile_lower_solve(L_kk_tile, A_ik_T_tile)
|
|
495
|
-
sol_tile = wp.tile_transpose(sol_T_tile)
|
|
496
|
-
|
|
497
|
-
wp.tile_store(L, sol_tile, offset=(i, k))
|
|
498
|
-
|
|
499
|
-
@wp.kernel(module="unique")
|
|
500
|
-
def block_cholesky_solve_kernel(
|
|
501
|
-
L: wp.array2d(dtype=float),
|
|
502
|
-
b: wp.array2d(dtype=float),
|
|
503
|
-
scratch: wp.array2d(dtype=float),
|
|
504
|
-
x: wp.array2d(dtype=float),
|
|
505
|
-
):
|
|
506
|
-
"""
|
|
507
|
-
Solves A x = b given the Cholesky factor L (A = L L^T) using
|
|
508
|
-
blocked forward and backward substitution.
|
|
509
|
-
"""
|
|
510
|
-
|
|
511
|
-
# Forward substitution: solve L y = b
|
|
512
|
-
for i in range(0, TILE_M, BLOCK_SIZE):
|
|
513
|
-
i_end = i + BLOCK_SIZE
|
|
514
|
-
rhs_tile = wp.tile_load(b, shape=(BLOCK_SIZE, 1), offset=(i, 0))
|
|
515
|
-
for j in range(0, i, BLOCK_SIZE):
|
|
516
|
-
L_block = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(i, j))
|
|
517
|
-
y_block = wp.tile_load(scratch, shape=(BLOCK_SIZE, 1), offset=(j, 0))
|
|
518
|
-
Ly_block = wp.tile_matmul(L_block, y_block)
|
|
519
|
-
rhs_tile -= Ly_block
|
|
520
|
-
L_tile = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(i, i))
|
|
521
|
-
y_tile = wp.tile_lower_solve(L_tile, rhs_tile)
|
|
522
|
-
wp.tile_store(scratch, y_tile, offset=(i, 0))
|
|
523
|
-
|
|
524
|
-
# Backward substitution: solve L^T x = y
|
|
525
|
-
for i in range(TILE_M - BLOCK_SIZE, -1, -BLOCK_SIZE):
|
|
526
|
-
i_start = i
|
|
527
|
-
i_end = i_start + BLOCK_SIZE
|
|
528
|
-
rhs_tile = wp.tile_load(scratch, shape=(BLOCK_SIZE, 1), offset=(i_start, 0))
|
|
529
|
-
for j in range(i_end, TILE_M, BLOCK_SIZE):
|
|
530
|
-
L_tile = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(j, i_start))
|
|
531
|
-
L_T_tile = wp.tile_transpose(L_tile)
|
|
532
|
-
x_tile = wp.tile_load(x, shape=(BLOCK_SIZE, 1), offset=(j, 0))
|
|
533
|
-
L_T_x_tile = wp.tile_matmul(L_T_tile, x_tile)
|
|
534
|
-
rhs_tile -= L_T_x_tile
|
|
535
|
-
L_tile = wp.tile_load(L, shape=(BLOCK_SIZE, BLOCK_SIZE), offset=(i_start, i_start))
|
|
536
|
-
x_tile = wp.tile_upper_solve(wp.tile_transpose(L_tile), rhs_tile)
|
|
537
|
-
wp.tile_store(x, x_tile, offset=(i_start, 0))
|
|
538
|
-
|
|
539
|
-
# check block cholesky decomposition
|
|
540
|
-
|
|
541
|
-
rng = np.random.default_rng(42)
|
|
542
|
-
|
|
543
|
-
M = np.array(rng.random((TILE_M, TILE_M)), dtype=float)
|
|
544
|
-
|
|
545
|
-
A_np = M.T @ M + np.eye(TILE_M, TILE_M)
|
|
546
|
-
L_np = np.linalg.cholesky(A_np)
|
|
547
|
-
|
|
548
|
-
A_wp = wp.array2d(A_np, dtype=float, device=device)
|
|
549
|
-
L_wp = wp.zeros_like(A_wp)
|
|
550
|
-
|
|
551
|
-
wp.launch_tiled(block_cholesky_kernel, dim=1, inputs=[A_wp], outputs=[L_wp], block_dim=TILE_DIM, device=device)
|
|
552
|
-
|
|
553
|
-
# check block cholesky solve
|
|
554
|
-
|
|
555
|
-
assert_np_equal(L_wp.numpy(), L_np, tol=1e-6)
|
|
556
|
-
|
|
557
|
-
b_np = np.array(rng.random((TILE_M, 1)), dtype=float)
|
|
558
|
-
b_wp = wp.array(b_np, dtype=float, device=device)
|
|
559
|
-
|
|
560
|
-
scratch = wp.zeros_like(b_wp)
|
|
561
|
-
|
|
562
|
-
x_np = np.linalg.solve(L_np.T, np.linalg.solve(L_np, b_np))
|
|
563
|
-
x_wp = wp.zeros_like(b_wp)
|
|
564
|
-
|
|
565
|
-
wp.launch_tiled(
|
|
566
|
-
block_cholesky_solve_kernel,
|
|
567
|
-
dim=1,
|
|
568
|
-
inputs=[L_wp, b_wp, scratch],
|
|
569
|
-
outputs=[x_wp],
|
|
570
|
-
block_dim=TILE_DIM,
|
|
571
|
-
device=device,
|
|
572
|
-
)
|
|
573
|
-
|
|
574
|
-
assert_np_equal(x_wp.numpy(), x_np, tol=1e-6)
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
@wp.kernel
|
|
578
|
-
def test_tile_lower_solve(L: wp.array2d(dtype=float), y: wp.array(dtype=float), x: wp.array(dtype=float)):
|
|
579
|
-
L_tile = wp.tile_load(L, shape=(TILE_M, TILE_M))
|
|
580
|
-
y_tile = wp.tile_load(x, shape=(TILE_M,))
|
|
581
|
-
sol = wp.tile_lower_solve(L_tile, y_tile)
|
|
582
|
-
wp.tile_store(x, sol)
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
@wp.kernel
|
|
586
|
-
def test_tile_upper_solve(L: wp.array2d(dtype=float), y: wp.array(dtype=float), x: wp.array(dtype=float)):
|
|
587
|
-
L_tile = wp.tile_load(L, shape=(TILE_M, TILE_M))
|
|
588
|
-
y_tile = wp.tile_load(x, shape=(TILE_M,))
|
|
589
|
-
sol = wp.tile_upper_solve(L_tile, y_tile)
|
|
590
|
-
wp.tile_store(x, sol)
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
@unittest.skipUnless(wp.context.runtime.core.cuda_toolkit_version() >= 12060, "CUDA toolkit version is less than 12.6")
|
|
594
|
-
def test_tile_math_singular_matrices(test, device):
|
|
595
|
-
rng = np.random.default_rng(42)
|
|
596
|
-
L_np = np.tril(rng.random((TILE_M, TILE_M))) # Lower triangular matrix
|
|
597
|
-
L_np[-1, -1] = 0.0 # Make it singular
|
|
598
|
-
y_np = rng.random(TILE_M)
|
|
599
|
-
|
|
600
|
-
L_wp = wp.array2d(L_np, dtype=float, device=device)
|
|
601
|
-
y_wp = wp.array(y_np, dtype=float, device=device)
|
|
602
|
-
x_wp = wp.zeros_like(y_wp)
|
|
603
|
-
|
|
604
|
-
wp.launch_tiled(
|
|
605
|
-
test_tile_lower_solve, dim=1, inputs=[L_wp, y_wp], outputs=[x_wp], block_dim=TILE_DIM, device=device
|
|
606
|
-
)
|
|
607
|
-
|
|
608
|
-
assert np.isnan(x_wp.numpy()).any()
|
|
609
|
-
|
|
610
|
-
L_np = np.triu(rng.random((TILE_M, TILE_M))) # Upper triangular matrix
|
|
611
|
-
L_np[-1, -1] = 0.0 # Make it singular
|
|
612
|
-
|
|
613
|
-
L_wp = wp.array2d(L_np, dtype=float, device=device)
|
|
614
|
-
y_wp = wp.array(y_np, dtype=float, device=device)
|
|
615
|
-
x_wp = wp.zeros_like(y_wp)
|
|
616
|
-
|
|
617
|
-
wp.launch_tiled(
|
|
618
|
-
test_tile_upper_solve, dim=1, inputs=[L_wp, y_wp], outputs=[x_wp], block_dim=TILE_DIM, device=device
|
|
619
|
-
)
|
|
620
|
-
|
|
621
|
-
assert np.isnan(x_wp.numpy()).any()
|
|
622
|
-
|
|
623
|
-
|
|
624
126
|
all_devices = get_test_devices()
|
|
625
127
|
cuda_devices = get_cuda_test_devices()
|
|
626
128
|
|
|
@@ -633,16 +135,6 @@ class TestTileMathDx(unittest.TestCase):
|
|
|
633
135
|
add_function_test(
|
|
634
136
|
TestTileMathDx, "test_tile_math_matmul", test_tile_math_matmul, devices=all_devices, check_output=False
|
|
635
137
|
)
|
|
636
|
-
add_function_test(
|
|
637
|
-
TestTileMathDx, "test_tile_math_cholesky", test_tile_math_cholesky, devices=all_devices, check_output=False
|
|
638
|
-
)
|
|
639
|
-
add_function_test(
|
|
640
|
-
TestTileMathDx,
|
|
641
|
-
"tile_math_cholesky_multiple_rhs",
|
|
642
|
-
tile_math_cholesky_multiple_rhs,
|
|
643
|
-
devices=all_devices,
|
|
644
|
-
check_output=False,
|
|
645
|
-
)
|
|
646
138
|
add_function_test(
|
|
647
139
|
TestTileMathDx,
|
|
648
140
|
"test_tile_math_fft_vec2f",
|
|
@@ -658,54 +150,6 @@ add_function_test(
|
|
|
658
150
|
check_output=False,
|
|
659
151
|
)
|
|
660
152
|
|
|
661
|
-
add_function_test(
|
|
662
|
-
TestTileMathDx,
|
|
663
|
-
"test_tile_math_forward_substitution",
|
|
664
|
-
test_tile_math_forward_substitution,
|
|
665
|
-
devices=cuda_devices,
|
|
666
|
-
check_output=False,
|
|
667
|
-
)
|
|
668
|
-
|
|
669
|
-
add_function_test(
|
|
670
|
-
TestTileMathDx,
|
|
671
|
-
"test_tile_math_back_substitution",
|
|
672
|
-
test_tile_math_back_substitution,
|
|
673
|
-
devices=cuda_devices,
|
|
674
|
-
check_output=False,
|
|
675
|
-
)
|
|
676
|
-
|
|
677
|
-
add_function_test(
|
|
678
|
-
TestTileMathDx,
|
|
679
|
-
"test_tile_math_forward_substitution_multiple_rhs",
|
|
680
|
-
test_tile_math_forward_substitution_multiple_rhs,
|
|
681
|
-
devices=cuda_devices,
|
|
682
|
-
check_output=False,
|
|
683
|
-
)
|
|
684
|
-
|
|
685
|
-
add_function_test(
|
|
686
|
-
TestTileMathDx,
|
|
687
|
-
"test_tile_math_back_substitution_multiple_rhs",
|
|
688
|
-
test_tile_math_back_substitution_multiple_rhs,
|
|
689
|
-
devices=cuda_devices,
|
|
690
|
-
check_output=False,
|
|
691
|
-
)
|
|
692
|
-
|
|
693
|
-
add_function_test(
|
|
694
|
-
TestTileMathDx,
|
|
695
|
-
"test_tile_math_block_cholesky",
|
|
696
|
-
test_tile_math_block_cholesky,
|
|
697
|
-
devices=cuda_devices,
|
|
698
|
-
check_output=False,
|
|
699
|
-
)
|
|
700
|
-
|
|
701
|
-
add_function_test(
|
|
702
|
-
TestTileMathDx,
|
|
703
|
-
"test_tile_math_singular_matrices",
|
|
704
|
-
test_tile_math_singular_matrices,
|
|
705
|
-
devices=cuda_devices,
|
|
706
|
-
check_output=False,
|
|
707
|
-
)
|
|
708
|
-
|
|
709
153
|
|
|
710
154
|
if __name__ == "__main__":
|
|
711
155
|
wp.clear_kernel_cache()
|
|
@@ -159,7 +159,7 @@ def test_tile_transpose_matmul(test, device):
|
|
|
159
159
|
test_tile_transpose_matmul_kernel, dim=[1], inputs=[input, output], block_dim=TILE_DIM, device=device
|
|
160
160
|
)
|
|
161
161
|
|
|
162
|
-
assert_np_equal(output.numpy(), input.numpy().T @ input.numpy())
|
|
162
|
+
assert_np_equal(output.numpy(), input.numpy().T @ input.numpy(), 1e-6)
|
|
163
163
|
|
|
164
164
|
|
|
165
165
|
class TestTileMatmul(unittest.TestCase):
|
warp/tests/tile/test_tile_mlp.py
CHANGED
|
@@ -43,7 +43,7 @@ def create_array(rng, dim_in, dim_hid, dtype=float):
|
|
|
43
43
|
def test_multi_layer_nn(test, device):
|
|
44
44
|
import torch as tc
|
|
45
45
|
|
|
46
|
-
if device.is_cuda and not wp.context.runtime.core.
|
|
46
|
+
if device.is_cuda and not wp.context.runtime.core.wp_is_mathdx_enabled():
|
|
47
47
|
test.skipTest("Skipping test on CUDA device without MathDx (tolerance)")
|
|
48
48
|
|
|
49
49
|
NUM_FREQ = wp.constant(8)
|
|
@@ -110,13 +110,13 @@ def test_tile_shared_mem_graph(test, device):
|
|
|
110
110
|
|
|
111
111
|
out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
|
|
112
112
|
|
|
113
|
-
|
|
113
|
+
# preload the unique module
|
|
114
|
+
wp.load_module(compute.module, device=device, block_dim=BLOCK_DIM)
|
|
114
115
|
|
|
115
|
-
wp.
|
|
116
|
-
|
|
117
|
-
graph = wp.capture_end(device)
|
|
116
|
+
with wp.ScopedCapture(device, force_module_load=False) as capture:
|
|
117
|
+
wp.launch_tiled(compute, dim=[1], inputs=[out], block_dim=BLOCK_DIM, device=device)
|
|
118
118
|
|
|
119
|
-
wp.capture_launch(graph)
|
|
119
|
+
wp.capture_launch(capture.graph)
|
|
120
120
|
|
|
121
121
|
# check output
|
|
122
122
|
assert_np_equal(out.numpy(), np.ones((DIM_M, DIM_N)) * 3.0)
|
warp/tests/unittest_suites.py
CHANGED
|
@@ -164,6 +164,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
|
|
|
164
164
|
from warp.tests.test_linear_solvers import TestLinearSolvers
|
|
165
165
|
from warp.tests.test_lvalue import TestLValue
|
|
166
166
|
from warp.tests.test_mat import TestMat
|
|
167
|
+
from warp.tests.test_mat_constructors import TestMatConstructors
|
|
167
168
|
from warp.tests.test_mat_lite import TestMatLite
|
|
168
169
|
from warp.tests.test_mat_scalar_ops import TestMatScalarOps
|
|
169
170
|
from warp.tests.test_math import TestMath
|
|
@@ -198,6 +199,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
|
|
|
198
199
|
from warp.tests.test_vec_scalar_ops import TestVecScalarOps
|
|
199
200
|
from warp.tests.test_verify_fp import TestVerifyFP
|
|
200
201
|
from warp.tests.tile.test_tile import TestTile
|
|
202
|
+
from warp.tests.tile.test_tile_cholesky import TestTileCholesky
|
|
201
203
|
from warp.tests.tile.test_tile_load import TestTileLoad
|
|
202
204
|
from warp.tests.tile.test_tile_mathdx import TestTileMathDx
|
|
203
205
|
from warp.tests.tile.test_tile_matmul import TestTileMatmul
|
|
@@ -261,6 +263,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
|
|
|
261
263
|
TestLValue,
|
|
262
264
|
TestMarchingCubes,
|
|
263
265
|
TestMat,
|
|
266
|
+
TestMatConstructors,
|
|
264
267
|
TestMatLite,
|
|
265
268
|
TestMatScalarOps,
|
|
266
269
|
TestMath,
|
|
@@ -298,6 +301,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
|
|
|
298
301
|
TestStruct,
|
|
299
302
|
TestTape,
|
|
300
303
|
TestTile,
|
|
304
|
+
TestTileCholesky,
|
|
301
305
|
TestTileLoad,
|
|
302
306
|
TestTileMathDx,
|
|
303
307
|
TestTileMatmul,
|
|
@@ -360,6 +364,7 @@ def kit_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader):
|
|
|
360
364
|
from warp.tests.test_lvalue import TestLValue
|
|
361
365
|
from warp.tests.test_mat_lite import TestMatLite
|
|
362
366
|
from warp.tests.test_math import TestMath
|
|
367
|
+
from warp.tests.test_module_aot import TestModuleAOT
|
|
363
368
|
from warp.tests.test_module_hashing import TestModuleHashing
|
|
364
369
|
from warp.tests.test_modules_lite import TestModuleLite
|
|
365
370
|
from warp.tests.test_noise import TestNoise
|
|
@@ -406,6 +411,7 @@ def kit_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader):
|
|
|
406
411
|
TestMeshQueryAABBMethods,
|
|
407
412
|
TestMeshQueryPoint,
|
|
408
413
|
TestMeshQueryRay,
|
|
414
|
+
TestModuleAOT,
|
|
409
415
|
TestModuleHashing,
|
|
410
416
|
TestModuleLite,
|
|
411
417
|
TestNoise,
|
warp/tests/walkthrough_debug.py
CHANGED
|
@@ -68,7 +68,7 @@ wp.init()
|
|
|
68
68
|
wp.config.mode = "debug"
|
|
69
69
|
|
|
70
70
|
# Make sure Warp was built with `build_lib.py --mode=debug`
|
|
71
|
-
assert wp.context.runtime.core.
|
|
71
|
+
assert wp.context.runtime.core.wp_is_debug_enabled(), "Warp must be built in debug mode to enable debugging kernels"
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
@wp.kernel
|