warp-lang 1.3.2__py3-none-win_amd64.whl → 1.4.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (108) hide show
  1. warp/__init__.py +6 -0
  2. warp/autograd.py +59 -6
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build_dll.py +8 -10
  6. warp/builtins.py +126 -4
  7. warp/codegen.py +435 -53
  8. warp/config.py +1 -1
  9. warp/context.py +678 -403
  10. warp/dlpack.py +2 -0
  11. warp/examples/benchmarks/benchmark_cloth.py +10 -0
  12. warp/examples/core/example_render_opengl.py +12 -10
  13. warp/examples/fem/example_adaptive_grid.py +251 -0
  14. warp/examples/fem/example_apic_fluid.py +1 -1
  15. warp/examples/fem/example_diffusion_3d.py +2 -2
  16. warp/examples/fem/example_magnetostatics.py +1 -1
  17. warp/examples/fem/example_streamlines.py +1 -0
  18. warp/examples/fem/utils.py +23 -4
  19. warp/examples/sim/example_cloth.py +50 -6
  20. warp/fem/__init__.py +2 -0
  21. warp/fem/adaptivity.py +493 -0
  22. warp/fem/field/field.py +2 -1
  23. warp/fem/field/nodal_field.py +18 -26
  24. warp/fem/field/test.py +4 -4
  25. warp/fem/field/trial.py +4 -4
  26. warp/fem/geometry/__init__.py +1 -0
  27. warp/fem/geometry/adaptive_nanogrid.py +843 -0
  28. warp/fem/geometry/nanogrid.py +55 -28
  29. warp/fem/space/__init__.py +1 -1
  30. warp/fem/space/nanogrid_function_space.py +69 -35
  31. warp/fem/utils.py +113 -107
  32. warp/jax_experimental.py +28 -15
  33. warp/native/array.h +0 -1
  34. warp/native/builtin.h +103 -6
  35. warp/native/bvh.cu +2 -0
  36. warp/native/cuda_util.cpp +14 -0
  37. warp/native/cuda_util.h +2 -0
  38. warp/native/error.cpp +4 -2
  39. warp/native/exports.h +99 -17
  40. warp/native/mat.h +97 -0
  41. warp/native/mesh.cpp +36 -0
  42. warp/native/mesh.cu +51 -0
  43. warp/native/mesh.h +1 -0
  44. warp/native/quat.h +43 -0
  45. warp/native/spatial.h +6 -0
  46. warp/native/vec.h +74 -0
  47. warp/native/warp.cpp +2 -1
  48. warp/native/warp.cu +10 -3
  49. warp/native/warp.h +8 -1
  50. warp/paddle.py +382 -0
  51. warp/sim/__init__.py +1 -0
  52. warp/sim/collide.py +519 -0
  53. warp/sim/integrator_euler.py +18 -5
  54. warp/sim/integrator_featherstone.py +5 -5
  55. warp/sim/integrator_vbd.py +1026 -0
  56. warp/sim/model.py +49 -23
  57. warp/stubs.py +459 -0
  58. warp/tape.py +2 -0
  59. warp/tests/aux_test_dependent.py +1 -0
  60. warp/tests/aux_test_name_clash1.py +32 -0
  61. warp/tests/aux_test_name_clash2.py +32 -0
  62. warp/tests/aux_test_square.py +1 -0
  63. warp/tests/test_array.py +222 -0
  64. warp/tests/test_async.py +3 -3
  65. warp/tests/test_atomic.py +6 -0
  66. warp/tests/test_closest_point_edge_edge.py +93 -1
  67. warp/tests/test_codegen.py +62 -15
  68. warp/tests/test_codegen_instancing.py +1457 -0
  69. warp/tests/test_collision.py +486 -0
  70. warp/tests/test_compile_consts.py +3 -28
  71. warp/tests/test_dlpack.py +170 -0
  72. warp/tests/test_examples.py +22 -8
  73. warp/tests/test_fast_math.py +10 -4
  74. warp/tests/test_fem.py +64 -0
  75. warp/tests/test_func.py +46 -0
  76. warp/tests/test_implicit_init.py +49 -0
  77. warp/tests/test_jax.py +58 -0
  78. warp/tests/test_mat.py +84 -0
  79. warp/tests/test_mesh_query_point.py +188 -0
  80. warp/tests/test_module_hashing.py +40 -0
  81. warp/tests/test_multigpu.py +3 -3
  82. warp/tests/test_overwrite.py +8 -0
  83. warp/tests/test_paddle.py +852 -0
  84. warp/tests/test_print.py +89 -0
  85. warp/tests/test_quat.py +111 -0
  86. warp/tests/test_reload.py +31 -1
  87. warp/tests/test_scalar_ops.py +2 -0
  88. warp/tests/test_static.py +412 -0
  89. warp/tests/test_streams.py +64 -3
  90. warp/tests/test_struct.py +4 -4
  91. warp/tests/test_torch.py +24 -0
  92. warp/tests/test_triangle_closest_point.py +137 -0
  93. warp/tests/test_types.py +1 -1
  94. warp/tests/test_vbd.py +386 -0
  95. warp/tests/test_vec.py +143 -0
  96. warp/tests/test_vec_scalar_ops.py +139 -0
  97. warp/tests/test_volume.py +30 -0
  98. warp/tests/unittest_suites.py +12 -0
  99. warp/tests/unittest_utils.py +9 -5
  100. warp/thirdparty/dlpack.py +3 -1
  101. warp/types.py +157 -34
  102. warp/utils.py +37 -14
  103. {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
  104. {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/RECORD +107 -95
  105. warp/tests/test_point_triangle_closest_point.py +0 -143
  106. {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
  107. {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
  108. {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,852 @@
1
+ # Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import numpy as np
11
+
12
+ import warp as wp
13
+ from warp.tests.unittest_utils import *
14
+
15
+
16
+ @wp.kernel
17
+ def op_kernel(x: wp.array(dtype=float), y: wp.array(dtype=float)):
18
+ tid = wp.tid()
19
+ y[tid] = 0.5 - x[tid] * 2.0
20
+
21
+
22
+ @wp.kernel
23
+ def inc(a: wp.array(dtype=float)):
24
+ tid = wp.tid()
25
+ a[tid] = a[tid] + 1.0
26
+
27
+
28
+ @wp.kernel
29
+ def inc_vector(a: wp.array(dtype=wp.vec3f)):
30
+ tid = wp.tid()
31
+ a[tid] = a[tid] + wp.vec3f(1.0)
32
+
33
+
34
+ @wp.kernel
35
+ def inc_matrix(a: wp.array(dtype=wp.mat22f)):
36
+ tid = wp.tid()
37
+ a[tid] = a[tid] + wp.mat22f(1.0)
38
+
39
+
40
+ @wp.kernel
41
+ def arange(start: int, step: int, a: wp.array(dtype=int)):
42
+ tid = wp.tid()
43
+ a[tid] = start + step * tid
44
+
45
+
46
+ # copy elements between non-contiguous 1d arrays of float
47
+ @wp.kernel
48
+ def copy1d_float_kernel(dst: wp.array(dtype=float), src: wp.array(dtype=float)):
49
+ i = wp.tid()
50
+ dst[i] = src[i]
51
+
52
+
53
+ # copy elements between non-contiguous 2d arrays of float
54
+ @wp.kernel
55
+ def copy2d_float_kernel(dst: wp.array2d(dtype=float), src: wp.array2d(dtype=float)):
56
+ i, j = wp.tid()
57
+ dst[i, j] = src[i, j]
58
+
59
+
60
+ # copy elements between non-contiguous 3d arrays of float
61
+ @wp.kernel
62
+ def copy3d_float_kernel(dst: wp.array3d(dtype=float), src: wp.array3d(dtype=float)):
63
+ i, j, k = wp.tid()
64
+ dst[i, j, k] = src[i, j, k]
65
+
66
+
67
+ # copy elements between non-contiguous 2d arrays of vec3
68
+ @wp.kernel
69
+ def copy2d_vec3_kernel(dst: wp.array2d(dtype=wp.vec3), src: wp.array2d(dtype=wp.vec3)):
70
+ i, j = wp.tid()
71
+ dst[i, j] = src[i, j]
72
+
73
+
74
+ # copy elements between non-contiguous 2d arrays of mat22
75
+ @wp.kernel
76
+ def copy2d_mat22_kernel(dst: wp.array2d(dtype=wp.mat22), src: wp.array2d(dtype=wp.mat22)):
77
+ i, j = wp.tid()
78
+ dst[i, j] = src[i, j]
79
+
80
+
81
+ def test_dtype_from_paddle(test, device):
82
+ import paddle
83
+
84
+ def test_conversions(paddle_type, warp_type):
85
+ test.assertEqual(wp.dtype_from_paddle(paddle_type), warp_type)
86
+
87
+ test_conversions(paddle.float16, wp.float16)
88
+ test_conversions(paddle.float32, wp.float32)
89
+ test_conversions(paddle.float64, wp.float64)
90
+ test_conversions(paddle.int8, wp.int8)
91
+ test_conversions(paddle.int16, wp.int16)
92
+ test_conversions(paddle.int32, wp.int32)
93
+ test_conversions(paddle.int64, wp.int64)
94
+ test_conversions(paddle.uint8, wp.uint8)
95
+ test_conversions(paddle.bool, wp.bool)
96
+
97
+
98
+ def test_dtype_to_paddle(test, device):
99
+ import paddle
100
+
101
+ def test_conversions(warp_type, paddle_type):
102
+ test.assertEqual(wp.dtype_to_paddle(warp_type), paddle_type)
103
+
104
+ test_conversions(wp.float16, paddle.float16)
105
+ test_conversions(wp.float32, paddle.float32)
106
+ test_conversions(wp.float64, paddle.float64)
107
+ test_conversions(wp.int8, paddle.int8)
108
+ test_conversions(wp.int16, paddle.int16)
109
+ test_conversions(wp.int32, paddle.int32)
110
+ test_conversions(wp.int64, paddle.int64)
111
+ test_conversions(wp.uint8, paddle.uint8)
112
+ test_conversions(wp.uint16, paddle.int16)
113
+ test_conversions(wp.uint32, paddle.int32)
114
+ test_conversions(wp.uint64, paddle.int64)
115
+ test_conversions(wp.bool, paddle.bool)
116
+
117
+
118
+ def test_device_conversion(test, device):
119
+ paddle_device = wp.device_to_paddle(device)
120
+ warp_device = wp.device_from_paddle(paddle_device)
121
+ test.assertEqual(warp_device, device)
122
+
123
+
124
+ def test_paddle_zerocopy(test, device):
125
+ import paddle
126
+
127
+ a = wp.zeros(10, dtype=wp.float32, device=device)
128
+ t = wp.to_paddle(a)
129
+ assert a.ptr == t.data_ptr()
130
+
131
+ paddle_device = wp.device_to_paddle(device)
132
+
133
+ t = paddle.zeros([10], dtype=paddle.float32).to(device=paddle_device)
134
+ a = wp.from_paddle(t)
135
+ assert a.ptr == t.data_ptr()
136
+
137
+
138
+ def test_from_paddle(test, device):
139
+ import paddle
140
+
141
+ paddle_device = wp.device_to_paddle(device)
142
+
143
+ # automatically determine warp dtype
144
+ def wrap_scalar_tensor_implicit(paddle_dtype, expected_warp_dtype):
145
+ t = paddle.zeros([10], dtype=paddle_dtype).to(device=paddle_device)
146
+ a = wp.from_paddle(t)
147
+ assert a.dtype == expected_warp_dtype
148
+ assert a.shape == tuple(t.shape)
149
+
150
+ wrap_scalar_tensor_implicit(paddle.float64, wp.float64)
151
+ wrap_scalar_tensor_implicit(paddle.float32, wp.float32)
152
+ wrap_scalar_tensor_implicit(paddle.float16, wp.float16)
153
+ wrap_scalar_tensor_implicit(paddle.int64, wp.int64)
154
+ wrap_scalar_tensor_implicit(paddle.int32, wp.int32)
155
+ wrap_scalar_tensor_implicit(paddle.int16, wp.int16)
156
+ wrap_scalar_tensor_implicit(paddle.int8, wp.int8)
157
+ wrap_scalar_tensor_implicit(paddle.uint8, wp.uint8)
158
+ wrap_scalar_tensor_implicit(paddle.bool, wp.bool)
159
+
160
+ # explicitly specify warp dtype
161
+ def wrap_scalar_tensor_explicit(paddle_dtype, expected_warp_dtype):
162
+ t = paddle.zeros([10], dtype=paddle_dtype).to(device=paddle_device)
163
+ a = wp.from_paddle(t, expected_warp_dtype)
164
+ assert a.dtype == expected_warp_dtype
165
+ assert a.shape == tuple(t.shape)
166
+
167
+ wrap_scalar_tensor_explicit(paddle.float64, wp.float64)
168
+ wrap_scalar_tensor_explicit(paddle.float32, wp.float32)
169
+ wrap_scalar_tensor_explicit(paddle.float16, wp.float16)
170
+ wrap_scalar_tensor_explicit(paddle.int64, wp.int64)
171
+ wrap_scalar_tensor_explicit(paddle.int64, wp.uint64)
172
+ wrap_scalar_tensor_explicit(paddle.int32, wp.int32)
173
+ wrap_scalar_tensor_explicit(paddle.int32, wp.uint32)
174
+ wrap_scalar_tensor_explicit(paddle.int16, wp.int16)
175
+ wrap_scalar_tensor_explicit(paddle.int16, wp.uint16)
176
+ wrap_scalar_tensor_explicit(paddle.int8, wp.int8)
177
+ wrap_scalar_tensor_explicit(paddle.int8, wp.uint8)
178
+ wrap_scalar_tensor_explicit(paddle.uint8, wp.uint8)
179
+ wrap_scalar_tensor_explicit(paddle.uint8, wp.int8)
180
+ wrap_scalar_tensor_explicit(paddle.bool, wp.uint8)
181
+ wrap_scalar_tensor_explicit(paddle.bool, wp.int8)
182
+ wrap_scalar_tensor_explicit(paddle.bool, wp.bool)
183
+
184
+ def wrap_vec_tensor(n, desired_warp_dtype):
185
+ t = paddle.zeros((10, n), dtype=paddle.float32).to(device=paddle_device)
186
+ a = wp.from_paddle(t, desired_warp_dtype)
187
+ assert a.dtype == desired_warp_dtype
188
+ assert a.shape == (10,)
189
+
190
+ wrap_vec_tensor(2, wp.vec2)
191
+ wrap_vec_tensor(3, wp.vec3)
192
+ wrap_vec_tensor(4, wp.vec4)
193
+ wrap_vec_tensor(6, wp.spatial_vector)
194
+ wrap_vec_tensor(7, wp.transform)
195
+
196
+ def wrap_mat_tensor(n, m, desired_warp_dtype):
197
+ t = paddle.zeros((10, n, m), dtype=paddle.float32).to(device=paddle_device)
198
+ a = wp.from_paddle(t, desired_warp_dtype)
199
+ assert a.dtype == desired_warp_dtype
200
+ assert a.shape == (10,)
201
+
202
+ wrap_mat_tensor(2, 2, wp.mat22)
203
+ wrap_mat_tensor(3, 3, wp.mat33)
204
+ wrap_mat_tensor(4, 4, wp.mat44)
205
+ wrap_mat_tensor(6, 6, wp.spatial_matrix)
206
+
207
+ def wrap_vec_tensor_with_grad(n, desired_warp_dtype):
208
+ t = paddle.zeros((10, n), dtype=paddle.float32).to(device=paddle_device)
209
+ a = wp.from_paddle(t, desired_warp_dtype)
210
+ a.reuqires_grad = True
211
+ assert a.dtype == desired_warp_dtype
212
+ assert a.shape == (10,)
213
+
214
+ wrap_vec_tensor_with_grad(2, wp.vec2)
215
+ wrap_vec_tensor_with_grad(3, wp.vec3)
216
+ wrap_vec_tensor_with_grad(4, wp.vec4)
217
+ wrap_vec_tensor_with_grad(6, wp.spatial_vector)
218
+ wrap_vec_tensor_with_grad(7, wp.transform)
219
+
220
+ def wrap_mat_tensor_with_grad(n, m, desired_warp_dtype):
221
+ t = paddle.zeros((10, n, m), dtype=paddle.float32).to(device=paddle_device)
222
+ a = wp.from_paddle(t, desired_warp_dtype, requires_grad=True)
223
+ assert a.dtype == desired_warp_dtype
224
+ assert a.shape == (10,)
225
+
226
+ wrap_mat_tensor_with_grad(2, 2, wp.mat22)
227
+ wrap_mat_tensor_with_grad(3, 3, wp.mat33)
228
+ wrap_mat_tensor_with_grad(4, 4, wp.mat44)
229
+ wrap_mat_tensor_with_grad(6, 6, wp.spatial_matrix)
230
+
231
+
232
+ def test_array_ctype_from_paddle(test, device):
233
+ import paddle
234
+
235
+ paddle_device = wp.device_to_paddle(device)
236
+
237
+ # automatically determine warp dtype
238
+ def wrap_scalar_tensor_implicit(paddle_dtype):
239
+ t = paddle.zeros([10], dtype=paddle_dtype).to(device=paddle_device)
240
+ a = wp.from_paddle(t, return_ctype=True)
241
+ warp_dtype = wp.dtype_from_paddle(paddle_dtype)
242
+ ctype_size = ctypes.sizeof(warp_dtype._type_)
243
+ assert a.data == t.data_ptr()
244
+ assert a.grad == 0
245
+ assert a.ndim == 1
246
+ assert a.shape[0] == t.shape[0]
247
+ assert a.strides[0] == t.strides[0] * ctype_size
248
+
249
+ wrap_scalar_tensor_implicit(paddle.float64)
250
+ wrap_scalar_tensor_implicit(paddle.float32)
251
+ wrap_scalar_tensor_implicit(paddle.float16)
252
+ wrap_scalar_tensor_implicit(paddle.int64)
253
+ wrap_scalar_tensor_implicit(paddle.int32)
254
+ wrap_scalar_tensor_implicit(paddle.int16)
255
+ wrap_scalar_tensor_implicit(paddle.int8)
256
+ wrap_scalar_tensor_implicit(paddle.uint8)
257
+ wrap_scalar_tensor_implicit(paddle.bool)
258
+
259
+ # explicitly specify warp dtype
260
+ def wrap_scalar_tensor_explicit(paddle_dtype, warp_dtype):
261
+ t = paddle.zeros([10], dtype=paddle_dtype).to(device=paddle_device)
262
+ a = wp.from_paddle(t, dtype=warp_dtype, return_ctype=True)
263
+ ctype_size = ctypes.sizeof(warp_dtype._type_)
264
+ assert a.data == t.data_ptr()
265
+ assert a.grad == 0
266
+ assert a.ndim == 1
267
+ assert a.shape[0] == t.shape[0]
268
+ assert a.strides[0] == t.strides[0] * ctype_size
269
+
270
+ wrap_scalar_tensor_explicit(paddle.float64, wp.float64)
271
+ wrap_scalar_tensor_explicit(paddle.float32, wp.float32)
272
+ wrap_scalar_tensor_explicit(paddle.float16, wp.float16)
273
+ wrap_scalar_tensor_explicit(paddle.int64, wp.int64)
274
+ wrap_scalar_tensor_explicit(paddle.int64, wp.uint64)
275
+ wrap_scalar_tensor_explicit(paddle.int32, wp.int32)
276
+ wrap_scalar_tensor_explicit(paddle.int32, wp.uint32)
277
+ wrap_scalar_tensor_explicit(paddle.int16, wp.int16)
278
+ wrap_scalar_tensor_explicit(paddle.int16, wp.uint16)
279
+ wrap_scalar_tensor_explicit(paddle.int8, wp.int8)
280
+ wrap_scalar_tensor_explicit(paddle.int8, wp.uint8)
281
+ wrap_scalar_tensor_explicit(paddle.uint8, wp.uint8)
282
+ wrap_scalar_tensor_explicit(paddle.uint8, wp.int8)
283
+ wrap_scalar_tensor_explicit(paddle.bool, wp.uint8)
284
+ wrap_scalar_tensor_explicit(paddle.bool, wp.int8)
285
+ wrap_scalar_tensor_explicit(paddle.bool, wp.bool)
286
+
287
+ def wrap_vec_tensor(vec_dtype):
288
+ t = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
289
+ a = wp.from_paddle(t, dtype=vec_dtype, return_ctype=True)
290
+ ctype_size = ctypes.sizeof(vec_dtype._type_)
291
+ assert a.data == t.data_ptr()
292
+ assert a.grad == 0
293
+ assert a.ndim == 1
294
+ assert a.shape[0] == t.shape[0]
295
+ assert a.strides[0] == t.strides[0] * ctype_size
296
+
297
+ wrap_vec_tensor(wp.vec2)
298
+ wrap_vec_tensor(wp.vec3)
299
+ wrap_vec_tensor(wp.vec4)
300
+ wrap_vec_tensor(wp.spatial_vector)
301
+ wrap_vec_tensor(wp.transform)
302
+
303
+ def wrap_mat_tensor(mat_dtype):
304
+ t = paddle.zeros((10, *mat_dtype._shape_), dtype=paddle.float32).to(device=paddle_device)
305
+ a = wp.from_paddle(t, dtype=mat_dtype, return_ctype=True)
306
+ ctype_size = ctypes.sizeof(mat_dtype._type_)
307
+ assert a.data == t.data_ptr()
308
+ assert a.grad == 0
309
+ assert a.ndim == 1
310
+ assert a.shape[0] == t.shape[0]
311
+ assert a.strides[0] == t.strides[0] * ctype_size
312
+
313
+ wrap_mat_tensor(wp.mat22)
314
+ wrap_mat_tensor(wp.mat33)
315
+ wrap_mat_tensor(wp.mat44)
316
+ wrap_mat_tensor(wp.spatial_matrix)
317
+
318
+ def wrap_vec_tensor_with_existing_grad(vec_dtype):
319
+ t = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
320
+ t.stop_gradient = False
321
+ t.grad_ = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
322
+ a = wp.from_paddle(t, dtype=vec_dtype, return_ctype=True)
323
+ ctype_size = ctypes.sizeof(vec_dtype._type_)
324
+ assert a.data == t.data_ptr()
325
+ assert a.grad == t.grad.data_ptr()
326
+ assert a.ndim == 1
327
+ assert a.shape[0] == t.shape[0]
328
+ assert a.strides[0] == t.strides[0] * ctype_size
329
+
330
+ wrap_vec_tensor_with_existing_grad(wp.vec2)
331
+ wrap_vec_tensor_with_existing_grad(wp.vec3)
332
+ wrap_vec_tensor_with_existing_grad(wp.vec4)
333
+ wrap_vec_tensor_with_existing_grad(wp.spatial_vector)
334
+ wrap_vec_tensor_with_existing_grad(wp.transform)
335
+
336
+ def wrap_vec_tensor_with_new_grad(vec_dtype):
337
+ t = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
338
+ a = wp.from_paddle(t, dtype=vec_dtype, requires_grad=True, return_ctype=True)
339
+ ctype_size = ctypes.sizeof(vec_dtype._type_)
340
+ assert a.data == t.data_ptr()
341
+ assert a.grad == t.grad.data_ptr()
342
+ assert a.ndim == 1
343
+ assert a.shape[0] == t.shape[0]
344
+ assert a.strides[0] == t.strides[0] * ctype_size
345
+
346
+ wrap_vec_tensor_with_new_grad(wp.vec2)
347
+ wrap_vec_tensor_with_new_grad(wp.vec3)
348
+ wrap_vec_tensor_with_new_grad(wp.vec4)
349
+ wrap_vec_tensor_with_new_grad(wp.spatial_vector)
350
+ wrap_vec_tensor_with_new_grad(wp.transform)
351
+
352
+ def wrap_vec_tensor_with_paddle_grad(vec_dtype):
353
+ t = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
354
+ grad = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
355
+ a = wp.from_paddle(t, dtype=vec_dtype, grad=grad, return_ctype=True)
356
+ ctype_size = ctypes.sizeof(vec_dtype._type_)
357
+ assert a.data == t.data_ptr()
358
+ assert a.grad == grad.data_ptr()
359
+ assert a.ndim == 1
360
+ assert a.shape[0] == t.shape[0]
361
+ assert a.strides[0] == t.strides[0] * ctype_size
362
+
363
+ wrap_vec_tensor_with_paddle_grad(wp.vec2)
364
+ wrap_vec_tensor_with_paddle_grad(wp.vec3)
365
+ wrap_vec_tensor_with_paddle_grad(wp.vec4)
366
+ wrap_vec_tensor_with_paddle_grad(wp.spatial_vector)
367
+ wrap_vec_tensor_with_paddle_grad(wp.transform)
368
+
369
+ def wrap_vec_tensor_with_warp_grad(vec_dtype):
370
+ t = paddle.zeros((10, vec_dtype._length_), dtype=paddle.float32).to(device=paddle_device)
371
+ grad = wp.zeros(10, dtype=vec_dtype, device=device)
372
+ a = wp.from_paddle(t, dtype=vec_dtype, grad=grad, return_ctype=True)
373
+ ctype_size = ctypes.sizeof(vec_dtype._type_)
374
+ assert a.data == t.data_ptr()
375
+ assert a.grad == grad.ptr
376
+ assert a.ndim == 1
377
+ assert a.shape[0] == t.shape[0]
378
+ assert a.strides[0] == t.strides[0] * ctype_size
379
+
380
+ wrap_vec_tensor_with_warp_grad(wp.vec2)
381
+ wrap_vec_tensor_with_warp_grad(wp.vec3)
382
+ wrap_vec_tensor_with_warp_grad(wp.vec4)
383
+ wrap_vec_tensor_with_warp_grad(wp.spatial_vector)
384
+ wrap_vec_tensor_with_warp_grad(wp.transform)
385
+
386
+
387
+ def test_to_paddle(test, device):
388
+ import paddle
389
+
390
+ def wrap_scalar_array(warp_dtype, expected_paddle_dtype):
391
+ a = wp.zeros(10, dtype=warp_dtype, device=device)
392
+ t = wp.to_paddle(a)
393
+ assert t.dtype == expected_paddle_dtype
394
+ assert tuple(t.shape) == a.shape
395
+
396
+ wrap_scalar_array(wp.float64, paddle.float64)
397
+ wrap_scalar_array(wp.float32, paddle.float32)
398
+ wrap_scalar_array(wp.float16, paddle.float16)
399
+ wrap_scalar_array(wp.int64, paddle.int64)
400
+ wrap_scalar_array(wp.int32, paddle.int32)
401
+ wrap_scalar_array(wp.int16, paddle.int16)
402
+ wrap_scalar_array(wp.int8, paddle.int8)
403
+ wrap_scalar_array(wp.uint8, paddle.uint8)
404
+ wrap_scalar_array(wp.bool, paddle.bool)
405
+
406
+ # not supported by paddle
407
+ # wrap_scalar_array(wp.uint64, paddle.int64)
408
+ # wrap_scalar_array(wp.uint32, paddle.int32)
409
+ # wrap_scalar_array(wp.uint16, paddle.int16)
410
+
411
+ def wrap_vec_array(n, warp_dtype):
412
+ a = wp.zeros(10, dtype=warp_dtype, device=device)
413
+ t = wp.to_paddle(a)
414
+ assert t.dtype == paddle.float32
415
+ assert tuple(t.shape) == (10, n)
416
+
417
+ wrap_vec_array(2, wp.vec2)
418
+ wrap_vec_array(3, wp.vec3)
419
+ wrap_vec_array(4, wp.vec4)
420
+ wrap_vec_array(6, wp.spatial_vector)
421
+ wrap_vec_array(7, wp.transform)
422
+
423
+ def wrap_mat_array(n, m, warp_dtype):
424
+ a = wp.zeros(10, dtype=warp_dtype, device=device)
425
+ t = wp.to_paddle(a)
426
+ assert t.dtype == paddle.float32
427
+ assert tuple(t.shape) == (10, n, m)
428
+
429
+ wrap_mat_array(2, 2, wp.mat22)
430
+ wrap_mat_array(3, 3, wp.mat33)
431
+ wrap_mat_array(4, 4, wp.mat44)
432
+ wrap_mat_array(6, 6, wp.spatial_matrix)
433
+
434
+
435
+ def test_from_paddle_slices(test, device):
436
+ import paddle
437
+
438
+ paddle_device = wp.device_to_paddle(device)
439
+
440
+ # 1D slice, contiguous
441
+ t_base = paddle.arange(10, dtype=paddle.float32).to(device=paddle_device)
442
+ t = t_base[2:9]
443
+ a = wp.from_paddle(t)
444
+ assert a.ptr == t.data_ptr()
445
+ assert a.is_contiguous
446
+ assert a.shape == tuple(t.shape)
447
+ assert_np_equal(a.numpy(), t.cpu().numpy())
448
+
449
+ # 1D slice with non-contiguous stride
450
+ t_base = paddle.arange(10, dtype=paddle.float32).to(device=paddle_device)
451
+ t = t_base[2:9:2]
452
+ a = wp.from_paddle(t)
453
+ assert a.ptr == t.data_ptr()
454
+ assert not a.is_contiguous
455
+ assert a.shape == tuple(t.shape)
456
+ # copy contents to contiguous array
457
+ a_contiguous = wp.empty_like(a)
458
+ wp.launch(copy1d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
459
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
460
+
461
+ # 2D slices (non-contiguous)
462
+ t_base = paddle.arange(24, dtype=paddle.float32).to(device=paddle_device).reshape((4, 6))
463
+ t = t_base[1:3, 2:5]
464
+ a = wp.from_paddle(t)
465
+ assert a.ptr == t.data_ptr()
466
+ assert not a.is_contiguous
467
+ assert a.shape == tuple(t.shape)
468
+ # copy contents to contiguous array
469
+ a_contiguous = wp.empty_like(a)
470
+ wp.launch(copy2d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
471
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
472
+
473
+ # 3D slices (non-contiguous)
474
+ t_base = paddle.arange(36, dtype=paddle.float32).to(device=paddle_device).reshape((4, 3, 3))
475
+ t = t_base[::2, 0:1, 1:2]
476
+ a = wp.from_paddle(t)
477
+ assert a.ptr == t.data_ptr()
478
+ assert not a.is_contiguous
479
+ assert a.shape == tuple(t.shape)
480
+ # copy contents to contiguous array
481
+ a_contiguous = wp.empty_like(a)
482
+ wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
483
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
484
+
485
+ # 2D slices of vec3 (inner contiguous, outer non-contiguous)
486
+ t_base = paddle.arange(150, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 3))
487
+ t = t_base[1:7:2, 2:5]
488
+ a = wp.from_paddle(t, dtype=wp.vec3)
489
+ assert a.ptr == t.data_ptr()
490
+ assert not a.is_contiguous
491
+ assert a.shape == tuple(t.shape[:-1])
492
+ # copy contents to contiguous array
493
+ a_contiguous = wp.empty_like(a)
494
+ wp.launch(copy2d_vec3_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
495
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
496
+
497
+ # 2D slices of mat22 (inner contiguous, outer non-contiguous)
498
+ t_base = paddle.arange(200, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 2, 2))
499
+ t = t_base[1:7:2, 2:5]
500
+ a = wp.from_paddle(t, dtype=wp.mat22)
501
+ assert a.ptr == t.data_ptr()
502
+ assert not a.is_contiguous
503
+ assert a.shape == tuple(t.shape[:-2])
504
+ # copy contents to contiguous array
505
+ a_contiguous = wp.empty_like(a)
506
+ wp.launch(copy2d_mat22_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
507
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
508
+
509
+
510
+ def test_from_paddle_zero_strides(test, device):
511
+ import paddle
512
+
513
+ paddle_device = wp.device_to_paddle(device)
514
+
515
+ t_base = paddle.arange(9, dtype=paddle.float32).to(device=paddle_device).reshape((3, 3))
516
+
517
+ # expand outermost dimension
518
+ t = t_base.unsqueeze(0).expand([3, -1, -1])
519
+ a = wp.from_paddle(t)
520
+ assert a.ptr == t.data_ptr()
521
+ assert a.is_contiguous
522
+ assert a.shape == tuple(t.shape)
523
+ a_contiguous = wp.empty_like(a)
524
+ wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
525
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
526
+
527
+ # expand middle dimension
528
+ t = t_base.unsqueeze(1).expand([-1, 3, -1])
529
+ a = wp.from_paddle(t)
530
+ assert a.ptr == t.data_ptr()
531
+ assert a.is_contiguous
532
+ assert a.shape == tuple(t.shape)
533
+ a_contiguous = wp.empty_like(a)
534
+ wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
535
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
536
+
537
+ # expand innermost dimension
538
+ t = t_base.unsqueeze(2).expand([-1, -1, 3])
539
+ a = wp.from_paddle(t)
540
+ assert a.ptr == t.data_ptr()
541
+ assert a.is_contiguous
542
+ assert a.shape == tuple(t.shape)
543
+ a_contiguous = wp.empty_like(a)
544
+ wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
545
+ assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
546
+
547
+
548
+ def test_paddle_mgpu_from_paddle(test, device):
549
+ import paddle
550
+
551
+ n = 32
552
+
553
+ t0 = paddle.arange(0, n, 1, dtype=paddle.int32).to(device="gpu:0")
554
+ t1 = paddle.arange(0, n * 2, 2, dtype=paddle.int32).to(device="gpu:1")
555
+
556
+ a0 = wp.from_paddle(t0, dtype=wp.int32)
557
+ a1 = wp.from_paddle(t1, dtype=wp.int32)
558
+
559
+ assert a0.device == "gpu:0"
560
+ assert a1.device == "gpu:1"
561
+
562
+ expected0 = np.arange(0, n, 1)
563
+ expected1 = np.arange(0, n * 2, 2)
564
+
565
+ assert_np_equal(a0.numpy(), expected0)
566
+ assert_np_equal(a1.numpy(), expected1)
567
+
568
+
569
+ def test_paddle_mgpu_to_paddle(test, device):
570
+ n = 32
571
+
572
+ with wp.ScopedDevice("gpu:0"):
573
+ a0 = wp.empty(n, dtype=wp.int32)
574
+ wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
575
+
576
+ with wp.ScopedDevice("gpu:1"):
577
+ a1 = wp.empty(n, dtype=wp.int32)
578
+ wp.launch(arange, dim=a1.size, inputs=[0, 2, a1])
579
+
580
+ t0 = wp.to_paddle(a0)
581
+ t1 = wp.to_paddle(a1)
582
+
583
+ assert str(t0.device) == "gpu:0"
584
+ assert str(t1.device) == "gpu:1"
585
+
586
+ expected0 = np.arange(0, n, 1, dtype=np.int32)
587
+ expected1 = np.arange(0, n * 2, 2, dtype=np.int32)
588
+
589
+ assert_np_equal(t0.cpu().numpy(), expected0)
590
+ assert_np_equal(t1.cpu().numpy(), expected1)
591
+
592
+
593
+ def test_paddle_mgpu_interop(test, device):
594
+ import paddle
595
+
596
+ n = 1024 * 1024
597
+
598
+ with paddle.cuda.device(0):
599
+ t0 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
600
+ a0 = wp.from_paddle(t0)
601
+ wp.launch(inc, dim=a0.size, inputs=[a0], stream=wp.stream_from_paddle())
602
+
603
+ with paddle.cuda.device(1):
604
+ t1 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
605
+ a1 = wp.from_paddle(t1)
606
+ wp.launch(inc, dim=a1.size, inputs=[a1], stream=wp.stream_from_paddle())
607
+
608
+ assert a0.device == "gpu:0"
609
+ assert a1.device == "gpu:1"
610
+
611
+ expected = np.arange(n, dtype=int) + 1
612
+
613
+ # ensure the paddle tensors were modified by warp
614
+ assert_np_equal(t0.cpu().numpy(), expected)
615
+ assert_np_equal(t1.cpu().numpy(), expected)
616
+
617
+
618
+ def test_paddle_autograd(test, device):
619
+ """Test paddle autograd with a custom Warp op"""
620
+
621
+ import paddle
622
+
623
+ # custom autograd op
624
+ class TestFunc(paddle.autograd.PyLayer):
625
+ @staticmethod
626
+ def forward(ctx, x):
627
+ # allocate output array
628
+ y = paddle.empty_like(x)
629
+
630
+ ctx.x = x
631
+ ctx.y = y
632
+
633
+ wp.launch(kernel=op_kernel, dim=len(x), inputs=[wp.from_paddle(x)], outputs=[wp.from_paddle(y)])
634
+
635
+ return y
636
+
637
+ @staticmethod
638
+ def backward(ctx, adj_y):
639
+ # adjoints should be allocated as zero initialized
640
+ adj_x = paddle.zeros_like(ctx.x).contiguous()
641
+ adj_y = adj_y.contiguous()
642
+
643
+ wp_x = wp.from_paddle(ctx.x, grad=adj_x)
644
+ wp_y = wp.from_paddle(ctx.y, grad=adj_y)
645
+
646
+ wp.launch(
647
+ kernel=op_kernel,
648
+ dim=len(ctx.x),
649
+ # fwd inputs
650
+ inputs=[wp_x],
651
+ outputs=[wp_y],
652
+ # adj inputs (already stored in input/output arrays, passing null pointers)
653
+ adj_inputs=[None],
654
+ adj_outputs=[None],
655
+ adjoint=True,
656
+ )
657
+
658
+ return adj_x
659
+
660
+ # run autograd on given device
661
+ with wp.ScopedDevice(device):
662
+ paddle_device = wp.device_to_paddle(device)
663
+
664
+ # input data
665
+ x = paddle.ones(16, dtype=paddle.float32).to(device=paddle_device)
666
+ x.stop_gradient = False
667
+
668
+ # execute op
669
+ y = TestFunc.apply(x)
670
+
671
+ # compute grads
672
+ l = y.sum()
673
+ l.backward()
674
+
675
+ passed = (x.grad == -2.0).all()
676
+ assert passed.item()
677
+
678
+
679
+ def test_warp_graph_warp_stream(test, device):
680
+ """Capture Warp graph on Warp stream"""
681
+
682
+ import paddle
683
+
684
+ paddle_device = wp.device_to_paddle(device)
685
+
686
+ n = 1024 * 1024
687
+ t = paddle.zeros(n, dtype=paddle.float32).to(device=paddle_device)
688
+ a = wp.from_paddle(t)
689
+
690
+ # make paddle use the warp stream from the given device
691
+ paddle_stream = wp.stream_to_paddle(device)
692
+
693
+ # capture graph
694
+ with wp.ScopedDevice(device), paddle.device.stream(paddle_stream):
695
+ wp.capture_begin(force_module_load=False)
696
+ try:
697
+ t += 1.0
698
+ wp.launch(inc, dim=n, inputs=[a])
699
+ t += 1.0
700
+ wp.launch(inc, dim=n, inputs=[a])
701
+ finally:
702
+ g = wp.capture_end()
703
+
704
+ # replay graph
705
+ num_iters = 10
706
+ for _i in range(num_iters):
707
+ wp.capture_launch(g)
708
+
709
+ passed = (t == num_iters * 4.0).all()
710
+ assert passed.item()
711
+
712
+
713
+ def test_warp_graph_paddle_stream(test, device):
714
+ """Capture Warp graph on Paddle stream"""
715
+
716
+ wp.load_module(device=device)
717
+
718
+ import paddle
719
+
720
+ paddle_device = wp.device_to_paddle(device)
721
+
722
+ n = 1024 * 1024
723
+ t = paddle.zeros(n, dtype=paddle.float32).to(device=paddle_device)
724
+ a = wp.from_paddle(t)
725
+
726
+ # create a device-specific paddle stream to use for capture
727
+ # (the default paddle stream is not suitable for graph capture)
728
+ paddle_stream = paddle.device.Stream(device=paddle_device)
729
+
730
+ # make warp use the same stream
731
+ warp_stream = wp.stream_from_paddle(paddle_stream)
732
+
733
+ # capture graph
734
+ with wp.ScopedStream(warp_stream):
735
+ wp.capture_begin(force_module_load=False)
736
+ try:
737
+ t += 1.0
738
+ wp.launch(inc, dim=n, inputs=[a])
739
+ t += 1.0
740
+ wp.launch(inc, dim=n, inputs=[a])
741
+ finally:
742
+ g = wp.capture_end()
743
+
744
+ # replay graph
745
+ num_iters = 10
746
+ for _i in range(num_iters):
747
+ wp.capture_launch(g)
748
+
749
+ passed = (t == num_iters * 4.0).all()
750
+ assert passed.item()
751
+
752
+
753
+ def test_direct(test, device):
754
+ """Pass Paddle tensors to Warp kernels directly"""
755
+
756
+ import paddle
757
+
758
+ paddle_device = wp.device_to_paddle(device)
759
+ n = 12
760
+
761
+ s = paddle.arange(n, dtype=paddle.float32).to(device=paddle_device)
762
+ v = paddle.arange(n, dtype=paddle.float32).to(device=paddle_device).reshape((n // 3, 3))
763
+ m = paddle.arange(n, dtype=paddle.float32).to(device=paddle_device).reshape((n // 4, 2, 2))
764
+
765
+ wp.launch(inc, dim=n, inputs=[s], device=device)
766
+ wp.launch(inc_vector, dim=n // 3, inputs=[v], device=device)
767
+ wp.launch(inc_matrix, dim=n // 4, inputs=[m], device=device)
768
+
769
+ expected = paddle.arange(1, n + 1, dtype=paddle.float32).to(device=paddle_device)
770
+
771
+ assert paddle.equal_all(s, expected).item()
772
+ assert paddle.equal_all(v.reshape([n]), expected).item()
773
+ assert paddle.equal_all(m.reshape([n]), expected).item()
774
+
775
+
776
+ class TestPaddle(unittest.TestCase):
777
+ pass
778
+
779
+
780
+ test_devices = get_test_devices()
781
+
782
+ try:
783
+ import paddle
784
+
785
+ # check which Warp devices work with Paddle
786
+ # CUDA devices may fail if Paddle was not compiled with CUDA support
787
+ paddle_compatible_devices = []
788
+ paddle_compatible_cuda_devices = []
789
+
790
+ for d in test_devices:
791
+ try:
792
+ t = paddle.arange(10).to(device=wp.device_to_paddle(d))
793
+ t += 1
794
+ paddle_compatible_devices.append(d)
795
+ if d.is_cuda:
796
+ paddle_compatible_cuda_devices.append(d)
797
+ except Exception as e:
798
+ print(f"Skipping Paddle tests on device '{d}' due to exception: {e}")
799
+
800
+ add_function_test(TestPaddle, "test_dtype_from_paddle", test_dtype_from_paddle, devices=None)
801
+ add_function_test(TestPaddle, "test_dtype_to_paddle", test_dtype_to_paddle, devices=None)
802
+
803
+ if paddle_compatible_devices:
804
+ add_function_test(
805
+ TestPaddle, "test_device_conversion", test_device_conversion, devices=paddle_compatible_devices
806
+ )
807
+ add_function_test(TestPaddle, "test_from_paddle", test_from_paddle, devices=paddle_compatible_devices)
808
+ add_function_test(
809
+ TestPaddle, "test_from_paddle_slices", test_from_paddle_slices, devices=paddle_compatible_devices
810
+ )
811
+ add_function_test(
812
+ TestPaddle, "test_array_ctype_from_paddle", test_array_ctype_from_paddle, devices=paddle_compatible_devices
813
+ )
814
+ add_function_test(
815
+ TestPaddle,
816
+ "test_from_paddle_zero_strides",
817
+ test_from_paddle_zero_strides,
818
+ devices=paddle_compatible_devices,
819
+ )
820
+ add_function_test(TestPaddle, "test_to_paddle", test_to_paddle, devices=paddle_compatible_devices)
821
+ add_function_test(TestPaddle, "test_paddle_zerocopy", test_paddle_zerocopy, devices=paddle_compatible_devices)
822
+ add_function_test(TestPaddle, "test_paddle_autograd", test_paddle_autograd, devices=paddle_compatible_devices)
823
+ add_function_test(TestPaddle, "test_direct", test_direct, devices=paddle_compatible_devices)
824
+
825
+ # NOTE: Graph not supported now
826
+ # if paddle_compatible_cuda_devices:
827
+ # add_function_test(
828
+ # TestPaddle,
829
+ # "test_warp_graph_warp_stream",
830
+ # test_warp_graph_warp_stream,
831
+ # devices=paddle_compatible_cuda_devices,
832
+ # )
833
+ # add_function_test(
834
+ # TestPaddle,
835
+ # "test_warp_graph_paddle_stream",
836
+ # test_warp_graph_paddle_stream,
837
+ # devices=paddle_compatible_cuda_devices,
838
+ # )
839
+
840
+ # multi-GPU tests
841
+ if len(paddle_compatible_cuda_devices) > 1:
842
+ add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
843
+ add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
844
+ add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
845
+
846
+ except Exception as e:
847
+ print(f"Skipping Paddle tests due to exception: {e}")
848
+
849
+
850
+ if __name__ == "__main__":
851
+ wp.clear_kernel_cache()
852
+ unittest.main(verbosity=2)