warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +482 -110
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +47 -67
  7. warp/builtins.py +955 -137
  8. warp/codegen.py +312 -206
  9. warp/config.py +1 -1
  10. warp/context.py +1249 -784
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/fabric.py +1 -1
  18. warp/fem/cache.py +27 -19
  19. warp/fem/domain.py +2 -2
  20. warp/fem/field/nodal_field.py +2 -2
  21. warp/fem/field/virtual.py +264 -166
  22. warp/fem/geometry/geometry.py +5 -5
  23. warp/fem/integrate.py +129 -51
  24. warp/fem/space/restriction.py +4 -0
  25. warp/fem/space/shape/tet_shape_function.py +3 -10
  26. warp/jax_experimental/custom_call.py +1 -1
  27. warp/jax_experimental/ffi.py +2 -1
  28. warp/marching_cubes.py +708 -0
  29. warp/native/array.h +99 -4
  30. warp/native/builtin.h +82 -5
  31. warp/native/bvh.cpp +64 -28
  32. warp/native/bvh.cu +58 -58
  33. warp/native/bvh.h +2 -2
  34. warp/native/clang/clang.cpp +7 -7
  35. warp/native/coloring.cpp +8 -2
  36. warp/native/crt.cpp +2 -2
  37. warp/native/crt.h +3 -5
  38. warp/native/cuda_util.cpp +41 -10
  39. warp/native/cuda_util.h +10 -4
  40. warp/native/exports.h +1842 -1908
  41. warp/native/fabric.h +2 -1
  42. warp/native/hashgrid.cpp +37 -37
  43. warp/native/hashgrid.cu +2 -2
  44. warp/native/initializer_array.h +1 -1
  45. warp/native/intersect.h +2 -2
  46. warp/native/mat.h +1910 -116
  47. warp/native/mathdx.cpp +43 -43
  48. warp/native/mesh.cpp +24 -24
  49. warp/native/mesh.cu +26 -26
  50. warp/native/mesh.h +4 -2
  51. warp/native/nanovdb/GridHandle.h +179 -12
  52. warp/native/nanovdb/HostBuffer.h +8 -7
  53. warp/native/nanovdb/NanoVDB.h +517 -895
  54. warp/native/nanovdb/NodeManager.h +323 -0
  55. warp/native/nanovdb/PNanoVDB.h +2 -2
  56. warp/native/quat.h +331 -14
  57. warp/native/range.h +7 -1
  58. warp/native/reduce.cpp +10 -10
  59. warp/native/reduce.cu +13 -14
  60. warp/native/runlength_encode.cpp +2 -2
  61. warp/native/runlength_encode.cu +5 -5
  62. warp/native/scan.cpp +3 -3
  63. warp/native/scan.cu +4 -4
  64. warp/native/sort.cpp +10 -10
  65. warp/native/sort.cu +22 -22
  66. warp/native/sparse.cpp +8 -8
  67. warp/native/sparse.cu +13 -13
  68. warp/native/spatial.h +366 -17
  69. warp/native/temp_buffer.h +2 -2
  70. warp/native/tile.h +283 -69
  71. warp/native/vec.h +381 -14
  72. warp/native/volume.cpp +54 -54
  73. warp/native/volume.cu +1 -1
  74. warp/native/volume.h +2 -1
  75. warp/native/volume_builder.cu +30 -37
  76. warp/native/warp.cpp +150 -149
  77. warp/native/warp.cu +323 -192
  78. warp/native/warp.h +227 -226
  79. warp/optim/linear.py +736 -271
  80. warp/render/imgui_manager.py +289 -0
  81. warp/render/render_opengl.py +85 -6
  82. warp/sim/graph_coloring.py +2 -2
  83. warp/sparse.py +558 -175
  84. warp/tests/aux_test_module_aot.py +7 -0
  85. warp/tests/cuda/test_async.py +3 -3
  86. warp/tests/cuda/test_conditional_captures.py +101 -0
  87. warp/tests/geometry/test_marching_cubes.py +233 -12
  88. warp/tests/sim/test_coloring.py +6 -6
  89. warp/tests/test_array.py +56 -5
  90. warp/tests/test_codegen.py +3 -2
  91. warp/tests/test_context.py +8 -15
  92. warp/tests/test_enum.py +136 -0
  93. warp/tests/test_examples.py +2 -2
  94. warp/tests/test_fem.py +45 -2
  95. warp/tests/test_fixedarray.py +229 -0
  96. warp/tests/test_func.py +18 -15
  97. warp/tests/test_future_annotations.py +7 -5
  98. warp/tests/test_linear_solvers.py +30 -0
  99. warp/tests/test_map.py +1 -1
  100. warp/tests/test_mat.py +1518 -378
  101. warp/tests/test_mat_assign_copy.py +178 -0
  102. warp/tests/test_mat_constructors.py +574 -0
  103. warp/tests/test_module_aot.py +287 -0
  104. warp/tests/test_print.py +69 -0
  105. warp/tests/test_quat.py +140 -34
  106. warp/tests/test_quat_assign_copy.py +145 -0
  107. warp/tests/test_reload.py +2 -1
  108. warp/tests/test_sparse.py +71 -0
  109. warp/tests/test_spatial.py +140 -34
  110. warp/tests/test_spatial_assign_copy.py +160 -0
  111. warp/tests/test_struct.py +43 -3
  112. warp/tests/test_types.py +0 -20
  113. warp/tests/test_vec.py +179 -34
  114. warp/tests/test_vec_assign_copy.py +143 -0
  115. warp/tests/tile/test_tile.py +184 -18
  116. warp/tests/tile/test_tile_cholesky.py +605 -0
  117. warp/tests/tile/test_tile_load.py +169 -0
  118. warp/tests/tile/test_tile_mathdx.py +2 -558
  119. warp/tests/tile/test_tile_matmul.py +1 -1
  120. warp/tests/tile/test_tile_mlp.py +1 -1
  121. warp/tests/tile/test_tile_shared_memory.py +5 -5
  122. warp/tests/unittest_suites.py +6 -0
  123. warp/tests/walkthrough_debug.py +1 -1
  124. warp/thirdparty/unittest_parallel.py +108 -9
  125. warp/types.py +554 -264
  126. warp/utils.py +68 -86
  127. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
  128. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
  129. warp/native/marching.cpp +0 -19
  130. warp/native/marching.cu +0 -514
  131. warp/native/marching.h +0 -19
  132. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
  133. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
  134. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/tests/test_types.py CHANGED
@@ -267,11 +267,6 @@ class TestTypes(unittest.TestCase):
267
267
  with self.assertRaisesRegex(TypeError, r"Expected to assign a `int32` value but got `str` instead"):
268
268
  v1[0] = "123.0"
269
269
 
270
- with self.assertRaisesRegex(
271
- TypeError, r"Expected to assign a slice from a sequence of values but got `int` instead"
272
- ):
273
- v1[:] = 123
274
-
275
270
  with self.assertRaisesRegex(
276
271
  TypeError, r"Expected to assign a slice from a sequence of `int32` values but got `vec3i` instead"
277
272
  ):
@@ -483,26 +478,11 @@ class TestTypes(unittest.TestCase):
483
478
  with self.assertRaisesRegex(TypeError, r"Expected to assign a `float16` value but got `str` instead"):
484
479
  m[0][0] = "123.0"
485
480
 
486
- with self.assertRaisesRegex(
487
- TypeError, r"Expected to assign a slice from a sequence of values but got `int` instead"
488
- ):
489
- m[0] = 123
490
-
491
481
  with self.assertRaisesRegex(
492
482
  TypeError, r"Expected to assign a slice from a sequence of `float16` values but got `mat22h` instead"
493
483
  ):
494
484
  m[0] = (m,)
495
485
 
496
- with self.assertRaisesRegex(
497
- KeyError, r"Slices are not supported when indexing matrices using the `m\[start:end\]` notation"
498
- ):
499
- m[:] = 123
500
-
501
- with self.assertRaisesRegex(
502
- KeyError, r"Slices are not supported when indexing matrices using the `m\[i, j\]` notation"
503
- ):
504
- m[0, :1] = (123,)
505
-
506
486
  with self.assertRaisesRegex(ValueError, r"Can only assign sequence of same size"):
507
487
  m[0][:1] = (1, 2)
508
488
 
warp/tests/test_vec.py CHANGED
@@ -922,39 +922,6 @@ def test_vec_assign(test, device):
922
922
  run(vec_assign_attribute)
923
923
 
924
924
 
925
- def test_vec_assign_copy(test, device):
926
- saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
927
- try:
928
- wp.config.enable_vector_component_overwrites = True
929
-
930
- @wp.kernel(module="unique")
931
- def vec_assign_overwrite(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
932
- tid = wp.tid()
933
-
934
- a = wp.vec3()
935
- b = x[tid]
936
- a = b
937
- a[1] = 3.0
938
-
939
- y[tid] = a
940
-
941
- x = wp.ones(1, dtype=wp.vec3, device=device, requires_grad=True)
942
- y = wp.zeros(1, dtype=wp.vec3, device=device, requires_grad=True)
943
-
944
- tape = wp.Tape()
945
- with tape:
946
- wp.launch(vec_assign_overwrite, dim=1, inputs=[x, y], device=device)
947
-
948
- y.grad = wp.ones_like(y, requires_grad=False)
949
- tape.backward()
950
-
951
- assert_np_equal(y.numpy(), np.array([[1.0, 3.0, 1.0]], dtype=float))
952
- assert_np_equal(x.grad.numpy(), np.array([[1.0, 0.0, 1.0]], dtype=float))
953
-
954
- finally:
955
- wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
956
-
957
-
958
925
  @wp.kernel
959
926
  def vec_array_extract_subscript(x: wp.array2d(dtype=wp.vec3), y: wp.array2d(dtype=float)):
960
927
  i, j = wp.tid()
@@ -1189,6 +1156,181 @@ def test_scalar_vec_div(test, device):
1189
1156
  assert_np_equal(x.grad.numpy(), np.array(((-1.0, -0.25, -0.0625),), dtype=float))
1190
1157
 
1191
1158
 
1159
+ def test_vec_indexing_assign(test, device):
1160
+ @wp.func
1161
+ def fn():
1162
+ v = wp.vec4(1.0, 2.0, 3.0, 4.0)
1163
+
1164
+ v[0] = 123.0
1165
+ v[1] *= 2.0
1166
+
1167
+ wp.expect_eq(v[0], 123.0)
1168
+ wp.expect_eq(v[1], 4.0)
1169
+ wp.expect_eq(v[2], 3.0)
1170
+ wp.expect_eq(v[3], 4.0)
1171
+
1172
+ v[-1] = 123.0
1173
+ v[-2] *= 2.0
1174
+
1175
+ wp.expect_eq(v[-1], 123.0)
1176
+ wp.expect_eq(v[-2], 6.0)
1177
+ wp.expect_eq(v[-3], 4.0)
1178
+ wp.expect_eq(v[-4], 123.0)
1179
+
1180
+ @wp.kernel(module="unique")
1181
+ def kernel():
1182
+ fn()
1183
+
1184
+ wp.launch(kernel, 1, device=device)
1185
+ wp.synchronize()
1186
+ fn()
1187
+
1188
+
1189
+ def test_vec_slicing_assign(test, device):
1190
+ vec0 = wp.vec(0, float)
1191
+ vec1 = wp.vec(1, float)
1192
+ vec2 = wp.vec(2, float)
1193
+ vec3 = wp.vec(3, float)
1194
+ vec4 = wp.vec(4, float)
1195
+
1196
+ @wp.func
1197
+ def fn():
1198
+ v = wp.vec4(1.0, 2.0, 3.0, 4.0)
1199
+
1200
+ wp.expect_eq(v[:] == vec4(1.0, 2.0, 3.0, 4.0), True)
1201
+ wp.expect_eq(v[-123:123] == vec4(1.0, 2.0, 3.0, 4.0), True)
1202
+ wp.expect_eq(v[123:] == vec0(), True)
1203
+ wp.expect_eq(v[:-123] == vec0(), True)
1204
+ wp.expect_eq(v[::123] == vec1(1.0), True)
1205
+
1206
+ wp.expect_eq(v[1:] == vec3(2.0, 3.0, 4.0), True)
1207
+ wp.expect_eq(v[-2:] == vec2(3.0, 4.0), True)
1208
+ wp.expect_eq(v[:2] == vec2(1.0, 2.0), True)
1209
+ wp.expect_eq(v[:-1] == vec3(1.0, 2.0, 3.0), True)
1210
+ wp.expect_eq(v[::2] == vec2(1.0, 3.0), True)
1211
+ wp.expect_eq(v[1::2] == vec2(2.0, 4.0), True)
1212
+ wp.expect_eq(v[::-1] == vec4(4.0, 3.0, 2.0, 1.0), True)
1213
+ wp.expect_eq(v[::-2] == vec2(4.0, 2.0), True)
1214
+ wp.expect_eq(v[1::-2] == vec1(2.0), True)
1215
+
1216
+ v[1:] = vec3(5.0, 6.0, 7.0)
1217
+ wp.expect_eq(v == wp.vec4(1.0, 5.0, 6.0, 7.0), True)
1218
+
1219
+ v[-2:] = vec2(8.0, 9.0)
1220
+ wp.expect_eq(v == wp.vec4(1.0, 5.0, 8.0, 9.0), True)
1221
+
1222
+ v[:2] = vec2(10.0, 11.0)
1223
+ wp.expect_eq(v == wp.vec4(10.0, 11.0, 8.0, 9.0), True)
1224
+
1225
+ v[:-1] = vec3(12.0, 13.0, 14.0)
1226
+ wp.expect_eq(v == wp.vec4(12.0, 13.0, 14.0, 9.0), True)
1227
+
1228
+ v[::2] = vec2(15.0, 16.0)
1229
+ wp.expect_eq(v == wp.vec4(15.0, 13.0, 16.0, 9.0), True)
1230
+
1231
+ v[1::2] = vec2(17.0, 18.0)
1232
+ wp.expect_eq(v == wp.vec4(15.0, 17.0, 16.0, 18.0), True)
1233
+
1234
+ v[::-1] = vec4(19.0, 20.0, 21.0, 22.0)
1235
+ wp.expect_eq(v == wp.vec4(22.0, 21.0, 20.0, 19.0), True)
1236
+
1237
+ v[::-2] = vec2(23.0, 24.0)
1238
+ wp.expect_eq(v == wp.vec4(22.0, 24.0, 20.0, 23.0), True)
1239
+
1240
+ v[1::-2] = vec1(25.0)
1241
+ wp.expect_eq(v == wp.vec4(22.0, 25.0, 20.0, 23.0), True)
1242
+
1243
+ v[1:] += vec3(26.0, 27.0, 28.0)
1244
+ wp.expect_eq(v == wp.vec4(22.0, 51.0, 47.0, 51.0), True)
1245
+
1246
+ v[:-1] -= vec3(29.0, 30.0, 31.0)
1247
+ wp.expect_eq(v == wp.vec4(-7.0, 21.0, 16.0, 51.0), True)
1248
+
1249
+ v[:] %= vec4(32.0, 33.0, 34.0, 35.0)
1250
+ wp.expect_eq(v == wp.vec4(-7.0, 21.0, 16.0, 16.0), True)
1251
+
1252
+ @wp.kernel(module="unique")
1253
+ def kernel():
1254
+ fn()
1255
+
1256
+ wp.launch(kernel, 1, device=device)
1257
+ wp.synchronize()
1258
+ fn()
1259
+
1260
+
1261
+ def test_vec_assign_inplace_errors(test, device):
1262
+ @wp.kernel
1263
+ def kernel_1():
1264
+ v = wp.vec4(1.0, 2.0, 3.0, 4.0)
1265
+ v[1:] = wp.vec3d(wp.float64(5.0), wp.float64(6.0), wp.float64(7.0))
1266
+
1267
+ with test.assertRaisesRegex(
1268
+ ValueError,
1269
+ r"The provided vector is expected to be of length 3 with dtype float32.$",
1270
+ ):
1271
+ wp.launch(kernel_1, dim=1, device=device)
1272
+
1273
+ @wp.kernel
1274
+ def kernel_2():
1275
+ v = wp.vec4(1.0, 2.0, 3.0, 4.0)
1276
+ v[1:] = wp.float64(5.0)
1277
+
1278
+ with test.assertRaisesRegex(
1279
+ ValueError,
1280
+ r"The provided value is expected to be a vector of length 3, with dtype float32.$",
1281
+ ):
1282
+ wp.launch(kernel_2, dim=1, device=device)
1283
+
1284
+ @wp.kernel
1285
+ def kernel_3():
1286
+ v = wp.vec4(1.0, 2.0, 3.0, 4.0)
1287
+ v[1:] = wp.mat22(5.0, 6.0, 7.0, 8.0)
1288
+
1289
+ with test.assertRaisesRegex(
1290
+ ValueError,
1291
+ r"The provided value is expected to be a vector of length 3, with dtype float32.$",
1292
+ ):
1293
+ wp.launch(kernel_3, dim=1, device=device)
1294
+
1295
+ @wp.kernel
1296
+ def kernel_4():
1297
+ v = wp.vec4(1.0, 2.0, 3.0, 4.0)
1298
+ v[1:] = wp.vec2(5.0, 6.0)
1299
+
1300
+ with test.assertRaisesRegex(
1301
+ ValueError,
1302
+ r"The length of the provided vector \(2\) isn't compatible with the given slice \(expected 3\).$",
1303
+ ):
1304
+ wp.launch(kernel_4, dim=1, device=device)
1305
+
1306
+
1307
+ def test_vec_slicing_assign_backward(test, device):
1308
+ @wp.kernel(module="unique")
1309
+ def kernel(arr_x: wp.array(dtype=wp.vec2), arr_y: wp.array(dtype=wp.vec4)):
1310
+ i = wp.tid()
1311
+
1312
+ y = arr_y[i]
1313
+
1314
+ y[:2] = arr_x[i]
1315
+ y[1:-1] += arr_x[i][:2]
1316
+ y[3:1:-1] -= arr_x[i][0:]
1317
+
1318
+ arr_y[i] = y
1319
+
1320
+ x = wp.ones(1, dtype=wp.vec2, requires_grad=True, device=device)
1321
+ y = wp.zeros(1, dtype=wp.vec4, requires_grad=True, device=device)
1322
+
1323
+ tape = wp.Tape()
1324
+ with tape:
1325
+ wp.launch(kernel, 1, inputs=(x,), outputs=(y,), device=device)
1326
+
1327
+ y.grad = wp.ones_like(y)
1328
+ tape.backward()
1329
+
1330
+ assert_np_equal(y.numpy(), np.array(((1.0, 2.0, 0.0, -1.0),), dtype=float))
1331
+ assert_np_equal(x.grad.numpy(), np.array(((1.0, 1.0),), dtype=float))
1332
+
1333
+
1192
1334
  devices = get_test_devices()
1193
1335
 
1194
1336
 
@@ -1248,7 +1390,6 @@ add_function_test(TestVec, "test_length_mismatch", test_length_mismatch, devices
1248
1390
  add_function_test(TestVec, "test_vector_len", test_vector_len, devices=devices)
1249
1391
  add_function_test(TestVec, "test_vec_extract", test_vec_extract, devices=devices)
1250
1392
  add_function_test(TestVec, "test_vec_assign", test_vec_assign, devices=devices)
1251
- add_function_test(TestVec, "test_vec_assign_copy", test_vec_assign_copy, devices=devices)
1252
1393
  add_function_test(TestVec, "test_vec_array_extract", test_vec_array_extract, devices=devices)
1253
1394
  add_function_test(TestVec, "test_vec_array_assign", test_vec_array_assign, devices=devices)
1254
1395
  add_function_test(TestVec, "test_vec_add_inplace", test_vec_add_inplace, devices=devices)
@@ -1256,6 +1397,10 @@ add_function_test(TestVec, "test_vec_sub_inplace", test_vec_sub_inplace, devices
1256
1397
  add_function_test(TestVec, "test_vec_array_add_inplace", test_vec_array_add_inplace, devices=devices)
1257
1398
  add_function_test(TestVec, "test_vec_array_sub_inplace", test_vec_array_sub_inplace, devices=devices)
1258
1399
  add_function_test(TestVec, "test_scalar_vec_div", test_scalar_vec_div, devices=devices)
1400
+ add_function_test(TestVec, "test_vec_indexing_assign", test_vec_indexing_assign, devices=devices)
1401
+ add_function_test(TestVec, "test_vec_slicing_assign", test_vec_slicing_assign, devices=devices)
1402
+ add_function_test(TestVec, "test_vec_assign_inplace_errors", test_vec_assign_inplace_errors, devices=devices)
1403
+ add_function_test(TestVec, "test_vec_slicing_assign_backward", test_vec_slicing_assign_backward, devices=devices)
1259
1404
 
1260
1405
 
1261
1406
  if __name__ == "__main__":
@@ -0,0 +1,143 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import unittest
17
+
18
+ import numpy as np
19
+
20
+ import warp as wp
21
+ from warp.tests.unittest_utils import *
22
+
23
+
24
+ def setUpModule():
25
+ wp.config.enable_vector_component_overwrites = True
26
+
27
+
28
+ def tearDownModule():
29
+ wp.config.enable_vector_component_overwrites = False
30
+
31
+
32
+ @wp.kernel
33
+ def vec_assign_subscript(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
34
+ i = wp.tid()
35
+
36
+ a = wp.vec3()
37
+ a[0] = 1.0 * x[i]
38
+ a[1] = 2.0 * x[i]
39
+ a[2] = 3.0 * x[i]
40
+ y[i] = a
41
+
42
+
43
+ @wp.kernel
44
+ def vec_assign_attribute(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
45
+ i = wp.tid()
46
+
47
+ a = wp.vec3()
48
+ a.x = 1.0 * x[i]
49
+ a.y = 2.0 * x[i]
50
+ a.z = 3.0 * x[i]
51
+ y[i] = a
52
+
53
+
54
+ def test_vec_assign(test, device):
55
+ def run(kernel):
56
+ x = wp.ones(1, dtype=float, requires_grad=True, device=device)
57
+ y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
58
+
59
+ tape = wp.Tape()
60
+ with tape:
61
+ wp.launch(kernel, 1, inputs=[x], outputs=[y], device=device)
62
+
63
+ y.grad = wp.ones_like(y)
64
+ tape.backward()
65
+
66
+ assert_np_equal(y.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=float))
67
+ assert_np_equal(x.grad.numpy(), np.array([6.0], dtype=float))
68
+
69
+ run(vec_assign_subscript)
70
+ run(vec_assign_attribute)
71
+
72
+
73
+ def test_vec_assign_copy(test, device):
74
+ @wp.kernel(module="unique")
75
+ def vec_assign_overwrite(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
76
+ tid = wp.tid()
77
+
78
+ a = wp.vec3()
79
+ b = x[tid]
80
+ a = b
81
+ a[1] = 3.0
82
+
83
+ y[tid] = a
84
+
85
+ x = wp.ones(1, dtype=wp.vec3, device=device, requires_grad=True)
86
+ y = wp.zeros(1, dtype=wp.vec3, device=device, requires_grad=True)
87
+
88
+ tape = wp.Tape()
89
+ with tape:
90
+ wp.launch(vec_assign_overwrite, dim=1, inputs=[x, y], device=device)
91
+
92
+ y.grad = wp.ones_like(y, requires_grad=False)
93
+ tape.backward()
94
+
95
+ assert_np_equal(y.numpy(), np.array([[1.0, 3.0, 1.0]], dtype=float))
96
+ assert_np_equal(x.grad.numpy(), np.array([[1.0, 0.0, 1.0]], dtype=float))
97
+
98
+
99
+ def test_vec_slicing_assign_backward(test, device):
100
+ @wp.kernel(module="unique")
101
+ def kernel(arr_x: wp.array(dtype=wp.vec2), arr_y: wp.array(dtype=wp.vec4)):
102
+ i = wp.tid()
103
+
104
+ x = arr_x[i]
105
+ y = arr_y[i]
106
+
107
+ y[:2] = x
108
+ y[1:-1] += x[:2]
109
+ y[3:1:-1] -= x[0:]
110
+
111
+ arr_y[i] = y
112
+
113
+ x = wp.ones(1, dtype=wp.vec2, requires_grad=True, device=device)
114
+ y = wp.zeros(1, dtype=wp.vec4, requires_grad=True, device=device)
115
+
116
+ tape = wp.Tape()
117
+ with tape:
118
+ wp.launch(kernel, 1, inputs=(x,), outputs=(y,), device=device)
119
+
120
+ y.grad = wp.ones_like(y)
121
+ tape.backward()
122
+
123
+ assert_np_equal(y.numpy(), np.array(((1.0, 2.0, 0.0, -1.0),), dtype=float))
124
+ assert_np_equal(x.grad.numpy(), np.array(((1.0, 1.0),), dtype=float))
125
+
126
+
127
+ devices = get_test_devices()
128
+
129
+
130
+ class TestVecAssignCopy(unittest.TestCase):
131
+ pass
132
+
133
+
134
+ add_function_test(TestVecAssignCopy, "test_vec_assign", test_vec_assign, devices=devices)
135
+ add_function_test(TestVecAssignCopy, "test_vec_assign_copy", test_vec_assign_copy, devices=devices)
136
+ add_function_test(
137
+ TestVecAssignCopy, "test_vec_slicing_assign_backward", test_vec_slicing_assign_backward, devices=devices
138
+ )
139
+
140
+
141
+ if __name__ == "__main__":
142
+ wp.clear_kernel_cache()
143
+ unittest.main(verbosity=2, failfast=True)
@@ -109,12 +109,29 @@ def test_tile_copy_2d(test, device):
109
109
 
110
110
 
111
111
  @wp.func
112
- def unary_func(x: float):
112
+ def unary_func(x: wp.float32):
113
+ return wp.sin(x)
114
+
115
+
116
+ @wp.func
117
+ def unary_func(x: wp.float64):
113
118
  return wp.sin(x)
114
119
 
115
120
 
116
121
  @wp.kernel
117
- def tile_unary_map(input: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
122
+ def tile_unary_map_user_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
123
+ # tile index
124
+ i, j = wp.tid()
125
+
126
+ a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
127
+
128
+ sa = wp.tile_map(unary_func, a)
129
+
130
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
131
+
132
+
133
+ @wp.kernel
134
+ def tile_unary_map_builtin_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
118
135
  # tile index
119
136
  i, j = wp.tid()
120
137
 
@@ -131,17 +148,76 @@ def test_tile_unary_map(test, device):
131
148
  M = TILE_M * 7
132
149
  N = TILE_N * 5
133
150
 
134
- A = rng.random((M, N), dtype=np.float32)
135
- B = np.sin(A)
151
+ def run(kernel, dtype):
152
+ A = rng.random((M, N), dtype=dtype)
153
+ B = np.sin(A)
154
+
155
+ A_grad = np.cos(A)
156
+
157
+ A_wp = wp.array(A, requires_grad=True, device=device)
158
+ B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
159
+
160
+ with wp.Tape() as tape:
161
+ wp.launch_tiled(
162
+ kernel,
163
+ dim=[int(M / TILE_M), int(N / TILE_N)],
164
+ inputs=[A_wp, B_wp],
165
+ block_dim=TILE_DIM,
166
+ device=device,
167
+ )
168
+
169
+ tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
170
+
171
+ # verify forward pass
172
+ assert_np_equal(B_wp.numpy(), B, tol=tol)
173
+
174
+ # verify backward pass
175
+ B_wp.grad = wp.ones_like(B_wp, device=device)
176
+ tape.backward()
136
177
 
137
- A_grad = np.cos(A)
178
+ assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
179
+
180
+ dtypes = [np.float32, np.float64]
181
+
182
+ for dtype in dtypes:
183
+ run(tile_unary_map_user_func, dtype)
184
+ run(tile_unary_map_builtin_func, dtype)
185
+
186
+
187
+ @wp.func
188
+ def unary_func_mixed_types(x: int) -> float:
189
+ return wp.sin(float(x))
190
+
191
+
192
+ @wp.kernel
193
+ def tile_unary_map_mixed_types(input: wp.array2d(dtype=int), output: wp.array2d(dtype=float)):
194
+ # tile index
195
+ i, j = wp.tid()
196
+
197
+ a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
198
+
199
+ sa = wp.tile_map(unary_func_mixed_types, a)
200
+
201
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
202
+
203
+
204
+ def test_tile_unary_map_mixed_types(test, device):
205
+ rng = np.random.default_rng(42)
206
+
207
+ M = TILE_M * 7
208
+ N = TILE_N * 5
209
+
210
+ A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
211
+ B = np.sin(A.astype(np.float32))
212
+
213
+ A_grad = np.cos(A.astype(np.float32))
138
214
 
139
215
  A_wp = wp.array(A, requires_grad=True, device=device)
140
- B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
216
+ B_wp = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
141
217
 
142
218
  with wp.Tape() as tape:
143
219
  wp.launch_tiled(
144
- tile_unary_map,
220
+ tile_unary_map_mixed_types,
145
221
  dim=[int(M / TILE_M), int(N / TILE_N)],
146
222
  inputs=[A_wp, B_wp],
147
223
  block_dim=TILE_DIM,
@@ -155,17 +231,23 @@ def test_tile_unary_map(test, device):
155
231
  B_wp.grad = wp.ones_like(B_wp, device=device)
156
232
  tape.backward()
157
233
 
158
- assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
234
+ # The a gradients are now stored as ints and can't capture the correct values
235
+ # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
159
236
 
160
237
 
161
238
  @wp.func
162
- def binary_func(x: float, y: float):
163
- return wp.sin(x) + y
239
+ def binary_func(x: wp.float32, y: wp.float32):
240
+ return x + y
241
+
242
+
243
+ @wp.func
244
+ def binary_func(x: wp.float64, y: wp.float64):
245
+ return x + y
164
246
 
165
247
 
166
248
  @wp.kernel
167
- def tile_binary_map(
168
- input_a: wp.array2d(dtype=float), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
249
+ def tile_binary_map_user_func(
250
+ input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
169
251
  ):
170
252
  # tile index
171
253
  i, j = wp.tid()
@@ -178,26 +260,107 @@ def tile_binary_map(
178
260
  wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
179
261
 
180
262
 
263
+ @wp.kernel
264
+ def tile_binary_map_builtin_func(
265
+ input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
266
+ ):
267
+ # tile index
268
+ i, j = wp.tid()
269
+
270
+ a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
271
+ b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
272
+
273
+ sa = wp.tile_map(wp.add, a, b)
274
+
275
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
276
+
277
+
181
278
  def test_tile_binary_map(test, device):
182
279
  rng = np.random.default_rng(42)
183
280
 
184
281
  M = TILE_M * 7
185
282
  N = TILE_N * 5
186
283
 
187
- A = rng.random((M, N), dtype=np.float32)
284
+ def run(kernel, dtype):
285
+ A = rng.random((M, N), dtype=dtype)
286
+ B = rng.random((M, N), dtype=dtype)
287
+ C = A + B
288
+
289
+ A_grad = np.ones_like(A)
290
+ B_grad = np.ones_like(B)
291
+
292
+ A_wp = wp.array(A, requires_grad=True, device=device)
293
+ B_wp = wp.array(B, requires_grad=True, device=device)
294
+ C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
295
+
296
+ with wp.Tape() as tape:
297
+ wp.launch_tiled(
298
+ kernel,
299
+ dim=[int(M / TILE_M), int(N / TILE_N)],
300
+ inputs=[A_wp, B_wp, C_wp],
301
+ block_dim=TILE_DIM,
302
+ device=device,
303
+ )
304
+
305
+ tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
306
+
307
+ # verify forward pass
308
+ assert_np_equal(C_wp.numpy(), C, tol=tol)
309
+
310
+ # verify backward pass
311
+ C_wp.grad = wp.ones_like(C_wp, device=device)
312
+ tape.backward()
313
+
314
+ assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
315
+ assert_np_equal(B_wp.grad.numpy(), B_grad, tol=tol)
316
+
317
+ dtypes = [np.float32, np.float64]
318
+
319
+ for dtype in dtypes:
320
+ run(tile_binary_map_builtin_func, dtype)
321
+ run(tile_binary_map_user_func, dtype)
322
+
323
+
324
+ @wp.func
325
+ def binary_func_mixed_types(x: int, y: float) -> float:
326
+ return wp.sin(float(x)) + y
327
+
328
+
329
+ @wp.kernel
330
+ def tile_binary_map_mixed_types(
331
+ input_a: wp.array2d(dtype=int), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
332
+ ):
333
+ # tile index
334
+ i, j = wp.tid()
335
+
336
+ a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
337
+ b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
338
+
339
+ sa = wp.tile_map(binary_func_mixed_types, a, b)
340
+
341
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
342
+
343
+
344
+ def test_tile_binary_map_mixed_types(test, device):
345
+ rng = np.random.default_rng(42)
346
+
347
+ M = TILE_M * 7
348
+ N = TILE_N * 5
349
+
350
+ A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
188
351
  B = rng.random((M, N), dtype=np.float32)
189
- C = np.sin(A) + B
352
+ C = np.sin(A.astype(np.float32)) + B
190
353
 
191
- A_grad = np.cos(A)
354
+ A_grad = np.cos(A.astype(np.float32))
192
355
  B_grad = np.ones_like(B)
193
356
 
194
357
  A_wp = wp.array(A, requires_grad=True, device=device)
195
358
  B_wp = wp.array(B, requires_grad=True, device=device)
196
- C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
359
+ C_wp = wp.zeros_like(B_wp, requires_grad=True, device=device)
197
360
 
198
361
  with wp.Tape() as tape:
199
362
  wp.launch_tiled(
200
- tile_binary_map,
363
+ tile_binary_map_mixed_types,
201
364
  dim=[int(M / TILE_M), int(N / TILE_N)],
202
365
  inputs=[A_wp, B_wp, C_wp],
203
366
  block_dim=TILE_DIM,
@@ -211,7 +374,8 @@ def test_tile_binary_map(test, device):
211
374
  C_wp.grad = wp.ones_like(C_wp, device=device)
212
375
  tape.backward()
213
376
 
214
- assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
377
+ # The a gradiens are now stored as ints and can't capture the correct values
378
+ # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
215
379
  assert_np_equal(B_wp.grad.numpy(), B_grad)
216
380
 
217
381
 
@@ -1085,7 +1249,9 @@ class TestTile(unittest.TestCase):
1085
1249
  add_function_test(TestTile, "test_tile_copy_1d", test_tile_copy_1d, devices=devices)
1086
1250
  add_function_test(TestTile, "test_tile_copy_2d", test_tile_copy_2d, devices=devices)
1087
1251
  add_function_test(TestTile, "test_tile_unary_map", test_tile_unary_map, devices=devices)
1252
+ add_function_test(TestTile, "test_tile_unary_map_mixed_types", test_tile_unary_map_mixed_types, devices=devices)
1088
1253
  add_function_test(TestTile, "test_tile_binary_map", test_tile_binary_map, devices=devices)
1254
+ add_function_test(TestTile, "test_tile_binary_map_mixed_types", test_tile_binary_map_mixed_types, devices=devices)
1089
1255
  add_function_test(TestTile, "test_tile_transpose", test_tile_transpose, devices=devices)
1090
1256
  add_function_test(TestTile, "test_tile_operators", test_tile_operators, devices=devices)
1091
1257
  add_function_test(TestTile, "test_tile_tile", test_tile_tile, devices=get_cuda_test_devices())