warp-lang 1.0.2__py3-none-macosx_10_13_universal2.whl → 1.1.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (346) hide show
  1. warp/__init__.py +108 -97
  2. warp/__init__.pyi +1 -1
  3. warp/bin/libwarp-clang.dylib +0 -0
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +115 -113
  6. warp/build_dll.py +383 -375
  7. warp/builtins.py +3425 -3354
  8. warp/codegen.py +2878 -2792
  9. warp/config.py +40 -36
  10. warp/constants.py +45 -45
  11. warp/context.py +5194 -5102
  12. warp/dlpack.py +442 -442
  13. warp/examples/__init__.py +16 -16
  14. warp/examples/assets/bear.usd +0 -0
  15. warp/examples/assets/bunny.usd +0 -0
  16. warp/examples/assets/cartpole.urdf +110 -110
  17. warp/examples/assets/crazyflie.usd +0 -0
  18. warp/examples/assets/cube.usd +0 -0
  19. warp/examples/assets/nv_ant.xml +92 -92
  20. warp/examples/assets/nv_humanoid.xml +183 -183
  21. warp/examples/assets/quadruped.urdf +267 -267
  22. warp/examples/assets/rocks.nvdb +0 -0
  23. warp/examples/assets/rocks.usd +0 -0
  24. warp/examples/assets/sphere.usd +0 -0
  25. warp/examples/benchmarks/benchmark_api.py +383 -383
  26. warp/examples/benchmarks/benchmark_cloth.py +278 -277
  27. warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -88
  28. warp/examples/benchmarks/benchmark_cloth_jax.py +97 -100
  29. warp/examples/benchmarks/benchmark_cloth_numba.py +146 -142
  30. warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -77
  31. warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -86
  32. warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -112
  33. warp/examples/benchmarks/benchmark_cloth_warp.py +146 -146
  34. warp/examples/benchmarks/benchmark_launches.py +295 -295
  35. warp/examples/browse.py +29 -29
  36. warp/examples/core/example_dem.py +234 -219
  37. warp/examples/core/example_fluid.py +293 -267
  38. warp/examples/core/example_graph_capture.py +144 -126
  39. warp/examples/core/example_marching_cubes.py +188 -174
  40. warp/examples/core/example_mesh.py +174 -155
  41. warp/examples/core/example_mesh_intersect.py +205 -193
  42. warp/examples/core/example_nvdb.py +176 -170
  43. warp/examples/core/example_raycast.py +105 -90
  44. warp/examples/core/example_raymarch.py +199 -178
  45. warp/examples/core/example_render_opengl.py +185 -141
  46. warp/examples/core/example_sph.py +405 -387
  47. warp/examples/core/example_torch.py +222 -181
  48. warp/examples/core/example_wave.py +263 -248
  49. warp/examples/fem/bsr_utils.py +378 -380
  50. warp/examples/fem/example_apic_fluid.py +407 -389
  51. warp/examples/fem/example_convection_diffusion.py +182 -168
  52. warp/examples/fem/example_convection_diffusion_dg.py +219 -209
  53. warp/examples/fem/example_convection_diffusion_dg0.py +204 -194
  54. warp/examples/fem/example_deformed_geometry.py +177 -159
  55. warp/examples/fem/example_diffusion.py +201 -173
  56. warp/examples/fem/example_diffusion_3d.py +177 -152
  57. warp/examples/fem/example_diffusion_mgpu.py +221 -214
  58. warp/examples/fem/example_mixed_elasticity.py +244 -222
  59. warp/examples/fem/example_navier_stokes.py +259 -243
  60. warp/examples/fem/example_stokes.py +220 -192
  61. warp/examples/fem/example_stokes_transfer.py +265 -249
  62. warp/examples/fem/mesh_utils.py +133 -109
  63. warp/examples/fem/plot_utils.py +292 -287
  64. warp/examples/optim/example_bounce.py +260 -246
  65. warp/examples/optim/example_cloth_throw.py +222 -209
  66. warp/examples/optim/example_diffray.py +566 -536
  67. warp/examples/optim/example_drone.py +864 -835
  68. warp/examples/optim/example_inverse_kinematics.py +176 -168
  69. warp/examples/optim/example_inverse_kinematics_torch.py +185 -169
  70. warp/examples/optim/example_spring_cage.py +239 -231
  71. warp/examples/optim/example_trajectory.py +223 -199
  72. warp/examples/optim/example_walker.py +306 -293
  73. warp/examples/sim/example_cartpole.py +139 -129
  74. warp/examples/sim/example_cloth.py +196 -186
  75. warp/examples/sim/example_granular.py +124 -111
  76. warp/examples/sim/example_granular_collision_sdf.py +197 -186
  77. warp/examples/sim/example_jacobian_ik.py +236 -214
  78. warp/examples/sim/example_particle_chain.py +118 -105
  79. warp/examples/sim/example_quadruped.py +193 -180
  80. warp/examples/sim/example_rigid_chain.py +197 -187
  81. warp/examples/sim/example_rigid_contact.py +189 -177
  82. warp/examples/sim/example_rigid_force.py +127 -125
  83. warp/examples/sim/example_rigid_gyroscopic.py +109 -95
  84. warp/examples/sim/example_rigid_soft_contact.py +134 -122
  85. warp/examples/sim/example_soft_body.py +190 -177
  86. warp/fabric.py +337 -335
  87. warp/fem/__init__.py +60 -27
  88. warp/fem/cache.py +401 -388
  89. warp/fem/dirichlet.py +178 -179
  90. warp/fem/domain.py +262 -263
  91. warp/fem/field/__init__.py +100 -101
  92. warp/fem/field/field.py +148 -149
  93. warp/fem/field/nodal_field.py +298 -299
  94. warp/fem/field/restriction.py +22 -21
  95. warp/fem/field/test.py +180 -181
  96. warp/fem/field/trial.py +183 -183
  97. warp/fem/geometry/__init__.py +15 -19
  98. warp/fem/geometry/closest_point.py +69 -70
  99. warp/fem/geometry/deformed_geometry.py +270 -271
  100. warp/fem/geometry/element.py +744 -744
  101. warp/fem/geometry/geometry.py +184 -186
  102. warp/fem/geometry/grid_2d.py +380 -373
  103. warp/fem/geometry/grid_3d.py +441 -435
  104. warp/fem/geometry/hexmesh.py +953 -953
  105. warp/fem/geometry/partition.py +374 -376
  106. warp/fem/geometry/quadmesh_2d.py +532 -532
  107. warp/fem/geometry/tetmesh.py +840 -840
  108. warp/fem/geometry/trimesh_2d.py +577 -577
  109. warp/fem/integrate.py +1630 -1615
  110. warp/fem/operator.py +190 -191
  111. warp/fem/polynomial.py +214 -213
  112. warp/fem/quadrature/__init__.py +2 -2
  113. warp/fem/quadrature/pic_quadrature.py +243 -245
  114. warp/fem/quadrature/quadrature.py +295 -294
  115. warp/fem/space/__init__.py +294 -292
  116. warp/fem/space/basis_space.py +488 -489
  117. warp/fem/space/collocated_function_space.py +100 -105
  118. warp/fem/space/dof_mapper.py +236 -236
  119. warp/fem/space/function_space.py +148 -145
  120. warp/fem/space/grid_2d_function_space.py +267 -267
  121. warp/fem/space/grid_3d_function_space.py +305 -306
  122. warp/fem/space/hexmesh_function_space.py +350 -352
  123. warp/fem/space/partition.py +350 -350
  124. warp/fem/space/quadmesh_2d_function_space.py +368 -369
  125. warp/fem/space/restriction.py +158 -160
  126. warp/fem/space/shape/__init__.py +13 -15
  127. warp/fem/space/shape/cube_shape_function.py +738 -738
  128. warp/fem/space/shape/shape_function.py +102 -103
  129. warp/fem/space/shape/square_shape_function.py +611 -611
  130. warp/fem/space/shape/tet_shape_function.py +565 -567
  131. warp/fem/space/shape/triangle_shape_function.py +429 -429
  132. warp/fem/space/tetmesh_function_space.py +294 -292
  133. warp/fem/space/topology.py +297 -295
  134. warp/fem/space/trimesh_2d_function_space.py +223 -221
  135. warp/fem/types.py +77 -77
  136. warp/fem/utils.py +495 -495
  137. warp/jax.py +166 -141
  138. warp/jax_experimental.py +341 -339
  139. warp/native/array.h +1072 -1025
  140. warp/native/builtin.h +1560 -1560
  141. warp/native/bvh.cpp +398 -398
  142. warp/native/bvh.cu +525 -525
  143. warp/native/bvh.h +429 -429
  144. warp/native/clang/clang.cpp +495 -464
  145. warp/native/crt.cpp +31 -31
  146. warp/native/crt.h +334 -334
  147. warp/native/cuda_crt.h +1049 -1049
  148. warp/native/cuda_util.cpp +549 -540
  149. warp/native/cuda_util.h +288 -203
  150. warp/native/cutlass_gemm.cpp +34 -34
  151. warp/native/cutlass_gemm.cu +372 -372
  152. warp/native/error.cpp +66 -66
  153. warp/native/error.h +27 -27
  154. warp/native/fabric.h +228 -228
  155. warp/native/hashgrid.cpp +301 -278
  156. warp/native/hashgrid.cu +78 -77
  157. warp/native/hashgrid.h +227 -227
  158. warp/native/initializer_array.h +32 -32
  159. warp/native/intersect.h +1204 -1204
  160. warp/native/intersect_adj.h +365 -365
  161. warp/native/intersect_tri.h +322 -322
  162. warp/native/marching.cpp +2 -2
  163. warp/native/marching.cu +497 -497
  164. warp/native/marching.h +2 -2
  165. warp/native/mat.h +1498 -1498
  166. warp/native/matnn.h +333 -333
  167. warp/native/mesh.cpp +203 -203
  168. warp/native/mesh.cu +293 -293
  169. warp/native/mesh.h +1887 -1887
  170. warp/native/nanovdb/NanoVDB.h +4782 -4782
  171. warp/native/nanovdb/PNanoVDB.h +2553 -2553
  172. warp/native/nanovdb/PNanoVDBWrite.h +294 -294
  173. warp/native/noise.h +850 -850
  174. warp/native/quat.h +1084 -1084
  175. warp/native/rand.h +299 -299
  176. warp/native/range.h +108 -108
  177. warp/native/reduce.cpp +156 -156
  178. warp/native/reduce.cu +348 -348
  179. warp/native/runlength_encode.cpp +61 -61
  180. warp/native/runlength_encode.cu +46 -46
  181. warp/native/scan.cpp +30 -30
  182. warp/native/scan.cu +36 -36
  183. warp/native/scan.h +7 -7
  184. warp/native/solid_angle.h +442 -442
  185. warp/native/sort.cpp +94 -94
  186. warp/native/sort.cu +97 -97
  187. warp/native/sort.h +14 -14
  188. warp/native/sparse.cpp +337 -337
  189. warp/native/sparse.cu +544 -544
  190. warp/native/spatial.h +630 -630
  191. warp/native/svd.h +562 -562
  192. warp/native/temp_buffer.h +30 -30
  193. warp/native/vec.h +1132 -1132
  194. warp/native/volume.cpp +297 -297
  195. warp/native/volume.cu +32 -32
  196. warp/native/volume.h +538 -538
  197. warp/native/volume_builder.cu +425 -425
  198. warp/native/volume_builder.h +19 -19
  199. warp/native/warp.cpp +1057 -1052
  200. warp/native/warp.cu +2943 -2828
  201. warp/native/warp.h +313 -305
  202. warp/optim/__init__.py +9 -9
  203. warp/optim/adam.py +120 -120
  204. warp/optim/linear.py +1104 -939
  205. warp/optim/sgd.py +104 -92
  206. warp/render/__init__.py +10 -10
  207. warp/render/render_opengl.py +3217 -3204
  208. warp/render/render_usd.py +768 -749
  209. warp/render/utils.py +152 -150
  210. warp/sim/__init__.py +52 -59
  211. warp/sim/articulation.py +685 -685
  212. warp/sim/collide.py +1594 -1590
  213. warp/sim/import_mjcf.py +489 -481
  214. warp/sim/import_snu.py +220 -221
  215. warp/sim/import_urdf.py +536 -516
  216. warp/sim/import_usd.py +887 -881
  217. warp/sim/inertia.py +316 -317
  218. warp/sim/integrator.py +234 -233
  219. warp/sim/integrator_euler.py +1956 -1956
  220. warp/sim/integrator_featherstone.py +1910 -1991
  221. warp/sim/integrator_xpbd.py +3294 -3312
  222. warp/sim/model.py +4473 -4314
  223. warp/sim/particles.py +113 -112
  224. warp/sim/render.py +417 -403
  225. warp/sim/utils.py +413 -410
  226. warp/sparse.py +1227 -1227
  227. warp/stubs.py +2109 -2469
  228. warp/tape.py +1162 -225
  229. warp/tests/__init__.py +1 -1
  230. warp/tests/__main__.py +4 -4
  231. warp/tests/assets/torus.usda +105 -105
  232. warp/tests/aux_test_class_kernel.py +26 -26
  233. warp/tests/aux_test_compile_consts_dummy.py +10 -10
  234. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -21
  235. warp/tests/aux_test_dependent.py +22 -22
  236. warp/tests/aux_test_grad_customs.py +23 -23
  237. warp/tests/aux_test_reference.py +11 -11
  238. warp/tests/aux_test_reference_reference.py +10 -10
  239. warp/tests/aux_test_square.py +17 -17
  240. warp/tests/aux_test_unresolved_func.py +14 -14
  241. warp/tests/aux_test_unresolved_symbol.py +14 -14
  242. warp/tests/disabled_kinematics.py +239 -239
  243. warp/tests/run_coverage_serial.py +31 -31
  244. warp/tests/test_adam.py +157 -157
  245. warp/tests/test_arithmetic.py +1124 -1124
  246. warp/tests/test_array.py +2417 -2326
  247. warp/tests/test_array_reduce.py +150 -150
  248. warp/tests/test_async.py +668 -656
  249. warp/tests/test_atomic.py +141 -141
  250. warp/tests/test_bool.py +204 -149
  251. warp/tests/test_builtins_resolution.py +1292 -1292
  252. warp/tests/test_bvh.py +164 -171
  253. warp/tests/test_closest_point_edge_edge.py +228 -228
  254. warp/tests/test_codegen.py +566 -553
  255. warp/tests/test_compile_consts.py +97 -101
  256. warp/tests/test_conditional.py +246 -246
  257. warp/tests/test_copy.py +232 -215
  258. warp/tests/test_ctypes.py +632 -632
  259. warp/tests/test_dense.py +67 -67
  260. warp/tests/test_devices.py +91 -98
  261. warp/tests/test_dlpack.py +530 -529
  262. warp/tests/test_examples.py +400 -378
  263. warp/tests/test_fabricarray.py +955 -955
  264. warp/tests/test_fast_math.py +62 -54
  265. warp/tests/test_fem.py +1277 -1278
  266. warp/tests/test_fp16.py +130 -130
  267. warp/tests/test_func.py +338 -337
  268. warp/tests/test_generics.py +571 -571
  269. warp/tests/test_grad.py +746 -640
  270. warp/tests/test_grad_customs.py +333 -336
  271. warp/tests/test_hash_grid.py +210 -164
  272. warp/tests/test_import.py +39 -39
  273. warp/tests/test_indexedarray.py +1134 -1134
  274. warp/tests/test_intersect.py +67 -67
  275. warp/tests/test_jax.py +307 -307
  276. warp/tests/test_large.py +167 -164
  277. warp/tests/test_launch.py +354 -354
  278. warp/tests/test_lerp.py +261 -261
  279. warp/tests/test_linear_solvers.py +191 -171
  280. warp/tests/test_lvalue.py +421 -493
  281. warp/tests/test_marching_cubes.py +65 -65
  282. warp/tests/test_mat.py +1801 -1827
  283. warp/tests/test_mat_lite.py +115 -115
  284. warp/tests/test_mat_scalar_ops.py +2907 -2889
  285. warp/tests/test_math.py +126 -193
  286. warp/tests/test_matmul.py +500 -499
  287. warp/tests/test_matmul_lite.py +410 -410
  288. warp/tests/test_mempool.py +188 -190
  289. warp/tests/test_mesh.py +284 -324
  290. warp/tests/test_mesh_query_aabb.py +228 -241
  291. warp/tests/test_mesh_query_point.py +692 -702
  292. warp/tests/test_mesh_query_ray.py +292 -303
  293. warp/tests/test_mlp.py +276 -276
  294. warp/tests/test_model.py +110 -110
  295. warp/tests/test_modules_lite.py +39 -39
  296. warp/tests/test_multigpu.py +163 -163
  297. warp/tests/test_noise.py +248 -248
  298. warp/tests/test_operators.py +250 -250
  299. warp/tests/test_options.py +123 -125
  300. warp/tests/test_peer.py +133 -137
  301. warp/tests/test_pinned.py +78 -78
  302. warp/tests/test_print.py +54 -54
  303. warp/tests/test_quat.py +2086 -2086
  304. warp/tests/test_rand.py +288 -288
  305. warp/tests/test_reload.py +217 -217
  306. warp/tests/test_rounding.py +179 -179
  307. warp/tests/test_runlength_encode.py +190 -190
  308. warp/tests/test_sim_grad.py +243 -0
  309. warp/tests/test_sim_kinematics.py +91 -97
  310. warp/tests/test_smoothstep.py +168 -168
  311. warp/tests/test_snippet.py +305 -266
  312. warp/tests/test_sparse.py +468 -460
  313. warp/tests/test_spatial.py +2148 -2148
  314. warp/tests/test_streams.py +486 -473
  315. warp/tests/test_struct.py +710 -675
  316. warp/tests/test_tape.py +173 -148
  317. warp/tests/test_torch.py +743 -743
  318. warp/tests/test_transient_module.py +87 -87
  319. warp/tests/test_types.py +556 -659
  320. warp/tests/test_utils.py +490 -499
  321. warp/tests/test_vec.py +1264 -1268
  322. warp/tests/test_vec_lite.py +73 -73
  323. warp/tests/test_vec_scalar_ops.py +2099 -2099
  324. warp/tests/test_verify_fp.py +94 -94
  325. warp/tests/test_volume.py +737 -736
  326. warp/tests/test_volume_write.py +255 -265
  327. warp/tests/unittest_serial.py +37 -37
  328. warp/tests/unittest_suites.py +363 -359
  329. warp/tests/unittest_utils.py +603 -578
  330. warp/tests/unused_test_misc.py +71 -71
  331. warp/tests/walkthrough_debug.py +85 -85
  332. warp/thirdparty/appdirs.py +598 -598
  333. warp/thirdparty/dlpack.py +143 -143
  334. warp/thirdparty/unittest_parallel.py +566 -561
  335. warp/torch.py +321 -295
  336. warp/types.py +4504 -4450
  337. warp/utils.py +1008 -821
  338. {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/LICENSE.md +126 -126
  339. {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/METADATA +338 -400
  340. warp_lang-1.1.0.dist-info/RECORD +352 -0
  341. warp/examples/assets/cube.usda +0 -42
  342. warp/examples/assets/sphere.usda +0 -56
  343. warp/examples/assets/torus.usda +0 -105
  344. warp_lang-1.0.2.dist-info/RECORD +0 -352
  345. {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/WHEEL +0 -0
  346. {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/top_level.txt +0 -0
@@ -1,2148 +1,2148 @@
1
- # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
- # NVIDIA CORPORATION and its licensors retain all intellectual property
3
- # and proprietary rights in and to this software, related documentation
4
- # and any modifications thereto. Any use, reproduction, disclosure or
5
- # distribution of this software and related documentation without an express
6
- # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
-
8
- import unittest
9
-
10
- import numpy as np
11
-
12
- import warp as wp
13
- from warp.tests.unittest_utils import *
14
-
15
- wp.init()
16
-
17
- np_float_types = [np.float32, np.float64, np.float16]
18
-
19
- kernel_cache = dict()
20
-
21
-
22
- def getkernel(func, suffix=""):
23
- key = func.__name__ + "_" + suffix
24
- if key not in kernel_cache:
25
- kernel_cache[key] = wp.Kernel(func=func, key=key)
26
- return kernel_cache[key]
27
-
28
-
29
- def get_select_kernel(dtype):
30
- def output_select_kernel_fn(
31
- input: wp.array(dtype=dtype),
32
- index: int,
33
- out: wp.array(dtype=dtype),
34
- ):
35
- out[0] = input[index]
36
-
37
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
38
-
39
-
40
- ############################################################
41
-
42
-
43
- def test_spatial_vector_constructors(test, device, dtype, register_kernels=False):
44
- rng = np.random.default_rng(123)
45
-
46
- tol = {
47
- np.float16: 5.0e-3,
48
- np.float32: 1.0e-6,
49
- np.float64: 1.0e-8,
50
- }.get(dtype, 0)
51
-
52
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
53
- vec3 = wp.types.vector(length=3, dtype=wptype)
54
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
55
-
56
- def check_spatial_vector_component_constructor(
57
- input: wp.array(dtype=wptype),
58
- out: wp.array(dtype=wptype),
59
- ):
60
- result = spatial_vector(input[0], input[1], input[2], input[3], input[4], input[5])
61
-
62
- # multiply the output by 2 so we've got something to backpropagate:
63
- out[0] = wptype(2) * result[0]
64
- out[1] = wptype(2) * result[1]
65
- out[2] = wptype(2) * result[2]
66
- out[3] = wptype(2) * result[3]
67
- out[4] = wptype(2) * result[4]
68
- out[5] = wptype(2) * result[5]
69
-
70
- def check_spatial_vector_vector_constructor(
71
- input: wp.array(dtype=wptype),
72
- out: wp.array(dtype=wptype),
73
- ):
74
- result = spatial_vector(vec3(input[0], input[1], input[2]), vec3(input[3], input[4], input[5]))
75
-
76
- # multiply the output by 2 so we've got something to backpropagate:
77
- out[0] = wptype(2) * result[0]
78
- out[1] = wptype(2) * result[1]
79
- out[2] = wptype(2) * result[2]
80
- out[3] = wptype(2) * result[3]
81
- out[4] = wptype(2) * result[4]
82
- out[5] = wptype(2) * result[5]
83
-
84
- kernel = getkernel(check_spatial_vector_component_constructor, suffix=dtype.__name__)
85
- output_select_kernel = get_select_kernel(wptype)
86
- vec_kernel = getkernel(check_spatial_vector_vector_constructor, suffix=dtype.__name__)
87
-
88
- if register_kernels:
89
- return
90
-
91
- input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
92
- output = wp.zeros_like(input)
93
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
94
-
95
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
96
-
97
- for i in range(len(input)):
98
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
99
- tape = wp.Tape()
100
- with tape:
101
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
102
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
103
- tape.backward(loss=cmp)
104
- expectedgrads = np.zeros(len(input))
105
- expectedgrads[i] = 2
106
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
107
- tape.zero()
108
-
109
- input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
110
- output = wp.zeros_like(input)
111
- wp.launch(vec_kernel, dim=1, inputs=[input], outputs=[output], device=device)
112
-
113
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
114
-
115
- for i in range(len(input)):
116
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
117
- tape = wp.Tape()
118
- with tape:
119
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
120
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
121
- tape.backward(loss=cmp)
122
- expectedgrads = np.zeros(len(input))
123
- expectedgrads[i] = 2
124
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
125
- tape.zero()
126
-
127
-
128
- def test_spatial_vector_indexing(test, device, dtype, register_kernels=False):
129
- rng = np.random.default_rng(123)
130
-
131
- tol = {
132
- np.float16: 5.0e-3,
133
- np.float32: 1.0e-6,
134
- np.float64: 1.0e-8,
135
- }.get(dtype, 0)
136
-
137
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
138
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
139
-
140
- def check_spatial_vector_indexing(
141
- input: wp.array(dtype=spatial_vector),
142
- out: wp.array(dtype=wptype),
143
- ):
144
- inpt = input[0]
145
-
146
- # multiply outputs by 2 so we've got something to backpropagate:
147
- idx = 0
148
- for i in range(6):
149
- out[idx] = wptype(2) * inpt[i]
150
- idx = idx + 1
151
-
152
- kernel = getkernel(check_spatial_vector_indexing, suffix=dtype.__name__)
153
- output_select_kernel = get_select_kernel(wptype)
154
-
155
- if register_kernels:
156
- return
157
-
158
- input = wp.array(
159
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
160
- )
161
- outcmps = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
162
-
163
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
164
-
165
- assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
166
-
167
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
168
- for i in range(6):
169
- tape = wp.Tape()
170
- with tape:
171
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
172
- wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
173
- tape.backward(loss=out)
174
- expectedresult = np.zeros(6, dtype=dtype)
175
- expectedresult[i] = 2
176
- assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
177
- tape.zero()
178
-
179
-
180
- def test_spatial_vector_scalar_multiplication(test, device, dtype, register_kernels=False):
181
- rng = np.random.default_rng(123)
182
-
183
- tol = {
184
- np.float16: 5.0e-3,
185
- np.float32: 1.0e-6,
186
- np.float64: 1.0e-8,
187
- }.get(dtype, 0)
188
-
189
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
190
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
191
-
192
- def check_spatial_vector_scalar_mul(
193
- s: wp.array(dtype=wptype),
194
- q: wp.array(dtype=spatial_vector),
195
- outcmps_l: wp.array(dtype=wptype),
196
- outcmps_r: wp.array(dtype=wptype),
197
- ):
198
- lresult = s[0] * q[0]
199
- rresult = q[0] * s[0]
200
-
201
- # multiply outputs by 2 so we've got something to backpropagate:
202
- for i in range(6):
203
- outcmps_l[i] = wptype(2) * lresult[i]
204
- outcmps_r[i] = wptype(2) * rresult[i]
205
-
206
- kernel = getkernel(check_spatial_vector_scalar_mul, suffix=dtype.__name__)
207
- output_select_kernel = get_select_kernel(wptype)
208
-
209
- if register_kernels:
210
- return
211
-
212
- s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
213
- q = wp.array(
214
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
215
- )
216
-
217
- outcmps_l = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
218
- outcmps_r = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
219
-
220
- wp.launch(
221
- kernel,
222
- dim=1,
223
- inputs=[s, q],
224
- outputs=[
225
- outcmps_l,
226
- outcmps_r,
227
- ],
228
- device=device,
229
- )
230
-
231
- assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
232
- assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
233
-
234
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
235
- for i in range(6):
236
- # test left/right mul gradients:
237
- for wrt in [outcmps_l, outcmps_r]:
238
- tape = wp.Tape()
239
- with tape:
240
- wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
241
- wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
242
- tape.backward(loss=out)
243
- expectedresult = np.zeros(6, dtype=dtype)
244
- expectedresult[i] = 2 * s.numpy()[0]
245
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
246
- assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
247
- tape.zero()
248
-
249
-
250
- def test_spatial_vector_add_sub(test, device, dtype, register_kernels=False):
251
- rng = np.random.default_rng(123)
252
-
253
- tol = {
254
- np.float16: 5.0e-3,
255
- np.float32: 1.0e-6,
256
- np.float64: 1.0e-8,
257
- }.get(dtype, 0)
258
-
259
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
260
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
261
-
262
- def check_spatial_vector_add_sub(
263
- q: wp.array(dtype=spatial_vector),
264
- v: wp.array(dtype=spatial_vector),
265
- outputs_add: wp.array(dtype=wptype),
266
- outputs_sub: wp.array(dtype=wptype),
267
- ):
268
- addresult = q[0] + v[0]
269
- subresult = q[0] - v[0]
270
- for i in range(6):
271
- outputs_add[i] = wptype(2) * addresult[i]
272
- outputs_sub[i] = wptype(2) * subresult[i]
273
-
274
- kernel = getkernel(check_spatial_vector_add_sub, suffix=dtype.__name__)
275
- output_select_kernel = get_select_kernel(wptype)
276
- if register_kernels:
277
- return
278
-
279
- q = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
280
- v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
281
-
282
- outputs_add = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
283
- outputs_sub = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
284
-
285
- wp.launch(
286
- kernel,
287
- dim=1,
288
- inputs=[
289
- q,
290
- v,
291
- ],
292
- outputs=[outputs_add, outputs_sub],
293
- device=device,
294
- )
295
-
296
- assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
297
- assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
298
-
299
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
300
- for i in range(6):
301
- # test add gradients:
302
- tape = wp.Tape()
303
- with tape:
304
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
305
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
306
- tape.backward(loss=out)
307
- expectedresult = np.zeros(6, dtype=dtype)
308
- expectedresult[i] = 2
309
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
310
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
311
- tape.zero()
312
-
313
- # test subtraction gradients:
314
- tape = wp.Tape()
315
- with tape:
316
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
317
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
318
- tape.backward(loss=out)
319
- expectedresult = np.zeros(6, dtype=dtype)
320
- expectedresult[i] = 2
321
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
322
- assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
323
- tape.zero()
324
-
325
-
326
- def test_spatial_dot(test, device, dtype, register_kernels=False):
327
- rng = np.random.default_rng(123)
328
-
329
- tol = {
330
- np.float16: 1.0e-2,
331
- np.float32: 1.0e-6,
332
- np.float64: 1.0e-8,
333
- }.get(dtype, 0)
334
-
335
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
336
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
337
-
338
- def check_spatial_dot(
339
- s: wp.array(dtype=spatial_vector),
340
- v: wp.array(dtype=spatial_vector),
341
- dot: wp.array(dtype=wptype),
342
- ):
343
- dot[0] = wptype(2) * wp.spatial_dot(v[0], s[0])
344
-
345
- kernel = getkernel(check_spatial_dot, suffix=dtype.__name__)
346
- if register_kernels:
347
- return
348
-
349
- s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
350
- v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
351
- dot = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
352
-
353
- tape = wp.Tape()
354
- with tape:
355
- wp.launch(
356
- kernel,
357
- dim=1,
358
- inputs=[
359
- s,
360
- v,
361
- ],
362
- outputs=[dot],
363
- device=device,
364
- )
365
-
366
- assert_np_equal(dot.numpy()[0], 2.0 * (v.numpy() * s.numpy()).sum(), tol=tol)
367
-
368
- tape.backward(loss=dot)
369
- sgrads = tape.gradients[s].numpy()[0]
370
- expected_grads = 2.0 * v.numpy()[0]
371
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
372
-
373
- vgrads = tape.gradients[v].numpy()[0]
374
- expected_grads = 2.0 * s.numpy()[0]
375
- assert_np_equal(vgrads, expected_grads, tol=tol)
376
-
377
-
378
- def test_spatial_cross(test, device, dtype, register_kernels=False):
379
- rng = np.random.default_rng(123)
380
-
381
- tol = {
382
- np.float16: 5.0e-3,
383
- np.float32: 1.0e-6,
384
- np.float64: 1.0e-8,
385
- }.get(dtype, 0)
386
-
387
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
388
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
389
-
390
- def check_spatial_cross(
391
- s: wp.array(dtype=spatial_vector),
392
- v: wp.array(dtype=spatial_vector),
393
- outputs: wp.array(dtype=wptype),
394
- outputs_dual: wp.array(dtype=wptype),
395
- outputs_wcrossw: wp.array(dtype=wptype),
396
- outputs_vcrossw: wp.array(dtype=wptype),
397
- outputs_wcrossv: wp.array(dtype=wptype),
398
- outputs_vcrossv: wp.array(dtype=wptype),
399
- ):
400
- c = wp.spatial_cross(s[0], v[0])
401
- d = wp.spatial_cross_dual(s[0], v[0])
402
-
403
- # multiply outputs by 2 so we've got something to backpropagate:
404
- for i in range(6):
405
- outputs[i] = wptype(2) * c[i]
406
- outputs_dual[i] = wptype(2) * d[i]
407
-
408
- sw = wp.spatial_top(s[0])
409
- sv = wp.spatial_bottom(s[0])
410
- vw = wp.spatial_top(v[0])
411
- vv = wp.spatial_bottom(v[0])
412
-
413
- wcrossw = wp.cross(sw, vw)
414
- vcrossw = wp.cross(sv, vw)
415
- wcrossv = wp.cross(sw, vv)
416
- vcrossv = wp.cross(sv, vv)
417
-
418
- for i in range(3):
419
- outputs_wcrossw[i] = wcrossw[i]
420
- outputs_vcrossw[i] = vcrossw[i]
421
- outputs_wcrossv[i] = wcrossv[i]
422
- outputs_vcrossv[i] = vcrossv[i]
423
-
424
- kernel = getkernel(check_spatial_cross, suffix=dtype.__name__)
425
- output_select_kernel = get_select_kernel(wptype)
426
-
427
- if register_kernels:
428
- return
429
-
430
- s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
431
- v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
432
- outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
433
- outputs_dual = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
434
- outputs_wcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
435
- outputs_vcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
436
- outputs_wcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
437
- outputs_vcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
438
-
439
- wp.launch(
440
- kernel,
441
- dim=1,
442
- inputs=[
443
- s,
444
- v,
445
- ],
446
- outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
447
- device=device,
448
- )
449
-
450
- sw = s.numpy()[0, :3]
451
- sv = s.numpy()[0, 3:]
452
- vw = v.numpy()[0, :3]
453
- vv = v.numpy()[0, 3:]
454
-
455
- wcrossw = np.cross(sw, vw)
456
- vcrossw = np.cross(sv, vw)
457
- wcrossv = np.cross(sw, vv)
458
- vcrossv = np.cross(sv, vv)
459
-
460
- assert_np_equal(outputs.numpy()[:3], 2 * wcrossw, tol=tol)
461
- assert_np_equal(outputs.numpy()[3:], 2 * (vcrossw + wcrossv), tol=tol)
462
-
463
- assert_np_equal(outputs_dual.numpy()[:3], 2 * (wcrossw + vcrossv), tol=tol)
464
- assert_np_equal(outputs_dual.numpy()[3:], 2 * wcrossv, tol=tol)
465
-
466
- for i in range(3):
467
- cmp_w = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
468
- cmp_v = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
469
- cmp_w_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
470
- cmp_v_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
471
- cmp_wcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
472
- cmp_vcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
473
- cmp_wcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
474
- cmp_vcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
475
- tape = wp.Tape()
476
- with tape:
477
- wp.launch(
478
- kernel,
479
- dim=1,
480
- inputs=[
481
- s,
482
- v,
483
- ],
484
- outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
485
- device=device,
486
- )
487
-
488
- # ith w and v vector components of spatial_cross:
489
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp_w], device=device)
490
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i + 3], outputs=[cmp_v], device=device)
491
-
492
- # ith w and v vector components of spatial_cross_dual:
493
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i], outputs=[cmp_w_dual], device=device)
494
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i + 3], outputs=[cmp_v_dual], device=device)
495
-
496
- # ith vector components of some cross products:
497
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossw, i], outputs=[cmp_wcrossw], device=device)
498
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossw, i], outputs=[cmp_vcrossw], device=device)
499
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossv, i], outputs=[cmp_wcrossv], device=device)
500
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossv, i], outputs=[cmp_vcrossv], device=device)
501
-
502
- def getgrads(cmp):
503
- tape.backward(loss=cmp)
504
- sgrads = 1.0 * tape.gradients[s].numpy()
505
- vgrads = 1.0 * tape.gradients[v].numpy()
506
- tape.zero()
507
- return sgrads, vgrads
508
-
509
- dcmp_w_ds, dcmp_w_dv = getgrads(cmp_w)
510
- dcmp_v_ds, dcmp_v_dv = getgrads(cmp_v)
511
- dcmp_w_dual_ds, dcmp_w_dual_dv = getgrads(cmp_w_dual)
512
- dcmp_v_dual_ds, dcmp_v_dual_dv = getgrads(cmp_v_dual)
513
-
514
- dcmp_wcrossw_ds, dcmp_wcrossw_dv = getgrads(cmp_wcrossw)
515
- dcmp_vcrossw_ds, dcmp_vcrossw_dv = getgrads(cmp_vcrossw)
516
- dcmp_wcrossv_ds, dcmp_wcrossv_dv = getgrads(cmp_wcrossv)
517
- dcmp_vcrossv_ds, dcmp_vcrossv_dv = getgrads(cmp_vcrossv)
518
-
519
- assert_np_equal(dcmp_w_ds, 2 * dcmp_wcrossw_ds, tol=tol)
520
- assert_np_equal(dcmp_w_dv, 2 * dcmp_wcrossw_dv, tol=tol)
521
-
522
- assert_np_equal(dcmp_v_ds, 2 * (dcmp_vcrossw_ds + dcmp_wcrossv_ds), tol=tol)
523
- assert_np_equal(dcmp_v_dv, 2 * (dcmp_vcrossw_dv + dcmp_wcrossv_dv), tol=tol)
524
-
525
- assert_np_equal(dcmp_w_dual_ds, 2 * (dcmp_wcrossw_ds + dcmp_vcrossv_ds), tol=tol)
526
- assert_np_equal(dcmp_w_dual_dv, 2 * (dcmp_wcrossw_dv + dcmp_vcrossv_dv), tol=tol)
527
-
528
- assert_np_equal(dcmp_v_dual_ds, 2 * dcmp_wcrossv_ds, tol=tol)
529
- assert_np_equal(dcmp_v_dual_dv, 2 * dcmp_wcrossv_dv, tol=tol)
530
-
531
-
532
- def test_spatial_top_bottom(test, device, dtype, register_kernels=False):
533
- rng = np.random.default_rng(123)
534
-
535
- tol = {
536
- np.float16: 1.0e-2,
537
- np.float32: 1.0e-6,
538
- np.float64: 1.0e-8,
539
- }.get(dtype, 0)
540
-
541
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
542
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
543
-
544
- def check_spatial_top_bottom(
545
- s: wp.array(dtype=spatial_vector),
546
- outputs: wp.array(dtype=wptype),
547
- ):
548
- top = wp.spatial_top(s[0])
549
- bottom = wp.spatial_bottom(s[0])
550
-
551
- outputs[0] = wptype(2) * top[0]
552
- outputs[1] = wptype(2) * top[1]
553
- outputs[2] = wptype(2) * top[2]
554
-
555
- outputs[3] = wptype(2) * bottom[0]
556
- outputs[4] = wptype(2) * bottom[1]
557
- outputs[5] = wptype(2) * bottom[2]
558
-
559
- kernel = getkernel(check_spatial_top_bottom, suffix=dtype.__name__)
560
- output_select_kernel = get_select_kernel(wptype)
561
-
562
- if register_kernels:
563
- return
564
-
565
- s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
566
- outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
567
-
568
- wp.launch(
569
- kernel,
570
- dim=1,
571
- inputs=[
572
- s,
573
- ],
574
- outputs=[outputs],
575
- device=device,
576
- )
577
-
578
- assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
579
-
580
- for i in range(6):
581
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
582
- tape = wp.Tape()
583
- with tape:
584
- wp.launch(
585
- kernel,
586
- dim=1,
587
- inputs=[
588
- s,
589
- ],
590
- outputs=[outputs],
591
- device=device,
592
- )
593
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
594
- tape.backward(loss=cmp)
595
- expectedgrads = np.zeros(6)
596
- expectedgrads[i] = 2
597
- assert_np_equal(tape.gradients[s].numpy(), expectedgrads)
598
- tape.zero()
599
-
600
-
601
- def test_transform_constructors(test, device, dtype, register_kernels=False):
602
- rng = np.random.default_rng(123)
603
-
604
- tol = {
605
- np.float16: 5.0e-3,
606
- np.float32: 1.0e-6,
607
- np.float64: 1.0e-8,
608
- }.get(dtype, 0)
609
-
610
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
611
- vec3 = wp.types.vector(length=3, dtype=wptype)
612
- transform = wp.types.transformation(dtype=wptype)
613
- quat = wp.types.quaternion(dtype=wptype)
614
-
615
- def check_transform_constructor(
616
- input: wp.array(dtype=wptype),
617
- out: wp.array(dtype=wptype),
618
- ):
619
- result = transform(vec3(input[0], input[1], input[2]), quat(input[3], input[4], input[5], input[6]))
620
-
621
- # multiply the output by 2 so we've got something to backpropagate:
622
- out[0] = wptype(2) * result[0]
623
- out[1] = wptype(2) * result[1]
624
- out[2] = wptype(2) * result[2]
625
- out[3] = wptype(2) * result[3]
626
- out[4] = wptype(2) * result[4]
627
- out[5] = wptype(2) * result[5]
628
- out[6] = wptype(2) * result[6]
629
-
630
- kernel = getkernel(check_transform_constructor, suffix=dtype.__name__)
631
- output_select_kernel = get_select_kernel(wptype)
632
-
633
- if register_kernels:
634
- return
635
-
636
- p = rng.standard_normal(size=3).astype(dtype)
637
- q = rng.standard_normal(size=4).astype(dtype)
638
- q /= np.linalg.norm(q)
639
-
640
- input = wp.array(np.concatenate((p, q)), requires_grad=True, device=device)
641
- output = wp.zeros_like(input)
642
-
643
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
644
-
645
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
646
-
647
- for i in range(len(input)):
648
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
649
- tape = wp.Tape()
650
- with tape:
651
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
652
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
653
- tape.backward(loss=cmp)
654
- expectedgrads = np.zeros(len(input))
655
- expectedgrads[i] = 2
656
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
657
- tape.zero()
658
-
659
-
660
- def test_transform_indexing(test, device, dtype, register_kernels=False):
661
- rng = np.random.default_rng(123)
662
-
663
- tol = {
664
- np.float16: 5.0e-3,
665
- np.float32: 1.0e-6,
666
- np.float64: 1.0e-8,
667
- }.get(dtype, 0)
668
-
669
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
670
- transform = wp.types.transformation(dtype=wptype)
671
-
672
- def check_transform_indexing(
673
- input: wp.array(dtype=transform),
674
- out: wp.array(dtype=wptype),
675
- ):
676
- inpt = input[0]
677
-
678
- # multiply outputs by 2 so we've got something to backpropagate:
679
- idx = 0
680
- for i in range(7):
681
- out[idx] = wptype(2) * inpt[i]
682
- idx = idx + 1
683
-
684
- kernel = getkernel(check_transform_indexing, suffix=dtype.__name__)
685
- output_select_kernel = get_select_kernel(wptype)
686
-
687
- if register_kernels:
688
- return
689
-
690
- input = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
691
- outcmps = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
692
-
693
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
694
-
695
- assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
696
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
697
- for i in range(7):
698
- tape = wp.Tape()
699
- with tape:
700
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
701
- wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
702
- tape.backward(loss=out)
703
- expectedresult = np.zeros(7, dtype=dtype)
704
- expectedresult[i] = 2
705
- assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
706
- tape.zero()
707
-
708
-
709
- def test_transform_scalar_multiplication(test, device, dtype, register_kernels=False):
710
- rng = np.random.default_rng(123)
711
-
712
- tol = {
713
- np.float16: 5.0e-3,
714
- np.float32: 1.0e-6,
715
- np.float64: 1.0e-8,
716
- }.get(dtype, 0)
717
-
718
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
719
- transform = wp.types.transformation(dtype=wptype)
720
-
721
- def check_transform_scalar_mul(
722
- s: wp.array(dtype=wptype),
723
- q: wp.array(dtype=transform),
724
- outcmps_l: wp.array(dtype=wptype),
725
- outcmps_r: wp.array(dtype=wptype),
726
- ):
727
- lresult = s[0] * q[0]
728
- rresult = q[0] * s[0]
729
-
730
- # multiply outputs by 2 so we've got something to backpropagate:
731
- for i in range(7):
732
- outcmps_l[i] = wptype(2) * lresult[i]
733
- outcmps_r[i] = wptype(2) * rresult[i]
734
-
735
- kernel = getkernel(check_transform_scalar_mul, suffix=dtype.__name__)
736
- output_select_kernel = get_select_kernel(wptype)
737
-
738
- if register_kernels:
739
- return
740
-
741
- s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
742
- q = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
743
-
744
- outcmps_l = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
745
- outcmps_r = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
746
-
747
- wp.launch(
748
- kernel,
749
- dim=1,
750
- inputs=[s, q],
751
- outputs=[
752
- outcmps_l,
753
- outcmps_r,
754
- ],
755
- device=device,
756
- )
757
-
758
- assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
759
- assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
760
-
761
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
762
- for i in range(7):
763
- # test left/right mul gradients:
764
- for wrt in [outcmps_l, outcmps_r]:
765
- tape = wp.Tape()
766
- with tape:
767
- wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
768
- wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
769
- tape.backward(loss=out)
770
- expectedresult = np.zeros(7, dtype=dtype)
771
- expectedresult[i] = 2 * s.numpy()[0]
772
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
773
- assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
774
- tape.zero()
775
-
776
-
777
- def test_transform_add_sub(test, device, dtype, register_kernels=False):
778
- rng = np.random.default_rng(123)
779
-
780
- tol = {
781
- np.float16: 5.0e-3,
782
- np.float32: 1.0e-6,
783
- np.float64: 1.0e-8,
784
- }.get(dtype, 0)
785
-
786
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
787
- transform = wp.types.transformation(dtype=wptype)
788
-
789
- def check_transform_add_sub(
790
- q: wp.array(dtype=transform),
791
- v: wp.array(dtype=transform),
792
- outputs_add: wp.array(dtype=wptype),
793
- outputs_sub: wp.array(dtype=wptype),
794
- ):
795
- addresult = q[0] + v[0]
796
- subresult = q[0] - v[0]
797
- for i in range(7):
798
- outputs_add[i] = wptype(2) * addresult[i]
799
- outputs_sub[i] = wptype(2) * subresult[i]
800
-
801
- kernel = getkernel(check_transform_add_sub, suffix=dtype.__name__)
802
- output_select_kernel = get_select_kernel(wptype)
803
-
804
- if register_kernels:
805
- return
806
-
807
- q = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
808
- v = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
809
-
810
- outputs_add = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
811
- outputs_sub = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
812
-
813
- wp.launch(
814
- kernel,
815
- dim=1,
816
- inputs=[
817
- q,
818
- v,
819
- ],
820
- outputs=[outputs_add, outputs_sub],
821
- device=device,
822
- )
823
-
824
- assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
825
- assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
826
-
827
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
828
- for i in range(7):
829
- # test add gradients:
830
- tape = wp.Tape()
831
- with tape:
832
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
833
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
834
- tape.backward(loss=out)
835
- expectedresult = np.zeros(7, dtype=dtype)
836
- expectedresult[i] = 2
837
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
838
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
839
- tape.zero()
840
-
841
- # test subtraction gradients:
842
- tape = wp.Tape()
843
- with tape:
844
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
845
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
846
- tape.backward(loss=out)
847
- expectedresult = np.zeros(7, dtype=dtype)
848
- expectedresult[i] = 2
849
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
850
- assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
851
- tape.zero()
852
-
853
-
854
- def test_transform_get_trans_rot(test, device, dtype, register_kernels=False):
855
- rng = np.random.default_rng(123)
856
-
857
- tol = {
858
- np.float16: 1.0e-2,
859
- np.float32: 1.0e-6,
860
- np.float64: 1.0e-8,
861
- }.get(dtype, 0)
862
-
863
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
864
- transform = wp.types.transformation(dtype=wptype)
865
-
866
- def check_transform_get_trans_rot(
867
- s: wp.array(dtype=transform),
868
- outputs: wp.array(dtype=wptype),
869
- ):
870
- trans = wp.transform_get_translation(s[0])
871
- q = wp.transform_get_rotation(s[0])
872
-
873
- outputs[0] = wptype(2) * trans[0]
874
- outputs[1] = wptype(2) * trans[1]
875
- outputs[2] = wptype(2) * trans[2]
876
-
877
- outputs[3] = wptype(2) * q[0]
878
- outputs[4] = wptype(2) * q[1]
879
- outputs[5] = wptype(2) * q[2]
880
- outputs[6] = wptype(2) * q[3]
881
-
882
- kernel = getkernel(check_transform_get_trans_rot, suffix=dtype.__name__)
883
- output_select_kernel = get_select_kernel(wptype)
884
-
885
- if register_kernels:
886
- return
887
-
888
- s = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
889
- outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
890
-
891
- wp.launch(
892
- kernel,
893
- dim=1,
894
- inputs=[
895
- s,
896
- ],
897
- outputs=[outputs],
898
- device=device,
899
- )
900
-
901
- assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
902
-
903
- for i in range(7):
904
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
905
- tape = wp.Tape()
906
- with tape:
907
- wp.launch(
908
- kernel,
909
- dim=1,
910
- inputs=[
911
- s,
912
- ],
913
- outputs=[outputs],
914
- device=device,
915
- )
916
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
917
- tape.backward(loss=cmp)
918
- expectedgrads = np.zeros(7)
919
- expectedgrads[i] = 2
920
- assert_np_equal(tape.gradients[s].numpy(), expectedgrads)
921
- tape.zero()
922
-
923
-
924
- def test_transform_multiply(test, device, dtype, register_kernels=False):
925
- rng = np.random.default_rng(123)
926
-
927
- tol = {
928
- np.float16: 1.0e-2,
929
- np.float32: 1.0e-6,
930
- np.float64: 1.0e-8,
931
- }.get(dtype, 0)
932
-
933
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
934
- transform = wp.types.transformation(dtype=wptype)
935
-
936
- def check_transform_multiply(
937
- a: wp.array(dtype=transform),
938
- b: wp.array(dtype=transform),
939
- outputs: wp.array(dtype=wptype),
940
- outputs_fn: wp.array(dtype=wptype),
941
- outputs_manual: wp.array(dtype=wptype),
942
- ):
943
- result = a[0] * b[0]
944
- result_fn = wp.transform_multiply(a[0], b[0])
945
-
946
- # let's just work out the transform multiplication manually
947
- # and compare value/gradients with that:
948
- atrans = wp.transform_get_translation(a[0])
949
- arot = wp.transform_get_rotation(a[0])
950
-
951
- btrans = wp.transform_get_translation(b[0])
952
- brot = wp.transform_get_rotation(b[0])
953
-
954
- trans = wp.quat_rotate(arot, btrans) + atrans
955
- rot = arot * brot
956
- result_manual = transform(trans, rot)
957
-
958
- for i in range(7):
959
- outputs[i] = wptype(2) * result[i]
960
- outputs_fn[i] = wptype(2) * result_fn[i]
961
- outputs_manual[i] = wptype(2) * result_manual[i]
962
-
963
- kernel = getkernel(check_transform_multiply, suffix=dtype.__name__)
964
- output_select_kernel = get_select_kernel(wptype)
965
-
966
- if register_kernels:
967
- return
968
-
969
- q = rng.standard_normal(size=7)
970
- s = rng.standard_normal(size=7)
971
- q[3:] /= np.linalg.norm(q[3:])
972
- s[3:] /= np.linalg.norm(s[3:])
973
-
974
- q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
975
- s = wp.array(s.astype(dtype), dtype=transform, requires_grad=True, device=device)
976
- outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
977
- outputs_fn = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
978
- outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
979
-
980
- wp.launch(
981
- kernel,
982
- dim=1,
983
- inputs=[
984
- q,
985
- s,
986
- ],
987
- outputs=[outputs, outputs_fn, outputs_manual],
988
- device=device,
989
- )
990
-
991
- assert_np_equal(outputs.numpy(), outputs_fn.numpy(), tol=tol)
992
- assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
993
-
994
- for i in range(7):
995
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
996
- cmp_fn = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
997
- cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
998
- tape = wp.Tape()
999
- with tape:
1000
- wp.launch(
1001
- kernel,
1002
- dim=1,
1003
- inputs=[
1004
- q,
1005
- s,
1006
- ],
1007
- outputs=[outputs, outputs_fn, outputs_manual],
1008
- device=device,
1009
- )
1010
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1011
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_fn, i], outputs=[cmp_fn], device=device)
1012
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1013
- tape.backward(loss=cmp)
1014
- qgrads = 1.0 * tape.gradients[q].numpy()
1015
- sgrads = 1.0 * tape.gradients[s].numpy()
1016
- tape.zero()
1017
- tape.backward(loss=cmp_fn)
1018
- qgrads_fn = 1.0 * tape.gradients[q].numpy()
1019
- sgrads_fn = 1.0 * tape.gradients[s].numpy()
1020
- tape.zero()
1021
- tape.backward(loss=cmp_manual)
1022
- qgrads_manual = 1.0 * tape.gradients[q].numpy()
1023
- sgrads_manual = 1.0 * tape.gradients[s].numpy()
1024
- tape.zero()
1025
-
1026
- assert_np_equal(qgrads, qgrads_fn, tol=tol)
1027
- assert_np_equal(sgrads, sgrads_fn, tol=tol)
1028
-
1029
- assert_np_equal(qgrads, qgrads_manual, tol=tol)
1030
- assert_np_equal(sgrads, sgrads_manual, tol=tol)
1031
-
1032
-
1033
- def test_transform_inverse(test, device, dtype, register_kernels=False):
1034
- rng = np.random.default_rng(123)
1035
-
1036
- tol = {
1037
- np.float16: 1.0e-2,
1038
- np.float32: 1.0e-6,
1039
- np.float64: 1.0e-8,
1040
- }.get(dtype, 0)
1041
-
1042
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1043
- transform = wp.types.transformation(dtype=wptype)
1044
-
1045
- def check_transform_inverse(
1046
- a: wp.array(dtype=transform),
1047
- outputs: wp.array(dtype=wptype),
1048
- outputs_shouldbeidentity: wp.array(dtype=wptype),
1049
- outputs_manual: wp.array(dtype=wptype),
1050
- ):
1051
- result = wp.transform_inverse(a[0])
1052
- idt = result * a[0]
1053
-
1054
- # let's just work out the transform inverse manually
1055
- # and compare value/gradients with that:
1056
- atrans = wp.transform_get_translation(a[0])
1057
- arot = wp.transform_get_rotation(a[0])
1058
-
1059
- rotinv = wp.quat_inverse(arot)
1060
- result_manual = transform(-wp.quat_rotate(rotinv, atrans), rotinv)
1061
-
1062
- for i in range(7):
1063
- outputs[i] = wptype(2) * result[i]
1064
- outputs_shouldbeidentity[i] = wptype(2) * idt[i]
1065
- outputs_manual[i] = wptype(2) * result_manual[i]
1066
-
1067
- kernel = getkernel(check_transform_inverse, suffix=dtype.__name__)
1068
- output_select_kernel = get_select_kernel(wptype)
1069
-
1070
- if register_kernels:
1071
- return
1072
-
1073
- q = rng.standard_normal(size=7)
1074
- s = rng.standard_normal(size=7)
1075
- q[3:] /= np.linalg.norm(q[3:])
1076
- s[3:] /= np.linalg.norm(s[3:])
1077
-
1078
- q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1079
- outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1080
- outputs_shouldbeidentity = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1081
- outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1082
-
1083
- wp.launch(
1084
- kernel,
1085
- dim=1,
1086
- inputs=[
1087
- q,
1088
- ],
1089
- outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1090
- device=device,
1091
- )
1092
-
1093
- # check inverse:
1094
- assert_np_equal(outputs_shouldbeidentity.numpy(), np.array([0, 0, 0, 0, 0, 0, 2]), tol=tol)
1095
-
1096
- # same as manual result:
1097
- assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
1098
-
1099
- for i in range(7):
1100
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1101
- cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1102
- tape = wp.Tape()
1103
- with tape:
1104
- wp.launch(
1105
- kernel,
1106
- dim=1,
1107
- inputs=[
1108
- q,
1109
- ],
1110
- outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1111
- device=device,
1112
- )
1113
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1114
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1115
- tape.backward(loss=cmp)
1116
- qgrads = 1.0 * tape.gradients[q].numpy()
1117
- tape.zero()
1118
- tape.backward(loss=cmp_manual)
1119
- qgrads_manual = 1.0 * tape.gradients[q].numpy()
1120
- tape.zero()
1121
-
1122
- # check gradients against manual result:
1123
- assert_np_equal(qgrads, qgrads_manual, tol=tol)
1124
-
1125
-
1126
- def test_transform_point_vector(test, device, dtype, register_kernels=False):
1127
- rng = np.random.default_rng(123)
1128
-
1129
- tol = {
1130
- np.float16: 1.0e-2,
1131
- np.float32: 1.0e-6,
1132
- np.float64: 1.0e-8,
1133
- }.get(dtype, 0)
1134
-
1135
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1136
- transform = wp.types.transformation(dtype=wptype)
1137
- vec3 = wp.types.vector(length=3, dtype=wptype)
1138
-
1139
- def check_transform_point_vector(
1140
- t: wp.array(dtype=transform),
1141
- v: wp.array(dtype=vec3),
1142
- outputs_pt: wp.array(dtype=wptype),
1143
- outputs_pt_manual: wp.array(dtype=wptype),
1144
- outputs_vec: wp.array(dtype=wptype),
1145
- outputs_vec_manual: wp.array(dtype=wptype),
1146
- ):
1147
- result_pt = wp.transform_point(t[0], v[0])
1148
- result_pt_manual = wp.transform_get_translation(t[0]) + wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1149
-
1150
- result_vec = wp.transform_vector(t[0], v[0])
1151
- result_vec_manual = wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1152
-
1153
- for i in range(3):
1154
- outputs_pt[i] = wptype(2) * result_pt[i]
1155
- outputs_pt_manual[i] = wptype(2) * result_pt_manual[i]
1156
- outputs_vec[i] = wptype(2) * result_vec[i]
1157
- outputs_vec_manual[i] = wptype(2) * result_vec_manual[i]
1158
-
1159
- kernel = getkernel(check_transform_point_vector, suffix=dtype.__name__)
1160
- output_select_kernel = get_select_kernel(wptype)
1161
-
1162
- if register_kernels:
1163
- return
1164
-
1165
- q = rng.standard_normal(size=7)
1166
- q[3:] /= np.linalg.norm(q[3:])
1167
-
1168
- t = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1169
- v = wp.array(rng.standard_normal(size=3), dtype=vec3, requires_grad=True, device=device)
1170
- outputs_pt = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1171
- outputs_pt_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1172
- outputs_vec = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1173
- outputs_vec_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1174
-
1175
- wp.launch(
1176
- kernel,
1177
- dim=1,
1178
- inputs=[t, v],
1179
- outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1180
- device=device,
1181
- )
1182
-
1183
- # same as manual results:
1184
- assert_np_equal(outputs_pt.numpy(), outputs_pt_manual.numpy(), tol=tol)
1185
- assert_np_equal(outputs_vec.numpy(), outputs_vec_manual.numpy(), tol=tol)
1186
-
1187
- for i in range(3):
1188
- cmp_pt = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1189
- cmp_pt_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1190
- cmp_vec = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1191
- cmp_vec_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1192
-
1193
- tape = wp.Tape()
1194
- with tape:
1195
- wp.launch(
1196
- kernel,
1197
- dim=1,
1198
- inputs=[t, v],
1199
- outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1200
- device=device,
1201
- )
1202
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_pt, i], outputs=[cmp_pt], device=device)
1203
- wp.launch(
1204
- output_select_kernel, dim=1, inputs=[outputs_pt_manual, i], outputs=[cmp_pt_manual], device=device
1205
- )
1206
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_vec, i], outputs=[cmp_vec], device=device)
1207
- wp.launch(
1208
- output_select_kernel, dim=1, inputs=[outputs_vec_manual, i], outputs=[cmp_vec_manual], device=device
1209
- )
1210
- tape.backward(loss=cmp_pt)
1211
- tgrads_pt = 1.0 * tape.gradients[t].numpy()
1212
- vgrads_pt = 1.0 * tape.gradients[v].numpy()
1213
- tape.zero()
1214
- tape.backward(loss=cmp_pt_manual)
1215
- tgrads_pt_manual = 1.0 * tape.gradients[t].numpy()
1216
- vgrads_pt_manual = 1.0 * tape.gradients[v].numpy()
1217
- tape.zero()
1218
- tape.backward(loss=cmp_vec)
1219
- tgrads_vec = 1.0 * tape.gradients[t].numpy()
1220
- vgrads_vec = 1.0 * tape.gradients[v].numpy()
1221
- tape.zero()
1222
- tape.backward(loss=cmp_vec_manual)
1223
- tgrads_vec_manual = 1.0 * tape.gradients[t].numpy()
1224
- vgrads_vec_manual = 1.0 * tape.gradients[v].numpy()
1225
- tape.zero()
1226
-
1227
- # check gradients against manual result:
1228
- assert_np_equal(tgrads_pt, tgrads_pt_manual, tol=tol)
1229
- assert_np_equal(vgrads_pt, vgrads_pt_manual, tol=tol)
1230
- assert_np_equal(tgrads_vec, tgrads_vec_manual, tol=tol)
1231
- assert_np_equal(vgrads_vec, vgrads_vec_manual, tol=tol)
1232
-
1233
-
1234
- def test_spatial_matrix_constructors(test, device, dtype, register_kernels=False):
1235
- rng = np.random.default_rng(123)
1236
-
1237
- tol = {
1238
- np.float16: 5.0e-3,
1239
- np.float32: 1.0e-6,
1240
- np.float64: 1.0e-8,
1241
- }.get(dtype, 0)
1242
-
1243
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1244
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1245
-
1246
- def check_spatial_matrix_constructor(
1247
- input: wp.array(dtype=wptype),
1248
- out: wp.array(dtype=wptype),
1249
- ):
1250
- # multiply the output by 2 so we've got something to backpropagate:
1251
- result0 = spatial_matrix(
1252
- input[0],
1253
- input[1],
1254
- input[2],
1255
- input[3],
1256
- input[4],
1257
- input[5],
1258
- input[6],
1259
- input[7],
1260
- input[8],
1261
- input[9],
1262
- input[10],
1263
- input[11],
1264
- input[12],
1265
- input[13],
1266
- input[14],
1267
- input[15],
1268
- input[16],
1269
- input[17],
1270
- input[18],
1271
- input[19],
1272
- input[20],
1273
- input[21],
1274
- input[22],
1275
- input[23],
1276
- input[24],
1277
- input[25],
1278
- input[26],
1279
- input[27],
1280
- input[28],
1281
- input[29],
1282
- input[30],
1283
- input[31],
1284
- input[32],
1285
- input[33],
1286
- input[34],
1287
- input[35],
1288
- )
1289
- result1 = spatial_matrix()
1290
-
1291
- idx = 0
1292
- for i in range(6):
1293
- for j in range(6):
1294
- out[idx] = wptype(2) * result0[i, j]
1295
- idx = idx + 1
1296
-
1297
- for i in range(6):
1298
- for j in range(6):
1299
- out[idx] = result1[i, j]
1300
- idx = idx + 1
1301
-
1302
- kernel = getkernel(check_spatial_matrix_constructor, suffix=dtype.__name__)
1303
- output_select_kernel = get_select_kernel(wptype)
1304
-
1305
- if register_kernels:
1306
- return
1307
-
1308
- input = wp.array(rng.standard_normal(size=6 * 6).astype(dtype), requires_grad=True, device=device)
1309
- output = wp.zeros(2 * 6 * 6, dtype=wptype, requires_grad=True, device=device)
1310
-
1311
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1312
-
1313
- assert_np_equal(output.numpy()[: 6 * 6], 2 * input.numpy(), tol=tol)
1314
- assert_np_equal(output.numpy()[6 * 6 :], np.zeros_like(input.numpy()), tol=tol)
1315
-
1316
- for i in range(len(input)):
1317
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1318
- tape = wp.Tape()
1319
- with tape:
1320
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1321
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1322
- tape.backward(loss=cmp)
1323
- expectedgrads = np.zeros(len(input))
1324
- expectedgrads[i] = 2
1325
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1326
- tape.zero()
1327
- break
1328
-
1329
-
1330
- def test_spatial_matrix_indexing(test, device, dtype, register_kernels=False):
1331
- rng = np.random.default_rng(123)
1332
-
1333
- tol = {
1334
- np.float16: 5.0e-3,
1335
- np.float32: 1.0e-6,
1336
- np.float64: 1.0e-8,
1337
- }.get(dtype, 0)
1338
-
1339
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1340
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1341
-
1342
- def check_spatial_matrix_indexing(
1343
- input: wp.array(dtype=spatial_matrix),
1344
- out: wp.array(dtype=wptype),
1345
- ):
1346
- inpt = input[0]
1347
-
1348
- # multiply outputs by 2 so we've got something to backpropagate:
1349
- idx = 0
1350
- for i in range(6):
1351
- for j in range(6):
1352
- out[idx] = wptype(2) * inpt[i, j]
1353
- idx = idx + 1
1354
-
1355
- kernel = getkernel(check_spatial_matrix_indexing, suffix=dtype.__name__)
1356
- output_select_kernel = get_select_kernel(wptype)
1357
-
1358
- if register_kernels:
1359
- return
1360
-
1361
- input = wp.array(
1362
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1363
- )
1364
- outcmps = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1365
-
1366
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1367
-
1368
- assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
1369
- idx = 0
1370
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1371
- for i in range(6):
1372
- for j in range(6):
1373
- tape = wp.Tape()
1374
- with tape:
1375
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1376
- wp.launch(output_select_kernel, dim=1, inputs=[outcmps, idx], outputs=[out], device=device)
1377
- tape.backward(loss=out)
1378
- expectedresult = np.zeros((6, 6), dtype=dtype)
1379
- expectedresult[i, j] = 2
1380
- assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
1381
- tape.zero()
1382
- idx = idx + 1
1383
-
1384
-
1385
- def test_spatial_matrix_scalar_multiplication(test, device, dtype, register_kernels=False):
1386
- rng = np.random.default_rng(123)
1387
-
1388
- tol = {
1389
- np.float16: 5.0e-3,
1390
- np.float32: 1.0e-6,
1391
- np.float64: 1.0e-8,
1392
- }.get(dtype, 0)
1393
-
1394
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1395
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1396
-
1397
- def check_spatial_matrix_scalar_mul(
1398
- s: wp.array(dtype=wptype),
1399
- q: wp.array(dtype=spatial_matrix),
1400
- outcmps_l: wp.array(dtype=wptype),
1401
- outcmps_r: wp.array(dtype=wptype),
1402
- ):
1403
- lresult = s[0] * q[0]
1404
- rresult = q[0] * s[0]
1405
-
1406
- # multiply outputs by 2 so we've got something to backpropagate:
1407
- idx = 0
1408
- for i in range(6):
1409
- for j in range(6):
1410
- outcmps_l[idx] = wptype(2) * lresult[i, j]
1411
- outcmps_r[idx] = wptype(2) * rresult[i, j]
1412
- idx = idx + 1
1413
-
1414
- kernel = getkernel(check_spatial_matrix_scalar_mul, suffix=dtype.__name__)
1415
- output_select_kernel = get_select_kernel(wptype)
1416
-
1417
- if register_kernels:
1418
- return
1419
-
1420
- s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
1421
- q = wp.array(
1422
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1423
- )
1424
-
1425
- outcmps_l = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1426
- outcmps_r = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1427
-
1428
- wp.launch(
1429
- kernel,
1430
- dim=1,
1431
- inputs=[s, q],
1432
- outputs=[
1433
- outcmps_l,
1434
- outcmps_r,
1435
- ],
1436
- device=device,
1437
- )
1438
-
1439
- assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1440
- assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1441
-
1442
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1443
- idx = 0
1444
- for i in range(6):
1445
- for j in range(6):
1446
- # test left/right mul gradients:
1447
- for wrt in [outcmps_l, outcmps_r]:
1448
- tape = wp.Tape()
1449
- with tape:
1450
- wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
1451
- wp.launch(output_select_kernel, dim=1, inputs=[wrt, idx], outputs=[out], device=device)
1452
- tape.backward(loss=out)
1453
- expectedresult = np.zeros((6, 6), dtype=dtype)
1454
- expectedresult[i, j] = 2 * s.numpy()[0]
1455
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1456
- assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i, j], tol=tol)
1457
- tape.zero()
1458
- idx = idx + 1
1459
-
1460
-
1461
- def test_spatial_matrix_add_sub(test, device, dtype, register_kernels=False):
1462
- rng = np.random.default_rng(123)
1463
-
1464
- tol = {
1465
- np.float16: 5.0e-3,
1466
- np.float32: 1.0e-6,
1467
- np.float64: 1.0e-8,
1468
- }.get(dtype, 0)
1469
-
1470
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1471
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1472
-
1473
- def check_spatial_matrix_add_sub(
1474
- q: wp.array(dtype=spatial_matrix),
1475
- v: wp.array(dtype=spatial_matrix),
1476
- outputs_add: wp.array(dtype=wptype),
1477
- outputs_sub: wp.array(dtype=wptype),
1478
- ):
1479
- addresult = q[0] + v[0]
1480
- subresult = q[0] - v[0]
1481
- idx = 0
1482
- for i in range(6):
1483
- for j in range(6):
1484
- outputs_add[idx] = wptype(2) * addresult[i, j]
1485
- outputs_sub[idx] = wptype(2) * subresult[i, j]
1486
- idx = idx + 1
1487
-
1488
- kernel = getkernel(check_spatial_matrix_add_sub, suffix=dtype.__name__)
1489
- output_select_kernel = get_select_kernel(wptype)
1490
-
1491
- if register_kernels:
1492
- return
1493
-
1494
- q = wp.array(
1495
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1496
- )
1497
- v = wp.array(
1498
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1499
- )
1500
-
1501
- outputs_add = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1502
- outputs_sub = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1503
-
1504
- wp.launch(
1505
- kernel,
1506
- dim=1,
1507
- inputs=[
1508
- q,
1509
- v,
1510
- ],
1511
- outputs=[outputs_add, outputs_sub],
1512
- device=device,
1513
- )
1514
-
1515
- assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
1516
- assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
1517
-
1518
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1519
- idx = 0
1520
- for i in range(6):
1521
- for j in range(6):
1522
- # test add gradients:
1523
- tape = wp.Tape()
1524
- with tape:
1525
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1526
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, idx], outputs=[out], device=device)
1527
- tape.backward(loss=out)
1528
- expectedresult = np.zeros((6, 6), dtype=dtype)
1529
- expectedresult[i, j] = 2
1530
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1531
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
1532
- tape.zero()
1533
-
1534
- # test subtraction gradients:
1535
- tape = wp.Tape()
1536
- with tape:
1537
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1538
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, idx], outputs=[out], device=device)
1539
- tape.backward(loss=out)
1540
- expectedresult = np.zeros((6, 6), dtype=dtype)
1541
- expectedresult[i, j] = 2
1542
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1543
- assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
1544
- tape.zero()
1545
-
1546
- idx = idx + 1
1547
-
1548
-
1549
- def test_spatial_matvec_multiplication(test, device, dtype, register_kernels=False):
1550
- rng = np.random.default_rng(123)
1551
-
1552
- tol = {
1553
- np.float16: 2.0e-2,
1554
- np.float32: 5.0e-6,
1555
- np.float64: 1.0e-8,
1556
- }.get(dtype, 0)
1557
-
1558
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1559
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1560
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
1561
-
1562
- output_select_kernel = get_select_kernel(wptype)
1563
-
1564
- def check_spatial_mat_vec_mul(
1565
- v: wp.array(dtype=spatial_vector),
1566
- m: wp.array(dtype=spatial_matrix),
1567
- outcomponents: wp.array(dtype=wptype),
1568
- ):
1569
- result = m[0] * v[0]
1570
-
1571
- # multiply outputs by 2 so we've got something to backpropagate:
1572
- idx = 0
1573
- for i in range(6):
1574
- outcomponents[idx] = wptype(2) * result[i]
1575
- idx = idx + 1
1576
-
1577
- kernel = getkernel(check_spatial_mat_vec_mul, suffix=dtype.__name__)
1578
-
1579
- if register_kernels:
1580
- return
1581
-
1582
- v = wp.array(
1583
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1584
- )
1585
- m = wp.array(
1586
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1587
- )
1588
- outcomponents = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
1589
-
1590
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1591
-
1592
- assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1593
-
1594
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1595
- for i in range(6):
1596
- tape = wp.Tape()
1597
- with tape:
1598
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1599
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, i], outputs=[out], device=device)
1600
- tape.backward(loss=out)
1601
-
1602
- assert_np_equal(tape.gradients[v].numpy()[0], 2 * m.numpy()[0, i, :], tol=tol)
1603
- expectedresult = np.zeros((6, 6), dtype=dtype)
1604
- expectedresult[i, :] = 2 * v.numpy()[0]
1605
- assert_np_equal(tape.gradients[m].numpy()[0], expectedresult, tol=tol)
1606
-
1607
- tape.zero()
1608
-
1609
-
1610
- def test_spatial_matmat_multiplication(test, device, dtype, register_kernels=False):
1611
- rng = np.random.default_rng(123)
1612
-
1613
- tol = {
1614
- np.float16: 2.0e-2,
1615
- np.float32: 5.0e-6,
1616
- np.float64: 1.0e-8,
1617
- }.get(dtype, 0)
1618
-
1619
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1620
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1621
-
1622
- output_select_kernel = get_select_kernel(wptype)
1623
-
1624
- def check_mat_mat_mul(
1625
- v: wp.array(dtype=spatial_matrix),
1626
- m: wp.array(dtype=spatial_matrix),
1627
- outcomponents: wp.array(dtype=wptype),
1628
- ):
1629
- result = m[0] * v[0]
1630
-
1631
- # multiply outputs by 2 so we've got something to backpropagate:
1632
- idx = 0
1633
- for i in range(6):
1634
- for j in range(6):
1635
- outcomponents[idx] = wptype(2) * result[i, j]
1636
- idx = idx + 1
1637
-
1638
- kernel = getkernel(check_mat_mat_mul, suffix=dtype.__name__)
1639
-
1640
- if register_kernels:
1641
- return
1642
-
1643
- v = wp.array(
1644
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1645
- )
1646
- m = wp.array(
1647
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1648
- )
1649
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1650
-
1651
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1652
-
1653
- assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1654
-
1655
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1656
- idx = 0
1657
- for i in range(6):
1658
- for j in range(6):
1659
- tape = wp.Tape()
1660
- with tape:
1661
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1662
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1663
- tape.backward(loss=out)
1664
-
1665
- expected = np.zeros((6, 6), dtype=dtype)
1666
- expected[:, j] = 2 * m.numpy()[0, i, :]
1667
- assert_np_equal(tape.gradients[v].numpy()[0], expected, tol=10 * tol)
1668
-
1669
- expected = np.zeros((6, 6), dtype=dtype)
1670
- expected[i, :] = 2 * v.numpy()[0, :, j]
1671
- assert_np_equal(tape.gradients[m].numpy()[0], expected, tol=10 * tol)
1672
-
1673
- tape.zero()
1674
- idx = idx + 1
1675
-
1676
-
1677
- def test_spatial_mat_transpose(test, device, dtype, register_kernels=False):
1678
- rng = np.random.default_rng(123)
1679
-
1680
- tol = {
1681
- np.float16: 1.0e-2,
1682
- np.float32: 1.0e-6,
1683
- np.float64: 1.0e-8,
1684
- }.get(dtype, 0)
1685
-
1686
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1687
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1688
-
1689
- output_select_kernel = get_select_kernel(wptype)
1690
-
1691
- def check_spatial_mat_transpose(
1692
- m: wp.array(dtype=spatial_matrix),
1693
- outcomponents: wp.array(dtype=wptype),
1694
- ):
1695
- # multiply outputs by 2 so we've got something to backpropagate:
1696
- mat = wptype(2) * wp.transpose(m[0])
1697
-
1698
- idx = 0
1699
- for i in range(6):
1700
- for j in range(6):
1701
- outcomponents[idx] = mat[i, j]
1702
- idx = idx + 1
1703
-
1704
- kernel = getkernel(check_spatial_mat_transpose, suffix=dtype.__name__)
1705
-
1706
- if register_kernels:
1707
- return
1708
-
1709
- m = wp.array(
1710
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1711
- )
1712
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1713
-
1714
- wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1715
-
1716
- assert_np_equal(outcomponents.numpy(), 2 * m.numpy()[0].T, tol=tol)
1717
-
1718
- idx = 0
1719
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1720
- for i in range(6):
1721
- for j in range(6):
1722
- tape = wp.Tape()
1723
- with tape:
1724
- wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1725
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1726
- tape.backward(loss=out)
1727
- expectedresult = np.zeros((6, 6), dtype=dtype)
1728
- expectedresult[j, i] = 2
1729
- assert_np_equal(tape.gradients[m].numpy()[0], expectedresult)
1730
- tape.zero()
1731
- idx = idx + 1
1732
-
1733
-
1734
- def test_spatial_outer_product(test, device, dtype, register_kernels=False):
1735
- rng = np.random.default_rng(123)
1736
-
1737
- tol = {
1738
- np.float16: 5.0e-3,
1739
- np.float32: 1.0e-6,
1740
- np.float64: 1.0e-8,
1741
- }.get(dtype, 0)
1742
-
1743
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1744
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
1745
-
1746
- output_select_kernel = get_select_kernel(wptype)
1747
-
1748
- def check_spatial_outer_product(
1749
- s: wp.array(dtype=spatial_vector),
1750
- v: wp.array(dtype=spatial_vector),
1751
- outcomponents: wp.array(dtype=wptype),
1752
- ):
1753
- mresult = wptype(2) * wp.outer(s[0], v[0])
1754
-
1755
- # multiply outputs by 2 so we've got something to backpropagate:
1756
- idx = 0
1757
- for i in range(6):
1758
- for j in range(6):
1759
- outcomponents[idx] = mresult[i, j]
1760
- idx = idx + 1
1761
-
1762
- kernel = getkernel(check_spatial_outer_product, suffix=dtype.__name__)
1763
-
1764
- if register_kernels:
1765
- return
1766
-
1767
- s = wp.array(
1768
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1769
- )
1770
- v = wp.array(
1771
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1772
- )
1773
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1774
-
1775
- wp.launch(kernel, dim=1, inputs=[s, v], outputs=[outcomponents], device=device)
1776
-
1777
- assert_np_equal(outcomponents.numpy(), 2 * s.numpy()[0, :, None] * v.numpy()[0, None, :], tol=tol)
1778
-
1779
- idx = 0
1780
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1781
-
1782
- for i in range(6):
1783
- for j in range(6):
1784
- tape = wp.Tape()
1785
- with tape:
1786
- wp.launch(
1787
- kernel,
1788
- dim=1,
1789
- inputs=[
1790
- s,
1791
- v,
1792
- ],
1793
- outputs=[outcomponents],
1794
- device=device,
1795
- )
1796
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1797
- tape.backward(loss=out)
1798
-
1799
- # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1800
- # at the ith component and its v gradient will be nonzero at the jth component:
1801
-
1802
- expectedresult = np.zeros((6), dtype=dtype)
1803
- expectedresult[i] = 2 * v.numpy()[0, j]
1804
- assert_np_equal(tape.gradients[s].numpy()[0], expectedresult, tol=10 * tol)
1805
-
1806
- expectedresult = np.zeros((6), dtype=dtype)
1807
- expectedresult[j] = 2 * s.numpy()[0, i]
1808
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=10 * tol)
1809
- tape.zero()
1810
-
1811
- idx = idx + 1
1812
-
1813
-
1814
- def test_spatial_adjoint(test, device, dtype, register_kernels=False):
1815
- rng = np.random.default_rng(123)
1816
-
1817
- tol = {
1818
- np.float16: 5.0e-3,
1819
- np.float32: 1.0e-6,
1820
- np.float64: 1.0e-8,
1821
- }.get(dtype, 0)
1822
-
1823
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1824
- mat3 = wp.types.matrix(shape=(3, 3), dtype=wptype)
1825
-
1826
- output_select_kernel = get_select_kernel(wptype)
1827
-
1828
- def check_spatial_adjoint(
1829
- R: wp.array(dtype=mat3),
1830
- S: wp.array(dtype=mat3),
1831
- outcomponents: wp.array(dtype=wptype),
1832
- ):
1833
- mresult = wptype(2) * wp.spatial_adjoint(R[0], S[0])
1834
-
1835
- # multiply outputs by 2 so we've got something to backpropagate:
1836
- idx = 0
1837
- for i in range(6):
1838
- for j in range(6):
1839
- outcomponents[idx] = mresult[i, j]
1840
- idx = idx + 1
1841
-
1842
- kernel = getkernel(check_spatial_adjoint, suffix=dtype.__name__)
1843
-
1844
- if register_kernels:
1845
- return
1846
-
1847
- R = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1848
- S = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1849
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1850
-
1851
- wp.launch(kernel, dim=1, inputs=[R, S], outputs=[outcomponents], device=device)
1852
-
1853
- result = outcomponents.numpy().reshape(6, 6)
1854
- expected = np.zeros_like(result)
1855
- expected[:3, :3] = R.numpy()
1856
- expected[3:, 3:] = R.numpy()
1857
- expected[3:, :3] = S.numpy()
1858
-
1859
- assert_np_equal(result, 2 * expected, tol=tol)
1860
-
1861
- idx = 0
1862
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1863
- for i in range(6):
1864
- for j in range(6):
1865
- tape = wp.Tape()
1866
- with tape:
1867
- wp.launch(
1868
- kernel,
1869
- dim=1,
1870
- inputs=[
1871
- R,
1872
- S,
1873
- ],
1874
- outputs=[outcomponents],
1875
- device=device,
1876
- )
1877
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1878
- tape.backward(loss=out)
1879
-
1880
- # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1881
- # at the ith component and its v gradient will be nonzero at the jth component:
1882
-
1883
- expectedresult = np.zeros((3, 3), dtype=dtype)
1884
- if (i // 3 == 0 and j // 3 == 0) or (i // 3 == 1 and j // 3 == 1):
1885
- expectedresult[i % 3, j % 3] = 2
1886
- assert_np_equal(tape.gradients[R].numpy()[0], expectedresult, tol=10 * tol)
1887
-
1888
- expectedresult = np.zeros((3, 3), dtype=dtype)
1889
- if i // 3 == 1 and j // 3 == 0:
1890
- expectedresult[i % 3, j % 3] = 2
1891
- assert_np_equal(tape.gradients[S].numpy()[0], expectedresult, tol=10 * tol)
1892
- tape.zero()
1893
-
1894
- idx = idx + 1
1895
-
1896
-
1897
- def test_transform_identity(test, device, dtype, register_kernels=False):
1898
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1899
-
1900
- def transform_identity_test(output: wp.array(dtype=wptype)):
1901
- t = wp.transform_identity(dtype=wptype)
1902
- for i in range(7):
1903
- output[i] = t[i]
1904
-
1905
- def transform_identity_test_default(output: wp.array(dtype=wp.float32)):
1906
- t = wp.transform_identity()
1907
- for i in range(7):
1908
- output[i] = t[i]
1909
-
1910
- quat_identity_kernel = getkernel(transform_identity_test, suffix=dtype.__name__)
1911
- quat_identity_default_kernel = getkernel(transform_identity_test_default, suffix=np.float32.__name__)
1912
-
1913
- if register_kernels:
1914
- return
1915
-
1916
- output = wp.zeros(7, dtype=wptype, device=device)
1917
- wp.launch(quat_identity_kernel, dim=1, inputs=[], outputs=[output], device=device)
1918
- expected = np.zeros_like(output.numpy())
1919
- expected[-1] = 1
1920
- assert_np_equal(output.numpy(), expected)
1921
-
1922
- # let's just test that it defaults to float32:
1923
- output = wp.zeros(7, dtype=wp.float32, device=device)
1924
- wp.launch(quat_identity_default_kernel, dim=1, inputs=[], outputs=[output], device=device)
1925
- expected = np.zeros_like(output.numpy())
1926
- expected[-1] = 1
1927
- assert_np_equal(output.numpy(), expected)
1928
-
1929
-
1930
- def test_transform_anon_type_instance(test, device, dtype, register_kernels=False):
1931
- rng = np.random.default_rng(123)
1932
-
1933
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1934
-
1935
- def transform_create_test(input: wp.array(dtype=wptype), output: wp.array(dtype=wptype)):
1936
- t = wp.transformation(
1937
- wp.vector(input[0], input[1], input[2]), wp.quaternion(input[3], input[4], input[5], input[6])
1938
- )
1939
- for i in range(7):
1940
- output[i] = wptype(2) * t[i]
1941
-
1942
- transform_create_kernel = getkernel(transform_create_test, suffix=dtype.__name__)
1943
- output_select_kernel = get_select_kernel(wptype)
1944
-
1945
- if register_kernels:
1946
- return
1947
-
1948
- input = wp.array(rng.standard_normal(size=7).astype(dtype), requires_grad=True, device=device)
1949
- output = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1950
- wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1951
- assert_np_equal(output.numpy(), 2 * input.numpy())
1952
-
1953
- for i in range(len(input)):
1954
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1955
- tape = wp.Tape()
1956
- with tape:
1957
- wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1958
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1959
- tape.backward(loss=cmp)
1960
- expectedgrads = np.zeros(len(input))
1961
- expectedgrads[i] = 2
1962
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1963
- tape.zero()
1964
-
1965
-
1966
- devices = get_test_devices()
1967
-
1968
-
1969
- class TestSpatial(unittest.TestCase):
1970
- pass
1971
-
1972
-
1973
- for dtype in np_float_types:
1974
- add_function_test_register_kernel(
1975
- TestSpatial,
1976
- f"test_spatial_vector_constructors_{dtype.__name__}",
1977
- test_spatial_vector_constructors,
1978
- devices=devices,
1979
- dtype=dtype,
1980
- )
1981
- add_function_test_register_kernel(
1982
- TestSpatial,
1983
- f"test_spatial_vector_indexing_{dtype.__name__}",
1984
- test_spatial_vector_indexing,
1985
- devices=devices,
1986
- dtype=dtype,
1987
- )
1988
- add_function_test_register_kernel(
1989
- TestSpatial,
1990
- f"test_spatial_vector_scalar_multiplication_{dtype.__name__}",
1991
- test_spatial_vector_scalar_multiplication,
1992
- devices=devices,
1993
- dtype=dtype,
1994
- )
1995
- add_function_test_register_kernel(
1996
- TestSpatial,
1997
- f"test_spatial_vector_add_sub_{dtype.__name__}",
1998
- test_spatial_vector_add_sub,
1999
- devices=devices,
2000
- dtype=dtype,
2001
- )
2002
- add_function_test_register_kernel(
2003
- TestSpatial, f"test_spatial_dot_{dtype.__name__}", test_spatial_dot, devices=devices, dtype=dtype
2004
- )
2005
- add_function_test_register_kernel(
2006
- TestSpatial, f"test_spatial_cross_{dtype.__name__}", test_spatial_cross, devices=devices, dtype=dtype
2007
- )
2008
- add_function_test_register_kernel(
2009
- TestSpatial,
2010
- f"test_spatial_top_bottom_{dtype.__name__}",
2011
- test_spatial_top_bottom,
2012
- devices=devices,
2013
- dtype=dtype,
2014
- )
2015
-
2016
- add_function_test_register_kernel(
2017
- TestSpatial,
2018
- f"test_transform_constructors_{dtype.__name__}",
2019
- test_transform_constructors,
2020
- devices=devices,
2021
- dtype=dtype,
2022
- )
2023
- add_function_test_register_kernel(
2024
- TestSpatial,
2025
- f"test_transform_anon_type_instance_{dtype.__name__}",
2026
- test_transform_anon_type_instance,
2027
- devices=devices,
2028
- dtype=dtype,
2029
- )
2030
- add_function_test_register_kernel(
2031
- TestSpatial,
2032
- f"test_transform_identity_{dtype.__name__}",
2033
- test_transform_identity,
2034
- devices=devices,
2035
- dtype=dtype,
2036
- )
2037
- add_function_test_register_kernel(
2038
- TestSpatial,
2039
- f"test_transform_indexing_{dtype.__name__}",
2040
- test_transform_indexing,
2041
- devices=devices,
2042
- dtype=dtype,
2043
- )
2044
- add_function_test_register_kernel(
2045
- TestSpatial,
2046
- f"test_transform_get_trans_rot_{dtype.__name__}",
2047
- test_transform_get_trans_rot,
2048
- devices=devices,
2049
- dtype=dtype,
2050
- )
2051
- add_function_test_register_kernel(
2052
- TestSpatial,
2053
- f"test_transform_multiply_{dtype.__name__}",
2054
- test_transform_multiply,
2055
- devices=devices,
2056
- dtype=dtype,
2057
- )
2058
- add_function_test_register_kernel(
2059
- TestSpatial,
2060
- f"test_transform_inverse_{dtype.__name__}",
2061
- test_transform_inverse,
2062
- devices=devices,
2063
- dtype=dtype,
2064
- )
2065
- add_function_test_register_kernel(
2066
- TestSpatial,
2067
- f"test_transform_point_vector_{dtype.__name__}",
2068
- test_transform_point_vector,
2069
- devices=devices,
2070
- dtype=dtype,
2071
- )
2072
-
2073
- # are these two valid? They don't seem to be doing things you'd want to do,
2074
- # maybe they should be removed
2075
- add_function_test_register_kernel(
2076
- TestSpatial,
2077
- f"test_transform_scalar_multiplication_{dtype.__name__}",
2078
- test_transform_scalar_multiplication,
2079
- devices=devices,
2080
- dtype=dtype,
2081
- )
2082
- add_function_test_register_kernel(
2083
- TestSpatial,
2084
- f"test_transform_add_sub_{dtype.__name__}",
2085
- test_transform_add_sub,
2086
- devices=devices,
2087
- dtype=dtype,
2088
- )
2089
-
2090
- add_function_test_register_kernel(
2091
- TestSpatial,
2092
- f"test_spatial_matrix_constructors_{dtype.__name__}",
2093
- test_spatial_matrix_constructors,
2094
- devices=devices,
2095
- dtype=dtype,
2096
- )
2097
- add_function_test_register_kernel(
2098
- TestSpatial,
2099
- f"test_spatial_matrix_indexing_{dtype.__name__}",
2100
- test_spatial_matrix_indexing,
2101
- devices=devices,
2102
- dtype=dtype,
2103
- )
2104
- add_function_test_register_kernel(
2105
- TestSpatial,
2106
- f"test_spatial_matrix_scalar_multiplication_{dtype.__name__}",
2107
- test_spatial_matrix_scalar_multiplication,
2108
- devices=devices,
2109
- dtype=dtype,
2110
- )
2111
- add_function_test_register_kernel(
2112
- TestSpatial,
2113
- f"test_spatial_matrix_add_sub_{dtype.__name__}",
2114
- test_spatial_matrix_add_sub,
2115
- devices=devices,
2116
- dtype=dtype,
2117
- )
2118
- add_function_test_register_kernel(
2119
- TestSpatial,
2120
- f"test_spatial_matvec_multiplication_{dtype.__name__}",
2121
- test_spatial_matvec_multiplication,
2122
- devices=devices,
2123
- dtype=dtype,
2124
- )
2125
- add_function_test_register_kernel(
2126
- TestSpatial,
2127
- f"test_spatial_matmat_multiplication_{dtype.__name__}",
2128
- test_spatial_matmat_multiplication,
2129
- devices=devices,
2130
- dtype=dtype,
2131
- )
2132
- add_function_test_register_kernel(
2133
- TestSpatial,
2134
- f"test_spatial_outer_product_{dtype.__name__}",
2135
- test_spatial_outer_product,
2136
- devices=devices,
2137
- dtype=dtype,
2138
- )
2139
- add_function_test_register_kernel(
2140
- TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
2141
- )
2142
-
2143
- # \TODO: test spatial_mass and spatial_jacobian
2144
-
2145
-
2146
- if __name__ == "__main__":
2147
- wp.build.clear_kernel_cache()
2148
- unittest.main(verbosity=2)
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import numpy as np
11
+
12
+ import warp as wp
13
+ from warp.tests.unittest_utils import *
14
+
15
+ wp.init()
16
+
17
+ np_float_types = [np.float32, np.float64, np.float16]
18
+
19
+ kernel_cache = {}
20
+
21
+
22
+ def getkernel(func, suffix=""):
23
+ key = func.__name__ + "_" + suffix
24
+ if key not in kernel_cache:
25
+ kernel_cache[key] = wp.Kernel(func=func, key=key)
26
+ return kernel_cache[key]
27
+
28
+
29
+ def get_select_kernel(dtype):
30
+ def output_select_kernel_fn(
31
+ input: wp.array(dtype=dtype),
32
+ index: int,
33
+ out: wp.array(dtype=dtype),
34
+ ):
35
+ out[0] = input[index]
36
+
37
+ return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
38
+
39
+
40
+ ############################################################
41
+
42
+
43
+ def test_spatial_vector_constructors(test, device, dtype, register_kernels=False):
44
+ rng = np.random.default_rng(123)
45
+
46
+ tol = {
47
+ np.float16: 5.0e-3,
48
+ np.float32: 1.0e-6,
49
+ np.float64: 1.0e-8,
50
+ }.get(dtype, 0)
51
+
52
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
53
+ vec3 = wp.types.vector(length=3, dtype=wptype)
54
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
55
+
56
+ def check_spatial_vector_component_constructor(
57
+ input: wp.array(dtype=wptype),
58
+ out: wp.array(dtype=wptype),
59
+ ):
60
+ result = spatial_vector(input[0], input[1], input[2], input[3], input[4], input[5])
61
+
62
+ # multiply the output by 2 so we've got something to backpropagate:
63
+ out[0] = wptype(2) * result[0]
64
+ out[1] = wptype(2) * result[1]
65
+ out[2] = wptype(2) * result[2]
66
+ out[3] = wptype(2) * result[3]
67
+ out[4] = wptype(2) * result[4]
68
+ out[5] = wptype(2) * result[5]
69
+
70
+ def check_spatial_vector_vector_constructor(
71
+ input: wp.array(dtype=wptype),
72
+ out: wp.array(dtype=wptype),
73
+ ):
74
+ result = spatial_vector(vec3(input[0], input[1], input[2]), vec3(input[3], input[4], input[5]))
75
+
76
+ # multiply the output by 2 so we've got something to backpropagate:
77
+ out[0] = wptype(2) * result[0]
78
+ out[1] = wptype(2) * result[1]
79
+ out[2] = wptype(2) * result[2]
80
+ out[3] = wptype(2) * result[3]
81
+ out[4] = wptype(2) * result[4]
82
+ out[5] = wptype(2) * result[5]
83
+
84
+ kernel = getkernel(check_spatial_vector_component_constructor, suffix=dtype.__name__)
85
+ output_select_kernel = get_select_kernel(wptype)
86
+ vec_kernel = getkernel(check_spatial_vector_vector_constructor, suffix=dtype.__name__)
87
+
88
+ if register_kernels:
89
+ return
90
+
91
+ input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
92
+ output = wp.zeros_like(input)
93
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
94
+
95
+ assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
96
+
97
+ for i in range(len(input)):
98
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
99
+ tape = wp.Tape()
100
+ with tape:
101
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
102
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
103
+ tape.backward(loss=cmp)
104
+ expectedgrads = np.zeros(len(input))
105
+ expectedgrads[i] = 2
106
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
107
+ tape.zero()
108
+
109
+ input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
110
+ output = wp.zeros_like(input)
111
+ wp.launch(vec_kernel, dim=1, inputs=[input], outputs=[output], device=device)
112
+
113
+ assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
114
+
115
+ for i in range(len(input)):
116
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
117
+ tape = wp.Tape()
118
+ with tape:
119
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
120
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
121
+ tape.backward(loss=cmp)
122
+ expectedgrads = np.zeros(len(input))
123
+ expectedgrads[i] = 2
124
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
125
+ tape.zero()
126
+
127
+
128
+ def test_spatial_vector_indexing(test, device, dtype, register_kernels=False):
129
+ rng = np.random.default_rng(123)
130
+
131
+ tol = {
132
+ np.float16: 5.0e-3,
133
+ np.float32: 1.0e-6,
134
+ np.float64: 1.0e-8,
135
+ }.get(dtype, 0)
136
+
137
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
138
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
139
+
140
+ def check_spatial_vector_indexing(
141
+ input: wp.array(dtype=spatial_vector),
142
+ out: wp.array(dtype=wptype),
143
+ ):
144
+ inpt = input[0]
145
+
146
+ # multiply outputs by 2 so we've got something to backpropagate:
147
+ idx = 0
148
+ for i in range(6):
149
+ out[idx] = wptype(2) * inpt[i]
150
+ idx = idx + 1
151
+
152
+ kernel = getkernel(check_spatial_vector_indexing, suffix=dtype.__name__)
153
+ output_select_kernel = get_select_kernel(wptype)
154
+
155
+ if register_kernels:
156
+ return
157
+
158
+ input = wp.array(
159
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
160
+ )
161
+ outcmps = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
162
+
163
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
164
+
165
+ assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
166
+
167
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
168
+ for i in range(6):
169
+ tape = wp.Tape()
170
+ with tape:
171
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
172
+ wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
173
+ tape.backward(loss=out)
174
+ expectedresult = np.zeros(6, dtype=dtype)
175
+ expectedresult[i] = 2
176
+ assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
177
+ tape.zero()
178
+
179
+
180
+ def test_spatial_vector_scalar_multiplication(test, device, dtype, register_kernels=False):
181
+ rng = np.random.default_rng(123)
182
+
183
+ tol = {
184
+ np.float16: 5.0e-3,
185
+ np.float32: 1.0e-6,
186
+ np.float64: 1.0e-8,
187
+ }.get(dtype, 0)
188
+
189
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
190
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
191
+
192
+ def check_spatial_vector_scalar_mul(
193
+ s: wp.array(dtype=wptype),
194
+ q: wp.array(dtype=spatial_vector),
195
+ outcmps_l: wp.array(dtype=wptype),
196
+ outcmps_r: wp.array(dtype=wptype),
197
+ ):
198
+ lresult = s[0] * q[0]
199
+ rresult = q[0] * s[0]
200
+
201
+ # multiply outputs by 2 so we've got something to backpropagate:
202
+ for i in range(6):
203
+ outcmps_l[i] = wptype(2) * lresult[i]
204
+ outcmps_r[i] = wptype(2) * rresult[i]
205
+
206
+ kernel = getkernel(check_spatial_vector_scalar_mul, suffix=dtype.__name__)
207
+ output_select_kernel = get_select_kernel(wptype)
208
+
209
+ if register_kernels:
210
+ return
211
+
212
+ s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
213
+ q = wp.array(
214
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
215
+ )
216
+
217
+ outcmps_l = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
218
+ outcmps_r = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
219
+
220
+ wp.launch(
221
+ kernel,
222
+ dim=1,
223
+ inputs=[s, q],
224
+ outputs=[
225
+ outcmps_l,
226
+ outcmps_r,
227
+ ],
228
+ device=device,
229
+ )
230
+
231
+ assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
232
+ assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
233
+
234
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
235
+ for i in range(6):
236
+ # test left/right mul gradients:
237
+ for wrt in [outcmps_l, outcmps_r]:
238
+ tape = wp.Tape()
239
+ with tape:
240
+ wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
241
+ wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
242
+ tape.backward(loss=out)
243
+ expectedresult = np.zeros(6, dtype=dtype)
244
+ expectedresult[i] = 2 * s.numpy()[0]
245
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
246
+ assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
247
+ tape.zero()
248
+
249
+
250
+ def test_spatial_vector_add_sub(test, device, dtype, register_kernels=False):
251
+ rng = np.random.default_rng(123)
252
+
253
+ tol = {
254
+ np.float16: 5.0e-3,
255
+ np.float32: 1.0e-6,
256
+ np.float64: 1.0e-8,
257
+ }.get(dtype, 0)
258
+
259
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
260
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
261
+
262
+ def check_spatial_vector_add_sub(
263
+ q: wp.array(dtype=spatial_vector),
264
+ v: wp.array(dtype=spatial_vector),
265
+ outputs_add: wp.array(dtype=wptype),
266
+ outputs_sub: wp.array(dtype=wptype),
267
+ ):
268
+ addresult = q[0] + v[0]
269
+ subresult = q[0] - v[0]
270
+ for i in range(6):
271
+ outputs_add[i] = wptype(2) * addresult[i]
272
+ outputs_sub[i] = wptype(2) * subresult[i]
273
+
274
+ kernel = getkernel(check_spatial_vector_add_sub, suffix=dtype.__name__)
275
+ output_select_kernel = get_select_kernel(wptype)
276
+ if register_kernels:
277
+ return
278
+
279
+ q = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
280
+ v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
281
+
282
+ outputs_add = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
283
+ outputs_sub = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
284
+
285
+ wp.launch(
286
+ kernel,
287
+ dim=1,
288
+ inputs=[
289
+ q,
290
+ v,
291
+ ],
292
+ outputs=[outputs_add, outputs_sub],
293
+ device=device,
294
+ )
295
+
296
+ assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
297
+ assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
298
+
299
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
300
+ for i in range(6):
301
+ # test add gradients:
302
+ tape = wp.Tape()
303
+ with tape:
304
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
305
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
306
+ tape.backward(loss=out)
307
+ expectedresult = np.zeros(6, dtype=dtype)
308
+ expectedresult[i] = 2
309
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
310
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
311
+ tape.zero()
312
+
313
+ # test subtraction gradients:
314
+ tape = wp.Tape()
315
+ with tape:
316
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
317
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
318
+ tape.backward(loss=out)
319
+ expectedresult = np.zeros(6, dtype=dtype)
320
+ expectedresult[i] = 2
321
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
322
+ assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
323
+ tape.zero()
324
+
325
+
326
+ def test_spatial_dot(test, device, dtype, register_kernels=False):
327
+ rng = np.random.default_rng(123)
328
+
329
+ tol = {
330
+ np.float16: 1.0e-2,
331
+ np.float32: 1.0e-6,
332
+ np.float64: 1.0e-8,
333
+ }.get(dtype, 0)
334
+
335
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
336
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
337
+
338
+ def check_spatial_dot(
339
+ s: wp.array(dtype=spatial_vector),
340
+ v: wp.array(dtype=spatial_vector),
341
+ dot: wp.array(dtype=wptype),
342
+ ):
343
+ dot[0] = wptype(2) * wp.spatial_dot(v[0], s[0])
344
+
345
+ kernel = getkernel(check_spatial_dot, suffix=dtype.__name__)
346
+ if register_kernels:
347
+ return
348
+
349
+ s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
350
+ v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
351
+ dot = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
352
+
353
+ tape = wp.Tape()
354
+ with tape:
355
+ wp.launch(
356
+ kernel,
357
+ dim=1,
358
+ inputs=[
359
+ s,
360
+ v,
361
+ ],
362
+ outputs=[dot],
363
+ device=device,
364
+ )
365
+
366
+ assert_np_equal(dot.numpy()[0], 2.0 * (v.numpy() * s.numpy()).sum(), tol=tol)
367
+
368
+ tape.backward(loss=dot)
369
+ sgrads = tape.gradients[s].numpy()[0]
370
+ expected_grads = 2.0 * v.numpy()[0]
371
+ assert_np_equal(sgrads, expected_grads, tol=10 * tol)
372
+
373
+ vgrads = tape.gradients[v].numpy()[0]
374
+ expected_grads = 2.0 * s.numpy()[0]
375
+ assert_np_equal(vgrads, expected_grads, tol=tol)
376
+
377
+
378
+ def test_spatial_cross(test, device, dtype, register_kernels=False):
379
+ rng = np.random.default_rng(123)
380
+
381
+ tol = {
382
+ np.float16: 5.0e-3,
383
+ np.float32: 1.0e-6,
384
+ np.float64: 1.0e-8,
385
+ }.get(dtype, 0)
386
+
387
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
388
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
389
+
390
+ def check_spatial_cross(
391
+ s: wp.array(dtype=spatial_vector),
392
+ v: wp.array(dtype=spatial_vector),
393
+ outputs: wp.array(dtype=wptype),
394
+ outputs_dual: wp.array(dtype=wptype),
395
+ outputs_wcrossw: wp.array(dtype=wptype),
396
+ outputs_vcrossw: wp.array(dtype=wptype),
397
+ outputs_wcrossv: wp.array(dtype=wptype),
398
+ outputs_vcrossv: wp.array(dtype=wptype),
399
+ ):
400
+ c = wp.spatial_cross(s[0], v[0])
401
+ d = wp.spatial_cross_dual(s[0], v[0])
402
+
403
+ # multiply outputs by 2 so we've got something to backpropagate:
404
+ for i in range(6):
405
+ outputs[i] = wptype(2) * c[i]
406
+ outputs_dual[i] = wptype(2) * d[i]
407
+
408
+ sw = wp.spatial_top(s[0])
409
+ sv = wp.spatial_bottom(s[0])
410
+ vw = wp.spatial_top(v[0])
411
+ vv = wp.spatial_bottom(v[0])
412
+
413
+ wcrossw = wp.cross(sw, vw)
414
+ vcrossw = wp.cross(sv, vw)
415
+ wcrossv = wp.cross(sw, vv)
416
+ vcrossv = wp.cross(sv, vv)
417
+
418
+ for i in range(3):
419
+ outputs_wcrossw[i] = wcrossw[i]
420
+ outputs_vcrossw[i] = vcrossw[i]
421
+ outputs_wcrossv[i] = wcrossv[i]
422
+ outputs_vcrossv[i] = vcrossv[i]
423
+
424
+ kernel = getkernel(check_spatial_cross, suffix=dtype.__name__)
425
+ output_select_kernel = get_select_kernel(wptype)
426
+
427
+ if register_kernels:
428
+ return
429
+
430
+ s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
431
+ v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
432
+ outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
433
+ outputs_dual = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
434
+ outputs_wcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
435
+ outputs_vcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
436
+ outputs_wcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
437
+ outputs_vcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
438
+
439
+ wp.launch(
440
+ kernel,
441
+ dim=1,
442
+ inputs=[
443
+ s,
444
+ v,
445
+ ],
446
+ outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
447
+ device=device,
448
+ )
449
+
450
+ sw = s.numpy()[0, :3]
451
+ sv = s.numpy()[0, 3:]
452
+ vw = v.numpy()[0, :3]
453
+ vv = v.numpy()[0, 3:]
454
+
455
+ wcrossw = np.cross(sw, vw)
456
+ vcrossw = np.cross(sv, vw)
457
+ wcrossv = np.cross(sw, vv)
458
+ vcrossv = np.cross(sv, vv)
459
+
460
+ assert_np_equal(outputs.numpy()[:3], 2 * wcrossw, tol=tol)
461
+ assert_np_equal(outputs.numpy()[3:], 2 * (vcrossw + wcrossv), tol=tol)
462
+
463
+ assert_np_equal(outputs_dual.numpy()[:3], 2 * (wcrossw + vcrossv), tol=tol)
464
+ assert_np_equal(outputs_dual.numpy()[3:], 2 * wcrossv, tol=tol)
465
+
466
+ for i in range(3):
467
+ cmp_w = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
468
+ cmp_v = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
469
+ cmp_w_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
470
+ cmp_v_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
471
+ cmp_wcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
472
+ cmp_vcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
473
+ cmp_wcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
474
+ cmp_vcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
475
+ tape = wp.Tape()
476
+ with tape:
477
+ wp.launch(
478
+ kernel,
479
+ dim=1,
480
+ inputs=[
481
+ s,
482
+ v,
483
+ ],
484
+ outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
485
+ device=device,
486
+ )
487
+
488
+ # ith w and v vector components of spatial_cross:
489
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp_w], device=device)
490
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i + 3], outputs=[cmp_v], device=device)
491
+
492
+ # ith w and v vector components of spatial_cross_dual:
493
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i], outputs=[cmp_w_dual], device=device)
494
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i + 3], outputs=[cmp_v_dual], device=device)
495
+
496
+ # ith vector components of some cross products:
497
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossw, i], outputs=[cmp_wcrossw], device=device)
498
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossw, i], outputs=[cmp_vcrossw], device=device)
499
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossv, i], outputs=[cmp_wcrossv], device=device)
500
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossv, i], outputs=[cmp_vcrossv], device=device)
501
+
502
+ def getgrads(cmp, tape=tape):
503
+ tape.backward(loss=cmp)
504
+ sgrads = 1.0 * tape.gradients[s].numpy()
505
+ vgrads = 1.0 * tape.gradients[v].numpy()
506
+ tape.zero()
507
+ return sgrads, vgrads
508
+
509
+ dcmp_w_ds, dcmp_w_dv = getgrads(cmp_w)
510
+ dcmp_v_ds, dcmp_v_dv = getgrads(cmp_v)
511
+ dcmp_w_dual_ds, dcmp_w_dual_dv = getgrads(cmp_w_dual)
512
+ dcmp_v_dual_ds, dcmp_v_dual_dv = getgrads(cmp_v_dual)
513
+
514
+ dcmp_wcrossw_ds, dcmp_wcrossw_dv = getgrads(cmp_wcrossw)
515
+ dcmp_vcrossw_ds, dcmp_vcrossw_dv = getgrads(cmp_vcrossw)
516
+ dcmp_wcrossv_ds, dcmp_wcrossv_dv = getgrads(cmp_wcrossv)
517
+ dcmp_vcrossv_ds, dcmp_vcrossv_dv = getgrads(cmp_vcrossv)
518
+
519
+ assert_np_equal(dcmp_w_ds, 2 * dcmp_wcrossw_ds, tol=tol)
520
+ assert_np_equal(dcmp_w_dv, 2 * dcmp_wcrossw_dv, tol=tol)
521
+
522
+ assert_np_equal(dcmp_v_ds, 2 * (dcmp_vcrossw_ds + dcmp_wcrossv_ds), tol=tol)
523
+ assert_np_equal(dcmp_v_dv, 2 * (dcmp_vcrossw_dv + dcmp_wcrossv_dv), tol=tol)
524
+
525
+ assert_np_equal(dcmp_w_dual_ds, 2 * (dcmp_wcrossw_ds + dcmp_vcrossv_ds), tol=tol)
526
+ assert_np_equal(dcmp_w_dual_dv, 2 * (dcmp_wcrossw_dv + dcmp_vcrossv_dv), tol=tol)
527
+
528
+ assert_np_equal(dcmp_v_dual_ds, 2 * dcmp_wcrossv_ds, tol=tol)
529
+ assert_np_equal(dcmp_v_dual_dv, 2 * dcmp_wcrossv_dv, tol=tol)
530
+
531
+
532
+ def test_spatial_top_bottom(test, device, dtype, register_kernels=False):
533
+ rng = np.random.default_rng(123)
534
+
535
+ tol = {
536
+ np.float16: 1.0e-2,
537
+ np.float32: 1.0e-6,
538
+ np.float64: 1.0e-8,
539
+ }.get(dtype, 0)
540
+
541
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
542
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
543
+
544
+ def check_spatial_top_bottom(
545
+ s: wp.array(dtype=spatial_vector),
546
+ outputs: wp.array(dtype=wptype),
547
+ ):
548
+ top = wp.spatial_top(s[0])
549
+ bottom = wp.spatial_bottom(s[0])
550
+
551
+ outputs[0] = wptype(2) * top[0]
552
+ outputs[1] = wptype(2) * top[1]
553
+ outputs[2] = wptype(2) * top[2]
554
+
555
+ outputs[3] = wptype(2) * bottom[0]
556
+ outputs[4] = wptype(2) * bottom[1]
557
+ outputs[5] = wptype(2) * bottom[2]
558
+
559
+ kernel = getkernel(check_spatial_top_bottom, suffix=dtype.__name__)
560
+ output_select_kernel = get_select_kernel(wptype)
561
+
562
+ if register_kernels:
563
+ return
564
+
565
+ s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
566
+ outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
567
+
568
+ wp.launch(
569
+ kernel,
570
+ dim=1,
571
+ inputs=[
572
+ s,
573
+ ],
574
+ outputs=[outputs],
575
+ device=device,
576
+ )
577
+
578
+ assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
579
+
580
+ for i in range(6):
581
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
582
+ tape = wp.Tape()
583
+ with tape:
584
+ wp.launch(
585
+ kernel,
586
+ dim=1,
587
+ inputs=[
588
+ s,
589
+ ],
590
+ outputs=[outputs],
591
+ device=device,
592
+ )
593
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
594
+ tape.backward(loss=cmp)
595
+ expectedgrads = np.zeros(6)
596
+ expectedgrads[i] = 2
597
+ assert_np_equal(tape.gradients[s].numpy(), expectedgrads.reshape((1, 6)))
598
+ tape.zero()
599
+
600
+
601
+ def test_transform_constructors(test, device, dtype, register_kernels=False):
602
+ rng = np.random.default_rng(123)
603
+
604
+ tol = {
605
+ np.float16: 5.0e-3,
606
+ np.float32: 1.0e-6,
607
+ np.float64: 1.0e-8,
608
+ }.get(dtype, 0)
609
+
610
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
611
+ vec3 = wp.types.vector(length=3, dtype=wptype)
612
+ transform = wp.types.transformation(dtype=wptype)
613
+ quat = wp.types.quaternion(dtype=wptype)
614
+
615
+ def check_transform_constructor(
616
+ input: wp.array(dtype=wptype),
617
+ out: wp.array(dtype=wptype),
618
+ ):
619
+ result = transform(vec3(input[0], input[1], input[2]), quat(input[3], input[4], input[5], input[6]))
620
+
621
+ # multiply the output by 2 so we've got something to backpropagate:
622
+ out[0] = wptype(2) * result[0]
623
+ out[1] = wptype(2) * result[1]
624
+ out[2] = wptype(2) * result[2]
625
+ out[3] = wptype(2) * result[3]
626
+ out[4] = wptype(2) * result[4]
627
+ out[5] = wptype(2) * result[5]
628
+ out[6] = wptype(2) * result[6]
629
+
630
+ kernel = getkernel(check_transform_constructor, suffix=dtype.__name__)
631
+ output_select_kernel = get_select_kernel(wptype)
632
+
633
+ if register_kernels:
634
+ return
635
+
636
+ p = rng.standard_normal(size=3).astype(dtype)
637
+ q = rng.standard_normal(size=4).astype(dtype)
638
+ q /= np.linalg.norm(q)
639
+
640
+ input = wp.array(np.concatenate((p, q)), requires_grad=True, device=device)
641
+ output = wp.zeros_like(input)
642
+
643
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
644
+
645
+ assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
646
+
647
+ for i in range(len(input)):
648
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
649
+ tape = wp.Tape()
650
+ with tape:
651
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
652
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
653
+ tape.backward(loss=cmp)
654
+ expectedgrads = np.zeros(len(input))
655
+ expectedgrads[i] = 2
656
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
657
+ tape.zero()
658
+
659
+
660
+ def test_transform_indexing(test, device, dtype, register_kernels=False):
661
+ rng = np.random.default_rng(123)
662
+
663
+ tol = {
664
+ np.float16: 5.0e-3,
665
+ np.float32: 1.0e-6,
666
+ np.float64: 1.0e-8,
667
+ }.get(dtype, 0)
668
+
669
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
670
+ transform = wp.types.transformation(dtype=wptype)
671
+
672
+ def check_transform_indexing(
673
+ input: wp.array(dtype=transform),
674
+ out: wp.array(dtype=wptype),
675
+ ):
676
+ inpt = input[0]
677
+
678
+ # multiply outputs by 2 so we've got something to backpropagate:
679
+ idx = 0
680
+ for i in range(7):
681
+ out[idx] = wptype(2) * inpt[i]
682
+ idx = idx + 1
683
+
684
+ kernel = getkernel(check_transform_indexing, suffix=dtype.__name__)
685
+ output_select_kernel = get_select_kernel(wptype)
686
+
687
+ if register_kernels:
688
+ return
689
+
690
+ input = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
691
+ outcmps = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
692
+
693
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
694
+
695
+ assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
696
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
697
+ for i in range(7):
698
+ tape = wp.Tape()
699
+ with tape:
700
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
701
+ wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
702
+ tape.backward(loss=out)
703
+ expectedresult = np.zeros(7, dtype=dtype)
704
+ expectedresult[i] = 2
705
+ assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
706
+ tape.zero()
707
+
708
+
709
+ def test_transform_scalar_multiplication(test, device, dtype, register_kernels=False):
710
+ rng = np.random.default_rng(123)
711
+
712
+ tol = {
713
+ np.float16: 5.0e-3,
714
+ np.float32: 1.0e-6,
715
+ np.float64: 1.0e-8,
716
+ }.get(dtype, 0)
717
+
718
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
719
+ transform = wp.types.transformation(dtype=wptype)
720
+
721
+ def check_transform_scalar_mul(
722
+ s: wp.array(dtype=wptype),
723
+ q: wp.array(dtype=transform),
724
+ outcmps_l: wp.array(dtype=wptype),
725
+ outcmps_r: wp.array(dtype=wptype),
726
+ ):
727
+ lresult = s[0] * q[0]
728
+ rresult = q[0] * s[0]
729
+
730
+ # multiply outputs by 2 so we've got something to backpropagate:
731
+ for i in range(7):
732
+ outcmps_l[i] = wptype(2) * lresult[i]
733
+ outcmps_r[i] = wptype(2) * rresult[i]
734
+
735
+ kernel = getkernel(check_transform_scalar_mul, suffix=dtype.__name__)
736
+ output_select_kernel = get_select_kernel(wptype)
737
+
738
+ if register_kernels:
739
+ return
740
+
741
+ s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
742
+ q = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
743
+
744
+ outcmps_l = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
745
+ outcmps_r = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
746
+
747
+ wp.launch(
748
+ kernel,
749
+ dim=1,
750
+ inputs=[s, q],
751
+ outputs=[
752
+ outcmps_l,
753
+ outcmps_r,
754
+ ],
755
+ device=device,
756
+ )
757
+
758
+ assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
759
+ assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
760
+
761
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
762
+ for i in range(7):
763
+ # test left/right mul gradients:
764
+ for wrt in [outcmps_l, outcmps_r]:
765
+ tape = wp.Tape()
766
+ with tape:
767
+ wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
768
+ wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
769
+ tape.backward(loss=out)
770
+ expectedresult = np.zeros(7, dtype=dtype)
771
+ expectedresult[i] = 2 * s.numpy()[0]
772
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
773
+ assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
774
+ tape.zero()
775
+
776
+
777
+ def test_transform_add_sub(test, device, dtype, register_kernels=False):
778
+ rng = np.random.default_rng(123)
779
+
780
+ tol = {
781
+ np.float16: 5.0e-3,
782
+ np.float32: 1.0e-6,
783
+ np.float64: 1.0e-8,
784
+ }.get(dtype, 0)
785
+
786
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
787
+ transform = wp.types.transformation(dtype=wptype)
788
+
789
+ def check_transform_add_sub(
790
+ q: wp.array(dtype=transform),
791
+ v: wp.array(dtype=transform),
792
+ outputs_add: wp.array(dtype=wptype),
793
+ outputs_sub: wp.array(dtype=wptype),
794
+ ):
795
+ addresult = q[0] + v[0]
796
+ subresult = q[0] - v[0]
797
+ for i in range(7):
798
+ outputs_add[i] = wptype(2) * addresult[i]
799
+ outputs_sub[i] = wptype(2) * subresult[i]
800
+
801
+ kernel = getkernel(check_transform_add_sub, suffix=dtype.__name__)
802
+ output_select_kernel = get_select_kernel(wptype)
803
+
804
+ if register_kernels:
805
+ return
806
+
807
+ q = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
808
+ v = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
809
+
810
+ outputs_add = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
811
+ outputs_sub = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
812
+
813
+ wp.launch(
814
+ kernel,
815
+ dim=1,
816
+ inputs=[
817
+ q,
818
+ v,
819
+ ],
820
+ outputs=[outputs_add, outputs_sub],
821
+ device=device,
822
+ )
823
+
824
+ assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
825
+ assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
826
+
827
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
828
+ for i in range(7):
829
+ # test add gradients:
830
+ tape = wp.Tape()
831
+ with tape:
832
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
833
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
834
+ tape.backward(loss=out)
835
+ expectedresult = np.zeros(7, dtype=dtype)
836
+ expectedresult[i] = 2
837
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
838
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
839
+ tape.zero()
840
+
841
+ # test subtraction gradients:
842
+ tape = wp.Tape()
843
+ with tape:
844
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
845
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
846
+ tape.backward(loss=out)
847
+ expectedresult = np.zeros(7, dtype=dtype)
848
+ expectedresult[i] = 2
849
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
850
+ assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
851
+ tape.zero()
852
+
853
+
854
+ def test_transform_get_trans_rot(test, device, dtype, register_kernels=False):
855
+ rng = np.random.default_rng(123)
856
+
857
+ tol = {
858
+ np.float16: 1.0e-2,
859
+ np.float32: 1.0e-6,
860
+ np.float64: 1.0e-8,
861
+ }.get(dtype, 0)
862
+
863
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
864
+ transform = wp.types.transformation(dtype=wptype)
865
+
866
+ def check_transform_get_trans_rot(
867
+ s: wp.array(dtype=transform),
868
+ outputs: wp.array(dtype=wptype),
869
+ ):
870
+ trans = wp.transform_get_translation(s[0])
871
+ q = wp.transform_get_rotation(s[0])
872
+
873
+ outputs[0] = wptype(2) * trans[0]
874
+ outputs[1] = wptype(2) * trans[1]
875
+ outputs[2] = wptype(2) * trans[2]
876
+
877
+ outputs[3] = wptype(2) * q[0]
878
+ outputs[4] = wptype(2) * q[1]
879
+ outputs[5] = wptype(2) * q[2]
880
+ outputs[6] = wptype(2) * q[3]
881
+
882
+ kernel = getkernel(check_transform_get_trans_rot, suffix=dtype.__name__)
883
+ output_select_kernel = get_select_kernel(wptype)
884
+
885
+ if register_kernels:
886
+ return
887
+
888
+ s = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
889
+ outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
890
+
891
+ wp.launch(
892
+ kernel,
893
+ dim=1,
894
+ inputs=[
895
+ s,
896
+ ],
897
+ outputs=[outputs],
898
+ device=device,
899
+ )
900
+
901
+ assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
902
+
903
+ for i in range(7):
904
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
905
+ tape = wp.Tape()
906
+ with tape:
907
+ wp.launch(
908
+ kernel,
909
+ dim=1,
910
+ inputs=[
911
+ s,
912
+ ],
913
+ outputs=[outputs],
914
+ device=device,
915
+ )
916
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
917
+ tape.backward(loss=cmp)
918
+ expectedgrads = np.zeros(7)
919
+ expectedgrads[i] = 2
920
+ assert_np_equal(tape.gradients[s].numpy(), expectedgrads.reshape((1, 7)))
921
+ tape.zero()
922
+
923
+
924
+ def test_transform_multiply(test, device, dtype, register_kernels=False):
925
+ rng = np.random.default_rng(123)
926
+
927
+ tol = {
928
+ np.float16: 1.0e-2,
929
+ np.float32: 1.0e-6,
930
+ np.float64: 1.0e-8,
931
+ }.get(dtype, 0)
932
+
933
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
934
+ transform = wp.types.transformation(dtype=wptype)
935
+
936
+ def check_transform_multiply(
937
+ a: wp.array(dtype=transform),
938
+ b: wp.array(dtype=transform),
939
+ outputs: wp.array(dtype=wptype),
940
+ outputs_fn: wp.array(dtype=wptype),
941
+ outputs_manual: wp.array(dtype=wptype),
942
+ ):
943
+ result = a[0] * b[0]
944
+ result_fn = wp.transform_multiply(a[0], b[0])
945
+
946
+ # let's just work out the transform multiplication manually
947
+ # and compare value/gradients with that:
948
+ atrans = wp.transform_get_translation(a[0])
949
+ arot = wp.transform_get_rotation(a[0])
950
+
951
+ btrans = wp.transform_get_translation(b[0])
952
+ brot = wp.transform_get_rotation(b[0])
953
+
954
+ trans = wp.quat_rotate(arot, btrans) + atrans
955
+ rot = arot * brot
956
+ result_manual = transform(trans, rot)
957
+
958
+ for i in range(7):
959
+ outputs[i] = wptype(2) * result[i]
960
+ outputs_fn[i] = wptype(2) * result_fn[i]
961
+ outputs_manual[i] = wptype(2) * result_manual[i]
962
+
963
+ kernel = getkernel(check_transform_multiply, suffix=dtype.__name__)
964
+ output_select_kernel = get_select_kernel(wptype)
965
+
966
+ if register_kernels:
967
+ return
968
+
969
+ q = rng.standard_normal(size=7)
970
+ s = rng.standard_normal(size=7)
971
+ q[3:] /= np.linalg.norm(q[3:])
972
+ s[3:] /= np.linalg.norm(s[3:])
973
+
974
+ q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
975
+ s = wp.array(s.astype(dtype), dtype=transform, requires_grad=True, device=device)
976
+ outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
977
+ outputs_fn = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
978
+ outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
979
+
980
+ wp.launch(
981
+ kernel,
982
+ dim=1,
983
+ inputs=[
984
+ q,
985
+ s,
986
+ ],
987
+ outputs=[outputs, outputs_fn, outputs_manual],
988
+ device=device,
989
+ )
990
+
991
+ assert_np_equal(outputs.numpy(), outputs_fn.numpy(), tol=tol)
992
+ assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
993
+
994
+ for i in range(7):
995
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
996
+ cmp_fn = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
997
+ cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
998
+ tape = wp.Tape()
999
+ with tape:
1000
+ wp.launch(
1001
+ kernel,
1002
+ dim=1,
1003
+ inputs=[
1004
+ q,
1005
+ s,
1006
+ ],
1007
+ outputs=[outputs, outputs_fn, outputs_manual],
1008
+ device=device,
1009
+ )
1010
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1011
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_fn, i], outputs=[cmp_fn], device=device)
1012
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1013
+ tape.backward(loss=cmp)
1014
+ qgrads = 1.0 * tape.gradients[q].numpy()
1015
+ sgrads = 1.0 * tape.gradients[s].numpy()
1016
+ tape.zero()
1017
+ tape.backward(loss=cmp_fn)
1018
+ qgrads_fn = 1.0 * tape.gradients[q].numpy()
1019
+ sgrads_fn = 1.0 * tape.gradients[s].numpy()
1020
+ tape.zero()
1021
+ tape.backward(loss=cmp_manual)
1022
+ qgrads_manual = 1.0 * tape.gradients[q].numpy()
1023
+ sgrads_manual = 1.0 * tape.gradients[s].numpy()
1024
+ tape.zero()
1025
+
1026
+ assert_np_equal(qgrads, qgrads_fn, tol=tol)
1027
+ assert_np_equal(sgrads, sgrads_fn, tol=tol)
1028
+
1029
+ assert_np_equal(qgrads, qgrads_manual, tol=tol)
1030
+ assert_np_equal(sgrads, sgrads_manual, tol=tol)
1031
+
1032
+
1033
+ def test_transform_inverse(test, device, dtype, register_kernels=False):
1034
+ rng = np.random.default_rng(123)
1035
+
1036
+ tol = {
1037
+ np.float16: 1.0e-2,
1038
+ np.float32: 1.0e-6,
1039
+ np.float64: 1.0e-8,
1040
+ }.get(dtype, 0)
1041
+
1042
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1043
+ transform = wp.types.transformation(dtype=wptype)
1044
+
1045
+ def check_transform_inverse(
1046
+ a: wp.array(dtype=transform),
1047
+ outputs: wp.array(dtype=wptype),
1048
+ outputs_shouldbeidentity: wp.array(dtype=wptype),
1049
+ outputs_manual: wp.array(dtype=wptype),
1050
+ ):
1051
+ result = wp.transform_inverse(a[0])
1052
+ idt = result * a[0]
1053
+
1054
+ # let's just work out the transform inverse manually
1055
+ # and compare value/gradients with that:
1056
+ atrans = wp.transform_get_translation(a[0])
1057
+ arot = wp.transform_get_rotation(a[0])
1058
+
1059
+ rotinv = wp.quat_inverse(arot)
1060
+ result_manual = transform(-wp.quat_rotate(rotinv, atrans), rotinv)
1061
+
1062
+ for i in range(7):
1063
+ outputs[i] = wptype(2) * result[i]
1064
+ outputs_shouldbeidentity[i] = wptype(2) * idt[i]
1065
+ outputs_manual[i] = wptype(2) * result_manual[i]
1066
+
1067
+ kernel = getkernel(check_transform_inverse, suffix=dtype.__name__)
1068
+ output_select_kernel = get_select_kernel(wptype)
1069
+
1070
+ if register_kernels:
1071
+ return
1072
+
1073
+ q = rng.standard_normal(size=7)
1074
+ s = rng.standard_normal(size=7)
1075
+ q[3:] /= np.linalg.norm(q[3:])
1076
+ s[3:] /= np.linalg.norm(s[3:])
1077
+
1078
+ q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1079
+ outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1080
+ outputs_shouldbeidentity = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1081
+ outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1082
+
1083
+ wp.launch(
1084
+ kernel,
1085
+ dim=1,
1086
+ inputs=[
1087
+ q,
1088
+ ],
1089
+ outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1090
+ device=device,
1091
+ )
1092
+
1093
+ # check inverse:
1094
+ assert_np_equal(outputs_shouldbeidentity.numpy(), np.array([0, 0, 0, 0, 0, 0, 2]), tol=tol)
1095
+
1096
+ # same as manual result:
1097
+ assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
1098
+
1099
+ for i in range(7):
1100
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1101
+ cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1102
+ tape = wp.Tape()
1103
+ with tape:
1104
+ wp.launch(
1105
+ kernel,
1106
+ dim=1,
1107
+ inputs=[
1108
+ q,
1109
+ ],
1110
+ outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1111
+ device=device,
1112
+ )
1113
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1114
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1115
+ tape.backward(loss=cmp)
1116
+ qgrads = 1.0 * tape.gradients[q].numpy()
1117
+ tape.zero()
1118
+ tape.backward(loss=cmp_manual)
1119
+ qgrads_manual = 1.0 * tape.gradients[q].numpy()
1120
+ tape.zero()
1121
+
1122
+ # check gradients against manual result:
1123
+ assert_np_equal(qgrads, qgrads_manual, tol=tol)
1124
+
1125
+
1126
+ def test_transform_point_vector(test, device, dtype, register_kernels=False):
1127
+ rng = np.random.default_rng(123)
1128
+
1129
+ tol = {
1130
+ np.float16: 1.0e-2,
1131
+ np.float32: 1.0e-6,
1132
+ np.float64: 1.0e-8,
1133
+ }.get(dtype, 0)
1134
+
1135
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1136
+ transform = wp.types.transformation(dtype=wptype)
1137
+ vec3 = wp.types.vector(length=3, dtype=wptype)
1138
+
1139
+ def check_transform_point_vector(
1140
+ t: wp.array(dtype=transform),
1141
+ v: wp.array(dtype=vec3),
1142
+ outputs_pt: wp.array(dtype=wptype),
1143
+ outputs_pt_manual: wp.array(dtype=wptype),
1144
+ outputs_vec: wp.array(dtype=wptype),
1145
+ outputs_vec_manual: wp.array(dtype=wptype),
1146
+ ):
1147
+ result_pt = wp.transform_point(t[0], v[0])
1148
+ result_pt_manual = wp.transform_get_translation(t[0]) + wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1149
+
1150
+ result_vec = wp.transform_vector(t[0], v[0])
1151
+ result_vec_manual = wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1152
+
1153
+ for i in range(3):
1154
+ outputs_pt[i] = wptype(2) * result_pt[i]
1155
+ outputs_pt_manual[i] = wptype(2) * result_pt_manual[i]
1156
+ outputs_vec[i] = wptype(2) * result_vec[i]
1157
+ outputs_vec_manual[i] = wptype(2) * result_vec_manual[i]
1158
+
1159
+ kernel = getkernel(check_transform_point_vector, suffix=dtype.__name__)
1160
+ output_select_kernel = get_select_kernel(wptype)
1161
+
1162
+ if register_kernels:
1163
+ return
1164
+
1165
+ q = rng.standard_normal(size=7)
1166
+ q[3:] /= np.linalg.norm(q[3:])
1167
+
1168
+ t = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1169
+ v = wp.array(rng.standard_normal(size=3), dtype=vec3, requires_grad=True, device=device)
1170
+ outputs_pt = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1171
+ outputs_pt_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1172
+ outputs_vec = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1173
+ outputs_vec_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1174
+
1175
+ wp.launch(
1176
+ kernel,
1177
+ dim=1,
1178
+ inputs=[t, v],
1179
+ outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1180
+ device=device,
1181
+ )
1182
+
1183
+ # same as manual results:
1184
+ assert_np_equal(outputs_pt.numpy(), outputs_pt_manual.numpy(), tol=tol)
1185
+ assert_np_equal(outputs_vec.numpy(), outputs_vec_manual.numpy(), tol=tol)
1186
+
1187
+ for i in range(3):
1188
+ cmp_pt = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1189
+ cmp_pt_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1190
+ cmp_vec = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1191
+ cmp_vec_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1192
+
1193
+ tape = wp.Tape()
1194
+ with tape:
1195
+ wp.launch(
1196
+ kernel,
1197
+ dim=1,
1198
+ inputs=[t, v],
1199
+ outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1200
+ device=device,
1201
+ )
1202
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_pt, i], outputs=[cmp_pt], device=device)
1203
+ wp.launch(
1204
+ output_select_kernel, dim=1, inputs=[outputs_pt_manual, i], outputs=[cmp_pt_manual], device=device
1205
+ )
1206
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_vec, i], outputs=[cmp_vec], device=device)
1207
+ wp.launch(
1208
+ output_select_kernel, dim=1, inputs=[outputs_vec_manual, i], outputs=[cmp_vec_manual], device=device
1209
+ )
1210
+ tape.backward(loss=cmp_pt)
1211
+ tgrads_pt = 1.0 * tape.gradients[t].numpy()
1212
+ vgrads_pt = 1.0 * tape.gradients[v].numpy()
1213
+ tape.zero()
1214
+ tape.backward(loss=cmp_pt_manual)
1215
+ tgrads_pt_manual = 1.0 * tape.gradients[t].numpy()
1216
+ vgrads_pt_manual = 1.0 * tape.gradients[v].numpy()
1217
+ tape.zero()
1218
+ tape.backward(loss=cmp_vec)
1219
+ tgrads_vec = 1.0 * tape.gradients[t].numpy()
1220
+ vgrads_vec = 1.0 * tape.gradients[v].numpy()
1221
+ tape.zero()
1222
+ tape.backward(loss=cmp_vec_manual)
1223
+ tgrads_vec_manual = 1.0 * tape.gradients[t].numpy()
1224
+ vgrads_vec_manual = 1.0 * tape.gradients[v].numpy()
1225
+ tape.zero()
1226
+
1227
+ # check gradients against manual result:
1228
+ assert_np_equal(tgrads_pt, tgrads_pt_manual, tol=tol)
1229
+ assert_np_equal(vgrads_pt, vgrads_pt_manual, tol=tol)
1230
+ assert_np_equal(tgrads_vec, tgrads_vec_manual, tol=tol)
1231
+ assert_np_equal(vgrads_vec, vgrads_vec_manual, tol=tol)
1232
+
1233
+
1234
+ def test_spatial_matrix_constructors(test, device, dtype, register_kernels=False):
1235
+ rng = np.random.default_rng(123)
1236
+
1237
+ tol = {
1238
+ np.float16: 5.0e-3,
1239
+ np.float32: 1.0e-6,
1240
+ np.float64: 1.0e-8,
1241
+ }.get(dtype, 0)
1242
+
1243
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1244
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1245
+
1246
+ def check_spatial_matrix_constructor(
1247
+ input: wp.array(dtype=wptype),
1248
+ out: wp.array(dtype=wptype),
1249
+ ):
1250
+ # multiply the output by 2 so we've got something to backpropagate:
1251
+ result0 = spatial_matrix(
1252
+ input[0],
1253
+ input[1],
1254
+ input[2],
1255
+ input[3],
1256
+ input[4],
1257
+ input[5],
1258
+ input[6],
1259
+ input[7],
1260
+ input[8],
1261
+ input[9],
1262
+ input[10],
1263
+ input[11],
1264
+ input[12],
1265
+ input[13],
1266
+ input[14],
1267
+ input[15],
1268
+ input[16],
1269
+ input[17],
1270
+ input[18],
1271
+ input[19],
1272
+ input[20],
1273
+ input[21],
1274
+ input[22],
1275
+ input[23],
1276
+ input[24],
1277
+ input[25],
1278
+ input[26],
1279
+ input[27],
1280
+ input[28],
1281
+ input[29],
1282
+ input[30],
1283
+ input[31],
1284
+ input[32],
1285
+ input[33],
1286
+ input[34],
1287
+ input[35],
1288
+ )
1289
+ result1 = spatial_matrix()
1290
+
1291
+ idx = 0
1292
+ for i in range(6):
1293
+ for j in range(6):
1294
+ out[idx] = wptype(2) * result0[i, j]
1295
+ idx = idx + 1
1296
+
1297
+ for i in range(6):
1298
+ for j in range(6):
1299
+ out[idx] = result1[i, j]
1300
+ idx = idx + 1
1301
+
1302
+ kernel = getkernel(check_spatial_matrix_constructor, suffix=dtype.__name__)
1303
+ output_select_kernel = get_select_kernel(wptype)
1304
+
1305
+ if register_kernels:
1306
+ return
1307
+
1308
+ input = wp.array(rng.standard_normal(size=6 * 6).astype(dtype), requires_grad=True, device=device)
1309
+ output = wp.zeros(2 * 6 * 6, dtype=wptype, requires_grad=True, device=device)
1310
+
1311
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1312
+
1313
+ assert_np_equal(output.numpy()[: 6 * 6], 2 * input.numpy(), tol=tol)
1314
+ assert_np_equal(output.numpy()[6 * 6 :], np.zeros_like(input.numpy()), tol=tol)
1315
+
1316
+ for i in range(len(input)):
1317
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1318
+ tape = wp.Tape()
1319
+ with tape:
1320
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1321
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1322
+ tape.backward(loss=cmp)
1323
+ expectedgrads = np.zeros(len(input))
1324
+ expectedgrads[i] = 2
1325
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1326
+ tape.zero()
1327
+ break
1328
+
1329
+
1330
+ def test_spatial_matrix_indexing(test, device, dtype, register_kernels=False):
1331
+ rng = np.random.default_rng(123)
1332
+
1333
+ tol = {
1334
+ np.float16: 5.0e-3,
1335
+ np.float32: 1.0e-6,
1336
+ np.float64: 1.0e-8,
1337
+ }.get(dtype, 0)
1338
+
1339
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1340
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1341
+
1342
+ def check_spatial_matrix_indexing(
1343
+ input: wp.array(dtype=spatial_matrix),
1344
+ out: wp.array(dtype=wptype),
1345
+ ):
1346
+ inpt = input[0]
1347
+
1348
+ # multiply outputs by 2 so we've got something to backpropagate:
1349
+ idx = 0
1350
+ for i in range(6):
1351
+ for j in range(6):
1352
+ out[idx] = wptype(2) * inpt[i, j]
1353
+ idx = idx + 1
1354
+
1355
+ kernel = getkernel(check_spatial_matrix_indexing, suffix=dtype.__name__)
1356
+ output_select_kernel = get_select_kernel(wptype)
1357
+
1358
+ if register_kernels:
1359
+ return
1360
+
1361
+ input = wp.array(
1362
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1363
+ )
1364
+ outcmps = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1365
+
1366
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1367
+
1368
+ assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
1369
+ idx = 0
1370
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1371
+ for i in range(6):
1372
+ for j in range(6):
1373
+ tape = wp.Tape()
1374
+ with tape:
1375
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1376
+ wp.launch(output_select_kernel, dim=1, inputs=[outcmps, idx], outputs=[out], device=device)
1377
+ tape.backward(loss=out)
1378
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1379
+ expectedresult[i, j] = 2
1380
+ assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
1381
+ tape.zero()
1382
+ idx = idx + 1
1383
+
1384
+
1385
+ def test_spatial_matrix_scalar_multiplication(test, device, dtype, register_kernels=False):
1386
+ rng = np.random.default_rng(123)
1387
+
1388
+ tol = {
1389
+ np.float16: 5.0e-3,
1390
+ np.float32: 1.0e-6,
1391
+ np.float64: 1.0e-8,
1392
+ }.get(dtype, 0)
1393
+
1394
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1395
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1396
+
1397
+ def check_spatial_matrix_scalar_mul(
1398
+ s: wp.array(dtype=wptype),
1399
+ q: wp.array(dtype=spatial_matrix),
1400
+ outcmps_l: wp.array(dtype=wptype),
1401
+ outcmps_r: wp.array(dtype=wptype),
1402
+ ):
1403
+ lresult = s[0] * q[0]
1404
+ rresult = q[0] * s[0]
1405
+
1406
+ # multiply outputs by 2 so we've got something to backpropagate:
1407
+ idx = 0
1408
+ for i in range(6):
1409
+ for j in range(6):
1410
+ outcmps_l[idx] = wptype(2) * lresult[i, j]
1411
+ outcmps_r[idx] = wptype(2) * rresult[i, j]
1412
+ idx = idx + 1
1413
+
1414
+ kernel = getkernel(check_spatial_matrix_scalar_mul, suffix=dtype.__name__)
1415
+ output_select_kernel = get_select_kernel(wptype)
1416
+
1417
+ if register_kernels:
1418
+ return
1419
+
1420
+ s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
1421
+ q = wp.array(
1422
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1423
+ )
1424
+
1425
+ outcmps_l = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1426
+ outcmps_r = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1427
+
1428
+ wp.launch(
1429
+ kernel,
1430
+ dim=1,
1431
+ inputs=[s, q],
1432
+ outputs=[
1433
+ outcmps_l,
1434
+ outcmps_r,
1435
+ ],
1436
+ device=device,
1437
+ )
1438
+
1439
+ assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1440
+ assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1441
+
1442
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1443
+ idx = 0
1444
+ for i in range(6):
1445
+ for j in range(6):
1446
+ # test left/right mul gradients:
1447
+ for wrt in [outcmps_l, outcmps_r]:
1448
+ tape = wp.Tape()
1449
+ with tape:
1450
+ wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
1451
+ wp.launch(output_select_kernel, dim=1, inputs=[wrt, idx], outputs=[out], device=device)
1452
+ tape.backward(loss=out)
1453
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1454
+ expectedresult[i, j] = 2 * s.numpy()[0]
1455
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1456
+ assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i, j], tol=tol)
1457
+ tape.zero()
1458
+ idx = idx + 1
1459
+
1460
+
1461
+ def test_spatial_matrix_add_sub(test, device, dtype, register_kernels=False):
1462
+ rng = np.random.default_rng(123)
1463
+
1464
+ tol = {
1465
+ np.float16: 5.0e-3,
1466
+ np.float32: 1.0e-6,
1467
+ np.float64: 1.0e-8,
1468
+ }.get(dtype, 0)
1469
+
1470
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1471
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1472
+
1473
+ def check_spatial_matrix_add_sub(
1474
+ q: wp.array(dtype=spatial_matrix),
1475
+ v: wp.array(dtype=spatial_matrix),
1476
+ outputs_add: wp.array(dtype=wptype),
1477
+ outputs_sub: wp.array(dtype=wptype),
1478
+ ):
1479
+ addresult = q[0] + v[0]
1480
+ subresult = q[0] - v[0]
1481
+ idx = 0
1482
+ for i in range(6):
1483
+ for j in range(6):
1484
+ outputs_add[idx] = wptype(2) * addresult[i, j]
1485
+ outputs_sub[idx] = wptype(2) * subresult[i, j]
1486
+ idx = idx + 1
1487
+
1488
+ kernel = getkernel(check_spatial_matrix_add_sub, suffix=dtype.__name__)
1489
+ output_select_kernel = get_select_kernel(wptype)
1490
+
1491
+ if register_kernels:
1492
+ return
1493
+
1494
+ q = wp.array(
1495
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1496
+ )
1497
+ v = wp.array(
1498
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1499
+ )
1500
+
1501
+ outputs_add = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1502
+ outputs_sub = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1503
+
1504
+ wp.launch(
1505
+ kernel,
1506
+ dim=1,
1507
+ inputs=[
1508
+ q,
1509
+ v,
1510
+ ],
1511
+ outputs=[outputs_add, outputs_sub],
1512
+ device=device,
1513
+ )
1514
+
1515
+ assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
1516
+ assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
1517
+
1518
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1519
+ idx = 0
1520
+ for i in range(6):
1521
+ for j in range(6):
1522
+ # test add gradients:
1523
+ tape = wp.Tape()
1524
+ with tape:
1525
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1526
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, idx], outputs=[out], device=device)
1527
+ tape.backward(loss=out)
1528
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1529
+ expectedresult[i, j] = 2
1530
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1531
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
1532
+ tape.zero()
1533
+
1534
+ # test subtraction gradients:
1535
+ tape = wp.Tape()
1536
+ with tape:
1537
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1538
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, idx], outputs=[out], device=device)
1539
+ tape.backward(loss=out)
1540
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1541
+ expectedresult[i, j] = 2
1542
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1543
+ assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
1544
+ tape.zero()
1545
+
1546
+ idx = idx + 1
1547
+
1548
+
1549
+ def test_spatial_matvec_multiplication(test, device, dtype, register_kernels=False):
1550
+ rng = np.random.default_rng(123)
1551
+
1552
+ tol = {
1553
+ np.float16: 2.0e-2,
1554
+ np.float32: 5.0e-6,
1555
+ np.float64: 1.0e-8,
1556
+ }.get(dtype, 0)
1557
+
1558
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1559
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1560
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
1561
+
1562
+ output_select_kernel = get_select_kernel(wptype)
1563
+
1564
+ def check_spatial_mat_vec_mul(
1565
+ v: wp.array(dtype=spatial_vector),
1566
+ m: wp.array(dtype=spatial_matrix),
1567
+ outcomponents: wp.array(dtype=wptype),
1568
+ ):
1569
+ result = m[0] * v[0]
1570
+
1571
+ # multiply outputs by 2 so we've got something to backpropagate:
1572
+ idx = 0
1573
+ for i in range(6):
1574
+ outcomponents[idx] = wptype(2) * result[i]
1575
+ idx = idx + 1
1576
+
1577
+ kernel = getkernel(check_spatial_mat_vec_mul, suffix=dtype.__name__)
1578
+
1579
+ if register_kernels:
1580
+ return
1581
+
1582
+ v = wp.array(
1583
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1584
+ )
1585
+ m = wp.array(
1586
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1587
+ )
1588
+ outcomponents = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
1589
+
1590
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1591
+
1592
+ assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1593
+
1594
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1595
+ for i in range(6):
1596
+ tape = wp.Tape()
1597
+ with tape:
1598
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1599
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, i], outputs=[out], device=device)
1600
+ tape.backward(loss=out)
1601
+
1602
+ assert_np_equal(tape.gradients[v].numpy()[0], 2 * m.numpy()[0, i, :], tol=tol)
1603
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1604
+ expectedresult[i, :] = 2 * v.numpy()[0]
1605
+ assert_np_equal(tape.gradients[m].numpy()[0], expectedresult, tol=tol)
1606
+
1607
+ tape.zero()
1608
+
1609
+
1610
+ def test_spatial_matmat_multiplication(test, device, dtype, register_kernels=False):
1611
+ rng = np.random.default_rng(123)
1612
+
1613
+ tol = {
1614
+ np.float16: 2.0e-2,
1615
+ np.float32: 5.0e-6,
1616
+ np.float64: 1.0e-8,
1617
+ }.get(dtype, 0)
1618
+
1619
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1620
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1621
+
1622
+ output_select_kernel = get_select_kernel(wptype)
1623
+
1624
+ def check_mat_mat_mul(
1625
+ v: wp.array(dtype=spatial_matrix),
1626
+ m: wp.array(dtype=spatial_matrix),
1627
+ outcomponents: wp.array(dtype=wptype),
1628
+ ):
1629
+ result = m[0] * v[0]
1630
+
1631
+ # multiply outputs by 2 so we've got something to backpropagate:
1632
+ idx = 0
1633
+ for i in range(6):
1634
+ for j in range(6):
1635
+ outcomponents[idx] = wptype(2) * result[i, j]
1636
+ idx = idx + 1
1637
+
1638
+ kernel = getkernel(check_mat_mat_mul, suffix=dtype.__name__)
1639
+
1640
+ if register_kernels:
1641
+ return
1642
+
1643
+ v = wp.array(
1644
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1645
+ )
1646
+ m = wp.array(
1647
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1648
+ )
1649
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1650
+
1651
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1652
+
1653
+ assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1654
+
1655
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1656
+ idx = 0
1657
+ for i in range(6):
1658
+ for j in range(6):
1659
+ tape = wp.Tape()
1660
+ with tape:
1661
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1662
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1663
+ tape.backward(loss=out)
1664
+
1665
+ expected = np.zeros((6, 6), dtype=dtype)
1666
+ expected[:, j] = 2 * m.numpy()[0, i, :]
1667
+ assert_np_equal(tape.gradients[v].numpy()[0], expected, tol=10 * tol)
1668
+
1669
+ expected = np.zeros((6, 6), dtype=dtype)
1670
+ expected[i, :] = 2 * v.numpy()[0, :, j]
1671
+ assert_np_equal(tape.gradients[m].numpy()[0], expected, tol=10 * tol)
1672
+
1673
+ tape.zero()
1674
+ idx = idx + 1
1675
+
1676
+
1677
+ def test_spatial_mat_transpose(test, device, dtype, register_kernels=False):
1678
+ rng = np.random.default_rng(123)
1679
+
1680
+ tol = {
1681
+ np.float16: 1.0e-2,
1682
+ np.float32: 1.0e-6,
1683
+ np.float64: 1.0e-8,
1684
+ }.get(dtype, 0)
1685
+
1686
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1687
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1688
+
1689
+ output_select_kernel = get_select_kernel(wptype)
1690
+
1691
+ def check_spatial_mat_transpose(
1692
+ m: wp.array(dtype=spatial_matrix),
1693
+ outcomponents: wp.array(dtype=wptype),
1694
+ ):
1695
+ # multiply outputs by 2 so we've got something to backpropagate:
1696
+ mat = wptype(2) * wp.transpose(m[0])
1697
+
1698
+ idx = 0
1699
+ for i in range(6):
1700
+ for j in range(6):
1701
+ outcomponents[idx] = mat[i, j]
1702
+ idx = idx + 1
1703
+
1704
+ kernel = getkernel(check_spatial_mat_transpose, suffix=dtype.__name__)
1705
+
1706
+ if register_kernels:
1707
+ return
1708
+
1709
+ m = wp.array(
1710
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1711
+ )
1712
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1713
+
1714
+ wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1715
+
1716
+ assert_np_equal(outcomponents.numpy(), 2 * m.numpy()[0].T, tol=tol)
1717
+
1718
+ idx = 0
1719
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1720
+ for i in range(6):
1721
+ for j in range(6):
1722
+ tape = wp.Tape()
1723
+ with tape:
1724
+ wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1725
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1726
+ tape.backward(loss=out)
1727
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1728
+ expectedresult[j, i] = 2
1729
+ assert_np_equal(tape.gradients[m].numpy()[0], expectedresult)
1730
+ tape.zero()
1731
+ idx = idx + 1
1732
+
1733
+
1734
+ def test_spatial_outer_product(test, device, dtype, register_kernels=False):
1735
+ rng = np.random.default_rng(123)
1736
+
1737
+ tol = {
1738
+ np.float16: 5.0e-3,
1739
+ np.float32: 1.0e-6,
1740
+ np.float64: 1.0e-8,
1741
+ }.get(dtype, 0)
1742
+
1743
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1744
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
1745
+
1746
+ output_select_kernel = get_select_kernel(wptype)
1747
+
1748
+ def check_spatial_outer_product(
1749
+ s: wp.array(dtype=spatial_vector),
1750
+ v: wp.array(dtype=spatial_vector),
1751
+ outcomponents: wp.array(dtype=wptype),
1752
+ ):
1753
+ mresult = wptype(2) * wp.outer(s[0], v[0])
1754
+
1755
+ # multiply outputs by 2 so we've got something to backpropagate:
1756
+ idx = 0
1757
+ for i in range(6):
1758
+ for j in range(6):
1759
+ outcomponents[idx] = mresult[i, j]
1760
+ idx = idx + 1
1761
+
1762
+ kernel = getkernel(check_spatial_outer_product, suffix=dtype.__name__)
1763
+
1764
+ if register_kernels:
1765
+ return
1766
+
1767
+ s = wp.array(
1768
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1769
+ )
1770
+ v = wp.array(
1771
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1772
+ )
1773
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1774
+
1775
+ wp.launch(kernel, dim=1, inputs=[s, v], outputs=[outcomponents], device=device)
1776
+
1777
+ assert_np_equal(outcomponents.numpy(), 2 * s.numpy()[0, :, None] * v.numpy()[0, None, :], tol=tol)
1778
+
1779
+ idx = 0
1780
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1781
+
1782
+ for i in range(6):
1783
+ for j in range(6):
1784
+ tape = wp.Tape()
1785
+ with tape:
1786
+ wp.launch(
1787
+ kernel,
1788
+ dim=1,
1789
+ inputs=[
1790
+ s,
1791
+ v,
1792
+ ],
1793
+ outputs=[outcomponents],
1794
+ device=device,
1795
+ )
1796
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1797
+ tape.backward(loss=out)
1798
+
1799
+ # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1800
+ # at the ith component and its v gradient will be nonzero at the jth component:
1801
+
1802
+ expectedresult = np.zeros((6), dtype=dtype)
1803
+ expectedresult[i] = 2 * v.numpy()[0, j]
1804
+ assert_np_equal(tape.gradients[s].numpy()[0], expectedresult, tol=10 * tol)
1805
+
1806
+ expectedresult = np.zeros((6), dtype=dtype)
1807
+ expectedresult[j] = 2 * s.numpy()[0, i]
1808
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=10 * tol)
1809
+ tape.zero()
1810
+
1811
+ idx = idx + 1
1812
+
1813
+
1814
+ def test_spatial_adjoint(test, device, dtype, register_kernels=False):
1815
+ rng = np.random.default_rng(123)
1816
+
1817
+ tol = {
1818
+ np.float16: 5.0e-3,
1819
+ np.float32: 1.0e-6,
1820
+ np.float64: 1.0e-8,
1821
+ }.get(dtype, 0)
1822
+
1823
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1824
+ mat3 = wp.types.matrix(shape=(3, 3), dtype=wptype)
1825
+
1826
+ output_select_kernel = get_select_kernel(wptype)
1827
+
1828
+ def check_spatial_adjoint(
1829
+ R: wp.array(dtype=mat3),
1830
+ S: wp.array(dtype=mat3),
1831
+ outcomponents: wp.array(dtype=wptype),
1832
+ ):
1833
+ mresult = wptype(2) * wp.spatial_adjoint(R[0], S[0])
1834
+
1835
+ # multiply outputs by 2 so we've got something to backpropagate:
1836
+ idx = 0
1837
+ for i in range(6):
1838
+ for j in range(6):
1839
+ outcomponents[idx] = mresult[i, j]
1840
+ idx = idx + 1
1841
+
1842
+ kernel = getkernel(check_spatial_adjoint, suffix=dtype.__name__)
1843
+
1844
+ if register_kernels:
1845
+ return
1846
+
1847
+ R = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1848
+ S = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1849
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1850
+
1851
+ wp.launch(kernel, dim=1, inputs=[R, S], outputs=[outcomponents], device=device)
1852
+
1853
+ result = outcomponents.numpy().reshape(6, 6)
1854
+ expected = np.zeros_like(result)
1855
+ expected[:3, :3] = R.numpy()
1856
+ expected[3:, 3:] = R.numpy()
1857
+ expected[3:, :3] = S.numpy()
1858
+
1859
+ assert_np_equal(result, 2 * expected, tol=tol)
1860
+
1861
+ idx = 0
1862
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1863
+ for i in range(6):
1864
+ for j in range(6):
1865
+ tape = wp.Tape()
1866
+ with tape:
1867
+ wp.launch(
1868
+ kernel,
1869
+ dim=1,
1870
+ inputs=[
1871
+ R,
1872
+ S,
1873
+ ],
1874
+ outputs=[outcomponents],
1875
+ device=device,
1876
+ )
1877
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1878
+ tape.backward(loss=out)
1879
+
1880
+ # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1881
+ # at the ith component and its v gradient will be nonzero at the jth component:
1882
+
1883
+ expectedresult = np.zeros((3, 3), dtype=dtype)
1884
+ if (i // 3 == 0 and j // 3 == 0) or (i // 3 == 1 and j // 3 == 1):
1885
+ expectedresult[i % 3, j % 3] = 2
1886
+ assert_np_equal(tape.gradients[R].numpy()[0], expectedresult, tol=10 * tol)
1887
+
1888
+ expectedresult = np.zeros((3, 3), dtype=dtype)
1889
+ if i // 3 == 1 and j // 3 == 0:
1890
+ expectedresult[i % 3, j % 3] = 2
1891
+ assert_np_equal(tape.gradients[S].numpy()[0], expectedresult, tol=10 * tol)
1892
+ tape.zero()
1893
+
1894
+ idx = idx + 1
1895
+
1896
+
1897
+ def test_transform_identity(test, device, dtype, register_kernels=False):
1898
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1899
+
1900
+ def transform_identity_test(output: wp.array(dtype=wptype)):
1901
+ t = wp.transform_identity(dtype=wptype)
1902
+ for i in range(7):
1903
+ output[i] = t[i]
1904
+
1905
+ def transform_identity_test_default(output: wp.array(dtype=wp.float32)):
1906
+ t = wp.transform_identity()
1907
+ for i in range(7):
1908
+ output[i] = t[i]
1909
+
1910
+ quat_identity_kernel = getkernel(transform_identity_test, suffix=dtype.__name__)
1911
+ quat_identity_default_kernel = getkernel(transform_identity_test_default, suffix=np.float32.__name__)
1912
+
1913
+ if register_kernels:
1914
+ return
1915
+
1916
+ output = wp.zeros(7, dtype=wptype, device=device)
1917
+ wp.launch(quat_identity_kernel, dim=1, inputs=[], outputs=[output], device=device)
1918
+ expected = np.zeros_like(output.numpy())
1919
+ expected[-1] = 1
1920
+ assert_np_equal(output.numpy(), expected)
1921
+
1922
+ # let's just test that it defaults to float32:
1923
+ output = wp.zeros(7, dtype=wp.float32, device=device)
1924
+ wp.launch(quat_identity_default_kernel, dim=1, inputs=[], outputs=[output], device=device)
1925
+ expected = np.zeros_like(output.numpy())
1926
+ expected[-1] = 1
1927
+ assert_np_equal(output.numpy(), expected)
1928
+
1929
+
1930
+ def test_transform_anon_type_instance(test, device, dtype, register_kernels=False):
1931
+ rng = np.random.default_rng(123)
1932
+
1933
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1934
+
1935
+ def transform_create_test(input: wp.array(dtype=wptype), output: wp.array(dtype=wptype)):
1936
+ t = wp.transformation(
1937
+ wp.vector(input[0], input[1], input[2]), wp.quaternion(input[3], input[4], input[5], input[6])
1938
+ )
1939
+ for i in range(7):
1940
+ output[i] = wptype(2) * t[i]
1941
+
1942
+ transform_create_kernel = getkernel(transform_create_test, suffix=dtype.__name__)
1943
+ output_select_kernel = get_select_kernel(wptype)
1944
+
1945
+ if register_kernels:
1946
+ return
1947
+
1948
+ input = wp.array(rng.standard_normal(size=7).astype(dtype), requires_grad=True, device=device)
1949
+ output = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1950
+ wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1951
+ assert_np_equal(output.numpy(), 2 * input.numpy())
1952
+
1953
+ for i in range(len(input)):
1954
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1955
+ tape = wp.Tape()
1956
+ with tape:
1957
+ wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1958
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1959
+ tape.backward(loss=cmp)
1960
+ expectedgrads = np.zeros(len(input))
1961
+ expectedgrads[i] = 2
1962
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1963
+ tape.zero()
1964
+
1965
+
1966
+ devices = get_test_devices()
1967
+
1968
+
1969
+ class TestSpatial(unittest.TestCase):
1970
+ pass
1971
+
1972
+
1973
+ for dtype in np_float_types:
1974
+ add_function_test_register_kernel(
1975
+ TestSpatial,
1976
+ f"test_spatial_vector_constructors_{dtype.__name__}",
1977
+ test_spatial_vector_constructors,
1978
+ devices=devices,
1979
+ dtype=dtype,
1980
+ )
1981
+ add_function_test_register_kernel(
1982
+ TestSpatial,
1983
+ f"test_spatial_vector_indexing_{dtype.__name__}",
1984
+ test_spatial_vector_indexing,
1985
+ devices=devices,
1986
+ dtype=dtype,
1987
+ )
1988
+ add_function_test_register_kernel(
1989
+ TestSpatial,
1990
+ f"test_spatial_vector_scalar_multiplication_{dtype.__name__}",
1991
+ test_spatial_vector_scalar_multiplication,
1992
+ devices=devices,
1993
+ dtype=dtype,
1994
+ )
1995
+ add_function_test_register_kernel(
1996
+ TestSpatial,
1997
+ f"test_spatial_vector_add_sub_{dtype.__name__}",
1998
+ test_spatial_vector_add_sub,
1999
+ devices=devices,
2000
+ dtype=dtype,
2001
+ )
2002
+ add_function_test_register_kernel(
2003
+ TestSpatial, f"test_spatial_dot_{dtype.__name__}", test_spatial_dot, devices=devices, dtype=dtype
2004
+ )
2005
+ add_function_test_register_kernel(
2006
+ TestSpatial, f"test_spatial_cross_{dtype.__name__}", test_spatial_cross, devices=devices, dtype=dtype
2007
+ )
2008
+ add_function_test_register_kernel(
2009
+ TestSpatial,
2010
+ f"test_spatial_top_bottom_{dtype.__name__}",
2011
+ test_spatial_top_bottom,
2012
+ devices=devices,
2013
+ dtype=dtype,
2014
+ )
2015
+
2016
+ add_function_test_register_kernel(
2017
+ TestSpatial,
2018
+ f"test_transform_constructors_{dtype.__name__}",
2019
+ test_transform_constructors,
2020
+ devices=devices,
2021
+ dtype=dtype,
2022
+ )
2023
+ add_function_test_register_kernel(
2024
+ TestSpatial,
2025
+ f"test_transform_anon_type_instance_{dtype.__name__}",
2026
+ test_transform_anon_type_instance,
2027
+ devices=devices,
2028
+ dtype=dtype,
2029
+ )
2030
+ add_function_test_register_kernel(
2031
+ TestSpatial,
2032
+ f"test_transform_identity_{dtype.__name__}",
2033
+ test_transform_identity,
2034
+ devices=devices,
2035
+ dtype=dtype,
2036
+ )
2037
+ add_function_test_register_kernel(
2038
+ TestSpatial,
2039
+ f"test_transform_indexing_{dtype.__name__}",
2040
+ test_transform_indexing,
2041
+ devices=devices,
2042
+ dtype=dtype,
2043
+ )
2044
+ add_function_test_register_kernel(
2045
+ TestSpatial,
2046
+ f"test_transform_get_trans_rot_{dtype.__name__}",
2047
+ test_transform_get_trans_rot,
2048
+ devices=devices,
2049
+ dtype=dtype,
2050
+ )
2051
+ add_function_test_register_kernel(
2052
+ TestSpatial,
2053
+ f"test_transform_multiply_{dtype.__name__}",
2054
+ test_transform_multiply,
2055
+ devices=devices,
2056
+ dtype=dtype,
2057
+ )
2058
+ add_function_test_register_kernel(
2059
+ TestSpatial,
2060
+ f"test_transform_inverse_{dtype.__name__}",
2061
+ test_transform_inverse,
2062
+ devices=devices,
2063
+ dtype=dtype,
2064
+ )
2065
+ add_function_test_register_kernel(
2066
+ TestSpatial,
2067
+ f"test_transform_point_vector_{dtype.__name__}",
2068
+ test_transform_point_vector,
2069
+ devices=devices,
2070
+ dtype=dtype,
2071
+ )
2072
+
2073
+ # are these two valid? They don't seem to be doing things you'd want to do,
2074
+ # maybe they should be removed
2075
+ add_function_test_register_kernel(
2076
+ TestSpatial,
2077
+ f"test_transform_scalar_multiplication_{dtype.__name__}",
2078
+ test_transform_scalar_multiplication,
2079
+ devices=devices,
2080
+ dtype=dtype,
2081
+ )
2082
+ add_function_test_register_kernel(
2083
+ TestSpatial,
2084
+ f"test_transform_add_sub_{dtype.__name__}",
2085
+ test_transform_add_sub,
2086
+ devices=devices,
2087
+ dtype=dtype,
2088
+ )
2089
+
2090
+ add_function_test_register_kernel(
2091
+ TestSpatial,
2092
+ f"test_spatial_matrix_constructors_{dtype.__name__}",
2093
+ test_spatial_matrix_constructors,
2094
+ devices=devices,
2095
+ dtype=dtype,
2096
+ )
2097
+ add_function_test_register_kernel(
2098
+ TestSpatial,
2099
+ f"test_spatial_matrix_indexing_{dtype.__name__}",
2100
+ test_spatial_matrix_indexing,
2101
+ devices=devices,
2102
+ dtype=dtype,
2103
+ )
2104
+ add_function_test_register_kernel(
2105
+ TestSpatial,
2106
+ f"test_spatial_matrix_scalar_multiplication_{dtype.__name__}",
2107
+ test_spatial_matrix_scalar_multiplication,
2108
+ devices=devices,
2109
+ dtype=dtype,
2110
+ )
2111
+ add_function_test_register_kernel(
2112
+ TestSpatial,
2113
+ f"test_spatial_matrix_add_sub_{dtype.__name__}",
2114
+ test_spatial_matrix_add_sub,
2115
+ devices=devices,
2116
+ dtype=dtype,
2117
+ )
2118
+ add_function_test_register_kernel(
2119
+ TestSpatial,
2120
+ f"test_spatial_matvec_multiplication_{dtype.__name__}",
2121
+ test_spatial_matvec_multiplication,
2122
+ devices=devices,
2123
+ dtype=dtype,
2124
+ )
2125
+ add_function_test_register_kernel(
2126
+ TestSpatial,
2127
+ f"test_spatial_matmat_multiplication_{dtype.__name__}",
2128
+ test_spatial_matmat_multiplication,
2129
+ devices=devices,
2130
+ dtype=dtype,
2131
+ )
2132
+ add_function_test_register_kernel(
2133
+ TestSpatial,
2134
+ f"test_spatial_outer_product_{dtype.__name__}",
2135
+ test_spatial_outer_product,
2136
+ devices=devices,
2137
+ dtype=dtype,
2138
+ )
2139
+ add_function_test_register_kernel(
2140
+ TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
2141
+ )
2142
+
2143
+ # \TODO: test spatial_mass and spatial_jacobian
2144
+
2145
+
2146
+ if __name__ == "__main__":
2147
+ wp.build.clear_kernel_cache()
2148
+ unittest.main(verbosity=2)