warp-lang 1.0.2__py3-none-win_amd64.whl → 1.2.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (356) hide show
  1. warp/__init__.py +108 -97
  2. warp/__init__.pyi +1 -1
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +88 -113
  6. warp/build_dll.py +383 -375
  7. warp/builtins.py +3693 -3354
  8. warp/codegen.py +2925 -2792
  9. warp/config.py +40 -36
  10. warp/constants.py +49 -45
  11. warp/context.py +5409 -5102
  12. warp/dlpack.py +442 -442
  13. warp/examples/__init__.py +16 -16
  14. warp/examples/assets/bear.usd +0 -0
  15. warp/examples/assets/bunny.usd +0 -0
  16. warp/examples/assets/cartpole.urdf +110 -110
  17. warp/examples/assets/crazyflie.usd +0 -0
  18. warp/examples/assets/cube.usd +0 -0
  19. warp/examples/assets/nv_ant.xml +92 -92
  20. warp/examples/assets/nv_humanoid.xml +183 -183
  21. warp/examples/assets/quadruped.urdf +267 -267
  22. warp/examples/assets/rocks.nvdb +0 -0
  23. warp/examples/assets/rocks.usd +0 -0
  24. warp/examples/assets/sphere.usd +0 -0
  25. warp/examples/benchmarks/benchmark_api.py +381 -383
  26. warp/examples/benchmarks/benchmark_cloth.py +278 -277
  27. warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -88
  28. warp/examples/benchmarks/benchmark_cloth_jax.py +97 -100
  29. warp/examples/benchmarks/benchmark_cloth_numba.py +146 -142
  30. warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -77
  31. warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -86
  32. warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -112
  33. warp/examples/benchmarks/benchmark_cloth_warp.py +145 -146
  34. warp/examples/benchmarks/benchmark_launches.py +293 -295
  35. warp/examples/browse.py +29 -29
  36. warp/examples/core/example_dem.py +232 -219
  37. warp/examples/core/example_fluid.py +291 -267
  38. warp/examples/core/example_graph_capture.py +142 -126
  39. warp/examples/core/example_marching_cubes.py +186 -174
  40. warp/examples/core/example_mesh.py +172 -155
  41. warp/examples/core/example_mesh_intersect.py +203 -193
  42. warp/examples/core/example_nvdb.py +174 -170
  43. warp/examples/core/example_raycast.py +103 -90
  44. warp/examples/core/example_raymarch.py +197 -178
  45. warp/examples/core/example_render_opengl.py +183 -141
  46. warp/examples/core/example_sph.py +403 -387
  47. warp/examples/core/example_torch.py +219 -181
  48. warp/examples/core/example_wave.py +261 -248
  49. warp/examples/fem/bsr_utils.py +378 -380
  50. warp/examples/fem/example_apic_fluid.py +432 -389
  51. warp/examples/fem/example_burgers.py +262 -0
  52. warp/examples/fem/example_convection_diffusion.py +180 -168
  53. warp/examples/fem/example_convection_diffusion_dg.py +217 -209
  54. warp/examples/fem/example_deformed_geometry.py +175 -159
  55. warp/examples/fem/example_diffusion.py +199 -173
  56. warp/examples/fem/example_diffusion_3d.py +178 -152
  57. warp/examples/fem/example_diffusion_mgpu.py +219 -214
  58. warp/examples/fem/example_mixed_elasticity.py +242 -222
  59. warp/examples/fem/example_navier_stokes.py +257 -243
  60. warp/examples/fem/example_stokes.py +218 -192
  61. warp/examples/fem/example_stokes_transfer.py +263 -249
  62. warp/examples/fem/mesh_utils.py +133 -109
  63. warp/examples/fem/plot_utils.py +292 -287
  64. warp/examples/optim/example_bounce.py +258 -246
  65. warp/examples/optim/example_cloth_throw.py +220 -209
  66. warp/examples/optim/example_diffray.py +564 -536
  67. warp/examples/optim/example_drone.py +862 -835
  68. warp/examples/optim/example_inverse_kinematics.py +174 -168
  69. warp/examples/optim/example_inverse_kinematics_torch.py +183 -169
  70. warp/examples/optim/example_spring_cage.py +237 -231
  71. warp/examples/optim/example_trajectory.py +221 -199
  72. warp/examples/optim/example_walker.py +304 -293
  73. warp/examples/sim/example_cartpole.py +137 -129
  74. warp/examples/sim/example_cloth.py +194 -186
  75. warp/examples/sim/example_granular.py +122 -111
  76. warp/examples/sim/example_granular_collision_sdf.py +195 -186
  77. warp/examples/sim/example_jacobian_ik.py +234 -214
  78. warp/examples/sim/example_particle_chain.py +116 -105
  79. warp/examples/sim/example_quadruped.py +191 -180
  80. warp/examples/sim/example_rigid_chain.py +195 -187
  81. warp/examples/sim/example_rigid_contact.py +187 -177
  82. warp/examples/sim/example_rigid_force.py +125 -125
  83. warp/examples/sim/example_rigid_gyroscopic.py +107 -95
  84. warp/examples/sim/example_rigid_soft_contact.py +132 -122
  85. warp/examples/sim/example_soft_body.py +188 -177
  86. warp/fabric.py +337 -335
  87. warp/fem/__init__.py +61 -27
  88. warp/fem/cache.py +403 -388
  89. warp/fem/dirichlet.py +178 -179
  90. warp/fem/domain.py +262 -263
  91. warp/fem/field/__init__.py +100 -101
  92. warp/fem/field/field.py +148 -149
  93. warp/fem/field/nodal_field.py +298 -299
  94. warp/fem/field/restriction.py +22 -21
  95. warp/fem/field/test.py +180 -181
  96. warp/fem/field/trial.py +183 -183
  97. warp/fem/geometry/__init__.py +16 -19
  98. warp/fem/geometry/closest_point.py +69 -70
  99. warp/fem/geometry/deformed_geometry.py +270 -271
  100. warp/fem/geometry/element.py +748 -744
  101. warp/fem/geometry/geometry.py +184 -186
  102. warp/fem/geometry/grid_2d.py +380 -373
  103. warp/fem/geometry/grid_3d.py +437 -435
  104. warp/fem/geometry/hexmesh.py +953 -953
  105. warp/fem/geometry/nanogrid.py +455 -0
  106. warp/fem/geometry/partition.py +374 -376
  107. warp/fem/geometry/quadmesh_2d.py +532 -532
  108. warp/fem/geometry/tetmesh.py +840 -840
  109. warp/fem/geometry/trimesh_2d.py +577 -577
  110. warp/fem/integrate.py +1684 -1615
  111. warp/fem/operator.py +190 -191
  112. warp/fem/polynomial.py +214 -213
  113. warp/fem/quadrature/__init__.py +2 -2
  114. warp/fem/quadrature/pic_quadrature.py +243 -245
  115. warp/fem/quadrature/quadrature.py +295 -294
  116. warp/fem/space/__init__.py +179 -292
  117. warp/fem/space/basis_space.py +522 -489
  118. warp/fem/space/collocated_function_space.py +100 -105
  119. warp/fem/space/dof_mapper.py +236 -236
  120. warp/fem/space/function_space.py +148 -145
  121. warp/fem/space/grid_2d_function_space.py +148 -267
  122. warp/fem/space/grid_3d_function_space.py +167 -306
  123. warp/fem/space/hexmesh_function_space.py +253 -352
  124. warp/fem/space/nanogrid_function_space.py +202 -0
  125. warp/fem/space/partition.py +350 -350
  126. warp/fem/space/quadmesh_2d_function_space.py +261 -369
  127. warp/fem/space/restriction.py +161 -160
  128. warp/fem/space/shape/__init__.py +90 -15
  129. warp/fem/space/shape/cube_shape_function.py +728 -738
  130. warp/fem/space/shape/shape_function.py +102 -103
  131. warp/fem/space/shape/square_shape_function.py +611 -611
  132. warp/fem/space/shape/tet_shape_function.py +565 -567
  133. warp/fem/space/shape/triangle_shape_function.py +429 -429
  134. warp/fem/space/tetmesh_function_space.py +224 -292
  135. warp/fem/space/topology.py +297 -295
  136. warp/fem/space/trimesh_2d_function_space.py +153 -221
  137. warp/fem/types.py +77 -77
  138. warp/fem/utils.py +495 -495
  139. warp/jax.py +166 -141
  140. warp/jax_experimental.py +341 -339
  141. warp/native/array.h +1081 -1025
  142. warp/native/builtin.h +1603 -1560
  143. warp/native/bvh.cpp +402 -398
  144. warp/native/bvh.cu +533 -525
  145. warp/native/bvh.h +430 -429
  146. warp/native/clang/clang.cpp +496 -464
  147. warp/native/crt.cpp +42 -32
  148. warp/native/crt.h +352 -335
  149. warp/native/cuda_crt.h +1049 -1049
  150. warp/native/cuda_util.cpp +549 -540
  151. warp/native/cuda_util.h +288 -203
  152. warp/native/cutlass_gemm.cpp +34 -34
  153. warp/native/cutlass_gemm.cu +372 -372
  154. warp/native/error.cpp +66 -66
  155. warp/native/error.h +27 -27
  156. warp/native/exports.h +187 -0
  157. warp/native/fabric.h +228 -228
  158. warp/native/hashgrid.cpp +301 -278
  159. warp/native/hashgrid.cu +78 -77
  160. warp/native/hashgrid.h +227 -227
  161. warp/native/initializer_array.h +32 -32
  162. warp/native/intersect.h +1204 -1204
  163. warp/native/intersect_adj.h +365 -365
  164. warp/native/intersect_tri.h +322 -322
  165. warp/native/marching.cpp +2 -2
  166. warp/native/marching.cu +497 -497
  167. warp/native/marching.h +2 -2
  168. warp/native/mat.h +1545 -1498
  169. warp/native/matnn.h +333 -333
  170. warp/native/mesh.cpp +203 -203
  171. warp/native/mesh.cu +292 -293
  172. warp/native/mesh.h +1887 -1887
  173. warp/native/nanovdb/GridHandle.h +366 -0
  174. warp/native/nanovdb/HostBuffer.h +590 -0
  175. warp/native/nanovdb/NanoVDB.h +6624 -4782
  176. warp/native/nanovdb/PNanoVDB.h +3390 -2553
  177. warp/native/noise.h +850 -850
  178. warp/native/quat.h +1112 -1085
  179. warp/native/rand.h +303 -299
  180. warp/native/range.h +108 -108
  181. warp/native/reduce.cpp +156 -156
  182. warp/native/reduce.cu +348 -348
  183. warp/native/runlength_encode.cpp +61 -61
  184. warp/native/runlength_encode.cu +46 -46
  185. warp/native/scan.cpp +30 -30
  186. warp/native/scan.cu +36 -36
  187. warp/native/scan.h +7 -7
  188. warp/native/solid_angle.h +442 -442
  189. warp/native/sort.cpp +94 -94
  190. warp/native/sort.cu +97 -97
  191. warp/native/sort.h +14 -14
  192. warp/native/sparse.cpp +337 -337
  193. warp/native/sparse.cu +544 -544
  194. warp/native/spatial.h +630 -630
  195. warp/native/svd.h +562 -562
  196. warp/native/temp_buffer.h +30 -30
  197. warp/native/vec.h +1177 -1133
  198. warp/native/volume.cpp +529 -297
  199. warp/native/volume.cu +58 -32
  200. warp/native/volume.h +960 -538
  201. warp/native/volume_builder.cu +446 -425
  202. warp/native/volume_builder.h +34 -19
  203. warp/native/volume_impl.h +61 -0
  204. warp/native/warp.cpp +1057 -1052
  205. warp/native/warp.cu +2949 -2828
  206. warp/native/warp.h +321 -305
  207. warp/optim/__init__.py +9 -9
  208. warp/optim/adam.py +120 -120
  209. warp/optim/linear.py +1104 -939
  210. warp/optim/sgd.py +104 -92
  211. warp/render/__init__.py +10 -10
  212. warp/render/render_opengl.py +3356 -3204
  213. warp/render/render_usd.py +768 -749
  214. warp/render/utils.py +152 -150
  215. warp/sim/__init__.py +52 -59
  216. warp/sim/articulation.py +685 -685
  217. warp/sim/collide.py +1594 -1590
  218. warp/sim/import_mjcf.py +489 -481
  219. warp/sim/import_snu.py +220 -221
  220. warp/sim/import_urdf.py +536 -516
  221. warp/sim/import_usd.py +887 -881
  222. warp/sim/inertia.py +316 -317
  223. warp/sim/integrator.py +234 -233
  224. warp/sim/integrator_euler.py +1956 -1956
  225. warp/sim/integrator_featherstone.py +1917 -1991
  226. warp/sim/integrator_xpbd.py +3288 -3312
  227. warp/sim/model.py +4473 -4314
  228. warp/sim/particles.py +113 -112
  229. warp/sim/render.py +417 -403
  230. warp/sim/utils.py +413 -410
  231. warp/sparse.py +1289 -1227
  232. warp/stubs.py +2192 -2469
  233. warp/tape.py +1162 -225
  234. warp/tests/__init__.py +1 -1
  235. warp/tests/__main__.py +4 -4
  236. warp/tests/assets/test_index_grid.nvdb +0 -0
  237. warp/tests/assets/torus.usda +105 -105
  238. warp/tests/aux_test_class_kernel.py +26 -26
  239. warp/tests/aux_test_compile_consts_dummy.py +10 -10
  240. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -21
  241. warp/tests/aux_test_dependent.py +20 -22
  242. warp/tests/aux_test_grad_customs.py +21 -23
  243. warp/tests/aux_test_reference.py +9 -11
  244. warp/tests/aux_test_reference_reference.py +8 -10
  245. warp/tests/aux_test_square.py +15 -17
  246. warp/tests/aux_test_unresolved_func.py +14 -14
  247. warp/tests/aux_test_unresolved_symbol.py +14 -14
  248. warp/tests/disabled_kinematics.py +237 -239
  249. warp/tests/run_coverage_serial.py +31 -31
  250. warp/tests/test_adam.py +155 -157
  251. warp/tests/test_arithmetic.py +1088 -1124
  252. warp/tests/test_array.py +2415 -2326
  253. warp/tests/test_array_reduce.py +148 -150
  254. warp/tests/test_async.py +666 -656
  255. warp/tests/test_atomic.py +139 -141
  256. warp/tests/test_bool.py +212 -149
  257. warp/tests/test_builtins_resolution.py +1290 -1292
  258. warp/tests/test_bvh.py +162 -171
  259. warp/tests/test_closest_point_edge_edge.py +227 -228
  260. warp/tests/test_codegen.py +562 -553
  261. warp/tests/test_compile_consts.py +217 -101
  262. warp/tests/test_conditional.py +244 -246
  263. warp/tests/test_copy.py +230 -215
  264. warp/tests/test_ctypes.py +630 -632
  265. warp/tests/test_dense.py +65 -67
  266. warp/tests/test_devices.py +89 -98
  267. warp/tests/test_dlpack.py +528 -529
  268. warp/tests/test_examples.py +403 -378
  269. warp/tests/test_fabricarray.py +952 -955
  270. warp/tests/test_fast_math.py +60 -54
  271. warp/tests/test_fem.py +1298 -1278
  272. warp/tests/test_fp16.py +128 -130
  273. warp/tests/test_func.py +336 -337
  274. warp/tests/test_generics.py +596 -571
  275. warp/tests/test_grad.py +885 -640
  276. warp/tests/test_grad_customs.py +331 -336
  277. warp/tests/test_hash_grid.py +208 -164
  278. warp/tests/test_import.py +37 -39
  279. warp/tests/test_indexedarray.py +1132 -1134
  280. warp/tests/test_intersect.py +65 -67
  281. warp/tests/test_jax.py +305 -307
  282. warp/tests/test_large.py +169 -164
  283. warp/tests/test_launch.py +352 -354
  284. warp/tests/test_lerp.py +217 -261
  285. warp/tests/test_linear_solvers.py +189 -171
  286. warp/tests/test_lvalue.py +419 -493
  287. warp/tests/test_marching_cubes.py +63 -65
  288. warp/tests/test_mat.py +1799 -1827
  289. warp/tests/test_mat_lite.py +113 -115
  290. warp/tests/test_mat_scalar_ops.py +2905 -2889
  291. warp/tests/test_math.py +124 -193
  292. warp/tests/test_matmul.py +498 -499
  293. warp/tests/test_matmul_lite.py +408 -410
  294. warp/tests/test_mempool.py +186 -190
  295. warp/tests/test_mesh.py +281 -324
  296. warp/tests/test_mesh_query_aabb.py +226 -241
  297. warp/tests/test_mesh_query_point.py +690 -702
  298. warp/tests/test_mesh_query_ray.py +290 -303
  299. warp/tests/test_mlp.py +274 -276
  300. warp/tests/test_model.py +108 -110
  301. warp/tests/test_module_hashing.py +111 -0
  302. warp/tests/test_modules_lite.py +36 -39
  303. warp/tests/test_multigpu.py +161 -163
  304. warp/tests/test_noise.py +244 -248
  305. warp/tests/test_operators.py +248 -250
  306. warp/tests/test_options.py +121 -125
  307. warp/tests/test_peer.py +131 -137
  308. warp/tests/test_pinned.py +76 -78
  309. warp/tests/test_print.py +52 -54
  310. warp/tests/test_quat.py +2084 -2086
  311. warp/tests/test_rand.py +324 -288
  312. warp/tests/test_reload.py +207 -217
  313. warp/tests/test_rounding.py +177 -179
  314. warp/tests/test_runlength_encode.py +188 -190
  315. warp/tests/test_sim_grad.py +241 -0
  316. warp/tests/test_sim_kinematics.py +89 -97
  317. warp/tests/test_smoothstep.py +166 -168
  318. warp/tests/test_snippet.py +303 -266
  319. warp/tests/test_sparse.py +466 -460
  320. warp/tests/test_spatial.py +2146 -2148
  321. warp/tests/test_special_values.py +362 -0
  322. warp/tests/test_streams.py +484 -473
  323. warp/tests/test_struct.py +708 -675
  324. warp/tests/test_tape.py +171 -148
  325. warp/tests/test_torch.py +741 -743
  326. warp/tests/test_transient_module.py +85 -87
  327. warp/tests/test_types.py +554 -659
  328. warp/tests/test_utils.py +488 -499
  329. warp/tests/test_vec.py +1262 -1268
  330. warp/tests/test_vec_lite.py +71 -73
  331. warp/tests/test_vec_scalar_ops.py +2097 -2099
  332. warp/tests/test_verify_fp.py +92 -94
  333. warp/tests/test_volume.py +961 -736
  334. warp/tests/test_volume_write.py +338 -265
  335. warp/tests/unittest_serial.py +38 -37
  336. warp/tests/unittest_suites.py +367 -359
  337. warp/tests/unittest_utils.py +434 -578
  338. warp/tests/unused_test_misc.py +69 -71
  339. warp/tests/walkthrough_debug.py +85 -85
  340. warp/thirdparty/appdirs.py +598 -598
  341. warp/thirdparty/dlpack.py +143 -143
  342. warp/thirdparty/unittest_parallel.py +563 -561
  343. warp/torch.py +321 -295
  344. warp/types.py +4941 -4450
  345. warp/utils.py +1008 -821
  346. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/LICENSE.md +126 -126
  347. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/METADATA +365 -400
  348. warp_lang-1.2.0.dist-info/RECORD +359 -0
  349. warp/examples/assets/cube.usda +0 -42
  350. warp/examples/assets/sphere.usda +0 -56
  351. warp/examples/assets/torus.usda +0 -105
  352. warp/examples/fem/example_convection_diffusion_dg0.py +0 -194
  353. warp/native/nanovdb/PNanoVDBWrite.h +0 -295
  354. warp_lang-1.0.2.dist-info/RECORD +0 -352
  355. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/WHEEL +0 -0
  356. {warp_lang-1.0.2.dist-info → warp_lang-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,2148 +1,2146 @@
1
- # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
- # NVIDIA CORPORATION and its licensors retain all intellectual property
3
- # and proprietary rights in and to this software, related documentation
4
- # and any modifications thereto. Any use, reproduction, disclosure or
5
- # distribution of this software and related documentation without an express
6
- # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
-
8
- import unittest
9
-
10
- import numpy as np
11
-
12
- import warp as wp
13
- from warp.tests.unittest_utils import *
14
-
15
- wp.init()
16
-
17
- np_float_types = [np.float32, np.float64, np.float16]
18
-
19
- kernel_cache = dict()
20
-
21
-
22
- def getkernel(func, suffix=""):
23
- key = func.__name__ + "_" + suffix
24
- if key not in kernel_cache:
25
- kernel_cache[key] = wp.Kernel(func=func, key=key)
26
- return kernel_cache[key]
27
-
28
-
29
- def get_select_kernel(dtype):
30
- def output_select_kernel_fn(
31
- input: wp.array(dtype=dtype),
32
- index: int,
33
- out: wp.array(dtype=dtype),
34
- ):
35
- out[0] = input[index]
36
-
37
- return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
38
-
39
-
40
- ############################################################
41
-
42
-
43
- def test_spatial_vector_constructors(test, device, dtype, register_kernels=False):
44
- rng = np.random.default_rng(123)
45
-
46
- tol = {
47
- np.float16: 5.0e-3,
48
- np.float32: 1.0e-6,
49
- np.float64: 1.0e-8,
50
- }.get(dtype, 0)
51
-
52
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
53
- vec3 = wp.types.vector(length=3, dtype=wptype)
54
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
55
-
56
- def check_spatial_vector_component_constructor(
57
- input: wp.array(dtype=wptype),
58
- out: wp.array(dtype=wptype),
59
- ):
60
- result = spatial_vector(input[0], input[1], input[2], input[3], input[4], input[5])
61
-
62
- # multiply the output by 2 so we've got something to backpropagate:
63
- out[0] = wptype(2) * result[0]
64
- out[1] = wptype(2) * result[1]
65
- out[2] = wptype(2) * result[2]
66
- out[3] = wptype(2) * result[3]
67
- out[4] = wptype(2) * result[4]
68
- out[5] = wptype(2) * result[5]
69
-
70
- def check_spatial_vector_vector_constructor(
71
- input: wp.array(dtype=wptype),
72
- out: wp.array(dtype=wptype),
73
- ):
74
- result = spatial_vector(vec3(input[0], input[1], input[2]), vec3(input[3], input[4], input[5]))
75
-
76
- # multiply the output by 2 so we've got something to backpropagate:
77
- out[0] = wptype(2) * result[0]
78
- out[1] = wptype(2) * result[1]
79
- out[2] = wptype(2) * result[2]
80
- out[3] = wptype(2) * result[3]
81
- out[4] = wptype(2) * result[4]
82
- out[5] = wptype(2) * result[5]
83
-
84
- kernel = getkernel(check_spatial_vector_component_constructor, suffix=dtype.__name__)
85
- output_select_kernel = get_select_kernel(wptype)
86
- vec_kernel = getkernel(check_spatial_vector_vector_constructor, suffix=dtype.__name__)
87
-
88
- if register_kernels:
89
- return
90
-
91
- input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
92
- output = wp.zeros_like(input)
93
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
94
-
95
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
96
-
97
- for i in range(len(input)):
98
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
99
- tape = wp.Tape()
100
- with tape:
101
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
102
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
103
- tape.backward(loss=cmp)
104
- expectedgrads = np.zeros(len(input))
105
- expectedgrads[i] = 2
106
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
107
- tape.zero()
108
-
109
- input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
110
- output = wp.zeros_like(input)
111
- wp.launch(vec_kernel, dim=1, inputs=[input], outputs=[output], device=device)
112
-
113
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
114
-
115
- for i in range(len(input)):
116
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
117
- tape = wp.Tape()
118
- with tape:
119
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
120
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
121
- tape.backward(loss=cmp)
122
- expectedgrads = np.zeros(len(input))
123
- expectedgrads[i] = 2
124
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
125
- tape.zero()
126
-
127
-
128
- def test_spatial_vector_indexing(test, device, dtype, register_kernels=False):
129
- rng = np.random.default_rng(123)
130
-
131
- tol = {
132
- np.float16: 5.0e-3,
133
- np.float32: 1.0e-6,
134
- np.float64: 1.0e-8,
135
- }.get(dtype, 0)
136
-
137
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
138
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
139
-
140
- def check_spatial_vector_indexing(
141
- input: wp.array(dtype=spatial_vector),
142
- out: wp.array(dtype=wptype),
143
- ):
144
- inpt = input[0]
145
-
146
- # multiply outputs by 2 so we've got something to backpropagate:
147
- idx = 0
148
- for i in range(6):
149
- out[idx] = wptype(2) * inpt[i]
150
- idx = idx + 1
151
-
152
- kernel = getkernel(check_spatial_vector_indexing, suffix=dtype.__name__)
153
- output_select_kernel = get_select_kernel(wptype)
154
-
155
- if register_kernels:
156
- return
157
-
158
- input = wp.array(
159
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
160
- )
161
- outcmps = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
162
-
163
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
164
-
165
- assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
166
-
167
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
168
- for i in range(6):
169
- tape = wp.Tape()
170
- with tape:
171
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
172
- wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
173
- tape.backward(loss=out)
174
- expectedresult = np.zeros(6, dtype=dtype)
175
- expectedresult[i] = 2
176
- assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
177
- tape.zero()
178
-
179
-
180
- def test_spatial_vector_scalar_multiplication(test, device, dtype, register_kernels=False):
181
- rng = np.random.default_rng(123)
182
-
183
- tol = {
184
- np.float16: 5.0e-3,
185
- np.float32: 1.0e-6,
186
- np.float64: 1.0e-8,
187
- }.get(dtype, 0)
188
-
189
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
190
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
191
-
192
- def check_spatial_vector_scalar_mul(
193
- s: wp.array(dtype=wptype),
194
- q: wp.array(dtype=spatial_vector),
195
- outcmps_l: wp.array(dtype=wptype),
196
- outcmps_r: wp.array(dtype=wptype),
197
- ):
198
- lresult = s[0] * q[0]
199
- rresult = q[0] * s[0]
200
-
201
- # multiply outputs by 2 so we've got something to backpropagate:
202
- for i in range(6):
203
- outcmps_l[i] = wptype(2) * lresult[i]
204
- outcmps_r[i] = wptype(2) * rresult[i]
205
-
206
- kernel = getkernel(check_spatial_vector_scalar_mul, suffix=dtype.__name__)
207
- output_select_kernel = get_select_kernel(wptype)
208
-
209
- if register_kernels:
210
- return
211
-
212
- s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
213
- q = wp.array(
214
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
215
- )
216
-
217
- outcmps_l = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
218
- outcmps_r = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
219
-
220
- wp.launch(
221
- kernel,
222
- dim=1,
223
- inputs=[s, q],
224
- outputs=[
225
- outcmps_l,
226
- outcmps_r,
227
- ],
228
- device=device,
229
- )
230
-
231
- assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
232
- assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
233
-
234
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
235
- for i in range(6):
236
- # test left/right mul gradients:
237
- for wrt in [outcmps_l, outcmps_r]:
238
- tape = wp.Tape()
239
- with tape:
240
- wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
241
- wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
242
- tape.backward(loss=out)
243
- expectedresult = np.zeros(6, dtype=dtype)
244
- expectedresult[i] = 2 * s.numpy()[0]
245
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
246
- assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
247
- tape.zero()
248
-
249
-
250
- def test_spatial_vector_add_sub(test, device, dtype, register_kernels=False):
251
- rng = np.random.default_rng(123)
252
-
253
- tol = {
254
- np.float16: 5.0e-3,
255
- np.float32: 1.0e-6,
256
- np.float64: 1.0e-8,
257
- }.get(dtype, 0)
258
-
259
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
260
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
261
-
262
- def check_spatial_vector_add_sub(
263
- q: wp.array(dtype=spatial_vector),
264
- v: wp.array(dtype=spatial_vector),
265
- outputs_add: wp.array(dtype=wptype),
266
- outputs_sub: wp.array(dtype=wptype),
267
- ):
268
- addresult = q[0] + v[0]
269
- subresult = q[0] - v[0]
270
- for i in range(6):
271
- outputs_add[i] = wptype(2) * addresult[i]
272
- outputs_sub[i] = wptype(2) * subresult[i]
273
-
274
- kernel = getkernel(check_spatial_vector_add_sub, suffix=dtype.__name__)
275
- output_select_kernel = get_select_kernel(wptype)
276
- if register_kernels:
277
- return
278
-
279
- q = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
280
- v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
281
-
282
- outputs_add = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
283
- outputs_sub = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
284
-
285
- wp.launch(
286
- kernel,
287
- dim=1,
288
- inputs=[
289
- q,
290
- v,
291
- ],
292
- outputs=[outputs_add, outputs_sub],
293
- device=device,
294
- )
295
-
296
- assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
297
- assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
298
-
299
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
300
- for i in range(6):
301
- # test add gradients:
302
- tape = wp.Tape()
303
- with tape:
304
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
305
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
306
- tape.backward(loss=out)
307
- expectedresult = np.zeros(6, dtype=dtype)
308
- expectedresult[i] = 2
309
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
310
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
311
- tape.zero()
312
-
313
- # test subtraction gradients:
314
- tape = wp.Tape()
315
- with tape:
316
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
317
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
318
- tape.backward(loss=out)
319
- expectedresult = np.zeros(6, dtype=dtype)
320
- expectedresult[i] = 2
321
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
322
- assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
323
- tape.zero()
324
-
325
-
326
- def test_spatial_dot(test, device, dtype, register_kernels=False):
327
- rng = np.random.default_rng(123)
328
-
329
- tol = {
330
- np.float16: 1.0e-2,
331
- np.float32: 1.0e-6,
332
- np.float64: 1.0e-8,
333
- }.get(dtype, 0)
334
-
335
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
336
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
337
-
338
- def check_spatial_dot(
339
- s: wp.array(dtype=spatial_vector),
340
- v: wp.array(dtype=spatial_vector),
341
- dot: wp.array(dtype=wptype),
342
- ):
343
- dot[0] = wptype(2) * wp.spatial_dot(v[0], s[0])
344
-
345
- kernel = getkernel(check_spatial_dot, suffix=dtype.__name__)
346
- if register_kernels:
347
- return
348
-
349
- s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
350
- v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
351
- dot = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
352
-
353
- tape = wp.Tape()
354
- with tape:
355
- wp.launch(
356
- kernel,
357
- dim=1,
358
- inputs=[
359
- s,
360
- v,
361
- ],
362
- outputs=[dot],
363
- device=device,
364
- )
365
-
366
- assert_np_equal(dot.numpy()[0], 2.0 * (v.numpy() * s.numpy()).sum(), tol=tol)
367
-
368
- tape.backward(loss=dot)
369
- sgrads = tape.gradients[s].numpy()[0]
370
- expected_grads = 2.0 * v.numpy()[0]
371
- assert_np_equal(sgrads, expected_grads, tol=10 * tol)
372
-
373
- vgrads = tape.gradients[v].numpy()[0]
374
- expected_grads = 2.0 * s.numpy()[0]
375
- assert_np_equal(vgrads, expected_grads, tol=tol)
376
-
377
-
378
- def test_spatial_cross(test, device, dtype, register_kernels=False):
379
- rng = np.random.default_rng(123)
380
-
381
- tol = {
382
- np.float16: 5.0e-3,
383
- np.float32: 1.0e-6,
384
- np.float64: 1.0e-8,
385
- }.get(dtype, 0)
386
-
387
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
388
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
389
-
390
- def check_spatial_cross(
391
- s: wp.array(dtype=spatial_vector),
392
- v: wp.array(dtype=spatial_vector),
393
- outputs: wp.array(dtype=wptype),
394
- outputs_dual: wp.array(dtype=wptype),
395
- outputs_wcrossw: wp.array(dtype=wptype),
396
- outputs_vcrossw: wp.array(dtype=wptype),
397
- outputs_wcrossv: wp.array(dtype=wptype),
398
- outputs_vcrossv: wp.array(dtype=wptype),
399
- ):
400
- c = wp.spatial_cross(s[0], v[0])
401
- d = wp.spatial_cross_dual(s[0], v[0])
402
-
403
- # multiply outputs by 2 so we've got something to backpropagate:
404
- for i in range(6):
405
- outputs[i] = wptype(2) * c[i]
406
- outputs_dual[i] = wptype(2) * d[i]
407
-
408
- sw = wp.spatial_top(s[0])
409
- sv = wp.spatial_bottom(s[0])
410
- vw = wp.spatial_top(v[0])
411
- vv = wp.spatial_bottom(v[0])
412
-
413
- wcrossw = wp.cross(sw, vw)
414
- vcrossw = wp.cross(sv, vw)
415
- wcrossv = wp.cross(sw, vv)
416
- vcrossv = wp.cross(sv, vv)
417
-
418
- for i in range(3):
419
- outputs_wcrossw[i] = wcrossw[i]
420
- outputs_vcrossw[i] = vcrossw[i]
421
- outputs_wcrossv[i] = wcrossv[i]
422
- outputs_vcrossv[i] = vcrossv[i]
423
-
424
- kernel = getkernel(check_spatial_cross, suffix=dtype.__name__)
425
- output_select_kernel = get_select_kernel(wptype)
426
-
427
- if register_kernels:
428
- return
429
-
430
- s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
431
- v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
432
- outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
433
- outputs_dual = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
434
- outputs_wcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
435
- outputs_vcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
436
- outputs_wcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
437
- outputs_vcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
438
-
439
- wp.launch(
440
- kernel,
441
- dim=1,
442
- inputs=[
443
- s,
444
- v,
445
- ],
446
- outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
447
- device=device,
448
- )
449
-
450
- sw = s.numpy()[0, :3]
451
- sv = s.numpy()[0, 3:]
452
- vw = v.numpy()[0, :3]
453
- vv = v.numpy()[0, 3:]
454
-
455
- wcrossw = np.cross(sw, vw)
456
- vcrossw = np.cross(sv, vw)
457
- wcrossv = np.cross(sw, vv)
458
- vcrossv = np.cross(sv, vv)
459
-
460
- assert_np_equal(outputs.numpy()[:3], 2 * wcrossw, tol=tol)
461
- assert_np_equal(outputs.numpy()[3:], 2 * (vcrossw + wcrossv), tol=tol)
462
-
463
- assert_np_equal(outputs_dual.numpy()[:3], 2 * (wcrossw + vcrossv), tol=tol)
464
- assert_np_equal(outputs_dual.numpy()[3:], 2 * wcrossv, tol=tol)
465
-
466
- for i in range(3):
467
- cmp_w = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
468
- cmp_v = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
469
- cmp_w_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
470
- cmp_v_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
471
- cmp_wcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
472
- cmp_vcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
473
- cmp_wcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
474
- cmp_vcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
475
- tape = wp.Tape()
476
- with tape:
477
- wp.launch(
478
- kernel,
479
- dim=1,
480
- inputs=[
481
- s,
482
- v,
483
- ],
484
- outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
485
- device=device,
486
- )
487
-
488
- # ith w and v vector components of spatial_cross:
489
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp_w], device=device)
490
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i + 3], outputs=[cmp_v], device=device)
491
-
492
- # ith w and v vector components of spatial_cross_dual:
493
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i], outputs=[cmp_w_dual], device=device)
494
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i + 3], outputs=[cmp_v_dual], device=device)
495
-
496
- # ith vector components of some cross products:
497
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossw, i], outputs=[cmp_wcrossw], device=device)
498
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossw, i], outputs=[cmp_vcrossw], device=device)
499
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossv, i], outputs=[cmp_wcrossv], device=device)
500
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossv, i], outputs=[cmp_vcrossv], device=device)
501
-
502
- def getgrads(cmp):
503
- tape.backward(loss=cmp)
504
- sgrads = 1.0 * tape.gradients[s].numpy()
505
- vgrads = 1.0 * tape.gradients[v].numpy()
506
- tape.zero()
507
- return sgrads, vgrads
508
-
509
- dcmp_w_ds, dcmp_w_dv = getgrads(cmp_w)
510
- dcmp_v_ds, dcmp_v_dv = getgrads(cmp_v)
511
- dcmp_w_dual_ds, dcmp_w_dual_dv = getgrads(cmp_w_dual)
512
- dcmp_v_dual_ds, dcmp_v_dual_dv = getgrads(cmp_v_dual)
513
-
514
- dcmp_wcrossw_ds, dcmp_wcrossw_dv = getgrads(cmp_wcrossw)
515
- dcmp_vcrossw_ds, dcmp_vcrossw_dv = getgrads(cmp_vcrossw)
516
- dcmp_wcrossv_ds, dcmp_wcrossv_dv = getgrads(cmp_wcrossv)
517
- dcmp_vcrossv_ds, dcmp_vcrossv_dv = getgrads(cmp_vcrossv)
518
-
519
- assert_np_equal(dcmp_w_ds, 2 * dcmp_wcrossw_ds, tol=tol)
520
- assert_np_equal(dcmp_w_dv, 2 * dcmp_wcrossw_dv, tol=tol)
521
-
522
- assert_np_equal(dcmp_v_ds, 2 * (dcmp_vcrossw_ds + dcmp_wcrossv_ds), tol=tol)
523
- assert_np_equal(dcmp_v_dv, 2 * (dcmp_vcrossw_dv + dcmp_wcrossv_dv), tol=tol)
524
-
525
- assert_np_equal(dcmp_w_dual_ds, 2 * (dcmp_wcrossw_ds + dcmp_vcrossv_ds), tol=tol)
526
- assert_np_equal(dcmp_w_dual_dv, 2 * (dcmp_wcrossw_dv + dcmp_vcrossv_dv), tol=tol)
527
-
528
- assert_np_equal(dcmp_v_dual_ds, 2 * dcmp_wcrossv_ds, tol=tol)
529
- assert_np_equal(dcmp_v_dual_dv, 2 * dcmp_wcrossv_dv, tol=tol)
530
-
531
-
532
- def test_spatial_top_bottom(test, device, dtype, register_kernels=False):
533
- rng = np.random.default_rng(123)
534
-
535
- tol = {
536
- np.float16: 1.0e-2,
537
- np.float32: 1.0e-6,
538
- np.float64: 1.0e-8,
539
- }.get(dtype, 0)
540
-
541
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
542
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
543
-
544
- def check_spatial_top_bottom(
545
- s: wp.array(dtype=spatial_vector),
546
- outputs: wp.array(dtype=wptype),
547
- ):
548
- top = wp.spatial_top(s[0])
549
- bottom = wp.spatial_bottom(s[0])
550
-
551
- outputs[0] = wptype(2) * top[0]
552
- outputs[1] = wptype(2) * top[1]
553
- outputs[2] = wptype(2) * top[2]
554
-
555
- outputs[3] = wptype(2) * bottom[0]
556
- outputs[4] = wptype(2) * bottom[1]
557
- outputs[5] = wptype(2) * bottom[2]
558
-
559
- kernel = getkernel(check_spatial_top_bottom, suffix=dtype.__name__)
560
- output_select_kernel = get_select_kernel(wptype)
561
-
562
- if register_kernels:
563
- return
564
-
565
- s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
566
- outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
567
-
568
- wp.launch(
569
- kernel,
570
- dim=1,
571
- inputs=[
572
- s,
573
- ],
574
- outputs=[outputs],
575
- device=device,
576
- )
577
-
578
- assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
579
-
580
- for i in range(6):
581
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
582
- tape = wp.Tape()
583
- with tape:
584
- wp.launch(
585
- kernel,
586
- dim=1,
587
- inputs=[
588
- s,
589
- ],
590
- outputs=[outputs],
591
- device=device,
592
- )
593
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
594
- tape.backward(loss=cmp)
595
- expectedgrads = np.zeros(6)
596
- expectedgrads[i] = 2
597
- assert_np_equal(tape.gradients[s].numpy(), expectedgrads)
598
- tape.zero()
599
-
600
-
601
- def test_transform_constructors(test, device, dtype, register_kernels=False):
602
- rng = np.random.default_rng(123)
603
-
604
- tol = {
605
- np.float16: 5.0e-3,
606
- np.float32: 1.0e-6,
607
- np.float64: 1.0e-8,
608
- }.get(dtype, 0)
609
-
610
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
611
- vec3 = wp.types.vector(length=3, dtype=wptype)
612
- transform = wp.types.transformation(dtype=wptype)
613
- quat = wp.types.quaternion(dtype=wptype)
614
-
615
- def check_transform_constructor(
616
- input: wp.array(dtype=wptype),
617
- out: wp.array(dtype=wptype),
618
- ):
619
- result = transform(vec3(input[0], input[1], input[2]), quat(input[3], input[4], input[5], input[6]))
620
-
621
- # multiply the output by 2 so we've got something to backpropagate:
622
- out[0] = wptype(2) * result[0]
623
- out[1] = wptype(2) * result[1]
624
- out[2] = wptype(2) * result[2]
625
- out[3] = wptype(2) * result[3]
626
- out[4] = wptype(2) * result[4]
627
- out[5] = wptype(2) * result[5]
628
- out[6] = wptype(2) * result[6]
629
-
630
- kernel = getkernel(check_transform_constructor, suffix=dtype.__name__)
631
- output_select_kernel = get_select_kernel(wptype)
632
-
633
- if register_kernels:
634
- return
635
-
636
- p = rng.standard_normal(size=3).astype(dtype)
637
- q = rng.standard_normal(size=4).astype(dtype)
638
- q /= np.linalg.norm(q)
639
-
640
- input = wp.array(np.concatenate((p, q)), requires_grad=True, device=device)
641
- output = wp.zeros_like(input)
642
-
643
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
644
-
645
- assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
646
-
647
- for i in range(len(input)):
648
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
649
- tape = wp.Tape()
650
- with tape:
651
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
652
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
653
- tape.backward(loss=cmp)
654
- expectedgrads = np.zeros(len(input))
655
- expectedgrads[i] = 2
656
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
657
- tape.zero()
658
-
659
-
660
- def test_transform_indexing(test, device, dtype, register_kernels=False):
661
- rng = np.random.default_rng(123)
662
-
663
- tol = {
664
- np.float16: 5.0e-3,
665
- np.float32: 1.0e-6,
666
- np.float64: 1.0e-8,
667
- }.get(dtype, 0)
668
-
669
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
670
- transform = wp.types.transformation(dtype=wptype)
671
-
672
- def check_transform_indexing(
673
- input: wp.array(dtype=transform),
674
- out: wp.array(dtype=wptype),
675
- ):
676
- inpt = input[0]
677
-
678
- # multiply outputs by 2 so we've got something to backpropagate:
679
- idx = 0
680
- for i in range(7):
681
- out[idx] = wptype(2) * inpt[i]
682
- idx = idx + 1
683
-
684
- kernel = getkernel(check_transform_indexing, suffix=dtype.__name__)
685
- output_select_kernel = get_select_kernel(wptype)
686
-
687
- if register_kernels:
688
- return
689
-
690
- input = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
691
- outcmps = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
692
-
693
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
694
-
695
- assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
696
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
697
- for i in range(7):
698
- tape = wp.Tape()
699
- with tape:
700
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
701
- wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
702
- tape.backward(loss=out)
703
- expectedresult = np.zeros(7, dtype=dtype)
704
- expectedresult[i] = 2
705
- assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
706
- tape.zero()
707
-
708
-
709
- def test_transform_scalar_multiplication(test, device, dtype, register_kernels=False):
710
- rng = np.random.default_rng(123)
711
-
712
- tol = {
713
- np.float16: 5.0e-3,
714
- np.float32: 1.0e-6,
715
- np.float64: 1.0e-8,
716
- }.get(dtype, 0)
717
-
718
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
719
- transform = wp.types.transformation(dtype=wptype)
720
-
721
- def check_transform_scalar_mul(
722
- s: wp.array(dtype=wptype),
723
- q: wp.array(dtype=transform),
724
- outcmps_l: wp.array(dtype=wptype),
725
- outcmps_r: wp.array(dtype=wptype),
726
- ):
727
- lresult = s[0] * q[0]
728
- rresult = q[0] * s[0]
729
-
730
- # multiply outputs by 2 so we've got something to backpropagate:
731
- for i in range(7):
732
- outcmps_l[i] = wptype(2) * lresult[i]
733
- outcmps_r[i] = wptype(2) * rresult[i]
734
-
735
- kernel = getkernel(check_transform_scalar_mul, suffix=dtype.__name__)
736
- output_select_kernel = get_select_kernel(wptype)
737
-
738
- if register_kernels:
739
- return
740
-
741
- s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
742
- q = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
743
-
744
- outcmps_l = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
745
- outcmps_r = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
746
-
747
- wp.launch(
748
- kernel,
749
- dim=1,
750
- inputs=[s, q],
751
- outputs=[
752
- outcmps_l,
753
- outcmps_r,
754
- ],
755
- device=device,
756
- )
757
-
758
- assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
759
- assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
760
-
761
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
762
- for i in range(7):
763
- # test left/right mul gradients:
764
- for wrt in [outcmps_l, outcmps_r]:
765
- tape = wp.Tape()
766
- with tape:
767
- wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
768
- wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
769
- tape.backward(loss=out)
770
- expectedresult = np.zeros(7, dtype=dtype)
771
- expectedresult[i] = 2 * s.numpy()[0]
772
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
773
- assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
774
- tape.zero()
775
-
776
-
777
- def test_transform_add_sub(test, device, dtype, register_kernels=False):
778
- rng = np.random.default_rng(123)
779
-
780
- tol = {
781
- np.float16: 5.0e-3,
782
- np.float32: 1.0e-6,
783
- np.float64: 1.0e-8,
784
- }.get(dtype, 0)
785
-
786
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
787
- transform = wp.types.transformation(dtype=wptype)
788
-
789
- def check_transform_add_sub(
790
- q: wp.array(dtype=transform),
791
- v: wp.array(dtype=transform),
792
- outputs_add: wp.array(dtype=wptype),
793
- outputs_sub: wp.array(dtype=wptype),
794
- ):
795
- addresult = q[0] + v[0]
796
- subresult = q[0] - v[0]
797
- for i in range(7):
798
- outputs_add[i] = wptype(2) * addresult[i]
799
- outputs_sub[i] = wptype(2) * subresult[i]
800
-
801
- kernel = getkernel(check_transform_add_sub, suffix=dtype.__name__)
802
- output_select_kernel = get_select_kernel(wptype)
803
-
804
- if register_kernels:
805
- return
806
-
807
- q = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
808
- v = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
809
-
810
- outputs_add = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
811
- outputs_sub = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
812
-
813
- wp.launch(
814
- kernel,
815
- dim=1,
816
- inputs=[
817
- q,
818
- v,
819
- ],
820
- outputs=[outputs_add, outputs_sub],
821
- device=device,
822
- )
823
-
824
- assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
825
- assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
826
-
827
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
828
- for i in range(7):
829
- # test add gradients:
830
- tape = wp.Tape()
831
- with tape:
832
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
833
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
834
- tape.backward(loss=out)
835
- expectedresult = np.zeros(7, dtype=dtype)
836
- expectedresult[i] = 2
837
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
838
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
839
- tape.zero()
840
-
841
- # test subtraction gradients:
842
- tape = wp.Tape()
843
- with tape:
844
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
845
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
846
- tape.backward(loss=out)
847
- expectedresult = np.zeros(7, dtype=dtype)
848
- expectedresult[i] = 2
849
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
850
- assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
851
- tape.zero()
852
-
853
-
854
- def test_transform_get_trans_rot(test, device, dtype, register_kernels=False):
855
- rng = np.random.default_rng(123)
856
-
857
- tol = {
858
- np.float16: 1.0e-2,
859
- np.float32: 1.0e-6,
860
- np.float64: 1.0e-8,
861
- }.get(dtype, 0)
862
-
863
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
864
- transform = wp.types.transformation(dtype=wptype)
865
-
866
- def check_transform_get_trans_rot(
867
- s: wp.array(dtype=transform),
868
- outputs: wp.array(dtype=wptype),
869
- ):
870
- trans = wp.transform_get_translation(s[0])
871
- q = wp.transform_get_rotation(s[0])
872
-
873
- outputs[0] = wptype(2) * trans[0]
874
- outputs[1] = wptype(2) * trans[1]
875
- outputs[2] = wptype(2) * trans[2]
876
-
877
- outputs[3] = wptype(2) * q[0]
878
- outputs[4] = wptype(2) * q[1]
879
- outputs[5] = wptype(2) * q[2]
880
- outputs[6] = wptype(2) * q[3]
881
-
882
- kernel = getkernel(check_transform_get_trans_rot, suffix=dtype.__name__)
883
- output_select_kernel = get_select_kernel(wptype)
884
-
885
- if register_kernels:
886
- return
887
-
888
- s = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
889
- outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
890
-
891
- wp.launch(
892
- kernel,
893
- dim=1,
894
- inputs=[
895
- s,
896
- ],
897
- outputs=[outputs],
898
- device=device,
899
- )
900
-
901
- assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
902
-
903
- for i in range(7):
904
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
905
- tape = wp.Tape()
906
- with tape:
907
- wp.launch(
908
- kernel,
909
- dim=1,
910
- inputs=[
911
- s,
912
- ],
913
- outputs=[outputs],
914
- device=device,
915
- )
916
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
917
- tape.backward(loss=cmp)
918
- expectedgrads = np.zeros(7)
919
- expectedgrads[i] = 2
920
- assert_np_equal(tape.gradients[s].numpy(), expectedgrads)
921
- tape.zero()
922
-
923
-
924
- def test_transform_multiply(test, device, dtype, register_kernels=False):
925
- rng = np.random.default_rng(123)
926
-
927
- tol = {
928
- np.float16: 1.0e-2,
929
- np.float32: 1.0e-6,
930
- np.float64: 1.0e-8,
931
- }.get(dtype, 0)
932
-
933
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
934
- transform = wp.types.transformation(dtype=wptype)
935
-
936
- def check_transform_multiply(
937
- a: wp.array(dtype=transform),
938
- b: wp.array(dtype=transform),
939
- outputs: wp.array(dtype=wptype),
940
- outputs_fn: wp.array(dtype=wptype),
941
- outputs_manual: wp.array(dtype=wptype),
942
- ):
943
- result = a[0] * b[0]
944
- result_fn = wp.transform_multiply(a[0], b[0])
945
-
946
- # let's just work out the transform multiplication manually
947
- # and compare value/gradients with that:
948
- atrans = wp.transform_get_translation(a[0])
949
- arot = wp.transform_get_rotation(a[0])
950
-
951
- btrans = wp.transform_get_translation(b[0])
952
- brot = wp.transform_get_rotation(b[0])
953
-
954
- trans = wp.quat_rotate(arot, btrans) + atrans
955
- rot = arot * brot
956
- result_manual = transform(trans, rot)
957
-
958
- for i in range(7):
959
- outputs[i] = wptype(2) * result[i]
960
- outputs_fn[i] = wptype(2) * result_fn[i]
961
- outputs_manual[i] = wptype(2) * result_manual[i]
962
-
963
- kernel = getkernel(check_transform_multiply, suffix=dtype.__name__)
964
- output_select_kernel = get_select_kernel(wptype)
965
-
966
- if register_kernels:
967
- return
968
-
969
- q = rng.standard_normal(size=7)
970
- s = rng.standard_normal(size=7)
971
- q[3:] /= np.linalg.norm(q[3:])
972
- s[3:] /= np.linalg.norm(s[3:])
973
-
974
- q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
975
- s = wp.array(s.astype(dtype), dtype=transform, requires_grad=True, device=device)
976
- outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
977
- outputs_fn = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
978
- outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
979
-
980
- wp.launch(
981
- kernel,
982
- dim=1,
983
- inputs=[
984
- q,
985
- s,
986
- ],
987
- outputs=[outputs, outputs_fn, outputs_manual],
988
- device=device,
989
- )
990
-
991
- assert_np_equal(outputs.numpy(), outputs_fn.numpy(), tol=tol)
992
- assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
993
-
994
- for i in range(7):
995
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
996
- cmp_fn = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
997
- cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
998
- tape = wp.Tape()
999
- with tape:
1000
- wp.launch(
1001
- kernel,
1002
- dim=1,
1003
- inputs=[
1004
- q,
1005
- s,
1006
- ],
1007
- outputs=[outputs, outputs_fn, outputs_manual],
1008
- device=device,
1009
- )
1010
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1011
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_fn, i], outputs=[cmp_fn], device=device)
1012
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1013
- tape.backward(loss=cmp)
1014
- qgrads = 1.0 * tape.gradients[q].numpy()
1015
- sgrads = 1.0 * tape.gradients[s].numpy()
1016
- tape.zero()
1017
- tape.backward(loss=cmp_fn)
1018
- qgrads_fn = 1.0 * tape.gradients[q].numpy()
1019
- sgrads_fn = 1.0 * tape.gradients[s].numpy()
1020
- tape.zero()
1021
- tape.backward(loss=cmp_manual)
1022
- qgrads_manual = 1.0 * tape.gradients[q].numpy()
1023
- sgrads_manual = 1.0 * tape.gradients[s].numpy()
1024
- tape.zero()
1025
-
1026
- assert_np_equal(qgrads, qgrads_fn, tol=tol)
1027
- assert_np_equal(sgrads, sgrads_fn, tol=tol)
1028
-
1029
- assert_np_equal(qgrads, qgrads_manual, tol=tol)
1030
- assert_np_equal(sgrads, sgrads_manual, tol=tol)
1031
-
1032
-
1033
- def test_transform_inverse(test, device, dtype, register_kernels=False):
1034
- rng = np.random.default_rng(123)
1035
-
1036
- tol = {
1037
- np.float16: 1.0e-2,
1038
- np.float32: 1.0e-6,
1039
- np.float64: 1.0e-8,
1040
- }.get(dtype, 0)
1041
-
1042
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1043
- transform = wp.types.transformation(dtype=wptype)
1044
-
1045
- def check_transform_inverse(
1046
- a: wp.array(dtype=transform),
1047
- outputs: wp.array(dtype=wptype),
1048
- outputs_shouldbeidentity: wp.array(dtype=wptype),
1049
- outputs_manual: wp.array(dtype=wptype),
1050
- ):
1051
- result = wp.transform_inverse(a[0])
1052
- idt = result * a[0]
1053
-
1054
- # let's just work out the transform inverse manually
1055
- # and compare value/gradients with that:
1056
- atrans = wp.transform_get_translation(a[0])
1057
- arot = wp.transform_get_rotation(a[0])
1058
-
1059
- rotinv = wp.quat_inverse(arot)
1060
- result_manual = transform(-wp.quat_rotate(rotinv, atrans), rotinv)
1061
-
1062
- for i in range(7):
1063
- outputs[i] = wptype(2) * result[i]
1064
- outputs_shouldbeidentity[i] = wptype(2) * idt[i]
1065
- outputs_manual[i] = wptype(2) * result_manual[i]
1066
-
1067
- kernel = getkernel(check_transform_inverse, suffix=dtype.__name__)
1068
- output_select_kernel = get_select_kernel(wptype)
1069
-
1070
- if register_kernels:
1071
- return
1072
-
1073
- q = rng.standard_normal(size=7)
1074
- s = rng.standard_normal(size=7)
1075
- q[3:] /= np.linalg.norm(q[3:])
1076
- s[3:] /= np.linalg.norm(s[3:])
1077
-
1078
- q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1079
- outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1080
- outputs_shouldbeidentity = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1081
- outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1082
-
1083
- wp.launch(
1084
- kernel,
1085
- dim=1,
1086
- inputs=[
1087
- q,
1088
- ],
1089
- outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1090
- device=device,
1091
- )
1092
-
1093
- # check inverse:
1094
- assert_np_equal(outputs_shouldbeidentity.numpy(), np.array([0, 0, 0, 0, 0, 0, 2]), tol=tol)
1095
-
1096
- # same as manual result:
1097
- assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
1098
-
1099
- for i in range(7):
1100
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1101
- cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1102
- tape = wp.Tape()
1103
- with tape:
1104
- wp.launch(
1105
- kernel,
1106
- dim=1,
1107
- inputs=[
1108
- q,
1109
- ],
1110
- outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1111
- device=device,
1112
- )
1113
- wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1114
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1115
- tape.backward(loss=cmp)
1116
- qgrads = 1.0 * tape.gradients[q].numpy()
1117
- tape.zero()
1118
- tape.backward(loss=cmp_manual)
1119
- qgrads_manual = 1.0 * tape.gradients[q].numpy()
1120
- tape.zero()
1121
-
1122
- # check gradients against manual result:
1123
- assert_np_equal(qgrads, qgrads_manual, tol=tol)
1124
-
1125
-
1126
- def test_transform_point_vector(test, device, dtype, register_kernels=False):
1127
- rng = np.random.default_rng(123)
1128
-
1129
- tol = {
1130
- np.float16: 1.0e-2,
1131
- np.float32: 1.0e-6,
1132
- np.float64: 1.0e-8,
1133
- }.get(dtype, 0)
1134
-
1135
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1136
- transform = wp.types.transformation(dtype=wptype)
1137
- vec3 = wp.types.vector(length=3, dtype=wptype)
1138
-
1139
- def check_transform_point_vector(
1140
- t: wp.array(dtype=transform),
1141
- v: wp.array(dtype=vec3),
1142
- outputs_pt: wp.array(dtype=wptype),
1143
- outputs_pt_manual: wp.array(dtype=wptype),
1144
- outputs_vec: wp.array(dtype=wptype),
1145
- outputs_vec_manual: wp.array(dtype=wptype),
1146
- ):
1147
- result_pt = wp.transform_point(t[0], v[0])
1148
- result_pt_manual = wp.transform_get_translation(t[0]) + wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1149
-
1150
- result_vec = wp.transform_vector(t[0], v[0])
1151
- result_vec_manual = wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1152
-
1153
- for i in range(3):
1154
- outputs_pt[i] = wptype(2) * result_pt[i]
1155
- outputs_pt_manual[i] = wptype(2) * result_pt_manual[i]
1156
- outputs_vec[i] = wptype(2) * result_vec[i]
1157
- outputs_vec_manual[i] = wptype(2) * result_vec_manual[i]
1158
-
1159
- kernel = getkernel(check_transform_point_vector, suffix=dtype.__name__)
1160
- output_select_kernel = get_select_kernel(wptype)
1161
-
1162
- if register_kernels:
1163
- return
1164
-
1165
- q = rng.standard_normal(size=7)
1166
- q[3:] /= np.linalg.norm(q[3:])
1167
-
1168
- t = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1169
- v = wp.array(rng.standard_normal(size=3), dtype=vec3, requires_grad=True, device=device)
1170
- outputs_pt = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1171
- outputs_pt_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1172
- outputs_vec = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1173
- outputs_vec_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1174
-
1175
- wp.launch(
1176
- kernel,
1177
- dim=1,
1178
- inputs=[t, v],
1179
- outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1180
- device=device,
1181
- )
1182
-
1183
- # same as manual results:
1184
- assert_np_equal(outputs_pt.numpy(), outputs_pt_manual.numpy(), tol=tol)
1185
- assert_np_equal(outputs_vec.numpy(), outputs_vec_manual.numpy(), tol=tol)
1186
-
1187
- for i in range(3):
1188
- cmp_pt = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1189
- cmp_pt_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1190
- cmp_vec = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1191
- cmp_vec_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1192
-
1193
- tape = wp.Tape()
1194
- with tape:
1195
- wp.launch(
1196
- kernel,
1197
- dim=1,
1198
- inputs=[t, v],
1199
- outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1200
- device=device,
1201
- )
1202
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_pt, i], outputs=[cmp_pt], device=device)
1203
- wp.launch(
1204
- output_select_kernel, dim=1, inputs=[outputs_pt_manual, i], outputs=[cmp_pt_manual], device=device
1205
- )
1206
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_vec, i], outputs=[cmp_vec], device=device)
1207
- wp.launch(
1208
- output_select_kernel, dim=1, inputs=[outputs_vec_manual, i], outputs=[cmp_vec_manual], device=device
1209
- )
1210
- tape.backward(loss=cmp_pt)
1211
- tgrads_pt = 1.0 * tape.gradients[t].numpy()
1212
- vgrads_pt = 1.0 * tape.gradients[v].numpy()
1213
- tape.zero()
1214
- tape.backward(loss=cmp_pt_manual)
1215
- tgrads_pt_manual = 1.0 * tape.gradients[t].numpy()
1216
- vgrads_pt_manual = 1.0 * tape.gradients[v].numpy()
1217
- tape.zero()
1218
- tape.backward(loss=cmp_vec)
1219
- tgrads_vec = 1.0 * tape.gradients[t].numpy()
1220
- vgrads_vec = 1.0 * tape.gradients[v].numpy()
1221
- tape.zero()
1222
- tape.backward(loss=cmp_vec_manual)
1223
- tgrads_vec_manual = 1.0 * tape.gradients[t].numpy()
1224
- vgrads_vec_manual = 1.0 * tape.gradients[v].numpy()
1225
- tape.zero()
1226
-
1227
- # check gradients against manual result:
1228
- assert_np_equal(tgrads_pt, tgrads_pt_manual, tol=tol)
1229
- assert_np_equal(vgrads_pt, vgrads_pt_manual, tol=tol)
1230
- assert_np_equal(tgrads_vec, tgrads_vec_manual, tol=tol)
1231
- assert_np_equal(vgrads_vec, vgrads_vec_manual, tol=tol)
1232
-
1233
-
1234
- def test_spatial_matrix_constructors(test, device, dtype, register_kernels=False):
1235
- rng = np.random.default_rng(123)
1236
-
1237
- tol = {
1238
- np.float16: 5.0e-3,
1239
- np.float32: 1.0e-6,
1240
- np.float64: 1.0e-8,
1241
- }.get(dtype, 0)
1242
-
1243
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1244
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1245
-
1246
- def check_spatial_matrix_constructor(
1247
- input: wp.array(dtype=wptype),
1248
- out: wp.array(dtype=wptype),
1249
- ):
1250
- # multiply the output by 2 so we've got something to backpropagate:
1251
- result0 = spatial_matrix(
1252
- input[0],
1253
- input[1],
1254
- input[2],
1255
- input[3],
1256
- input[4],
1257
- input[5],
1258
- input[6],
1259
- input[7],
1260
- input[8],
1261
- input[9],
1262
- input[10],
1263
- input[11],
1264
- input[12],
1265
- input[13],
1266
- input[14],
1267
- input[15],
1268
- input[16],
1269
- input[17],
1270
- input[18],
1271
- input[19],
1272
- input[20],
1273
- input[21],
1274
- input[22],
1275
- input[23],
1276
- input[24],
1277
- input[25],
1278
- input[26],
1279
- input[27],
1280
- input[28],
1281
- input[29],
1282
- input[30],
1283
- input[31],
1284
- input[32],
1285
- input[33],
1286
- input[34],
1287
- input[35],
1288
- )
1289
- result1 = spatial_matrix()
1290
-
1291
- idx = 0
1292
- for i in range(6):
1293
- for j in range(6):
1294
- out[idx] = wptype(2) * result0[i, j]
1295
- idx = idx + 1
1296
-
1297
- for i in range(6):
1298
- for j in range(6):
1299
- out[idx] = result1[i, j]
1300
- idx = idx + 1
1301
-
1302
- kernel = getkernel(check_spatial_matrix_constructor, suffix=dtype.__name__)
1303
- output_select_kernel = get_select_kernel(wptype)
1304
-
1305
- if register_kernels:
1306
- return
1307
-
1308
- input = wp.array(rng.standard_normal(size=6 * 6).astype(dtype), requires_grad=True, device=device)
1309
- output = wp.zeros(2 * 6 * 6, dtype=wptype, requires_grad=True, device=device)
1310
-
1311
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1312
-
1313
- assert_np_equal(output.numpy()[: 6 * 6], 2 * input.numpy(), tol=tol)
1314
- assert_np_equal(output.numpy()[6 * 6 :], np.zeros_like(input.numpy()), tol=tol)
1315
-
1316
- for i in range(len(input)):
1317
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1318
- tape = wp.Tape()
1319
- with tape:
1320
- wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1321
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1322
- tape.backward(loss=cmp)
1323
- expectedgrads = np.zeros(len(input))
1324
- expectedgrads[i] = 2
1325
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1326
- tape.zero()
1327
- break
1328
-
1329
-
1330
- def test_spatial_matrix_indexing(test, device, dtype, register_kernels=False):
1331
- rng = np.random.default_rng(123)
1332
-
1333
- tol = {
1334
- np.float16: 5.0e-3,
1335
- np.float32: 1.0e-6,
1336
- np.float64: 1.0e-8,
1337
- }.get(dtype, 0)
1338
-
1339
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1340
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1341
-
1342
- def check_spatial_matrix_indexing(
1343
- input: wp.array(dtype=spatial_matrix),
1344
- out: wp.array(dtype=wptype),
1345
- ):
1346
- inpt = input[0]
1347
-
1348
- # multiply outputs by 2 so we've got something to backpropagate:
1349
- idx = 0
1350
- for i in range(6):
1351
- for j in range(6):
1352
- out[idx] = wptype(2) * inpt[i, j]
1353
- idx = idx + 1
1354
-
1355
- kernel = getkernel(check_spatial_matrix_indexing, suffix=dtype.__name__)
1356
- output_select_kernel = get_select_kernel(wptype)
1357
-
1358
- if register_kernels:
1359
- return
1360
-
1361
- input = wp.array(
1362
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1363
- )
1364
- outcmps = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1365
-
1366
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1367
-
1368
- assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
1369
- idx = 0
1370
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1371
- for i in range(6):
1372
- for j in range(6):
1373
- tape = wp.Tape()
1374
- with tape:
1375
- wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1376
- wp.launch(output_select_kernel, dim=1, inputs=[outcmps, idx], outputs=[out], device=device)
1377
- tape.backward(loss=out)
1378
- expectedresult = np.zeros((6, 6), dtype=dtype)
1379
- expectedresult[i, j] = 2
1380
- assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
1381
- tape.zero()
1382
- idx = idx + 1
1383
-
1384
-
1385
- def test_spatial_matrix_scalar_multiplication(test, device, dtype, register_kernels=False):
1386
- rng = np.random.default_rng(123)
1387
-
1388
- tol = {
1389
- np.float16: 5.0e-3,
1390
- np.float32: 1.0e-6,
1391
- np.float64: 1.0e-8,
1392
- }.get(dtype, 0)
1393
-
1394
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1395
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1396
-
1397
- def check_spatial_matrix_scalar_mul(
1398
- s: wp.array(dtype=wptype),
1399
- q: wp.array(dtype=spatial_matrix),
1400
- outcmps_l: wp.array(dtype=wptype),
1401
- outcmps_r: wp.array(dtype=wptype),
1402
- ):
1403
- lresult = s[0] * q[0]
1404
- rresult = q[0] * s[0]
1405
-
1406
- # multiply outputs by 2 so we've got something to backpropagate:
1407
- idx = 0
1408
- for i in range(6):
1409
- for j in range(6):
1410
- outcmps_l[idx] = wptype(2) * lresult[i, j]
1411
- outcmps_r[idx] = wptype(2) * rresult[i, j]
1412
- idx = idx + 1
1413
-
1414
- kernel = getkernel(check_spatial_matrix_scalar_mul, suffix=dtype.__name__)
1415
- output_select_kernel = get_select_kernel(wptype)
1416
-
1417
- if register_kernels:
1418
- return
1419
-
1420
- s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
1421
- q = wp.array(
1422
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1423
- )
1424
-
1425
- outcmps_l = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1426
- outcmps_r = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1427
-
1428
- wp.launch(
1429
- kernel,
1430
- dim=1,
1431
- inputs=[s, q],
1432
- outputs=[
1433
- outcmps_l,
1434
- outcmps_r,
1435
- ],
1436
- device=device,
1437
- )
1438
-
1439
- assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1440
- assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1441
-
1442
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1443
- idx = 0
1444
- for i in range(6):
1445
- for j in range(6):
1446
- # test left/right mul gradients:
1447
- for wrt in [outcmps_l, outcmps_r]:
1448
- tape = wp.Tape()
1449
- with tape:
1450
- wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
1451
- wp.launch(output_select_kernel, dim=1, inputs=[wrt, idx], outputs=[out], device=device)
1452
- tape.backward(loss=out)
1453
- expectedresult = np.zeros((6, 6), dtype=dtype)
1454
- expectedresult[i, j] = 2 * s.numpy()[0]
1455
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1456
- assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i, j], tol=tol)
1457
- tape.zero()
1458
- idx = idx + 1
1459
-
1460
-
1461
- def test_spatial_matrix_add_sub(test, device, dtype, register_kernels=False):
1462
- rng = np.random.default_rng(123)
1463
-
1464
- tol = {
1465
- np.float16: 5.0e-3,
1466
- np.float32: 1.0e-6,
1467
- np.float64: 1.0e-8,
1468
- }.get(dtype, 0)
1469
-
1470
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1471
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1472
-
1473
- def check_spatial_matrix_add_sub(
1474
- q: wp.array(dtype=spatial_matrix),
1475
- v: wp.array(dtype=spatial_matrix),
1476
- outputs_add: wp.array(dtype=wptype),
1477
- outputs_sub: wp.array(dtype=wptype),
1478
- ):
1479
- addresult = q[0] + v[0]
1480
- subresult = q[0] - v[0]
1481
- idx = 0
1482
- for i in range(6):
1483
- for j in range(6):
1484
- outputs_add[idx] = wptype(2) * addresult[i, j]
1485
- outputs_sub[idx] = wptype(2) * subresult[i, j]
1486
- idx = idx + 1
1487
-
1488
- kernel = getkernel(check_spatial_matrix_add_sub, suffix=dtype.__name__)
1489
- output_select_kernel = get_select_kernel(wptype)
1490
-
1491
- if register_kernels:
1492
- return
1493
-
1494
- q = wp.array(
1495
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1496
- )
1497
- v = wp.array(
1498
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1499
- )
1500
-
1501
- outputs_add = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1502
- outputs_sub = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1503
-
1504
- wp.launch(
1505
- kernel,
1506
- dim=1,
1507
- inputs=[
1508
- q,
1509
- v,
1510
- ],
1511
- outputs=[outputs_add, outputs_sub],
1512
- device=device,
1513
- )
1514
-
1515
- assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
1516
- assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
1517
-
1518
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1519
- idx = 0
1520
- for i in range(6):
1521
- for j in range(6):
1522
- # test add gradients:
1523
- tape = wp.Tape()
1524
- with tape:
1525
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1526
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, idx], outputs=[out], device=device)
1527
- tape.backward(loss=out)
1528
- expectedresult = np.zeros((6, 6), dtype=dtype)
1529
- expectedresult[i, j] = 2
1530
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1531
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
1532
- tape.zero()
1533
-
1534
- # test subtraction gradients:
1535
- tape = wp.Tape()
1536
- with tape:
1537
- wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1538
- wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, idx], outputs=[out], device=device)
1539
- tape.backward(loss=out)
1540
- expectedresult = np.zeros((6, 6), dtype=dtype)
1541
- expectedresult[i, j] = 2
1542
- assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1543
- assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
1544
- tape.zero()
1545
-
1546
- idx = idx + 1
1547
-
1548
-
1549
- def test_spatial_matvec_multiplication(test, device, dtype, register_kernels=False):
1550
- rng = np.random.default_rng(123)
1551
-
1552
- tol = {
1553
- np.float16: 2.0e-2,
1554
- np.float32: 5.0e-6,
1555
- np.float64: 1.0e-8,
1556
- }.get(dtype, 0)
1557
-
1558
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1559
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1560
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
1561
-
1562
- output_select_kernel = get_select_kernel(wptype)
1563
-
1564
- def check_spatial_mat_vec_mul(
1565
- v: wp.array(dtype=spatial_vector),
1566
- m: wp.array(dtype=spatial_matrix),
1567
- outcomponents: wp.array(dtype=wptype),
1568
- ):
1569
- result = m[0] * v[0]
1570
-
1571
- # multiply outputs by 2 so we've got something to backpropagate:
1572
- idx = 0
1573
- for i in range(6):
1574
- outcomponents[idx] = wptype(2) * result[i]
1575
- idx = idx + 1
1576
-
1577
- kernel = getkernel(check_spatial_mat_vec_mul, suffix=dtype.__name__)
1578
-
1579
- if register_kernels:
1580
- return
1581
-
1582
- v = wp.array(
1583
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1584
- )
1585
- m = wp.array(
1586
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1587
- )
1588
- outcomponents = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
1589
-
1590
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1591
-
1592
- assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1593
-
1594
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1595
- for i in range(6):
1596
- tape = wp.Tape()
1597
- with tape:
1598
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1599
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, i], outputs=[out], device=device)
1600
- tape.backward(loss=out)
1601
-
1602
- assert_np_equal(tape.gradients[v].numpy()[0], 2 * m.numpy()[0, i, :], tol=tol)
1603
- expectedresult = np.zeros((6, 6), dtype=dtype)
1604
- expectedresult[i, :] = 2 * v.numpy()[0]
1605
- assert_np_equal(tape.gradients[m].numpy()[0], expectedresult, tol=tol)
1606
-
1607
- tape.zero()
1608
-
1609
-
1610
- def test_spatial_matmat_multiplication(test, device, dtype, register_kernels=False):
1611
- rng = np.random.default_rng(123)
1612
-
1613
- tol = {
1614
- np.float16: 2.0e-2,
1615
- np.float32: 5.0e-6,
1616
- np.float64: 1.0e-8,
1617
- }.get(dtype, 0)
1618
-
1619
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1620
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1621
-
1622
- output_select_kernel = get_select_kernel(wptype)
1623
-
1624
- def check_mat_mat_mul(
1625
- v: wp.array(dtype=spatial_matrix),
1626
- m: wp.array(dtype=spatial_matrix),
1627
- outcomponents: wp.array(dtype=wptype),
1628
- ):
1629
- result = m[0] * v[0]
1630
-
1631
- # multiply outputs by 2 so we've got something to backpropagate:
1632
- idx = 0
1633
- for i in range(6):
1634
- for j in range(6):
1635
- outcomponents[idx] = wptype(2) * result[i, j]
1636
- idx = idx + 1
1637
-
1638
- kernel = getkernel(check_mat_mat_mul, suffix=dtype.__name__)
1639
-
1640
- if register_kernels:
1641
- return
1642
-
1643
- v = wp.array(
1644
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1645
- )
1646
- m = wp.array(
1647
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1648
- )
1649
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1650
-
1651
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1652
-
1653
- assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1654
-
1655
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1656
- idx = 0
1657
- for i in range(6):
1658
- for j in range(6):
1659
- tape = wp.Tape()
1660
- with tape:
1661
- wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1662
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1663
- tape.backward(loss=out)
1664
-
1665
- expected = np.zeros((6, 6), dtype=dtype)
1666
- expected[:, j] = 2 * m.numpy()[0, i, :]
1667
- assert_np_equal(tape.gradients[v].numpy()[0], expected, tol=10 * tol)
1668
-
1669
- expected = np.zeros((6, 6), dtype=dtype)
1670
- expected[i, :] = 2 * v.numpy()[0, :, j]
1671
- assert_np_equal(tape.gradients[m].numpy()[0], expected, tol=10 * tol)
1672
-
1673
- tape.zero()
1674
- idx = idx + 1
1675
-
1676
-
1677
- def test_spatial_mat_transpose(test, device, dtype, register_kernels=False):
1678
- rng = np.random.default_rng(123)
1679
-
1680
- tol = {
1681
- np.float16: 1.0e-2,
1682
- np.float32: 1.0e-6,
1683
- np.float64: 1.0e-8,
1684
- }.get(dtype, 0)
1685
-
1686
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1687
- spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1688
-
1689
- output_select_kernel = get_select_kernel(wptype)
1690
-
1691
- def check_spatial_mat_transpose(
1692
- m: wp.array(dtype=spatial_matrix),
1693
- outcomponents: wp.array(dtype=wptype),
1694
- ):
1695
- # multiply outputs by 2 so we've got something to backpropagate:
1696
- mat = wptype(2) * wp.transpose(m[0])
1697
-
1698
- idx = 0
1699
- for i in range(6):
1700
- for j in range(6):
1701
- outcomponents[idx] = mat[i, j]
1702
- idx = idx + 1
1703
-
1704
- kernel = getkernel(check_spatial_mat_transpose, suffix=dtype.__name__)
1705
-
1706
- if register_kernels:
1707
- return
1708
-
1709
- m = wp.array(
1710
- rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1711
- )
1712
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1713
-
1714
- wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1715
-
1716
- assert_np_equal(outcomponents.numpy(), 2 * m.numpy()[0].T, tol=tol)
1717
-
1718
- idx = 0
1719
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1720
- for i in range(6):
1721
- for j in range(6):
1722
- tape = wp.Tape()
1723
- with tape:
1724
- wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1725
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1726
- tape.backward(loss=out)
1727
- expectedresult = np.zeros((6, 6), dtype=dtype)
1728
- expectedresult[j, i] = 2
1729
- assert_np_equal(tape.gradients[m].numpy()[0], expectedresult)
1730
- tape.zero()
1731
- idx = idx + 1
1732
-
1733
-
1734
- def test_spatial_outer_product(test, device, dtype, register_kernels=False):
1735
- rng = np.random.default_rng(123)
1736
-
1737
- tol = {
1738
- np.float16: 5.0e-3,
1739
- np.float32: 1.0e-6,
1740
- np.float64: 1.0e-8,
1741
- }.get(dtype, 0)
1742
-
1743
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1744
- spatial_vector = wp.types.vector(length=6, dtype=wptype)
1745
-
1746
- output_select_kernel = get_select_kernel(wptype)
1747
-
1748
- def check_spatial_outer_product(
1749
- s: wp.array(dtype=spatial_vector),
1750
- v: wp.array(dtype=spatial_vector),
1751
- outcomponents: wp.array(dtype=wptype),
1752
- ):
1753
- mresult = wptype(2) * wp.outer(s[0], v[0])
1754
-
1755
- # multiply outputs by 2 so we've got something to backpropagate:
1756
- idx = 0
1757
- for i in range(6):
1758
- for j in range(6):
1759
- outcomponents[idx] = mresult[i, j]
1760
- idx = idx + 1
1761
-
1762
- kernel = getkernel(check_spatial_outer_product, suffix=dtype.__name__)
1763
-
1764
- if register_kernels:
1765
- return
1766
-
1767
- s = wp.array(
1768
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1769
- )
1770
- v = wp.array(
1771
- rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1772
- )
1773
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1774
-
1775
- wp.launch(kernel, dim=1, inputs=[s, v], outputs=[outcomponents], device=device)
1776
-
1777
- assert_np_equal(outcomponents.numpy(), 2 * s.numpy()[0, :, None] * v.numpy()[0, None, :], tol=tol)
1778
-
1779
- idx = 0
1780
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1781
-
1782
- for i in range(6):
1783
- for j in range(6):
1784
- tape = wp.Tape()
1785
- with tape:
1786
- wp.launch(
1787
- kernel,
1788
- dim=1,
1789
- inputs=[
1790
- s,
1791
- v,
1792
- ],
1793
- outputs=[outcomponents],
1794
- device=device,
1795
- )
1796
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1797
- tape.backward(loss=out)
1798
-
1799
- # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1800
- # at the ith component and its v gradient will be nonzero at the jth component:
1801
-
1802
- expectedresult = np.zeros((6), dtype=dtype)
1803
- expectedresult[i] = 2 * v.numpy()[0, j]
1804
- assert_np_equal(tape.gradients[s].numpy()[0], expectedresult, tol=10 * tol)
1805
-
1806
- expectedresult = np.zeros((6), dtype=dtype)
1807
- expectedresult[j] = 2 * s.numpy()[0, i]
1808
- assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=10 * tol)
1809
- tape.zero()
1810
-
1811
- idx = idx + 1
1812
-
1813
-
1814
- def test_spatial_adjoint(test, device, dtype, register_kernels=False):
1815
- rng = np.random.default_rng(123)
1816
-
1817
- tol = {
1818
- np.float16: 5.0e-3,
1819
- np.float32: 1.0e-6,
1820
- np.float64: 1.0e-8,
1821
- }.get(dtype, 0)
1822
-
1823
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1824
- mat3 = wp.types.matrix(shape=(3, 3), dtype=wptype)
1825
-
1826
- output_select_kernel = get_select_kernel(wptype)
1827
-
1828
- def check_spatial_adjoint(
1829
- R: wp.array(dtype=mat3),
1830
- S: wp.array(dtype=mat3),
1831
- outcomponents: wp.array(dtype=wptype),
1832
- ):
1833
- mresult = wptype(2) * wp.spatial_adjoint(R[0], S[0])
1834
-
1835
- # multiply outputs by 2 so we've got something to backpropagate:
1836
- idx = 0
1837
- for i in range(6):
1838
- for j in range(6):
1839
- outcomponents[idx] = mresult[i, j]
1840
- idx = idx + 1
1841
-
1842
- kernel = getkernel(check_spatial_adjoint, suffix=dtype.__name__)
1843
-
1844
- if register_kernels:
1845
- return
1846
-
1847
- R = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1848
- S = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1849
- outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1850
-
1851
- wp.launch(kernel, dim=1, inputs=[R, S], outputs=[outcomponents], device=device)
1852
-
1853
- result = outcomponents.numpy().reshape(6, 6)
1854
- expected = np.zeros_like(result)
1855
- expected[:3, :3] = R.numpy()
1856
- expected[3:, 3:] = R.numpy()
1857
- expected[3:, :3] = S.numpy()
1858
-
1859
- assert_np_equal(result, 2 * expected, tol=tol)
1860
-
1861
- idx = 0
1862
- out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1863
- for i in range(6):
1864
- for j in range(6):
1865
- tape = wp.Tape()
1866
- with tape:
1867
- wp.launch(
1868
- kernel,
1869
- dim=1,
1870
- inputs=[
1871
- R,
1872
- S,
1873
- ],
1874
- outputs=[outcomponents],
1875
- device=device,
1876
- )
1877
- wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1878
- tape.backward(loss=out)
1879
-
1880
- # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1881
- # at the ith component and its v gradient will be nonzero at the jth component:
1882
-
1883
- expectedresult = np.zeros((3, 3), dtype=dtype)
1884
- if (i // 3 == 0 and j // 3 == 0) or (i // 3 == 1 and j // 3 == 1):
1885
- expectedresult[i % 3, j % 3] = 2
1886
- assert_np_equal(tape.gradients[R].numpy()[0], expectedresult, tol=10 * tol)
1887
-
1888
- expectedresult = np.zeros((3, 3), dtype=dtype)
1889
- if i // 3 == 1 and j // 3 == 0:
1890
- expectedresult[i % 3, j % 3] = 2
1891
- assert_np_equal(tape.gradients[S].numpy()[0], expectedresult, tol=10 * tol)
1892
- tape.zero()
1893
-
1894
- idx = idx + 1
1895
-
1896
-
1897
- def test_transform_identity(test, device, dtype, register_kernels=False):
1898
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1899
-
1900
- def transform_identity_test(output: wp.array(dtype=wptype)):
1901
- t = wp.transform_identity(dtype=wptype)
1902
- for i in range(7):
1903
- output[i] = t[i]
1904
-
1905
- def transform_identity_test_default(output: wp.array(dtype=wp.float32)):
1906
- t = wp.transform_identity()
1907
- for i in range(7):
1908
- output[i] = t[i]
1909
-
1910
- quat_identity_kernel = getkernel(transform_identity_test, suffix=dtype.__name__)
1911
- quat_identity_default_kernel = getkernel(transform_identity_test_default, suffix=np.float32.__name__)
1912
-
1913
- if register_kernels:
1914
- return
1915
-
1916
- output = wp.zeros(7, dtype=wptype, device=device)
1917
- wp.launch(quat_identity_kernel, dim=1, inputs=[], outputs=[output], device=device)
1918
- expected = np.zeros_like(output.numpy())
1919
- expected[-1] = 1
1920
- assert_np_equal(output.numpy(), expected)
1921
-
1922
- # let's just test that it defaults to float32:
1923
- output = wp.zeros(7, dtype=wp.float32, device=device)
1924
- wp.launch(quat_identity_default_kernel, dim=1, inputs=[], outputs=[output], device=device)
1925
- expected = np.zeros_like(output.numpy())
1926
- expected[-1] = 1
1927
- assert_np_equal(output.numpy(), expected)
1928
-
1929
-
1930
- def test_transform_anon_type_instance(test, device, dtype, register_kernels=False):
1931
- rng = np.random.default_rng(123)
1932
-
1933
- wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1934
-
1935
- def transform_create_test(input: wp.array(dtype=wptype), output: wp.array(dtype=wptype)):
1936
- t = wp.transformation(
1937
- wp.vector(input[0], input[1], input[2]), wp.quaternion(input[3], input[4], input[5], input[6])
1938
- )
1939
- for i in range(7):
1940
- output[i] = wptype(2) * t[i]
1941
-
1942
- transform_create_kernel = getkernel(transform_create_test, suffix=dtype.__name__)
1943
- output_select_kernel = get_select_kernel(wptype)
1944
-
1945
- if register_kernels:
1946
- return
1947
-
1948
- input = wp.array(rng.standard_normal(size=7).astype(dtype), requires_grad=True, device=device)
1949
- output = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1950
- wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1951
- assert_np_equal(output.numpy(), 2 * input.numpy())
1952
-
1953
- for i in range(len(input)):
1954
- cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1955
- tape = wp.Tape()
1956
- with tape:
1957
- wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1958
- wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1959
- tape.backward(loss=cmp)
1960
- expectedgrads = np.zeros(len(input))
1961
- expectedgrads[i] = 2
1962
- assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1963
- tape.zero()
1964
-
1965
-
1966
- devices = get_test_devices()
1967
-
1968
-
1969
- class TestSpatial(unittest.TestCase):
1970
- pass
1971
-
1972
-
1973
- for dtype in np_float_types:
1974
- add_function_test_register_kernel(
1975
- TestSpatial,
1976
- f"test_spatial_vector_constructors_{dtype.__name__}",
1977
- test_spatial_vector_constructors,
1978
- devices=devices,
1979
- dtype=dtype,
1980
- )
1981
- add_function_test_register_kernel(
1982
- TestSpatial,
1983
- f"test_spatial_vector_indexing_{dtype.__name__}",
1984
- test_spatial_vector_indexing,
1985
- devices=devices,
1986
- dtype=dtype,
1987
- )
1988
- add_function_test_register_kernel(
1989
- TestSpatial,
1990
- f"test_spatial_vector_scalar_multiplication_{dtype.__name__}",
1991
- test_spatial_vector_scalar_multiplication,
1992
- devices=devices,
1993
- dtype=dtype,
1994
- )
1995
- add_function_test_register_kernel(
1996
- TestSpatial,
1997
- f"test_spatial_vector_add_sub_{dtype.__name__}",
1998
- test_spatial_vector_add_sub,
1999
- devices=devices,
2000
- dtype=dtype,
2001
- )
2002
- add_function_test_register_kernel(
2003
- TestSpatial, f"test_spatial_dot_{dtype.__name__}", test_spatial_dot, devices=devices, dtype=dtype
2004
- )
2005
- add_function_test_register_kernel(
2006
- TestSpatial, f"test_spatial_cross_{dtype.__name__}", test_spatial_cross, devices=devices, dtype=dtype
2007
- )
2008
- add_function_test_register_kernel(
2009
- TestSpatial,
2010
- f"test_spatial_top_bottom_{dtype.__name__}",
2011
- test_spatial_top_bottom,
2012
- devices=devices,
2013
- dtype=dtype,
2014
- )
2015
-
2016
- add_function_test_register_kernel(
2017
- TestSpatial,
2018
- f"test_transform_constructors_{dtype.__name__}",
2019
- test_transform_constructors,
2020
- devices=devices,
2021
- dtype=dtype,
2022
- )
2023
- add_function_test_register_kernel(
2024
- TestSpatial,
2025
- f"test_transform_anon_type_instance_{dtype.__name__}",
2026
- test_transform_anon_type_instance,
2027
- devices=devices,
2028
- dtype=dtype,
2029
- )
2030
- add_function_test_register_kernel(
2031
- TestSpatial,
2032
- f"test_transform_identity_{dtype.__name__}",
2033
- test_transform_identity,
2034
- devices=devices,
2035
- dtype=dtype,
2036
- )
2037
- add_function_test_register_kernel(
2038
- TestSpatial,
2039
- f"test_transform_indexing_{dtype.__name__}",
2040
- test_transform_indexing,
2041
- devices=devices,
2042
- dtype=dtype,
2043
- )
2044
- add_function_test_register_kernel(
2045
- TestSpatial,
2046
- f"test_transform_get_trans_rot_{dtype.__name__}",
2047
- test_transform_get_trans_rot,
2048
- devices=devices,
2049
- dtype=dtype,
2050
- )
2051
- add_function_test_register_kernel(
2052
- TestSpatial,
2053
- f"test_transform_multiply_{dtype.__name__}",
2054
- test_transform_multiply,
2055
- devices=devices,
2056
- dtype=dtype,
2057
- )
2058
- add_function_test_register_kernel(
2059
- TestSpatial,
2060
- f"test_transform_inverse_{dtype.__name__}",
2061
- test_transform_inverse,
2062
- devices=devices,
2063
- dtype=dtype,
2064
- )
2065
- add_function_test_register_kernel(
2066
- TestSpatial,
2067
- f"test_transform_point_vector_{dtype.__name__}",
2068
- test_transform_point_vector,
2069
- devices=devices,
2070
- dtype=dtype,
2071
- )
2072
-
2073
- # are these two valid? They don't seem to be doing things you'd want to do,
2074
- # maybe they should be removed
2075
- add_function_test_register_kernel(
2076
- TestSpatial,
2077
- f"test_transform_scalar_multiplication_{dtype.__name__}",
2078
- test_transform_scalar_multiplication,
2079
- devices=devices,
2080
- dtype=dtype,
2081
- )
2082
- add_function_test_register_kernel(
2083
- TestSpatial,
2084
- f"test_transform_add_sub_{dtype.__name__}",
2085
- test_transform_add_sub,
2086
- devices=devices,
2087
- dtype=dtype,
2088
- )
2089
-
2090
- add_function_test_register_kernel(
2091
- TestSpatial,
2092
- f"test_spatial_matrix_constructors_{dtype.__name__}",
2093
- test_spatial_matrix_constructors,
2094
- devices=devices,
2095
- dtype=dtype,
2096
- )
2097
- add_function_test_register_kernel(
2098
- TestSpatial,
2099
- f"test_spatial_matrix_indexing_{dtype.__name__}",
2100
- test_spatial_matrix_indexing,
2101
- devices=devices,
2102
- dtype=dtype,
2103
- )
2104
- add_function_test_register_kernel(
2105
- TestSpatial,
2106
- f"test_spatial_matrix_scalar_multiplication_{dtype.__name__}",
2107
- test_spatial_matrix_scalar_multiplication,
2108
- devices=devices,
2109
- dtype=dtype,
2110
- )
2111
- add_function_test_register_kernel(
2112
- TestSpatial,
2113
- f"test_spatial_matrix_add_sub_{dtype.__name__}",
2114
- test_spatial_matrix_add_sub,
2115
- devices=devices,
2116
- dtype=dtype,
2117
- )
2118
- add_function_test_register_kernel(
2119
- TestSpatial,
2120
- f"test_spatial_matvec_multiplication_{dtype.__name__}",
2121
- test_spatial_matvec_multiplication,
2122
- devices=devices,
2123
- dtype=dtype,
2124
- )
2125
- add_function_test_register_kernel(
2126
- TestSpatial,
2127
- f"test_spatial_matmat_multiplication_{dtype.__name__}",
2128
- test_spatial_matmat_multiplication,
2129
- devices=devices,
2130
- dtype=dtype,
2131
- )
2132
- add_function_test_register_kernel(
2133
- TestSpatial,
2134
- f"test_spatial_outer_product_{dtype.__name__}",
2135
- test_spatial_outer_product,
2136
- devices=devices,
2137
- dtype=dtype,
2138
- )
2139
- add_function_test_register_kernel(
2140
- TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
2141
- )
2142
-
2143
- # \TODO: test spatial_mass and spatial_jacobian
2144
-
2145
-
2146
- if __name__ == "__main__":
2147
- wp.build.clear_kernel_cache()
2148
- unittest.main(verbosity=2)
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import numpy as np
11
+
12
+ import warp as wp
13
+ from warp.tests.unittest_utils import *
14
+
15
+ np_float_types = [np.float32, np.float64, np.float16]
16
+
17
+ kernel_cache = {}
18
+
19
+
20
+ def getkernel(func, suffix=""):
21
+ key = func.__name__ + "_" + suffix
22
+ if key not in kernel_cache:
23
+ kernel_cache[key] = wp.Kernel(func=func, key=key)
24
+ return kernel_cache[key]
25
+
26
+
27
+ def get_select_kernel(dtype):
28
+ def output_select_kernel_fn(
29
+ input: wp.array(dtype=dtype),
30
+ index: int,
31
+ out: wp.array(dtype=dtype),
32
+ ):
33
+ out[0] = input[index]
34
+
35
+ return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
36
+
37
+
38
+ ############################################################
39
+
40
+
41
+ def test_spatial_vector_constructors(test, device, dtype, register_kernels=False):
42
+ rng = np.random.default_rng(123)
43
+
44
+ tol = {
45
+ np.float16: 5.0e-3,
46
+ np.float32: 1.0e-6,
47
+ np.float64: 1.0e-8,
48
+ }.get(dtype, 0)
49
+
50
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
51
+ vec3 = wp.types.vector(length=3, dtype=wptype)
52
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
53
+
54
+ def check_spatial_vector_component_constructor(
55
+ input: wp.array(dtype=wptype),
56
+ out: wp.array(dtype=wptype),
57
+ ):
58
+ result = spatial_vector(input[0], input[1], input[2], input[3], input[4], input[5])
59
+
60
+ # multiply the output by 2 so we've got something to backpropagate:
61
+ out[0] = wptype(2) * result[0]
62
+ out[1] = wptype(2) * result[1]
63
+ out[2] = wptype(2) * result[2]
64
+ out[3] = wptype(2) * result[3]
65
+ out[4] = wptype(2) * result[4]
66
+ out[5] = wptype(2) * result[5]
67
+
68
+ def check_spatial_vector_vector_constructor(
69
+ input: wp.array(dtype=wptype),
70
+ out: wp.array(dtype=wptype),
71
+ ):
72
+ result = spatial_vector(vec3(input[0], input[1], input[2]), vec3(input[3], input[4], input[5]))
73
+
74
+ # multiply the output by 2 so we've got something to backpropagate:
75
+ out[0] = wptype(2) * result[0]
76
+ out[1] = wptype(2) * result[1]
77
+ out[2] = wptype(2) * result[2]
78
+ out[3] = wptype(2) * result[3]
79
+ out[4] = wptype(2) * result[4]
80
+ out[5] = wptype(2) * result[5]
81
+
82
+ kernel = getkernel(check_spatial_vector_component_constructor, suffix=dtype.__name__)
83
+ output_select_kernel = get_select_kernel(wptype)
84
+ vec_kernel = getkernel(check_spatial_vector_vector_constructor, suffix=dtype.__name__)
85
+
86
+ if register_kernels:
87
+ return
88
+
89
+ input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
90
+ output = wp.zeros_like(input)
91
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
92
+
93
+ assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
94
+
95
+ for i in range(len(input)):
96
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
97
+ tape = wp.Tape()
98
+ with tape:
99
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
100
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
101
+ tape.backward(loss=cmp)
102
+ expectedgrads = np.zeros(len(input))
103
+ expectedgrads[i] = 2
104
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
105
+ tape.zero()
106
+
107
+ input = wp.array(rng.standard_normal(size=6).astype(dtype), requires_grad=True, device=device)
108
+ output = wp.zeros_like(input)
109
+ wp.launch(vec_kernel, dim=1, inputs=[input], outputs=[output], device=device)
110
+
111
+ assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
112
+
113
+ for i in range(len(input)):
114
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
115
+ tape = wp.Tape()
116
+ with tape:
117
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
118
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
119
+ tape.backward(loss=cmp)
120
+ expectedgrads = np.zeros(len(input))
121
+ expectedgrads[i] = 2
122
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
123
+ tape.zero()
124
+
125
+
126
+ def test_spatial_vector_indexing(test, device, dtype, register_kernels=False):
127
+ rng = np.random.default_rng(123)
128
+
129
+ tol = {
130
+ np.float16: 5.0e-3,
131
+ np.float32: 1.0e-6,
132
+ np.float64: 1.0e-8,
133
+ }.get(dtype, 0)
134
+
135
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
136
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
137
+
138
+ def check_spatial_vector_indexing(
139
+ input: wp.array(dtype=spatial_vector),
140
+ out: wp.array(dtype=wptype),
141
+ ):
142
+ inpt = input[0]
143
+
144
+ # multiply outputs by 2 so we've got something to backpropagate:
145
+ idx = 0
146
+ for i in range(6):
147
+ out[idx] = wptype(2) * inpt[i]
148
+ idx = idx + 1
149
+
150
+ kernel = getkernel(check_spatial_vector_indexing, suffix=dtype.__name__)
151
+ output_select_kernel = get_select_kernel(wptype)
152
+
153
+ if register_kernels:
154
+ return
155
+
156
+ input = wp.array(
157
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
158
+ )
159
+ outcmps = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
160
+
161
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
162
+
163
+ assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
164
+
165
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
166
+ for i in range(6):
167
+ tape = wp.Tape()
168
+ with tape:
169
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
170
+ wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
171
+ tape.backward(loss=out)
172
+ expectedresult = np.zeros(6, dtype=dtype)
173
+ expectedresult[i] = 2
174
+ assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
175
+ tape.zero()
176
+
177
+
178
+ def test_spatial_vector_scalar_multiplication(test, device, dtype, register_kernels=False):
179
+ rng = np.random.default_rng(123)
180
+
181
+ tol = {
182
+ np.float16: 5.0e-3,
183
+ np.float32: 1.0e-6,
184
+ np.float64: 1.0e-8,
185
+ }.get(dtype, 0)
186
+
187
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
188
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
189
+
190
+ def check_spatial_vector_scalar_mul(
191
+ s: wp.array(dtype=wptype),
192
+ q: wp.array(dtype=spatial_vector),
193
+ outcmps_l: wp.array(dtype=wptype),
194
+ outcmps_r: wp.array(dtype=wptype),
195
+ ):
196
+ lresult = s[0] * q[0]
197
+ rresult = q[0] * s[0]
198
+
199
+ # multiply outputs by 2 so we've got something to backpropagate:
200
+ for i in range(6):
201
+ outcmps_l[i] = wptype(2) * lresult[i]
202
+ outcmps_r[i] = wptype(2) * rresult[i]
203
+
204
+ kernel = getkernel(check_spatial_vector_scalar_mul, suffix=dtype.__name__)
205
+ output_select_kernel = get_select_kernel(wptype)
206
+
207
+ if register_kernels:
208
+ return
209
+
210
+ s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
211
+ q = wp.array(
212
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
213
+ )
214
+
215
+ outcmps_l = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
216
+ outcmps_r = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
217
+
218
+ wp.launch(
219
+ kernel,
220
+ dim=1,
221
+ inputs=[s, q],
222
+ outputs=[
223
+ outcmps_l,
224
+ outcmps_r,
225
+ ],
226
+ device=device,
227
+ )
228
+
229
+ assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
230
+ assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
231
+
232
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
233
+ for i in range(6):
234
+ # test left/right mul gradients:
235
+ for wrt in [outcmps_l, outcmps_r]:
236
+ tape = wp.Tape()
237
+ with tape:
238
+ wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
239
+ wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
240
+ tape.backward(loss=out)
241
+ expectedresult = np.zeros(6, dtype=dtype)
242
+ expectedresult[i] = 2 * s.numpy()[0]
243
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
244
+ assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
245
+ tape.zero()
246
+
247
+
248
+ def test_spatial_vector_add_sub(test, device, dtype, register_kernels=False):
249
+ rng = np.random.default_rng(123)
250
+
251
+ tol = {
252
+ np.float16: 5.0e-3,
253
+ np.float32: 1.0e-6,
254
+ np.float64: 1.0e-8,
255
+ }.get(dtype, 0)
256
+
257
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
258
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
259
+
260
+ def check_spatial_vector_add_sub(
261
+ q: wp.array(dtype=spatial_vector),
262
+ v: wp.array(dtype=spatial_vector),
263
+ outputs_add: wp.array(dtype=wptype),
264
+ outputs_sub: wp.array(dtype=wptype),
265
+ ):
266
+ addresult = q[0] + v[0]
267
+ subresult = q[0] - v[0]
268
+ for i in range(6):
269
+ outputs_add[i] = wptype(2) * addresult[i]
270
+ outputs_sub[i] = wptype(2) * subresult[i]
271
+
272
+ kernel = getkernel(check_spatial_vector_add_sub, suffix=dtype.__name__)
273
+ output_select_kernel = get_select_kernel(wptype)
274
+ if register_kernels:
275
+ return
276
+
277
+ q = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
278
+ v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
279
+
280
+ outputs_add = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
281
+ outputs_sub = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
282
+
283
+ wp.launch(
284
+ kernel,
285
+ dim=1,
286
+ inputs=[
287
+ q,
288
+ v,
289
+ ],
290
+ outputs=[outputs_add, outputs_sub],
291
+ device=device,
292
+ )
293
+
294
+ assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
295
+ assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
296
+
297
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
298
+ for i in range(6):
299
+ # test add gradients:
300
+ tape = wp.Tape()
301
+ with tape:
302
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
303
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
304
+ tape.backward(loss=out)
305
+ expectedresult = np.zeros(6, dtype=dtype)
306
+ expectedresult[i] = 2
307
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
308
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
309
+ tape.zero()
310
+
311
+ # test subtraction gradients:
312
+ tape = wp.Tape()
313
+ with tape:
314
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
315
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
316
+ tape.backward(loss=out)
317
+ expectedresult = np.zeros(6, dtype=dtype)
318
+ expectedresult[i] = 2
319
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
320
+ assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
321
+ tape.zero()
322
+
323
+
324
+ def test_spatial_dot(test, device, dtype, register_kernels=False):
325
+ rng = np.random.default_rng(123)
326
+
327
+ tol = {
328
+ np.float16: 1.0e-2,
329
+ np.float32: 1.0e-6,
330
+ np.float64: 1.0e-8,
331
+ }.get(dtype, 0)
332
+
333
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
334
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
335
+
336
+ def check_spatial_dot(
337
+ s: wp.array(dtype=spatial_vector),
338
+ v: wp.array(dtype=spatial_vector),
339
+ dot: wp.array(dtype=wptype),
340
+ ):
341
+ dot[0] = wptype(2) * wp.spatial_dot(v[0], s[0])
342
+
343
+ kernel = getkernel(check_spatial_dot, suffix=dtype.__name__)
344
+ if register_kernels:
345
+ return
346
+
347
+ s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
348
+ v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
349
+ dot = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
350
+
351
+ tape = wp.Tape()
352
+ with tape:
353
+ wp.launch(
354
+ kernel,
355
+ dim=1,
356
+ inputs=[
357
+ s,
358
+ v,
359
+ ],
360
+ outputs=[dot],
361
+ device=device,
362
+ )
363
+
364
+ assert_np_equal(dot.numpy()[0], 2.0 * (v.numpy() * s.numpy()).sum(), tol=tol)
365
+
366
+ tape.backward(loss=dot)
367
+ sgrads = tape.gradients[s].numpy()[0]
368
+ expected_grads = 2.0 * v.numpy()[0]
369
+ assert_np_equal(sgrads, expected_grads, tol=10 * tol)
370
+
371
+ vgrads = tape.gradients[v].numpy()[0]
372
+ expected_grads = 2.0 * s.numpy()[0]
373
+ assert_np_equal(vgrads, expected_grads, tol=tol)
374
+
375
+
376
+ def test_spatial_cross(test, device, dtype, register_kernels=False):
377
+ rng = np.random.default_rng(123)
378
+
379
+ tol = {
380
+ np.float16: 5.0e-3,
381
+ np.float32: 1.0e-6,
382
+ np.float64: 1.0e-8,
383
+ }.get(dtype, 0)
384
+
385
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
386
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
387
+
388
+ def check_spatial_cross(
389
+ s: wp.array(dtype=spatial_vector),
390
+ v: wp.array(dtype=spatial_vector),
391
+ outputs: wp.array(dtype=wptype),
392
+ outputs_dual: wp.array(dtype=wptype),
393
+ outputs_wcrossw: wp.array(dtype=wptype),
394
+ outputs_vcrossw: wp.array(dtype=wptype),
395
+ outputs_wcrossv: wp.array(dtype=wptype),
396
+ outputs_vcrossv: wp.array(dtype=wptype),
397
+ ):
398
+ c = wp.spatial_cross(s[0], v[0])
399
+ d = wp.spatial_cross_dual(s[0], v[0])
400
+
401
+ # multiply outputs by 2 so we've got something to backpropagate:
402
+ for i in range(6):
403
+ outputs[i] = wptype(2) * c[i]
404
+ outputs_dual[i] = wptype(2) * d[i]
405
+
406
+ sw = wp.spatial_top(s[0])
407
+ sv = wp.spatial_bottom(s[0])
408
+ vw = wp.spatial_top(v[0])
409
+ vv = wp.spatial_bottom(v[0])
410
+
411
+ wcrossw = wp.cross(sw, vw)
412
+ vcrossw = wp.cross(sv, vw)
413
+ wcrossv = wp.cross(sw, vv)
414
+ vcrossv = wp.cross(sv, vv)
415
+
416
+ for i in range(3):
417
+ outputs_wcrossw[i] = wcrossw[i]
418
+ outputs_vcrossw[i] = vcrossw[i]
419
+ outputs_wcrossv[i] = wcrossv[i]
420
+ outputs_vcrossv[i] = vcrossv[i]
421
+
422
+ kernel = getkernel(check_spatial_cross, suffix=dtype.__name__)
423
+ output_select_kernel = get_select_kernel(wptype)
424
+
425
+ if register_kernels:
426
+ return
427
+
428
+ s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
429
+ v = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
430
+ outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
431
+ outputs_dual = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
432
+ outputs_wcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
433
+ outputs_vcrossw = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
434
+ outputs_wcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
435
+ outputs_vcrossv = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
436
+
437
+ wp.launch(
438
+ kernel,
439
+ dim=1,
440
+ inputs=[
441
+ s,
442
+ v,
443
+ ],
444
+ outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
445
+ device=device,
446
+ )
447
+
448
+ sw = s.numpy()[0, :3]
449
+ sv = s.numpy()[0, 3:]
450
+ vw = v.numpy()[0, :3]
451
+ vv = v.numpy()[0, 3:]
452
+
453
+ wcrossw = np.cross(sw, vw)
454
+ vcrossw = np.cross(sv, vw)
455
+ wcrossv = np.cross(sw, vv)
456
+ vcrossv = np.cross(sv, vv)
457
+
458
+ assert_np_equal(outputs.numpy()[:3], 2 * wcrossw, tol=tol)
459
+ assert_np_equal(outputs.numpy()[3:], 2 * (vcrossw + wcrossv), tol=tol)
460
+
461
+ assert_np_equal(outputs_dual.numpy()[:3], 2 * (wcrossw + vcrossv), tol=tol)
462
+ assert_np_equal(outputs_dual.numpy()[3:], 2 * wcrossv, tol=tol)
463
+
464
+ for i in range(3):
465
+ cmp_w = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
466
+ cmp_v = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
467
+ cmp_w_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
468
+ cmp_v_dual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
469
+ cmp_wcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
470
+ cmp_vcrossw = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
471
+ cmp_wcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
472
+ cmp_vcrossv = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
473
+ tape = wp.Tape()
474
+ with tape:
475
+ wp.launch(
476
+ kernel,
477
+ dim=1,
478
+ inputs=[
479
+ s,
480
+ v,
481
+ ],
482
+ outputs=[outputs, outputs_dual, outputs_wcrossw, outputs_vcrossw, outputs_wcrossv, outputs_vcrossv],
483
+ device=device,
484
+ )
485
+
486
+ # ith w and v vector components of spatial_cross:
487
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp_w], device=device)
488
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i + 3], outputs=[cmp_v], device=device)
489
+
490
+ # ith w and v vector components of spatial_cross_dual:
491
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i], outputs=[cmp_w_dual], device=device)
492
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_dual, i + 3], outputs=[cmp_v_dual], device=device)
493
+
494
+ # ith vector components of some cross products:
495
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossw, i], outputs=[cmp_wcrossw], device=device)
496
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossw, i], outputs=[cmp_vcrossw], device=device)
497
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_wcrossv, i], outputs=[cmp_wcrossv], device=device)
498
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_vcrossv, i], outputs=[cmp_vcrossv], device=device)
499
+
500
+ def getgrads(cmp, tape=tape):
501
+ tape.backward(loss=cmp)
502
+ sgrads = 1.0 * tape.gradients[s].numpy()
503
+ vgrads = 1.0 * tape.gradients[v].numpy()
504
+ tape.zero()
505
+ return sgrads, vgrads
506
+
507
+ dcmp_w_ds, dcmp_w_dv = getgrads(cmp_w)
508
+ dcmp_v_ds, dcmp_v_dv = getgrads(cmp_v)
509
+ dcmp_w_dual_ds, dcmp_w_dual_dv = getgrads(cmp_w_dual)
510
+ dcmp_v_dual_ds, dcmp_v_dual_dv = getgrads(cmp_v_dual)
511
+
512
+ dcmp_wcrossw_ds, dcmp_wcrossw_dv = getgrads(cmp_wcrossw)
513
+ dcmp_vcrossw_ds, dcmp_vcrossw_dv = getgrads(cmp_vcrossw)
514
+ dcmp_wcrossv_ds, dcmp_wcrossv_dv = getgrads(cmp_wcrossv)
515
+ dcmp_vcrossv_ds, dcmp_vcrossv_dv = getgrads(cmp_vcrossv)
516
+
517
+ assert_np_equal(dcmp_w_ds, 2 * dcmp_wcrossw_ds, tol=tol)
518
+ assert_np_equal(dcmp_w_dv, 2 * dcmp_wcrossw_dv, tol=tol)
519
+
520
+ assert_np_equal(dcmp_v_ds, 2 * (dcmp_vcrossw_ds + dcmp_wcrossv_ds), tol=tol)
521
+ assert_np_equal(dcmp_v_dv, 2 * (dcmp_vcrossw_dv + dcmp_wcrossv_dv), tol=tol)
522
+
523
+ assert_np_equal(dcmp_w_dual_ds, 2 * (dcmp_wcrossw_ds + dcmp_vcrossv_ds), tol=tol)
524
+ assert_np_equal(dcmp_w_dual_dv, 2 * (dcmp_wcrossw_dv + dcmp_vcrossv_dv), tol=tol)
525
+
526
+ assert_np_equal(dcmp_v_dual_ds, 2 * dcmp_wcrossv_ds, tol=tol)
527
+ assert_np_equal(dcmp_v_dual_dv, 2 * dcmp_wcrossv_dv, tol=tol)
528
+
529
+
530
+ def test_spatial_top_bottom(test, device, dtype, register_kernels=False):
531
+ rng = np.random.default_rng(123)
532
+
533
+ tol = {
534
+ np.float16: 1.0e-2,
535
+ np.float32: 1.0e-6,
536
+ np.float64: 1.0e-8,
537
+ }.get(dtype, 0)
538
+
539
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
540
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
541
+
542
+ def check_spatial_top_bottom(
543
+ s: wp.array(dtype=spatial_vector),
544
+ outputs: wp.array(dtype=wptype),
545
+ ):
546
+ top = wp.spatial_top(s[0])
547
+ bottom = wp.spatial_bottom(s[0])
548
+
549
+ outputs[0] = wptype(2) * top[0]
550
+ outputs[1] = wptype(2) * top[1]
551
+ outputs[2] = wptype(2) * top[2]
552
+
553
+ outputs[3] = wptype(2) * bottom[0]
554
+ outputs[4] = wptype(2) * bottom[1]
555
+ outputs[5] = wptype(2) * bottom[2]
556
+
557
+ kernel = getkernel(check_spatial_top_bottom, suffix=dtype.__name__)
558
+ output_select_kernel = get_select_kernel(wptype)
559
+
560
+ if register_kernels:
561
+ return
562
+
563
+ s = wp.array(rng.standard_normal(size=6).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device)
564
+ outputs = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
565
+
566
+ wp.launch(
567
+ kernel,
568
+ dim=1,
569
+ inputs=[
570
+ s,
571
+ ],
572
+ outputs=[outputs],
573
+ device=device,
574
+ )
575
+
576
+ assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
577
+
578
+ for i in range(6):
579
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
580
+ tape = wp.Tape()
581
+ with tape:
582
+ wp.launch(
583
+ kernel,
584
+ dim=1,
585
+ inputs=[
586
+ s,
587
+ ],
588
+ outputs=[outputs],
589
+ device=device,
590
+ )
591
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
592
+ tape.backward(loss=cmp)
593
+ expectedgrads = np.zeros(6)
594
+ expectedgrads[i] = 2
595
+ assert_np_equal(tape.gradients[s].numpy(), expectedgrads.reshape((1, 6)))
596
+ tape.zero()
597
+
598
+
599
+ def test_transform_constructors(test, device, dtype, register_kernels=False):
600
+ rng = np.random.default_rng(123)
601
+
602
+ tol = {
603
+ np.float16: 5.0e-3,
604
+ np.float32: 1.0e-6,
605
+ np.float64: 1.0e-8,
606
+ }.get(dtype, 0)
607
+
608
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
609
+ vec3 = wp.types.vector(length=3, dtype=wptype)
610
+ transform = wp.types.transformation(dtype=wptype)
611
+ quat = wp.types.quaternion(dtype=wptype)
612
+
613
+ def check_transform_constructor(
614
+ input: wp.array(dtype=wptype),
615
+ out: wp.array(dtype=wptype),
616
+ ):
617
+ result = transform(vec3(input[0], input[1], input[2]), quat(input[3], input[4], input[5], input[6]))
618
+
619
+ # multiply the output by 2 so we've got something to backpropagate:
620
+ out[0] = wptype(2) * result[0]
621
+ out[1] = wptype(2) * result[1]
622
+ out[2] = wptype(2) * result[2]
623
+ out[3] = wptype(2) * result[3]
624
+ out[4] = wptype(2) * result[4]
625
+ out[5] = wptype(2) * result[5]
626
+ out[6] = wptype(2) * result[6]
627
+
628
+ kernel = getkernel(check_transform_constructor, suffix=dtype.__name__)
629
+ output_select_kernel = get_select_kernel(wptype)
630
+
631
+ if register_kernels:
632
+ return
633
+
634
+ p = rng.standard_normal(size=3).astype(dtype)
635
+ q = rng.standard_normal(size=4).astype(dtype)
636
+ q /= np.linalg.norm(q)
637
+
638
+ input = wp.array(np.concatenate((p, q)), requires_grad=True, device=device)
639
+ output = wp.zeros_like(input)
640
+
641
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
642
+
643
+ assert_np_equal(output.numpy(), 2 * input.numpy(), tol=tol)
644
+
645
+ for i in range(len(input)):
646
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
647
+ tape = wp.Tape()
648
+ with tape:
649
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
650
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
651
+ tape.backward(loss=cmp)
652
+ expectedgrads = np.zeros(len(input))
653
+ expectedgrads[i] = 2
654
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
655
+ tape.zero()
656
+
657
+
658
+ def test_transform_indexing(test, device, dtype, register_kernels=False):
659
+ rng = np.random.default_rng(123)
660
+
661
+ tol = {
662
+ np.float16: 5.0e-3,
663
+ np.float32: 1.0e-6,
664
+ np.float64: 1.0e-8,
665
+ }.get(dtype, 0)
666
+
667
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
668
+ transform = wp.types.transformation(dtype=wptype)
669
+
670
+ def check_transform_indexing(
671
+ input: wp.array(dtype=transform),
672
+ out: wp.array(dtype=wptype),
673
+ ):
674
+ inpt = input[0]
675
+
676
+ # multiply outputs by 2 so we've got something to backpropagate:
677
+ idx = 0
678
+ for i in range(7):
679
+ out[idx] = wptype(2) * inpt[i]
680
+ idx = idx + 1
681
+
682
+ kernel = getkernel(check_transform_indexing, suffix=dtype.__name__)
683
+ output_select_kernel = get_select_kernel(wptype)
684
+
685
+ if register_kernels:
686
+ return
687
+
688
+ input = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
689
+ outcmps = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
690
+
691
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
692
+
693
+ assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
694
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
695
+ for i in range(7):
696
+ tape = wp.Tape()
697
+ with tape:
698
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
699
+ wp.launch(output_select_kernel, dim=1, inputs=[outcmps, i], outputs=[out], device=device)
700
+ tape.backward(loss=out)
701
+ expectedresult = np.zeros(7, dtype=dtype)
702
+ expectedresult[i] = 2
703
+ assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
704
+ tape.zero()
705
+
706
+
707
+ def test_transform_scalar_multiplication(test, device, dtype, register_kernels=False):
708
+ rng = np.random.default_rng(123)
709
+
710
+ tol = {
711
+ np.float16: 5.0e-3,
712
+ np.float32: 1.0e-6,
713
+ np.float64: 1.0e-8,
714
+ }.get(dtype, 0)
715
+
716
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
717
+ transform = wp.types.transformation(dtype=wptype)
718
+
719
+ def check_transform_scalar_mul(
720
+ s: wp.array(dtype=wptype),
721
+ q: wp.array(dtype=transform),
722
+ outcmps_l: wp.array(dtype=wptype),
723
+ outcmps_r: wp.array(dtype=wptype),
724
+ ):
725
+ lresult = s[0] * q[0]
726
+ rresult = q[0] * s[0]
727
+
728
+ # multiply outputs by 2 so we've got something to backpropagate:
729
+ for i in range(7):
730
+ outcmps_l[i] = wptype(2) * lresult[i]
731
+ outcmps_r[i] = wptype(2) * rresult[i]
732
+
733
+ kernel = getkernel(check_transform_scalar_mul, suffix=dtype.__name__)
734
+ output_select_kernel = get_select_kernel(wptype)
735
+
736
+ if register_kernels:
737
+ return
738
+
739
+ s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
740
+ q = wp.array(rng.standard_normal(size=(1, 7)).astype(dtype), dtype=transform, requires_grad=True, device=device)
741
+
742
+ outcmps_l = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
743
+ outcmps_r = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
744
+
745
+ wp.launch(
746
+ kernel,
747
+ dim=1,
748
+ inputs=[s, q],
749
+ outputs=[
750
+ outcmps_l,
751
+ outcmps_r,
752
+ ],
753
+ device=device,
754
+ )
755
+
756
+ assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
757
+ assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
758
+
759
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
760
+ for i in range(7):
761
+ # test left/right mul gradients:
762
+ for wrt in [outcmps_l, outcmps_r]:
763
+ tape = wp.Tape()
764
+ with tape:
765
+ wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
766
+ wp.launch(output_select_kernel, dim=1, inputs=[wrt, i], outputs=[out], device=device)
767
+ tape.backward(loss=out)
768
+ expectedresult = np.zeros(7, dtype=dtype)
769
+ expectedresult[i] = 2 * s.numpy()[0]
770
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
771
+ assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i], tol=tol)
772
+ tape.zero()
773
+
774
+
775
+ def test_transform_add_sub(test, device, dtype, register_kernels=False):
776
+ rng = np.random.default_rng(123)
777
+
778
+ tol = {
779
+ np.float16: 5.0e-3,
780
+ np.float32: 1.0e-6,
781
+ np.float64: 1.0e-8,
782
+ }.get(dtype, 0)
783
+
784
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
785
+ transform = wp.types.transformation(dtype=wptype)
786
+
787
+ def check_transform_add_sub(
788
+ q: wp.array(dtype=transform),
789
+ v: wp.array(dtype=transform),
790
+ outputs_add: wp.array(dtype=wptype),
791
+ outputs_sub: wp.array(dtype=wptype),
792
+ ):
793
+ addresult = q[0] + v[0]
794
+ subresult = q[0] - v[0]
795
+ for i in range(7):
796
+ outputs_add[i] = wptype(2) * addresult[i]
797
+ outputs_sub[i] = wptype(2) * subresult[i]
798
+
799
+ kernel = getkernel(check_transform_add_sub, suffix=dtype.__name__)
800
+ output_select_kernel = get_select_kernel(wptype)
801
+
802
+ if register_kernels:
803
+ return
804
+
805
+ q = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
806
+ v = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
807
+
808
+ outputs_add = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
809
+ outputs_sub = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
810
+
811
+ wp.launch(
812
+ kernel,
813
+ dim=1,
814
+ inputs=[
815
+ q,
816
+ v,
817
+ ],
818
+ outputs=[outputs_add, outputs_sub],
819
+ device=device,
820
+ )
821
+
822
+ assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
823
+ assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
824
+
825
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
826
+ for i in range(7):
827
+ # test add gradients:
828
+ tape = wp.Tape()
829
+ with tape:
830
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
831
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, i], outputs=[out], device=device)
832
+ tape.backward(loss=out)
833
+ expectedresult = np.zeros(7, dtype=dtype)
834
+ expectedresult[i] = 2
835
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
836
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
837
+ tape.zero()
838
+
839
+ # test subtraction gradients:
840
+ tape = wp.Tape()
841
+ with tape:
842
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
843
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, i], outputs=[out], device=device)
844
+ tape.backward(loss=out)
845
+ expectedresult = np.zeros(7, dtype=dtype)
846
+ expectedresult[i] = 2
847
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
848
+ assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
849
+ tape.zero()
850
+
851
+
852
+ def test_transform_get_trans_rot(test, device, dtype, register_kernels=False):
853
+ rng = np.random.default_rng(123)
854
+
855
+ tol = {
856
+ np.float16: 1.0e-2,
857
+ np.float32: 1.0e-6,
858
+ np.float64: 1.0e-8,
859
+ }.get(dtype, 0)
860
+
861
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
862
+ transform = wp.types.transformation(dtype=wptype)
863
+
864
+ def check_transform_get_trans_rot(
865
+ s: wp.array(dtype=transform),
866
+ outputs: wp.array(dtype=wptype),
867
+ ):
868
+ trans = wp.transform_get_translation(s[0])
869
+ q = wp.transform_get_rotation(s[0])
870
+
871
+ outputs[0] = wptype(2) * trans[0]
872
+ outputs[1] = wptype(2) * trans[1]
873
+ outputs[2] = wptype(2) * trans[2]
874
+
875
+ outputs[3] = wptype(2) * q[0]
876
+ outputs[4] = wptype(2) * q[1]
877
+ outputs[5] = wptype(2) * q[2]
878
+ outputs[6] = wptype(2) * q[3]
879
+
880
+ kernel = getkernel(check_transform_get_trans_rot, suffix=dtype.__name__)
881
+ output_select_kernel = get_select_kernel(wptype)
882
+
883
+ if register_kernels:
884
+ return
885
+
886
+ s = wp.array(rng.standard_normal(size=7).astype(dtype), dtype=transform, requires_grad=True, device=device)
887
+ outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
888
+
889
+ wp.launch(
890
+ kernel,
891
+ dim=1,
892
+ inputs=[
893
+ s,
894
+ ],
895
+ outputs=[outputs],
896
+ device=device,
897
+ )
898
+
899
+ assert_np_equal(outputs.numpy(), 2.0 * s.numpy(), tol=tol)
900
+
901
+ for i in range(7):
902
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
903
+ tape = wp.Tape()
904
+ with tape:
905
+ wp.launch(
906
+ kernel,
907
+ dim=1,
908
+ inputs=[
909
+ s,
910
+ ],
911
+ outputs=[outputs],
912
+ device=device,
913
+ )
914
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
915
+ tape.backward(loss=cmp)
916
+ expectedgrads = np.zeros(7)
917
+ expectedgrads[i] = 2
918
+ assert_np_equal(tape.gradients[s].numpy(), expectedgrads.reshape((1, 7)))
919
+ tape.zero()
920
+
921
+
922
+ def test_transform_multiply(test, device, dtype, register_kernels=False):
923
+ rng = np.random.default_rng(123)
924
+
925
+ tol = {
926
+ np.float16: 1.0e-2,
927
+ np.float32: 1.0e-6,
928
+ np.float64: 1.0e-8,
929
+ }.get(dtype, 0)
930
+
931
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
932
+ transform = wp.types.transformation(dtype=wptype)
933
+
934
+ def check_transform_multiply(
935
+ a: wp.array(dtype=transform),
936
+ b: wp.array(dtype=transform),
937
+ outputs: wp.array(dtype=wptype),
938
+ outputs_fn: wp.array(dtype=wptype),
939
+ outputs_manual: wp.array(dtype=wptype),
940
+ ):
941
+ result = a[0] * b[0]
942
+ result_fn = wp.transform_multiply(a[0], b[0])
943
+
944
+ # let's just work out the transform multiplication manually
945
+ # and compare value/gradients with that:
946
+ atrans = wp.transform_get_translation(a[0])
947
+ arot = wp.transform_get_rotation(a[0])
948
+
949
+ btrans = wp.transform_get_translation(b[0])
950
+ brot = wp.transform_get_rotation(b[0])
951
+
952
+ trans = wp.quat_rotate(arot, btrans) + atrans
953
+ rot = arot * brot
954
+ result_manual = transform(trans, rot)
955
+
956
+ for i in range(7):
957
+ outputs[i] = wptype(2) * result[i]
958
+ outputs_fn[i] = wptype(2) * result_fn[i]
959
+ outputs_manual[i] = wptype(2) * result_manual[i]
960
+
961
+ kernel = getkernel(check_transform_multiply, suffix=dtype.__name__)
962
+ output_select_kernel = get_select_kernel(wptype)
963
+
964
+ if register_kernels:
965
+ return
966
+
967
+ q = rng.standard_normal(size=7)
968
+ s = rng.standard_normal(size=7)
969
+ q[3:] /= np.linalg.norm(q[3:])
970
+ s[3:] /= np.linalg.norm(s[3:])
971
+
972
+ q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
973
+ s = wp.array(s.astype(dtype), dtype=transform, requires_grad=True, device=device)
974
+ outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
975
+ outputs_fn = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
976
+ outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
977
+
978
+ wp.launch(
979
+ kernel,
980
+ dim=1,
981
+ inputs=[
982
+ q,
983
+ s,
984
+ ],
985
+ outputs=[outputs, outputs_fn, outputs_manual],
986
+ device=device,
987
+ )
988
+
989
+ assert_np_equal(outputs.numpy(), outputs_fn.numpy(), tol=tol)
990
+ assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
991
+
992
+ for i in range(7):
993
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
994
+ cmp_fn = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
995
+ cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
996
+ tape = wp.Tape()
997
+ with tape:
998
+ wp.launch(
999
+ kernel,
1000
+ dim=1,
1001
+ inputs=[
1002
+ q,
1003
+ s,
1004
+ ],
1005
+ outputs=[outputs, outputs_fn, outputs_manual],
1006
+ device=device,
1007
+ )
1008
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1009
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_fn, i], outputs=[cmp_fn], device=device)
1010
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1011
+ tape.backward(loss=cmp)
1012
+ qgrads = 1.0 * tape.gradients[q].numpy()
1013
+ sgrads = 1.0 * tape.gradients[s].numpy()
1014
+ tape.zero()
1015
+ tape.backward(loss=cmp_fn)
1016
+ qgrads_fn = 1.0 * tape.gradients[q].numpy()
1017
+ sgrads_fn = 1.0 * tape.gradients[s].numpy()
1018
+ tape.zero()
1019
+ tape.backward(loss=cmp_manual)
1020
+ qgrads_manual = 1.0 * tape.gradients[q].numpy()
1021
+ sgrads_manual = 1.0 * tape.gradients[s].numpy()
1022
+ tape.zero()
1023
+
1024
+ assert_np_equal(qgrads, qgrads_fn, tol=tol)
1025
+ assert_np_equal(sgrads, sgrads_fn, tol=tol)
1026
+
1027
+ assert_np_equal(qgrads, qgrads_manual, tol=tol)
1028
+ assert_np_equal(sgrads, sgrads_manual, tol=tol)
1029
+
1030
+
1031
+ def test_transform_inverse(test, device, dtype, register_kernels=False):
1032
+ rng = np.random.default_rng(123)
1033
+
1034
+ tol = {
1035
+ np.float16: 1.0e-2,
1036
+ np.float32: 1.0e-6,
1037
+ np.float64: 1.0e-8,
1038
+ }.get(dtype, 0)
1039
+
1040
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1041
+ transform = wp.types.transformation(dtype=wptype)
1042
+
1043
+ def check_transform_inverse(
1044
+ a: wp.array(dtype=transform),
1045
+ outputs: wp.array(dtype=wptype),
1046
+ outputs_shouldbeidentity: wp.array(dtype=wptype),
1047
+ outputs_manual: wp.array(dtype=wptype),
1048
+ ):
1049
+ result = wp.transform_inverse(a[0])
1050
+ idt = result * a[0]
1051
+
1052
+ # let's just work out the transform inverse manually
1053
+ # and compare value/gradients with that:
1054
+ atrans = wp.transform_get_translation(a[0])
1055
+ arot = wp.transform_get_rotation(a[0])
1056
+
1057
+ rotinv = wp.quat_inverse(arot)
1058
+ result_manual = transform(-wp.quat_rotate(rotinv, atrans), rotinv)
1059
+
1060
+ for i in range(7):
1061
+ outputs[i] = wptype(2) * result[i]
1062
+ outputs_shouldbeidentity[i] = wptype(2) * idt[i]
1063
+ outputs_manual[i] = wptype(2) * result_manual[i]
1064
+
1065
+ kernel = getkernel(check_transform_inverse, suffix=dtype.__name__)
1066
+ output_select_kernel = get_select_kernel(wptype)
1067
+
1068
+ if register_kernels:
1069
+ return
1070
+
1071
+ q = rng.standard_normal(size=7)
1072
+ s = rng.standard_normal(size=7)
1073
+ q[3:] /= np.linalg.norm(q[3:])
1074
+ s[3:] /= np.linalg.norm(s[3:])
1075
+
1076
+ q = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1077
+ outputs = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1078
+ outputs_shouldbeidentity = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1079
+ outputs_manual = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1080
+
1081
+ wp.launch(
1082
+ kernel,
1083
+ dim=1,
1084
+ inputs=[
1085
+ q,
1086
+ ],
1087
+ outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1088
+ device=device,
1089
+ )
1090
+
1091
+ # check inverse:
1092
+ assert_np_equal(outputs_shouldbeidentity.numpy(), np.array([0, 0, 0, 0, 0, 0, 2]), tol=tol)
1093
+
1094
+ # same as manual result:
1095
+ assert_np_equal(outputs.numpy(), outputs_manual.numpy(), tol=tol)
1096
+
1097
+ for i in range(7):
1098
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1099
+ cmp_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1100
+ tape = wp.Tape()
1101
+ with tape:
1102
+ wp.launch(
1103
+ kernel,
1104
+ dim=1,
1105
+ inputs=[
1106
+ q,
1107
+ ],
1108
+ outputs=[outputs, outputs_shouldbeidentity, outputs_manual],
1109
+ device=device,
1110
+ )
1111
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs, i], outputs=[cmp], device=device)
1112
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_manual, i], outputs=[cmp_manual], device=device)
1113
+ tape.backward(loss=cmp)
1114
+ qgrads = 1.0 * tape.gradients[q].numpy()
1115
+ tape.zero()
1116
+ tape.backward(loss=cmp_manual)
1117
+ qgrads_manual = 1.0 * tape.gradients[q].numpy()
1118
+ tape.zero()
1119
+
1120
+ # check gradients against manual result:
1121
+ assert_np_equal(qgrads, qgrads_manual, tol=tol)
1122
+
1123
+
1124
+ def test_transform_point_vector(test, device, dtype, register_kernels=False):
1125
+ rng = np.random.default_rng(123)
1126
+
1127
+ tol = {
1128
+ np.float16: 1.0e-2,
1129
+ np.float32: 1.0e-6,
1130
+ np.float64: 1.0e-8,
1131
+ }.get(dtype, 0)
1132
+
1133
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1134
+ transform = wp.types.transformation(dtype=wptype)
1135
+ vec3 = wp.types.vector(length=3, dtype=wptype)
1136
+
1137
+ def check_transform_point_vector(
1138
+ t: wp.array(dtype=transform),
1139
+ v: wp.array(dtype=vec3),
1140
+ outputs_pt: wp.array(dtype=wptype),
1141
+ outputs_pt_manual: wp.array(dtype=wptype),
1142
+ outputs_vec: wp.array(dtype=wptype),
1143
+ outputs_vec_manual: wp.array(dtype=wptype),
1144
+ ):
1145
+ result_pt = wp.transform_point(t[0], v[0])
1146
+ result_pt_manual = wp.transform_get_translation(t[0]) + wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1147
+
1148
+ result_vec = wp.transform_vector(t[0], v[0])
1149
+ result_vec_manual = wp.quat_rotate(wp.transform_get_rotation(t[0]), v[0])
1150
+
1151
+ for i in range(3):
1152
+ outputs_pt[i] = wptype(2) * result_pt[i]
1153
+ outputs_pt_manual[i] = wptype(2) * result_pt_manual[i]
1154
+ outputs_vec[i] = wptype(2) * result_vec[i]
1155
+ outputs_vec_manual[i] = wptype(2) * result_vec_manual[i]
1156
+
1157
+ kernel = getkernel(check_transform_point_vector, suffix=dtype.__name__)
1158
+ output_select_kernel = get_select_kernel(wptype)
1159
+
1160
+ if register_kernels:
1161
+ return
1162
+
1163
+ q = rng.standard_normal(size=7)
1164
+ q[3:] /= np.linalg.norm(q[3:])
1165
+
1166
+ t = wp.array(q.astype(dtype), dtype=transform, requires_grad=True, device=device)
1167
+ v = wp.array(rng.standard_normal(size=3), dtype=vec3, requires_grad=True, device=device)
1168
+ outputs_pt = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1169
+ outputs_pt_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1170
+ outputs_vec = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1171
+ outputs_vec_manual = wp.zeros(3, dtype=wptype, requires_grad=True, device=device)
1172
+
1173
+ wp.launch(
1174
+ kernel,
1175
+ dim=1,
1176
+ inputs=[t, v],
1177
+ outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1178
+ device=device,
1179
+ )
1180
+
1181
+ # same as manual results:
1182
+ assert_np_equal(outputs_pt.numpy(), outputs_pt_manual.numpy(), tol=tol)
1183
+ assert_np_equal(outputs_vec.numpy(), outputs_vec_manual.numpy(), tol=tol)
1184
+
1185
+ for i in range(3):
1186
+ cmp_pt = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1187
+ cmp_pt_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1188
+ cmp_vec = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1189
+ cmp_vec_manual = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1190
+
1191
+ tape = wp.Tape()
1192
+ with tape:
1193
+ wp.launch(
1194
+ kernel,
1195
+ dim=1,
1196
+ inputs=[t, v],
1197
+ outputs=[outputs_pt, outputs_pt_manual, outputs_vec, outputs_vec_manual],
1198
+ device=device,
1199
+ )
1200
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_pt, i], outputs=[cmp_pt], device=device)
1201
+ wp.launch(
1202
+ output_select_kernel, dim=1, inputs=[outputs_pt_manual, i], outputs=[cmp_pt_manual], device=device
1203
+ )
1204
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_vec, i], outputs=[cmp_vec], device=device)
1205
+ wp.launch(
1206
+ output_select_kernel, dim=1, inputs=[outputs_vec_manual, i], outputs=[cmp_vec_manual], device=device
1207
+ )
1208
+ tape.backward(loss=cmp_pt)
1209
+ tgrads_pt = 1.0 * tape.gradients[t].numpy()
1210
+ vgrads_pt = 1.0 * tape.gradients[v].numpy()
1211
+ tape.zero()
1212
+ tape.backward(loss=cmp_pt_manual)
1213
+ tgrads_pt_manual = 1.0 * tape.gradients[t].numpy()
1214
+ vgrads_pt_manual = 1.0 * tape.gradients[v].numpy()
1215
+ tape.zero()
1216
+ tape.backward(loss=cmp_vec)
1217
+ tgrads_vec = 1.0 * tape.gradients[t].numpy()
1218
+ vgrads_vec = 1.0 * tape.gradients[v].numpy()
1219
+ tape.zero()
1220
+ tape.backward(loss=cmp_vec_manual)
1221
+ tgrads_vec_manual = 1.0 * tape.gradients[t].numpy()
1222
+ vgrads_vec_manual = 1.0 * tape.gradients[v].numpy()
1223
+ tape.zero()
1224
+
1225
+ # check gradients against manual result:
1226
+ assert_np_equal(tgrads_pt, tgrads_pt_manual, tol=tol)
1227
+ assert_np_equal(vgrads_pt, vgrads_pt_manual, tol=tol)
1228
+ assert_np_equal(tgrads_vec, tgrads_vec_manual, tol=tol)
1229
+ assert_np_equal(vgrads_vec, vgrads_vec_manual, tol=tol)
1230
+
1231
+
1232
+ def test_spatial_matrix_constructors(test, device, dtype, register_kernels=False):
1233
+ rng = np.random.default_rng(123)
1234
+
1235
+ tol = {
1236
+ np.float16: 5.0e-3,
1237
+ np.float32: 1.0e-6,
1238
+ np.float64: 1.0e-8,
1239
+ }.get(dtype, 0)
1240
+
1241
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1242
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1243
+
1244
+ def check_spatial_matrix_constructor(
1245
+ input: wp.array(dtype=wptype),
1246
+ out: wp.array(dtype=wptype),
1247
+ ):
1248
+ # multiply the output by 2 so we've got something to backpropagate:
1249
+ result0 = spatial_matrix(
1250
+ input[0],
1251
+ input[1],
1252
+ input[2],
1253
+ input[3],
1254
+ input[4],
1255
+ input[5],
1256
+ input[6],
1257
+ input[7],
1258
+ input[8],
1259
+ input[9],
1260
+ input[10],
1261
+ input[11],
1262
+ input[12],
1263
+ input[13],
1264
+ input[14],
1265
+ input[15],
1266
+ input[16],
1267
+ input[17],
1268
+ input[18],
1269
+ input[19],
1270
+ input[20],
1271
+ input[21],
1272
+ input[22],
1273
+ input[23],
1274
+ input[24],
1275
+ input[25],
1276
+ input[26],
1277
+ input[27],
1278
+ input[28],
1279
+ input[29],
1280
+ input[30],
1281
+ input[31],
1282
+ input[32],
1283
+ input[33],
1284
+ input[34],
1285
+ input[35],
1286
+ )
1287
+ result1 = spatial_matrix()
1288
+
1289
+ idx = 0
1290
+ for i in range(6):
1291
+ for j in range(6):
1292
+ out[idx] = wptype(2) * result0[i, j]
1293
+ idx = idx + 1
1294
+
1295
+ for i in range(6):
1296
+ for j in range(6):
1297
+ out[idx] = result1[i, j]
1298
+ idx = idx + 1
1299
+
1300
+ kernel = getkernel(check_spatial_matrix_constructor, suffix=dtype.__name__)
1301
+ output_select_kernel = get_select_kernel(wptype)
1302
+
1303
+ if register_kernels:
1304
+ return
1305
+
1306
+ input = wp.array(rng.standard_normal(size=6 * 6).astype(dtype), requires_grad=True, device=device)
1307
+ output = wp.zeros(2 * 6 * 6, dtype=wptype, requires_grad=True, device=device)
1308
+
1309
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1310
+
1311
+ assert_np_equal(output.numpy()[: 6 * 6], 2 * input.numpy(), tol=tol)
1312
+ assert_np_equal(output.numpy()[6 * 6 :], np.zeros_like(input.numpy()), tol=tol)
1313
+
1314
+ for i in range(len(input)):
1315
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1316
+ tape = wp.Tape()
1317
+ with tape:
1318
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[output], device=device)
1319
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1320
+ tape.backward(loss=cmp)
1321
+ expectedgrads = np.zeros(len(input))
1322
+ expectedgrads[i] = 2
1323
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1324
+ tape.zero()
1325
+ break
1326
+
1327
+
1328
+ def test_spatial_matrix_indexing(test, device, dtype, register_kernels=False):
1329
+ rng = np.random.default_rng(123)
1330
+
1331
+ tol = {
1332
+ np.float16: 5.0e-3,
1333
+ np.float32: 1.0e-6,
1334
+ np.float64: 1.0e-8,
1335
+ }.get(dtype, 0)
1336
+
1337
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1338
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1339
+
1340
+ def check_spatial_matrix_indexing(
1341
+ input: wp.array(dtype=spatial_matrix),
1342
+ out: wp.array(dtype=wptype),
1343
+ ):
1344
+ inpt = input[0]
1345
+
1346
+ # multiply outputs by 2 so we've got something to backpropagate:
1347
+ idx = 0
1348
+ for i in range(6):
1349
+ for j in range(6):
1350
+ out[idx] = wptype(2) * inpt[i, j]
1351
+ idx = idx + 1
1352
+
1353
+ kernel = getkernel(check_spatial_matrix_indexing, suffix=dtype.__name__)
1354
+ output_select_kernel = get_select_kernel(wptype)
1355
+
1356
+ if register_kernels:
1357
+ return
1358
+
1359
+ input = wp.array(
1360
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1361
+ )
1362
+ outcmps = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1363
+
1364
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1365
+
1366
+ assert_np_equal(outcmps.numpy(), 2 * input.numpy().ravel(), tol=tol)
1367
+ idx = 0
1368
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1369
+ for i in range(6):
1370
+ for j in range(6):
1371
+ tape = wp.Tape()
1372
+ with tape:
1373
+ wp.launch(kernel, dim=1, inputs=[input], outputs=[outcmps], device=device)
1374
+ wp.launch(output_select_kernel, dim=1, inputs=[outcmps, idx], outputs=[out], device=device)
1375
+ tape.backward(loss=out)
1376
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1377
+ expectedresult[i, j] = 2
1378
+ assert_np_equal(tape.gradients[input].numpy()[0], expectedresult)
1379
+ tape.zero()
1380
+ idx = idx + 1
1381
+
1382
+
1383
+ def test_spatial_matrix_scalar_multiplication(test, device, dtype, register_kernels=False):
1384
+ rng = np.random.default_rng(123)
1385
+
1386
+ tol = {
1387
+ np.float16: 5.0e-3,
1388
+ np.float32: 1.0e-6,
1389
+ np.float64: 1.0e-8,
1390
+ }.get(dtype, 0)
1391
+
1392
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1393
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1394
+
1395
+ def check_spatial_matrix_scalar_mul(
1396
+ s: wp.array(dtype=wptype),
1397
+ q: wp.array(dtype=spatial_matrix),
1398
+ outcmps_l: wp.array(dtype=wptype),
1399
+ outcmps_r: wp.array(dtype=wptype),
1400
+ ):
1401
+ lresult = s[0] * q[0]
1402
+ rresult = q[0] * s[0]
1403
+
1404
+ # multiply outputs by 2 so we've got something to backpropagate:
1405
+ idx = 0
1406
+ for i in range(6):
1407
+ for j in range(6):
1408
+ outcmps_l[idx] = wptype(2) * lresult[i, j]
1409
+ outcmps_r[idx] = wptype(2) * rresult[i, j]
1410
+ idx = idx + 1
1411
+
1412
+ kernel = getkernel(check_spatial_matrix_scalar_mul, suffix=dtype.__name__)
1413
+ output_select_kernel = get_select_kernel(wptype)
1414
+
1415
+ if register_kernels:
1416
+ return
1417
+
1418
+ s = wp.array(rng.standard_normal(size=1).astype(dtype), requires_grad=True, device=device)
1419
+ q = wp.array(
1420
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1421
+ )
1422
+
1423
+ outcmps_l = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1424
+ outcmps_r = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1425
+
1426
+ wp.launch(
1427
+ kernel,
1428
+ dim=1,
1429
+ inputs=[s, q],
1430
+ outputs=[
1431
+ outcmps_l,
1432
+ outcmps_r,
1433
+ ],
1434
+ device=device,
1435
+ )
1436
+
1437
+ assert_np_equal(outcmps_l.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1438
+ assert_np_equal(outcmps_r.numpy(), 2 * s.numpy()[0] * q.numpy(), tol=tol)
1439
+
1440
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1441
+ idx = 0
1442
+ for i in range(6):
1443
+ for j in range(6):
1444
+ # test left/right mul gradients:
1445
+ for wrt in [outcmps_l, outcmps_r]:
1446
+ tape = wp.Tape()
1447
+ with tape:
1448
+ wp.launch(kernel, dim=1, inputs=[s, q], outputs=[outcmps_l, outcmps_r], device=device)
1449
+ wp.launch(output_select_kernel, dim=1, inputs=[wrt, idx], outputs=[out], device=device)
1450
+ tape.backward(loss=out)
1451
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1452
+ expectedresult[i, j] = 2 * s.numpy()[0]
1453
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1454
+ assert_np_equal(tape.gradients[s].numpy()[0], 2 * q.numpy()[0, i, j], tol=tol)
1455
+ tape.zero()
1456
+ idx = idx + 1
1457
+
1458
+
1459
+ def test_spatial_matrix_add_sub(test, device, dtype, register_kernels=False):
1460
+ rng = np.random.default_rng(123)
1461
+
1462
+ tol = {
1463
+ np.float16: 5.0e-3,
1464
+ np.float32: 1.0e-6,
1465
+ np.float64: 1.0e-8,
1466
+ }.get(dtype, 0)
1467
+
1468
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1469
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1470
+
1471
+ def check_spatial_matrix_add_sub(
1472
+ q: wp.array(dtype=spatial_matrix),
1473
+ v: wp.array(dtype=spatial_matrix),
1474
+ outputs_add: wp.array(dtype=wptype),
1475
+ outputs_sub: wp.array(dtype=wptype),
1476
+ ):
1477
+ addresult = q[0] + v[0]
1478
+ subresult = q[0] - v[0]
1479
+ idx = 0
1480
+ for i in range(6):
1481
+ for j in range(6):
1482
+ outputs_add[idx] = wptype(2) * addresult[i, j]
1483
+ outputs_sub[idx] = wptype(2) * subresult[i, j]
1484
+ idx = idx + 1
1485
+
1486
+ kernel = getkernel(check_spatial_matrix_add_sub, suffix=dtype.__name__)
1487
+ output_select_kernel = get_select_kernel(wptype)
1488
+
1489
+ if register_kernels:
1490
+ return
1491
+
1492
+ q = wp.array(
1493
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1494
+ )
1495
+ v = wp.array(
1496
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1497
+ )
1498
+
1499
+ outputs_add = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1500
+ outputs_sub = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1501
+
1502
+ wp.launch(
1503
+ kernel,
1504
+ dim=1,
1505
+ inputs=[
1506
+ q,
1507
+ v,
1508
+ ],
1509
+ outputs=[outputs_add, outputs_sub],
1510
+ device=device,
1511
+ )
1512
+
1513
+ assert_np_equal(outputs_add.numpy(), 2 * (q.numpy() + v.numpy()), tol=tol)
1514
+ assert_np_equal(outputs_sub.numpy(), 2 * (q.numpy() - v.numpy()), tol=tol)
1515
+
1516
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1517
+ idx = 0
1518
+ for i in range(6):
1519
+ for j in range(6):
1520
+ # test add gradients:
1521
+ tape = wp.Tape()
1522
+ with tape:
1523
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1524
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_add, idx], outputs=[out], device=device)
1525
+ tape.backward(loss=out)
1526
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1527
+ expectedresult[i, j] = 2
1528
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1529
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=tol)
1530
+ tape.zero()
1531
+
1532
+ # test subtraction gradients:
1533
+ tape = wp.Tape()
1534
+ with tape:
1535
+ wp.launch(kernel, dim=1, inputs=[q, v], outputs=[outputs_add, outputs_sub], device=device)
1536
+ wp.launch(output_select_kernel, dim=1, inputs=[outputs_sub, idx], outputs=[out], device=device)
1537
+ tape.backward(loss=out)
1538
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1539
+ expectedresult[i, j] = 2
1540
+ assert_np_equal(tape.gradients[q].numpy()[0], expectedresult, tol=tol)
1541
+ assert_np_equal(tape.gradients[v].numpy()[0], -expectedresult, tol=tol)
1542
+ tape.zero()
1543
+
1544
+ idx = idx + 1
1545
+
1546
+
1547
+ def test_spatial_matvec_multiplication(test, device, dtype, register_kernels=False):
1548
+ rng = np.random.default_rng(123)
1549
+
1550
+ tol = {
1551
+ np.float16: 2.0e-2,
1552
+ np.float32: 5.0e-6,
1553
+ np.float64: 1.0e-8,
1554
+ }.get(dtype, 0)
1555
+
1556
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1557
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1558
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
1559
+
1560
+ output_select_kernel = get_select_kernel(wptype)
1561
+
1562
+ def check_spatial_mat_vec_mul(
1563
+ v: wp.array(dtype=spatial_vector),
1564
+ m: wp.array(dtype=spatial_matrix),
1565
+ outcomponents: wp.array(dtype=wptype),
1566
+ ):
1567
+ result = m[0] * v[0]
1568
+
1569
+ # multiply outputs by 2 so we've got something to backpropagate:
1570
+ idx = 0
1571
+ for i in range(6):
1572
+ outcomponents[idx] = wptype(2) * result[i]
1573
+ idx = idx + 1
1574
+
1575
+ kernel = getkernel(check_spatial_mat_vec_mul, suffix=dtype.__name__)
1576
+
1577
+ if register_kernels:
1578
+ return
1579
+
1580
+ v = wp.array(
1581
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1582
+ )
1583
+ m = wp.array(
1584
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1585
+ )
1586
+ outcomponents = wp.zeros(6, dtype=wptype, requires_grad=True, device=device)
1587
+
1588
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1589
+
1590
+ assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1591
+
1592
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1593
+ for i in range(6):
1594
+ tape = wp.Tape()
1595
+ with tape:
1596
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1597
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, i], outputs=[out], device=device)
1598
+ tape.backward(loss=out)
1599
+
1600
+ assert_np_equal(tape.gradients[v].numpy()[0], 2 * m.numpy()[0, i, :], tol=tol)
1601
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1602
+ expectedresult[i, :] = 2 * v.numpy()[0]
1603
+ assert_np_equal(tape.gradients[m].numpy()[0], expectedresult, tol=tol)
1604
+
1605
+ tape.zero()
1606
+
1607
+
1608
+ def test_spatial_matmat_multiplication(test, device, dtype, register_kernels=False):
1609
+ rng = np.random.default_rng(123)
1610
+
1611
+ tol = {
1612
+ np.float16: 2.0e-2,
1613
+ np.float32: 5.0e-6,
1614
+ np.float64: 1.0e-8,
1615
+ }.get(dtype, 0)
1616
+
1617
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1618
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1619
+
1620
+ output_select_kernel = get_select_kernel(wptype)
1621
+
1622
+ def check_mat_mat_mul(
1623
+ v: wp.array(dtype=spatial_matrix),
1624
+ m: wp.array(dtype=spatial_matrix),
1625
+ outcomponents: wp.array(dtype=wptype),
1626
+ ):
1627
+ result = m[0] * v[0]
1628
+
1629
+ # multiply outputs by 2 so we've got something to backpropagate:
1630
+ idx = 0
1631
+ for i in range(6):
1632
+ for j in range(6):
1633
+ outcomponents[idx] = wptype(2) * result[i, j]
1634
+ idx = idx + 1
1635
+
1636
+ kernel = getkernel(check_mat_mat_mul, suffix=dtype.__name__)
1637
+
1638
+ if register_kernels:
1639
+ return
1640
+
1641
+ v = wp.array(
1642
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1643
+ )
1644
+ m = wp.array(
1645
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1646
+ )
1647
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1648
+
1649
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1650
+
1651
+ assert_np_equal(outcomponents.numpy(), 2 * np.matmul(m.numpy()[0], v.numpy()[0]), tol=tol)
1652
+
1653
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1654
+ idx = 0
1655
+ for i in range(6):
1656
+ for j in range(6):
1657
+ tape = wp.Tape()
1658
+ with tape:
1659
+ wp.launch(kernel, dim=1, inputs=[v, m], outputs=[outcomponents], device=device)
1660
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1661
+ tape.backward(loss=out)
1662
+
1663
+ expected = np.zeros((6, 6), dtype=dtype)
1664
+ expected[:, j] = 2 * m.numpy()[0, i, :]
1665
+ assert_np_equal(tape.gradients[v].numpy()[0], expected, tol=10 * tol)
1666
+
1667
+ expected = np.zeros((6, 6), dtype=dtype)
1668
+ expected[i, :] = 2 * v.numpy()[0, :, j]
1669
+ assert_np_equal(tape.gradients[m].numpy()[0], expected, tol=10 * tol)
1670
+
1671
+ tape.zero()
1672
+ idx = idx + 1
1673
+
1674
+
1675
+ def test_spatial_mat_transpose(test, device, dtype, register_kernels=False):
1676
+ rng = np.random.default_rng(123)
1677
+
1678
+ tol = {
1679
+ np.float16: 1.0e-2,
1680
+ np.float32: 1.0e-6,
1681
+ np.float64: 1.0e-8,
1682
+ }.get(dtype, 0)
1683
+
1684
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1685
+ spatial_matrix = wp.types.matrix(shape=(6, 6), dtype=wptype)
1686
+
1687
+ output_select_kernel = get_select_kernel(wptype)
1688
+
1689
+ def check_spatial_mat_transpose(
1690
+ m: wp.array(dtype=spatial_matrix),
1691
+ outcomponents: wp.array(dtype=wptype),
1692
+ ):
1693
+ # multiply outputs by 2 so we've got something to backpropagate:
1694
+ mat = wptype(2) * wp.transpose(m[0])
1695
+
1696
+ idx = 0
1697
+ for i in range(6):
1698
+ for j in range(6):
1699
+ outcomponents[idx] = mat[i, j]
1700
+ idx = idx + 1
1701
+
1702
+ kernel = getkernel(check_spatial_mat_transpose, suffix=dtype.__name__)
1703
+
1704
+ if register_kernels:
1705
+ return
1706
+
1707
+ m = wp.array(
1708
+ rng.standard_normal(size=(1, 6, 6)).astype(dtype), dtype=spatial_matrix, requires_grad=True, device=device
1709
+ )
1710
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1711
+
1712
+ wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1713
+
1714
+ assert_np_equal(outcomponents.numpy(), 2 * m.numpy()[0].T, tol=tol)
1715
+
1716
+ idx = 0
1717
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1718
+ for i in range(6):
1719
+ for j in range(6):
1720
+ tape = wp.Tape()
1721
+ with tape:
1722
+ wp.launch(kernel, dim=1, inputs=[m], outputs=[outcomponents], device=device)
1723
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1724
+ tape.backward(loss=out)
1725
+ expectedresult = np.zeros((6, 6), dtype=dtype)
1726
+ expectedresult[j, i] = 2
1727
+ assert_np_equal(tape.gradients[m].numpy()[0], expectedresult)
1728
+ tape.zero()
1729
+ idx = idx + 1
1730
+
1731
+
1732
+ def test_spatial_outer_product(test, device, dtype, register_kernels=False):
1733
+ rng = np.random.default_rng(123)
1734
+
1735
+ tol = {
1736
+ np.float16: 5.0e-3,
1737
+ np.float32: 1.0e-6,
1738
+ np.float64: 1.0e-8,
1739
+ }.get(dtype, 0)
1740
+
1741
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1742
+ spatial_vector = wp.types.vector(length=6, dtype=wptype)
1743
+
1744
+ output_select_kernel = get_select_kernel(wptype)
1745
+
1746
+ def check_spatial_outer_product(
1747
+ s: wp.array(dtype=spatial_vector),
1748
+ v: wp.array(dtype=spatial_vector),
1749
+ outcomponents: wp.array(dtype=wptype),
1750
+ ):
1751
+ mresult = wptype(2) * wp.outer(s[0], v[0])
1752
+
1753
+ # multiply outputs by 2 so we've got something to backpropagate:
1754
+ idx = 0
1755
+ for i in range(6):
1756
+ for j in range(6):
1757
+ outcomponents[idx] = mresult[i, j]
1758
+ idx = idx + 1
1759
+
1760
+ kernel = getkernel(check_spatial_outer_product, suffix=dtype.__name__)
1761
+
1762
+ if register_kernels:
1763
+ return
1764
+
1765
+ s = wp.array(
1766
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1767
+ )
1768
+ v = wp.array(
1769
+ rng.standard_normal(size=(1, 6)).astype(dtype), dtype=spatial_vector, requires_grad=True, device=device
1770
+ )
1771
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1772
+
1773
+ wp.launch(kernel, dim=1, inputs=[s, v], outputs=[outcomponents], device=device)
1774
+
1775
+ assert_np_equal(outcomponents.numpy(), 2 * s.numpy()[0, :, None] * v.numpy()[0, None, :], tol=tol)
1776
+
1777
+ idx = 0
1778
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1779
+
1780
+ for i in range(6):
1781
+ for j in range(6):
1782
+ tape = wp.Tape()
1783
+ with tape:
1784
+ wp.launch(
1785
+ kernel,
1786
+ dim=1,
1787
+ inputs=[
1788
+ s,
1789
+ v,
1790
+ ],
1791
+ outputs=[outcomponents],
1792
+ device=device,
1793
+ )
1794
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1795
+ tape.backward(loss=out)
1796
+
1797
+ # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1798
+ # at the ith component and its v gradient will be nonzero at the jth component:
1799
+
1800
+ expectedresult = np.zeros((6), dtype=dtype)
1801
+ expectedresult[i] = 2 * v.numpy()[0, j]
1802
+ assert_np_equal(tape.gradients[s].numpy()[0], expectedresult, tol=10 * tol)
1803
+
1804
+ expectedresult = np.zeros((6), dtype=dtype)
1805
+ expectedresult[j] = 2 * s.numpy()[0, i]
1806
+ assert_np_equal(tape.gradients[v].numpy()[0], expectedresult, tol=10 * tol)
1807
+ tape.zero()
1808
+
1809
+ idx = idx + 1
1810
+
1811
+
1812
+ def test_spatial_adjoint(test, device, dtype, register_kernels=False):
1813
+ rng = np.random.default_rng(123)
1814
+
1815
+ tol = {
1816
+ np.float16: 5.0e-3,
1817
+ np.float32: 1.0e-6,
1818
+ np.float64: 1.0e-8,
1819
+ }.get(dtype, 0)
1820
+
1821
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1822
+ mat3 = wp.types.matrix(shape=(3, 3), dtype=wptype)
1823
+
1824
+ output_select_kernel = get_select_kernel(wptype)
1825
+
1826
+ def check_spatial_adjoint(
1827
+ R: wp.array(dtype=mat3),
1828
+ S: wp.array(dtype=mat3),
1829
+ outcomponents: wp.array(dtype=wptype),
1830
+ ):
1831
+ mresult = wptype(2) * wp.spatial_adjoint(R[0], S[0])
1832
+
1833
+ # multiply outputs by 2 so we've got something to backpropagate:
1834
+ idx = 0
1835
+ for i in range(6):
1836
+ for j in range(6):
1837
+ outcomponents[idx] = mresult[i, j]
1838
+ idx = idx + 1
1839
+
1840
+ kernel = getkernel(check_spatial_adjoint, suffix=dtype.__name__)
1841
+
1842
+ if register_kernels:
1843
+ return
1844
+
1845
+ R = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1846
+ S = wp.array(rng.standard_normal(size=(1, 3, 3)).astype(dtype), dtype=mat3, requires_grad=True, device=device)
1847
+ outcomponents = wp.zeros(6 * 6, dtype=wptype, requires_grad=True, device=device)
1848
+
1849
+ wp.launch(kernel, dim=1, inputs=[R, S], outputs=[outcomponents], device=device)
1850
+
1851
+ result = outcomponents.numpy().reshape(6, 6)
1852
+ expected = np.zeros_like(result)
1853
+ expected[:3, :3] = R.numpy()
1854
+ expected[3:, 3:] = R.numpy()
1855
+ expected[3:, :3] = S.numpy()
1856
+
1857
+ assert_np_equal(result, 2 * expected, tol=tol)
1858
+
1859
+ idx = 0
1860
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1861
+ for i in range(6):
1862
+ for j in range(6):
1863
+ tape = wp.Tape()
1864
+ with tape:
1865
+ wp.launch(
1866
+ kernel,
1867
+ dim=1,
1868
+ inputs=[
1869
+ R,
1870
+ S,
1871
+ ],
1872
+ outputs=[outcomponents],
1873
+ device=device,
1874
+ )
1875
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1876
+ tape.backward(loss=out)
1877
+
1878
+ # this component's gonna be s_i * v_j, so its s gradient is gonna be nozero
1879
+ # at the ith component and its v gradient will be nonzero at the jth component:
1880
+
1881
+ expectedresult = np.zeros((3, 3), dtype=dtype)
1882
+ if (i // 3 == 0 and j // 3 == 0) or (i // 3 == 1 and j // 3 == 1):
1883
+ expectedresult[i % 3, j % 3] = 2
1884
+ assert_np_equal(tape.gradients[R].numpy()[0], expectedresult, tol=10 * tol)
1885
+
1886
+ expectedresult = np.zeros((3, 3), dtype=dtype)
1887
+ if i // 3 == 1 and j // 3 == 0:
1888
+ expectedresult[i % 3, j % 3] = 2
1889
+ assert_np_equal(tape.gradients[S].numpy()[0], expectedresult, tol=10 * tol)
1890
+ tape.zero()
1891
+
1892
+ idx = idx + 1
1893
+
1894
+
1895
+ def test_transform_identity(test, device, dtype, register_kernels=False):
1896
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1897
+
1898
+ def transform_identity_test(output: wp.array(dtype=wptype)):
1899
+ t = wp.transform_identity(dtype=wptype)
1900
+ for i in range(7):
1901
+ output[i] = t[i]
1902
+
1903
+ def transform_identity_test_default(output: wp.array(dtype=wp.float32)):
1904
+ t = wp.transform_identity()
1905
+ for i in range(7):
1906
+ output[i] = t[i]
1907
+
1908
+ quat_identity_kernel = getkernel(transform_identity_test, suffix=dtype.__name__)
1909
+ quat_identity_default_kernel = getkernel(transform_identity_test_default, suffix=np.float32.__name__)
1910
+
1911
+ if register_kernels:
1912
+ return
1913
+
1914
+ output = wp.zeros(7, dtype=wptype, device=device)
1915
+ wp.launch(quat_identity_kernel, dim=1, inputs=[], outputs=[output], device=device)
1916
+ expected = np.zeros_like(output.numpy())
1917
+ expected[-1] = 1
1918
+ assert_np_equal(output.numpy(), expected)
1919
+
1920
+ # let's just test that it defaults to float32:
1921
+ output = wp.zeros(7, dtype=wp.float32, device=device)
1922
+ wp.launch(quat_identity_default_kernel, dim=1, inputs=[], outputs=[output], device=device)
1923
+ expected = np.zeros_like(output.numpy())
1924
+ expected[-1] = 1
1925
+ assert_np_equal(output.numpy(), expected)
1926
+
1927
+
1928
+ def test_transform_anon_type_instance(test, device, dtype, register_kernels=False):
1929
+ rng = np.random.default_rng(123)
1930
+
1931
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1932
+
1933
+ def transform_create_test(input: wp.array(dtype=wptype), output: wp.array(dtype=wptype)):
1934
+ t = wp.transformation(
1935
+ wp.vector(input[0], input[1], input[2]), wp.quaternion(input[3], input[4], input[5], input[6])
1936
+ )
1937
+ for i in range(7):
1938
+ output[i] = wptype(2) * t[i]
1939
+
1940
+ transform_create_kernel = getkernel(transform_create_test, suffix=dtype.__name__)
1941
+ output_select_kernel = get_select_kernel(wptype)
1942
+
1943
+ if register_kernels:
1944
+ return
1945
+
1946
+ input = wp.array(rng.standard_normal(size=7).astype(dtype), requires_grad=True, device=device)
1947
+ output = wp.zeros(7, dtype=wptype, requires_grad=True, device=device)
1948
+ wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1949
+ assert_np_equal(output.numpy(), 2 * input.numpy())
1950
+
1951
+ for i in range(len(input)):
1952
+ cmp = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1953
+ tape = wp.Tape()
1954
+ with tape:
1955
+ wp.launch(transform_create_kernel, dim=1, inputs=[input], outputs=[output], device=device)
1956
+ wp.launch(output_select_kernel, dim=1, inputs=[output, i], outputs=[cmp], device=device)
1957
+ tape.backward(loss=cmp)
1958
+ expectedgrads = np.zeros(len(input))
1959
+ expectedgrads[i] = 2
1960
+ assert_np_equal(tape.gradients[input].numpy(), expectedgrads)
1961
+ tape.zero()
1962
+
1963
+
1964
+ devices = get_test_devices()
1965
+
1966
+
1967
+ class TestSpatial(unittest.TestCase):
1968
+ pass
1969
+
1970
+
1971
+ for dtype in np_float_types:
1972
+ add_function_test_register_kernel(
1973
+ TestSpatial,
1974
+ f"test_spatial_vector_constructors_{dtype.__name__}",
1975
+ test_spatial_vector_constructors,
1976
+ devices=devices,
1977
+ dtype=dtype,
1978
+ )
1979
+ add_function_test_register_kernel(
1980
+ TestSpatial,
1981
+ f"test_spatial_vector_indexing_{dtype.__name__}",
1982
+ test_spatial_vector_indexing,
1983
+ devices=devices,
1984
+ dtype=dtype,
1985
+ )
1986
+ add_function_test_register_kernel(
1987
+ TestSpatial,
1988
+ f"test_spatial_vector_scalar_multiplication_{dtype.__name__}",
1989
+ test_spatial_vector_scalar_multiplication,
1990
+ devices=devices,
1991
+ dtype=dtype,
1992
+ )
1993
+ add_function_test_register_kernel(
1994
+ TestSpatial,
1995
+ f"test_spatial_vector_add_sub_{dtype.__name__}",
1996
+ test_spatial_vector_add_sub,
1997
+ devices=devices,
1998
+ dtype=dtype,
1999
+ )
2000
+ add_function_test_register_kernel(
2001
+ TestSpatial, f"test_spatial_dot_{dtype.__name__}", test_spatial_dot, devices=devices, dtype=dtype
2002
+ )
2003
+ add_function_test_register_kernel(
2004
+ TestSpatial, f"test_spatial_cross_{dtype.__name__}", test_spatial_cross, devices=devices, dtype=dtype
2005
+ )
2006
+ add_function_test_register_kernel(
2007
+ TestSpatial,
2008
+ f"test_spatial_top_bottom_{dtype.__name__}",
2009
+ test_spatial_top_bottom,
2010
+ devices=devices,
2011
+ dtype=dtype,
2012
+ )
2013
+
2014
+ add_function_test_register_kernel(
2015
+ TestSpatial,
2016
+ f"test_transform_constructors_{dtype.__name__}",
2017
+ test_transform_constructors,
2018
+ devices=devices,
2019
+ dtype=dtype,
2020
+ )
2021
+ add_function_test_register_kernel(
2022
+ TestSpatial,
2023
+ f"test_transform_anon_type_instance_{dtype.__name__}",
2024
+ test_transform_anon_type_instance,
2025
+ devices=devices,
2026
+ dtype=dtype,
2027
+ )
2028
+ add_function_test_register_kernel(
2029
+ TestSpatial,
2030
+ f"test_transform_identity_{dtype.__name__}",
2031
+ test_transform_identity,
2032
+ devices=devices,
2033
+ dtype=dtype,
2034
+ )
2035
+ add_function_test_register_kernel(
2036
+ TestSpatial,
2037
+ f"test_transform_indexing_{dtype.__name__}",
2038
+ test_transform_indexing,
2039
+ devices=devices,
2040
+ dtype=dtype,
2041
+ )
2042
+ add_function_test_register_kernel(
2043
+ TestSpatial,
2044
+ f"test_transform_get_trans_rot_{dtype.__name__}",
2045
+ test_transform_get_trans_rot,
2046
+ devices=devices,
2047
+ dtype=dtype,
2048
+ )
2049
+ add_function_test_register_kernel(
2050
+ TestSpatial,
2051
+ f"test_transform_multiply_{dtype.__name__}",
2052
+ test_transform_multiply,
2053
+ devices=devices,
2054
+ dtype=dtype,
2055
+ )
2056
+ add_function_test_register_kernel(
2057
+ TestSpatial,
2058
+ f"test_transform_inverse_{dtype.__name__}",
2059
+ test_transform_inverse,
2060
+ devices=devices,
2061
+ dtype=dtype,
2062
+ )
2063
+ add_function_test_register_kernel(
2064
+ TestSpatial,
2065
+ f"test_transform_point_vector_{dtype.__name__}",
2066
+ test_transform_point_vector,
2067
+ devices=devices,
2068
+ dtype=dtype,
2069
+ )
2070
+
2071
+ # are these two valid? They don't seem to be doing things you'd want to do,
2072
+ # maybe they should be removed
2073
+ add_function_test_register_kernel(
2074
+ TestSpatial,
2075
+ f"test_transform_scalar_multiplication_{dtype.__name__}",
2076
+ test_transform_scalar_multiplication,
2077
+ devices=devices,
2078
+ dtype=dtype,
2079
+ )
2080
+ add_function_test_register_kernel(
2081
+ TestSpatial,
2082
+ f"test_transform_add_sub_{dtype.__name__}",
2083
+ test_transform_add_sub,
2084
+ devices=devices,
2085
+ dtype=dtype,
2086
+ )
2087
+
2088
+ add_function_test_register_kernel(
2089
+ TestSpatial,
2090
+ f"test_spatial_matrix_constructors_{dtype.__name__}",
2091
+ test_spatial_matrix_constructors,
2092
+ devices=devices,
2093
+ dtype=dtype,
2094
+ )
2095
+ add_function_test_register_kernel(
2096
+ TestSpatial,
2097
+ f"test_spatial_matrix_indexing_{dtype.__name__}",
2098
+ test_spatial_matrix_indexing,
2099
+ devices=devices,
2100
+ dtype=dtype,
2101
+ )
2102
+ add_function_test_register_kernel(
2103
+ TestSpatial,
2104
+ f"test_spatial_matrix_scalar_multiplication_{dtype.__name__}",
2105
+ test_spatial_matrix_scalar_multiplication,
2106
+ devices=devices,
2107
+ dtype=dtype,
2108
+ )
2109
+ add_function_test_register_kernel(
2110
+ TestSpatial,
2111
+ f"test_spatial_matrix_add_sub_{dtype.__name__}",
2112
+ test_spatial_matrix_add_sub,
2113
+ devices=devices,
2114
+ dtype=dtype,
2115
+ )
2116
+ add_function_test_register_kernel(
2117
+ TestSpatial,
2118
+ f"test_spatial_matvec_multiplication_{dtype.__name__}",
2119
+ test_spatial_matvec_multiplication,
2120
+ devices=devices,
2121
+ dtype=dtype,
2122
+ )
2123
+ add_function_test_register_kernel(
2124
+ TestSpatial,
2125
+ f"test_spatial_matmat_multiplication_{dtype.__name__}",
2126
+ test_spatial_matmat_multiplication,
2127
+ devices=devices,
2128
+ dtype=dtype,
2129
+ )
2130
+ add_function_test_register_kernel(
2131
+ TestSpatial,
2132
+ f"test_spatial_outer_product_{dtype.__name__}",
2133
+ test_spatial_outer_product,
2134
+ devices=devices,
2135
+ dtype=dtype,
2136
+ )
2137
+ add_function_test_register_kernel(
2138
+ TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
2139
+ )
2140
+
2141
+ # \TODO: test spatial_mass and spatial_jacobian
2142
+
2143
+
2144
+ if __name__ == "__main__":
2145
+ wp.build.clear_kernel_cache()
2146
+ unittest.main(verbosity=2)