warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show
  1. warp/__init__.py +334 -0
  2. warp/__init__.pyi +5856 -0
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1077 -0
  5. warp/_src/build.py +620 -0
  6. warp/_src/build_dll.py +642 -0
  7. warp/_src/builtins.py +10555 -0
  8. warp/_src/codegen.py +4361 -0
  9. warp/_src/config.py +178 -0
  10. warp/_src/constants.py +59 -0
  11. warp/_src/context.py +8352 -0
  12. warp/_src/dlpack.py +464 -0
  13. warp/_src/fabric.py +362 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +510 -0
  16. warp/_src/fem/cache.py +689 -0
  17. warp/_src/fem/dirichlet.py +190 -0
  18. warp/_src/fem/domain.py +553 -0
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +703 -0
  21. warp/_src/fem/field/nodal_field.py +403 -0
  22. warp/_src/fem/field/restriction.py +39 -0
  23. warp/_src/fem/field/virtual.py +1021 -0
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
  26. warp/_src/fem/geometry/closest_point.py +99 -0
  27. warp/_src/fem/geometry/deformed_geometry.py +277 -0
  28. warp/_src/fem/geometry/element.py +854 -0
  29. warp/_src/fem/geometry/geometry.py +693 -0
  30. warp/_src/fem/geometry/grid_2d.py +478 -0
  31. warp/_src/fem/geometry/grid_3d.py +539 -0
  32. warp/_src/fem/geometry/hexmesh.py +956 -0
  33. warp/_src/fem/geometry/nanogrid.py +660 -0
  34. warp/_src/fem/geometry/partition.py +483 -0
  35. warp/_src/fem/geometry/quadmesh.py +597 -0
  36. warp/_src/fem/geometry/tetmesh.py +762 -0
  37. warp/_src/fem/geometry/trimesh.py +588 -0
  38. warp/_src/fem/integrate.py +2507 -0
  39. warp/_src/fem/linalg.py +385 -0
  40. warp/_src/fem/operator.py +398 -0
  41. warp/_src/fem/polynomial.py +231 -0
  42. warp/_src/fem/quadrature/__init__.py +17 -0
  43. warp/_src/fem/quadrature/pic_quadrature.py +318 -0
  44. warp/_src/fem/quadrature/quadrature.py +665 -0
  45. warp/_src/fem/space/__init__.py +248 -0
  46. warp/_src/fem/space/basis_function_space.py +499 -0
  47. warp/_src/fem/space/basis_space.py +681 -0
  48. warp/_src/fem/space/dof_mapper.py +253 -0
  49. warp/_src/fem/space/function_space.py +312 -0
  50. warp/_src/fem/space/grid_2d_function_space.py +179 -0
  51. warp/_src/fem/space/grid_3d_function_space.py +229 -0
  52. warp/_src/fem/space/hexmesh_function_space.py +255 -0
  53. warp/_src/fem/space/nanogrid_function_space.py +199 -0
  54. warp/_src/fem/space/partition.py +435 -0
  55. warp/_src/fem/space/quadmesh_function_space.py +222 -0
  56. warp/_src/fem/space/restriction.py +221 -0
  57. warp/_src/fem/space/shape/__init__.py +152 -0
  58. warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
  59. warp/_src/fem/space/shape/shape_function.py +134 -0
  60. warp/_src/fem/space/shape/square_shape_function.py +928 -0
  61. warp/_src/fem/space/shape/tet_shape_function.py +829 -0
  62. warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
  63. warp/_src/fem/space/tetmesh_function_space.py +270 -0
  64. warp/_src/fem/space/topology.py +461 -0
  65. warp/_src/fem/space/trimesh_function_space.py +193 -0
  66. warp/_src/fem/types.py +114 -0
  67. warp/_src/fem/utils.py +488 -0
  68. warp/_src/jax.py +188 -0
  69. warp/_src/jax_experimental/__init__.py +14 -0
  70. warp/_src/jax_experimental/custom_call.py +389 -0
  71. warp/_src/jax_experimental/ffi.py +1286 -0
  72. warp/_src/jax_experimental/xla_ffi.py +658 -0
  73. warp/_src/marching_cubes.py +710 -0
  74. warp/_src/math.py +416 -0
  75. warp/_src/optim/__init__.py +14 -0
  76. warp/_src/optim/adam.py +165 -0
  77. warp/_src/optim/linear.py +1608 -0
  78. warp/_src/optim/sgd.py +114 -0
  79. warp/_src/paddle.py +408 -0
  80. warp/_src/render/__init__.py +14 -0
  81. warp/_src/render/imgui_manager.py +291 -0
  82. warp/_src/render/render_opengl.py +3638 -0
  83. warp/_src/render/render_usd.py +939 -0
  84. warp/_src/render/utils.py +162 -0
  85. warp/_src/sparse.py +2718 -0
  86. warp/_src/tape.py +1208 -0
  87. warp/_src/thirdparty/__init__.py +0 -0
  88. warp/_src/thirdparty/appdirs.py +598 -0
  89. warp/_src/thirdparty/dlpack.py +145 -0
  90. warp/_src/thirdparty/unittest_parallel.py +676 -0
  91. warp/_src/torch.py +393 -0
  92. warp/_src/types.py +5888 -0
  93. warp/_src/utils.py +1695 -0
  94. warp/autograd.py +33 -0
  95. warp/bin/libwarp-clang.dylib +0 -0
  96. warp/bin/libwarp.dylib +0 -0
  97. warp/build.py +29 -0
  98. warp/build_dll.py +24 -0
  99. warp/codegen.py +24 -0
  100. warp/constants.py +24 -0
  101. warp/context.py +33 -0
  102. warp/dlpack.py +24 -0
  103. warp/examples/__init__.py +24 -0
  104. warp/examples/assets/bear.usd +0 -0
  105. warp/examples/assets/bunny.usd +0 -0
  106. warp/examples/assets/cube.usd +0 -0
  107. warp/examples/assets/nonuniform.usd +0 -0
  108. warp/examples/assets/nvidia_logo.png +0 -0
  109. warp/examples/assets/pixel.jpg +0 -0
  110. warp/examples/assets/rocks.nvdb +0 -0
  111. warp/examples/assets/rocks.usd +0 -0
  112. warp/examples/assets/sphere.usd +0 -0
  113. warp/examples/assets/square_cloth.usd +0 -0
  114. warp/examples/benchmarks/benchmark_api.py +389 -0
  115. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  116. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  117. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  118. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  119. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  120. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  121. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  122. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  123. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  124. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  125. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  126. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  127. warp/examples/benchmarks/benchmark_launches.py +301 -0
  128. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  129. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  130. warp/examples/browse.py +37 -0
  131. warp/examples/core/example_cupy.py +86 -0
  132. warp/examples/core/example_dem.py +241 -0
  133. warp/examples/core/example_fluid.py +299 -0
  134. warp/examples/core/example_graph_capture.py +150 -0
  135. warp/examples/core/example_marching_cubes.py +195 -0
  136. warp/examples/core/example_mesh.py +180 -0
  137. warp/examples/core/example_mesh_intersect.py +211 -0
  138. warp/examples/core/example_nvdb.py +182 -0
  139. warp/examples/core/example_raycast.py +111 -0
  140. warp/examples/core/example_raymarch.py +205 -0
  141. warp/examples/core/example_render_opengl.py +290 -0
  142. warp/examples/core/example_sample_mesh.py +300 -0
  143. warp/examples/core/example_sph.py +411 -0
  144. warp/examples/core/example_spin_lock.py +93 -0
  145. warp/examples/core/example_torch.py +211 -0
  146. warp/examples/core/example_wave.py +269 -0
  147. warp/examples/core/example_work_queue.py +118 -0
  148. warp/examples/distributed/example_jacobi_mpi.py +506 -0
  149. warp/examples/fem/example_adaptive_grid.py +286 -0
  150. warp/examples/fem/example_apic_fluid.py +469 -0
  151. warp/examples/fem/example_burgers.py +261 -0
  152. warp/examples/fem/example_convection_diffusion.py +181 -0
  153. warp/examples/fem/example_convection_diffusion_dg.py +225 -0
  154. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  155. warp/examples/fem/example_deformed_geometry.py +172 -0
  156. warp/examples/fem/example_diffusion.py +196 -0
  157. warp/examples/fem/example_diffusion_3d.py +225 -0
  158. warp/examples/fem/example_diffusion_mgpu.py +225 -0
  159. warp/examples/fem/example_distortion_energy.py +228 -0
  160. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  161. warp/examples/fem/example_magnetostatics.py +242 -0
  162. warp/examples/fem/example_mixed_elasticity.py +293 -0
  163. warp/examples/fem/example_navier_stokes.py +263 -0
  164. warp/examples/fem/example_nonconforming_contact.py +300 -0
  165. warp/examples/fem/example_stokes.py +213 -0
  166. warp/examples/fem/example_stokes_transfer.py +262 -0
  167. warp/examples/fem/example_streamlines.py +357 -0
  168. warp/examples/fem/utils.py +1047 -0
  169. warp/examples/interop/example_jax_callable.py +146 -0
  170. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  171. warp/examples/interop/example_jax_kernel.py +232 -0
  172. warp/examples/optim/example_diffray.py +561 -0
  173. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  174. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  175. warp/examples/tile/example_tile_cholesky.py +88 -0
  176. warp/examples/tile/example_tile_convolution.py +66 -0
  177. warp/examples/tile/example_tile_fft.py +55 -0
  178. warp/examples/tile/example_tile_filtering.py +113 -0
  179. warp/examples/tile/example_tile_matmul.py +85 -0
  180. warp/examples/tile/example_tile_mcgp.py +191 -0
  181. warp/examples/tile/example_tile_mlp.py +385 -0
  182. warp/examples/tile/example_tile_nbody.py +199 -0
  183. warp/fabric.py +24 -0
  184. warp/fem/__init__.py +173 -0
  185. warp/fem/adaptivity.py +26 -0
  186. warp/fem/cache.py +30 -0
  187. warp/fem/dirichlet.py +24 -0
  188. warp/fem/field/__init__.py +24 -0
  189. warp/fem/field/field.py +26 -0
  190. warp/fem/geometry/__init__.py +21 -0
  191. warp/fem/geometry/closest_point.py +31 -0
  192. warp/fem/linalg.py +38 -0
  193. warp/fem/operator.py +32 -0
  194. warp/fem/polynomial.py +29 -0
  195. warp/fem/space/__init__.py +22 -0
  196. warp/fem/space/basis_space.py +24 -0
  197. warp/fem/space/shape/__init__.py +68 -0
  198. warp/fem/space/topology.py +24 -0
  199. warp/fem/types.py +24 -0
  200. warp/fem/utils.py +32 -0
  201. warp/jax.py +29 -0
  202. warp/jax_experimental/__init__.py +29 -0
  203. warp/jax_experimental/custom_call.py +29 -0
  204. warp/jax_experimental/ffi.py +39 -0
  205. warp/jax_experimental/xla_ffi.py +24 -0
  206. warp/marching_cubes.py +24 -0
  207. warp/math.py +37 -0
  208. warp/native/array.h +1687 -0
  209. warp/native/builtin.h +2327 -0
  210. warp/native/bvh.cpp +562 -0
  211. warp/native/bvh.cu +826 -0
  212. warp/native/bvh.h +555 -0
  213. warp/native/clang/clang.cpp +541 -0
  214. warp/native/coloring.cpp +622 -0
  215. warp/native/crt.cpp +51 -0
  216. warp/native/crt.h +568 -0
  217. warp/native/cuda_crt.h +1058 -0
  218. warp/native/cuda_util.cpp +677 -0
  219. warp/native/cuda_util.h +313 -0
  220. warp/native/error.cpp +77 -0
  221. warp/native/error.h +36 -0
  222. warp/native/exports.h +2023 -0
  223. warp/native/fabric.h +246 -0
  224. warp/native/hashgrid.cpp +311 -0
  225. warp/native/hashgrid.cu +89 -0
  226. warp/native/hashgrid.h +240 -0
  227. warp/native/initializer_array.h +41 -0
  228. warp/native/intersect.h +1253 -0
  229. warp/native/intersect_adj.h +375 -0
  230. warp/native/intersect_tri.h +348 -0
  231. warp/native/mat.h +5189 -0
  232. warp/native/mathdx.cpp +93 -0
  233. warp/native/matnn.h +221 -0
  234. warp/native/mesh.cpp +266 -0
  235. warp/native/mesh.cu +406 -0
  236. warp/native/mesh.h +2097 -0
  237. warp/native/nanovdb/GridHandle.h +533 -0
  238. warp/native/nanovdb/HostBuffer.h +591 -0
  239. warp/native/nanovdb/NanoVDB.h +6246 -0
  240. warp/native/nanovdb/NodeManager.h +323 -0
  241. warp/native/nanovdb/PNanoVDB.h +3390 -0
  242. warp/native/noise.h +859 -0
  243. warp/native/quat.h +1664 -0
  244. warp/native/rand.h +342 -0
  245. warp/native/range.h +145 -0
  246. warp/native/reduce.cpp +174 -0
  247. warp/native/reduce.cu +363 -0
  248. warp/native/runlength_encode.cpp +79 -0
  249. warp/native/runlength_encode.cu +61 -0
  250. warp/native/scan.cpp +47 -0
  251. warp/native/scan.cu +55 -0
  252. warp/native/scan.h +23 -0
  253. warp/native/solid_angle.h +466 -0
  254. warp/native/sort.cpp +251 -0
  255. warp/native/sort.cu +286 -0
  256. warp/native/sort.h +35 -0
  257. warp/native/sparse.cpp +241 -0
  258. warp/native/sparse.cu +435 -0
  259. warp/native/spatial.h +1306 -0
  260. warp/native/svd.h +727 -0
  261. warp/native/temp_buffer.h +46 -0
  262. warp/native/tile.h +4124 -0
  263. warp/native/tile_radix_sort.h +1112 -0
  264. warp/native/tile_reduce.h +838 -0
  265. warp/native/tile_scan.h +240 -0
  266. warp/native/tuple.h +189 -0
  267. warp/native/vec.h +2199 -0
  268. warp/native/version.h +23 -0
  269. warp/native/volume.cpp +501 -0
  270. warp/native/volume.cu +68 -0
  271. warp/native/volume.h +970 -0
  272. warp/native/volume_builder.cu +483 -0
  273. warp/native/volume_builder.h +52 -0
  274. warp/native/volume_impl.h +70 -0
  275. warp/native/warp.cpp +1143 -0
  276. warp/native/warp.cu +4604 -0
  277. warp/native/warp.h +358 -0
  278. warp/optim/__init__.py +20 -0
  279. warp/optim/adam.py +24 -0
  280. warp/optim/linear.py +35 -0
  281. warp/optim/sgd.py +24 -0
  282. warp/paddle.py +24 -0
  283. warp/py.typed +0 -0
  284. warp/render/__init__.py +22 -0
  285. warp/render/imgui_manager.py +29 -0
  286. warp/render/render_opengl.py +24 -0
  287. warp/render/render_usd.py +24 -0
  288. warp/render/utils.py +24 -0
  289. warp/sparse.py +51 -0
  290. warp/tape.py +24 -0
  291. warp/tests/__init__.py +1 -0
  292. warp/tests/__main__.py +4 -0
  293. warp/tests/assets/curlnoise_golden.npy +0 -0
  294. warp/tests/assets/mlp_golden.npy +0 -0
  295. warp/tests/assets/pixel.npy +0 -0
  296. warp/tests/assets/pnoise_golden.npy +0 -0
  297. warp/tests/assets/spiky.usd +0 -0
  298. warp/tests/assets/test_grid.nvdb +0 -0
  299. warp/tests/assets/test_index_grid.nvdb +0 -0
  300. warp/tests/assets/test_int32_grid.nvdb +0 -0
  301. warp/tests/assets/test_vec_grid.nvdb +0 -0
  302. warp/tests/assets/torus.nvdb +0 -0
  303. warp/tests/assets/torus.usda +105 -0
  304. warp/tests/aux_test_class_kernel.py +34 -0
  305. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  306. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  307. warp/tests/aux_test_dependent.py +29 -0
  308. warp/tests/aux_test_grad_customs.py +29 -0
  309. warp/tests/aux_test_instancing_gc.py +26 -0
  310. warp/tests/aux_test_module_aot.py +7 -0
  311. warp/tests/aux_test_module_unload.py +23 -0
  312. warp/tests/aux_test_name_clash1.py +40 -0
  313. warp/tests/aux_test_name_clash2.py +40 -0
  314. warp/tests/aux_test_reference.py +9 -0
  315. warp/tests/aux_test_reference_reference.py +8 -0
  316. warp/tests/aux_test_square.py +16 -0
  317. warp/tests/aux_test_unresolved_func.py +22 -0
  318. warp/tests/aux_test_unresolved_symbol.py +22 -0
  319. warp/tests/cuda/__init__.py +0 -0
  320. warp/tests/cuda/test_async.py +676 -0
  321. warp/tests/cuda/test_conditional_captures.py +1147 -0
  322. warp/tests/cuda/test_ipc.py +124 -0
  323. warp/tests/cuda/test_mempool.py +233 -0
  324. warp/tests/cuda/test_multigpu.py +169 -0
  325. warp/tests/cuda/test_peer.py +139 -0
  326. warp/tests/cuda/test_pinned.py +84 -0
  327. warp/tests/cuda/test_streams.py +691 -0
  328. warp/tests/geometry/__init__.py +0 -0
  329. warp/tests/geometry/test_bvh.py +335 -0
  330. warp/tests/geometry/test_hash_grid.py +259 -0
  331. warp/tests/geometry/test_marching_cubes.py +294 -0
  332. warp/tests/geometry/test_mesh.py +318 -0
  333. warp/tests/geometry/test_mesh_query_aabb.py +392 -0
  334. warp/tests/geometry/test_mesh_query_point.py +935 -0
  335. warp/tests/geometry/test_mesh_query_ray.py +323 -0
  336. warp/tests/geometry/test_volume.py +1103 -0
  337. warp/tests/geometry/test_volume_write.py +346 -0
  338. warp/tests/interop/__init__.py +0 -0
  339. warp/tests/interop/test_dlpack.py +730 -0
  340. warp/tests/interop/test_jax.py +1673 -0
  341. warp/tests/interop/test_paddle.py +800 -0
  342. warp/tests/interop/test_torch.py +1001 -0
  343. warp/tests/run_coverage_serial.py +39 -0
  344. warp/tests/test_adam.py +162 -0
  345. warp/tests/test_arithmetic.py +1096 -0
  346. warp/tests/test_array.py +3756 -0
  347. warp/tests/test_array_reduce.py +156 -0
  348. warp/tests/test_assert.py +303 -0
  349. warp/tests/test_atomic.py +336 -0
  350. warp/tests/test_atomic_bitwise.py +209 -0
  351. warp/tests/test_atomic_cas.py +312 -0
  352. warp/tests/test_bool.py +220 -0
  353. warp/tests/test_builtins_resolution.py +732 -0
  354. warp/tests/test_closest_point_edge_edge.py +327 -0
  355. warp/tests/test_codegen.py +974 -0
  356. warp/tests/test_codegen_instancing.py +1495 -0
  357. warp/tests/test_compile_consts.py +215 -0
  358. warp/tests/test_conditional.py +298 -0
  359. warp/tests/test_context.py +35 -0
  360. warp/tests/test_copy.py +319 -0
  361. warp/tests/test_ctypes.py +618 -0
  362. warp/tests/test_dense.py +73 -0
  363. warp/tests/test_devices.py +127 -0
  364. warp/tests/test_enum.py +136 -0
  365. warp/tests/test_examples.py +424 -0
  366. warp/tests/test_fabricarray.py +998 -0
  367. warp/tests/test_fast_math.py +72 -0
  368. warp/tests/test_fem.py +2204 -0
  369. warp/tests/test_fixedarray.py +229 -0
  370. warp/tests/test_fp16.py +136 -0
  371. warp/tests/test_func.py +501 -0
  372. warp/tests/test_future_annotations.py +100 -0
  373. warp/tests/test_generics.py +656 -0
  374. warp/tests/test_grad.py +893 -0
  375. warp/tests/test_grad_customs.py +339 -0
  376. warp/tests/test_grad_debug.py +341 -0
  377. warp/tests/test_implicit_init.py +411 -0
  378. warp/tests/test_import.py +45 -0
  379. warp/tests/test_indexedarray.py +1140 -0
  380. warp/tests/test_intersect.py +103 -0
  381. warp/tests/test_iter.py +76 -0
  382. warp/tests/test_large.py +177 -0
  383. warp/tests/test_launch.py +411 -0
  384. warp/tests/test_lerp.py +151 -0
  385. warp/tests/test_linear_solvers.py +223 -0
  386. warp/tests/test_lvalue.py +427 -0
  387. warp/tests/test_map.py +526 -0
  388. warp/tests/test_mat.py +3515 -0
  389. warp/tests/test_mat_assign_copy.py +178 -0
  390. warp/tests/test_mat_constructors.py +573 -0
  391. warp/tests/test_mat_lite.py +122 -0
  392. warp/tests/test_mat_scalar_ops.py +2913 -0
  393. warp/tests/test_math.py +212 -0
  394. warp/tests/test_module_aot.py +287 -0
  395. warp/tests/test_module_hashing.py +258 -0
  396. warp/tests/test_modules_lite.py +70 -0
  397. warp/tests/test_noise.py +252 -0
  398. warp/tests/test_operators.py +299 -0
  399. warp/tests/test_options.py +129 -0
  400. warp/tests/test_overwrite.py +551 -0
  401. warp/tests/test_print.py +408 -0
  402. warp/tests/test_quat.py +2653 -0
  403. warp/tests/test_quat_assign_copy.py +145 -0
  404. warp/tests/test_rand.py +339 -0
  405. warp/tests/test_reload.py +303 -0
  406. warp/tests/test_rounding.py +157 -0
  407. warp/tests/test_runlength_encode.py +196 -0
  408. warp/tests/test_scalar_ops.py +133 -0
  409. warp/tests/test_smoothstep.py +108 -0
  410. warp/tests/test_snippet.py +318 -0
  411. warp/tests/test_sparse.py +845 -0
  412. warp/tests/test_spatial.py +2859 -0
  413. warp/tests/test_spatial_assign_copy.py +160 -0
  414. warp/tests/test_special_values.py +361 -0
  415. warp/tests/test_static.py +640 -0
  416. warp/tests/test_struct.py +901 -0
  417. warp/tests/test_tape.py +242 -0
  418. warp/tests/test_transient_module.py +93 -0
  419. warp/tests/test_triangle_closest_point.py +192 -0
  420. warp/tests/test_tuple.py +361 -0
  421. warp/tests/test_types.py +615 -0
  422. warp/tests/test_utils.py +594 -0
  423. warp/tests/test_vec.py +1408 -0
  424. warp/tests/test_vec_assign_copy.py +143 -0
  425. warp/tests/test_vec_constructors.py +325 -0
  426. warp/tests/test_vec_lite.py +80 -0
  427. warp/tests/test_vec_scalar_ops.py +2327 -0
  428. warp/tests/test_verify_fp.py +100 -0
  429. warp/tests/test_version.py +75 -0
  430. warp/tests/tile/__init__.py +0 -0
  431. warp/tests/tile/test_tile.py +1519 -0
  432. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  433. warp/tests/tile/test_tile_cholesky.py +608 -0
  434. warp/tests/tile/test_tile_load.py +724 -0
  435. warp/tests/tile/test_tile_mathdx.py +156 -0
  436. warp/tests/tile/test_tile_matmul.py +179 -0
  437. warp/tests/tile/test_tile_mlp.py +400 -0
  438. warp/tests/tile/test_tile_reduce.py +950 -0
  439. warp/tests/tile/test_tile_shared_memory.py +376 -0
  440. warp/tests/tile/test_tile_sort.py +121 -0
  441. warp/tests/tile/test_tile_view.py +173 -0
  442. warp/tests/unittest_serial.py +47 -0
  443. warp/tests/unittest_suites.py +430 -0
  444. warp/tests/unittest_utils.py +469 -0
  445. warp/tests/walkthrough_debug.py +95 -0
  446. warp/torch.py +24 -0
  447. warp/types.py +51 -0
  448. warp/utils.py +31 -0
  449. warp_lang-1.10.0.dist-info/METADATA +459 -0
  450. warp_lang-1.10.0.dist-info/RECORD +468 -0
  451. warp_lang-1.10.0.dist-info/WHEEL +5 -0
  452. warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
  453. warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  454. warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  455. warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  456. warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  457. warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  458. warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  459. warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  460. warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  461. warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  462. warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  463. warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  464. warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  465. warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  466. warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  467. warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  468. warp_lang-1.10.0.dist-info/top_level.txt +1 -0
warp/native/spatial.h ADDED
@@ -0,0 +1,1306 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ namespace wp
21
+ {
22
+
23
+ //---------------------------------------------------------------------------------
24
+ // Represents a twist in se(3)
25
+ template <typename Type>
26
+ using spatial_vector_t = vec_t<6,Type>;
27
+
28
+ template<typename Type>
29
+ CUDA_CALLABLE inline Type spatial_dot(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b)
30
+ {
31
+ return dot(a, b);
32
+ }
33
+
34
+ template<typename Type>
35
+ CUDA_CALLABLE inline vec_t<3,Type> &w_vec( spatial_vector_t<Type>& a )
36
+ {
37
+ return *reinterpret_cast<vec_t<3,Type>*>(&a);
38
+ }
39
+
40
+ template<typename Type>
41
+ CUDA_CALLABLE inline vec_t<3,Type> &v_vec( spatial_vector_t<Type>& a )
42
+ {
43
+ return *(vec_t<3,Type>*)(&a.c[3]);
44
+ }
45
+
46
+ template<typename Type>
47
+ CUDA_CALLABLE inline const vec_t<3,Type> &w_vec( const spatial_vector_t<Type>& a )
48
+ {
49
+ spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
50
+ return w_vec(non_const_vec);
51
+ }
52
+
53
+ template<typename Type>
54
+ CUDA_CALLABLE inline const vec_t<3,Type> &v_vec( const spatial_vector_t<Type>& a )
55
+ {
56
+ spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
57
+ return v_vec(non_const_vec);
58
+ }
59
+
60
+ template<typename Type>
61
+ CUDA_CALLABLE inline spatial_vector_t<Type> spatial_cross(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b)
62
+ {
63
+ vec_t<3,Type> w = cross(w_vec(a), w_vec(b));
64
+ vec_t<3,Type> v = cross(v_vec(a), w_vec(b)) + cross(w_vec(a), v_vec(b));
65
+
66
+ return spatial_vector_t<Type>({w[0], w[1], w[2], v[0], v[1], v[2]});
67
+ }
68
+
69
+ template<typename Type>
70
+ CUDA_CALLABLE inline spatial_vector_t<Type> spatial_cross_dual(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b)
71
+ {
72
+ vec_t<3,Type> w = cross(w_vec(a), w_vec(b)) + cross(v_vec(a), v_vec(b));
73
+ vec_t<3,Type> v = cross(w_vec(a), v_vec(b));
74
+
75
+ return spatial_vector_t<Type>({w[0], w[1], w[2], v[0], v[1], v[2]});
76
+ }
77
+
78
+ template<typename Type>
79
+ CUDA_CALLABLE inline vec_t<3,Type> spatial_top(const spatial_vector_t<Type>& a)
80
+ {
81
+ return w_vec(a);
82
+ }
83
+
84
+ template<typename Type>
85
+ CUDA_CALLABLE inline vec_t<3,Type> spatial_bottom(const spatial_vector_t<Type>& a)
86
+ {
87
+ return v_vec(a);
88
+ }
89
+
90
+ template<typename Type>
91
+ CUDA_CALLABLE inline void adj_spatial_dot(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b, spatial_vector_t<Type>& adj_a, spatial_vector_t<Type>& adj_b, const Type& adj_ret)
92
+ {
93
+ adj_dot(a, b, adj_a, adj_b, adj_ret);
94
+ }
95
+
96
+ template<typename Type>
97
+ CUDA_CALLABLE inline void adj_spatial_cross(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b, spatial_vector_t<Type>& adj_a, spatial_vector_t<Type>& adj_b, const spatial_vector_t<Type>& adj_ret)
98
+ {
99
+ adj_cross(w_vec(a), w_vec(b), w_vec(adj_a), w_vec(adj_b), w_vec(adj_ret));
100
+
101
+ adj_cross(v_vec(a), w_vec(b), v_vec(adj_a), w_vec(adj_b), v_vec(adj_ret));
102
+ adj_cross(w_vec(a), v_vec(b), w_vec(adj_a), v_vec(adj_b), v_vec(adj_ret));
103
+ }
104
+
105
+ template<typename Type>
106
+ CUDA_CALLABLE inline void adj_spatial_cross_dual(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b, spatial_vector_t<Type>& adj_a, spatial_vector_t<Type>& adj_b, const spatial_vector_t<Type>& adj_ret)
107
+ {
108
+ adj_cross(w_vec(a), w_vec(b), w_vec(adj_a), w_vec(adj_b), w_vec(adj_ret));
109
+ adj_cross(v_vec(a), v_vec(b), v_vec(adj_a), v_vec(adj_b), w_vec(adj_ret));
110
+
111
+ adj_cross(w_vec(a), v_vec(b), w_vec(adj_a), v_vec(adj_b), v_vec(adj_ret));
112
+ }
113
+
114
+ template<typename Type>
115
+ CUDA_CALLABLE inline void adj_spatial_top(const spatial_vector_t<Type>& a, spatial_vector_t<Type>& adj_a, const vec_t<3,Type>& adj_ret)
116
+ {
117
+ w_vec(adj_a) += adj_ret;
118
+ }
119
+
120
+ template<typename Type>
121
+ CUDA_CALLABLE inline void adj_spatial_bottom(const spatial_vector_t<Type>& a, spatial_vector_t<Type>& adj_a, const vec_t<3,Type>& adj_ret)
122
+ {
123
+ v_vec(adj_a) += adj_ret;
124
+ }
125
+
126
+
127
+ //---------------------------------------------------------------------------------
128
+ // Represents a rigid body transform<Type>ation
129
+
130
+ template<typename Type>
131
+ struct transform_t
132
+ {
133
+ vec_t<3,Type> p;
134
+ quat_t<Type> q;
135
+
136
+ CUDA_CALLABLE inline transform_t(vec_t<3,Type> p=vec_t<3,Type>(), quat_t<Type> q=quat_t<Type>()) : p(p), q(q) {}
137
+ CUDA_CALLABLE inline transform_t(Type) {} // helps uniform initialization
138
+
139
+ template<typename OtherType>
140
+ inline explicit CUDA_CALLABLE transform_t(const transform_t<OtherType>& other)
141
+ {
142
+ p = other.p;
143
+ q = other.q;
144
+ }
145
+
146
+ CUDA_CALLABLE inline transform_t(const initializer_array<7, Type> &l)
147
+ {
148
+ p = vec_t<3,Type>(l[0], l[1], l[2]);
149
+ q = quat_t<Type>(l[3], l[4], l[5], l[6]);
150
+ }
151
+
152
+ CUDA_CALLABLE inline Type operator[](int index) const
153
+ {
154
+ assert(index < 7);
155
+
156
+ return p.c[index];
157
+ }
158
+
159
+ CUDA_CALLABLE inline Type& operator[](int index)
160
+ {
161
+ assert(index < 7);
162
+
163
+ return p.c[index];
164
+ }
165
+ };
166
+
167
+ template<typename Type=float32>
168
+ CUDA_CALLABLE inline transform_t<Type> transform_identity()
169
+ {
170
+ return transform_t<Type>(vec_t<3,Type>(), quat_identity<Type>());
171
+ }
172
+
173
+ template<typename Type>
174
+ inline CUDA_CALLABLE transform_t<Type> operator - (const transform_t<Type>& x)
175
+ {
176
+ transform_t<Type> ret;
177
+
178
+ ret.p = -x.p;
179
+ ret.q = -x.q;
180
+
181
+ return ret;
182
+ }
183
+
184
+ template<typename Type>
185
+ CUDA_CALLABLE inline transform_t<Type> pos(const transform_t<Type>& x)
186
+ {
187
+ return x;
188
+ }
189
+
190
+ template<typename Type>
191
+ CUDA_CALLABLE inline transform_t<Type> neg(const transform_t<Type>& x)
192
+ {
193
+ return -x;
194
+ }
195
+
196
+ template<typename Type>
197
+ CUDA_CALLABLE inline void adj_neg(const transform_t<Type>& x, transform_t<Type>& adj_x, const transform_t<Type>& adj_ret)
198
+ {
199
+ adj_x -= adj_ret;
200
+ }
201
+
202
+ template<typename Type>
203
+ inline CUDA_CALLABLE bool operator==(const transform_t<Type>& a, const transform_t<Type>& b)
204
+ {
205
+ return a.p == b.p && a.q == b.q;
206
+ }
207
+
208
+
209
+ template<typename Type>
210
+ inline bool CUDA_CALLABLE isfinite(const transform_t<Type>& t)
211
+ {
212
+ return isfinite(t.p) && isfinite(t.q);
213
+ }
214
+
215
+ template<typename Type>
216
+ CUDA_CALLABLE inline vec_t<3,Type> transform_get_translation(const transform_t<Type>& t)
217
+ {
218
+ return t.p;
219
+ }
220
+
221
+ template<typename Type>
222
+ CUDA_CALLABLE inline quat_t<Type> transform_get_rotation(const transform_t<Type>& t)
223
+ {
224
+ return t.q;
225
+ }
226
+
227
+ template<typename Type>
228
+ CUDA_CALLABLE inline void adj_transform_get_translation(const transform_t<Type>& t, transform_t<Type>& adj_t, const vec_t<3,Type>& adj_ret)
229
+ {
230
+ adj_t.p += adj_ret;
231
+ }
232
+
233
+ template<typename Type>
234
+ CUDA_CALLABLE inline void adj_transform_get_rotation(const transform_t<Type>& t, transform_t<Type>& adj_t, const quat_t<Type>& adj_ret)
235
+ {
236
+ adj_t.q += adj_ret;
237
+ }
238
+
239
+ template<typename Type>
240
+ CUDA_CALLABLE inline void transform_set_translation(transform_t<Type>& t, const vec_t<3, Type>& p)
241
+ {
242
+ t.p = p;
243
+ }
244
+
245
+ template<typename Type>
246
+ CUDA_CALLABLE inline void transform_set_rotation(transform_t<Type>& t, const quat_t<Type>& q)
247
+ {
248
+ t.q = q;
249
+ }
250
+
251
+ template<typename Type>
252
+ CUDA_CALLABLE inline transform_t<Type> transform_set_translation_copy(transform_t<Type>& t, const vec_t<3, Type>& p)
253
+ {
254
+ transform_t<Type> ret(t);
255
+ ret.p = p;
256
+ return ret;
257
+ }
258
+
259
+ template<typename Type>
260
+ CUDA_CALLABLE inline transform_t<Type> transform_set_rotation_copy(transform_t<Type>& t, const quat_t<Type>& q)
261
+ {
262
+ transform_t<Type> ret(t);
263
+ ret.q = q;
264
+ return ret;
265
+ }
266
+
267
+ template<typename Type>
268
+ CUDA_CALLABLE inline void adj_transform_set_translation(transform_t<Type>& t, const vec_t<3, Type>& p, const transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
269
+ {
270
+ adj_p += adj_t.p;
271
+ }
272
+
273
+ template<typename Type>
274
+ CUDA_CALLABLE inline void adj_transform_set_rotation(transform_t<Type>& t, const quat_t<Type>& q, const transform_t<Type>& adj_t, quat_t<Type>& adj_q)
275
+ {
276
+ adj_q += adj_t.q;
277
+ }
278
+
279
+ template<typename Type>
280
+ CUDA_CALLABLE inline void adj_transform_set_translation_copy(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p, const transform_t<Type>& adj_ret)
281
+ {
282
+ adj_p += adj_ret.p;
283
+ adj_t.q += adj_ret.q;
284
+ }
285
+
286
+ template<typename Type>
287
+ CUDA_CALLABLE inline void adj_transform_set_rotation_copy(transform_t<Type>& t, const quat_t<Type>& q, transform_t<Type>& adj_t, quat_t<Type>& adj_q, const transform_t<Type>& adj_ret)
288
+ {
289
+ adj_q += adj_ret.q;
290
+ adj_t.p += adj_ret.p;
291
+ }
292
+
293
+ template<typename Type>
294
+ inline CUDA_CALLABLE void transform_add_inplace(transform_t<Type>& t, const vec_t<3, Type>& p)
295
+ {
296
+ t.p += p;
297
+ }
298
+
299
+ template<typename Type>
300
+ inline CUDA_CALLABLE void transform_sub_inplace(transform_t<Type>& t, const vec_t<3, Type>& p)
301
+ {
302
+ t.p -= p;
303
+ }
304
+
305
+ template<typename Type>
306
+ inline CUDA_CALLABLE void adj_transform_add_inplace(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
307
+ {
308
+ adj_p += adj_t.p;
309
+ }
310
+
311
+ template<typename Type>
312
+ inline CUDA_CALLABLE void adj_transform_sub_inplace(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
313
+ {
314
+ adj_p -= adj_t.p;
315
+ }
316
+
317
+ template<typename Type>
318
+ CUDA_CALLABLE inline transform_t<Type> transform_multiply(const transform_t<Type>& a, const transform_t<Type>& b)
319
+ {
320
+ return { quat_rotate(a.q, b.p) + a.p, mul(a.q, b.q) };
321
+ }
322
+
323
+ template<typename Type>
324
+ CUDA_CALLABLE inline void adj_transform_multiply(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
325
+ {
326
+ // translational part
327
+ adj_quat_rotate(a.q, b.p, adj_a.q, adj_b.p, adj_ret.p);
328
+ adj_a.p += adj_ret.p;
329
+
330
+ // rotational part
331
+ adj_mul(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
332
+ }
333
+
334
+
335
+ template<typename Type>
336
+ CUDA_CALLABLE inline transform_t<Type> transform_inverse(const transform_t<Type>& t)
337
+ {
338
+ quat_t<Type> q_inv = quat_inverse(t.q);
339
+ return transform_t<Type>(-quat_rotate(q_inv, t.p), q_inv);
340
+ }
341
+
342
+
343
+ template<typename Type>
344
+ CUDA_CALLABLE inline vec_t<3,Type> transform_vector(const transform_t<Type>& t, const vec_t<3,Type>& x)
345
+ {
346
+ return quat_rotate(t.q, x);
347
+ }
348
+
349
+ template<typename Type>
350
+ CUDA_CALLABLE inline vec_t<3,Type> transform_point(const transform_t<Type>& t, const vec_t<3,Type>& x)
351
+ {
352
+ return t.p + quat_rotate(t.q, x);
353
+ }
354
+
355
+ // not totally sure why you'd want to do this seeing as adding/subtracting two rotation
356
+ // quats doesn't seem to do anything meaningful
357
+ template<typename Type>
358
+ CUDA_CALLABLE inline transform_t<Type> add(const transform_t<Type>& a, const transform_t<Type>& b)
359
+ {
360
+ return { a.p + b.p, a.q + b.q };
361
+ }
362
+
363
+ template<typename Type>
364
+ CUDA_CALLABLE inline transform_t<Type> sub(const transform_t<Type>& a, const transform_t<Type>& b)
365
+ {
366
+ return { a.p - b.p, a.q - b.q };
367
+ }
368
+
369
+ // also not sure why you'd want to do this seeing as the quat would end up unnormalized
370
+ template<typename Type>
371
+ CUDA_CALLABLE inline transform_t<Type> mul(const transform_t<Type>& a, Type s)
372
+ {
373
+ return { a.p*s, a.q*s };
374
+ }
375
+
376
+ template<typename Type>
377
+ CUDA_CALLABLE inline transform_t<Type> mul(Type s, const transform_t<Type>& a)
378
+ {
379
+ return mul(a, s);
380
+ }
381
+
382
+ template<typename Type>
383
+ CUDA_CALLABLE inline transform_t<Type> mul(const transform_t<Type>& a, const transform_t<Type>& b)
384
+ {
385
+ return transform_multiply(a, b);
386
+ }
387
+
388
+ template<typename Type>
389
+ CUDA_CALLABLE inline transform_t<Type> operator*(const transform_t<Type>& a, Type s)
390
+ {
391
+ return mul(a, s);
392
+ }
393
+
394
+ template<typename Type>
395
+ CUDA_CALLABLE inline transform_t<Type> operator*(Type s, const transform_t<Type>& a)
396
+ {
397
+ return mul(a, s);
398
+ }
399
+
400
+ template<typename Type>
401
+ inline CUDA_CALLABLE Type tensordot(const transform_t<Type>& a, const transform_t<Type>& b)
402
+ {
403
+ // corresponds to `np.tensordot()` with all axes being contracted
404
+ return tensordot(a.p, b.p) + tensordot(a.q, b.q);
405
+ }
406
+
407
+ template<typename Type>
408
+ inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int idx)
409
+ {
410
+ #ifndef NDEBUG
411
+ if (idx < -7 || idx >= 7)
412
+ {
413
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
414
+ assert(0);
415
+ }
416
+ #endif
417
+
418
+ if (idx < 0)
419
+ {
420
+ idx += 7;
421
+ }
422
+
423
+ return t[idx];
424
+ }
425
+
426
+ template<unsigned SliceLength, typename Type>
427
+ inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const transform_t<Type> & t, slice_t slice)
428
+ {
429
+ vec_t<SliceLength, Type> ret;
430
+
431
+ assert(slice.start >= 0 && slice.start <= 7);
432
+ assert(slice.stop >= -1 && slice.stop <= 7);
433
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
434
+ assert(slice_get_length(slice) == SliceLength);
435
+
436
+ bool is_reversed = slice.step < 0;
437
+
438
+ int ii = 0;
439
+ for (
440
+ int i = slice.start;
441
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
442
+ i += slice.step
443
+ )
444
+ {
445
+ ret[ii] = t[i];
446
+ ++ii;
447
+ }
448
+
449
+ assert(ii == SliceLength);
450
+ return ret;
451
+ }
452
+
453
+ template<typename Type>
454
+ inline CUDA_CALLABLE Type* index(transform_t<Type>& t, int idx)
455
+ {
456
+ #ifndef NDEBUG
457
+ if (idx < -7 || idx >= 7)
458
+ {
459
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
460
+ assert(0);
461
+ }
462
+ #endif
463
+
464
+ if (idx < 0)
465
+ {
466
+ idx += 7;
467
+ }
468
+
469
+ return &t[idx];
470
+ }
471
+
472
+ template<typename Type>
473
+ inline CUDA_CALLABLE Type* indexref(transform_t<Type>* t, int idx)
474
+ {
475
+ #ifndef NDEBUG
476
+ if (idx < -7 || idx >= 7)
477
+ {
478
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
479
+ assert(0);
480
+ }
481
+ #endif
482
+
483
+ if (idx < 0)
484
+ {
485
+ idx += 7;
486
+ }
487
+
488
+ return &((*t)[idx]);
489
+ }
490
+
491
+ template<typename Type>
492
+ inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int idx, transform_t<Type>& adj_t, int& adj_idx, Type adj_ret)
493
+ {
494
+ adj_t[idx] += adj_ret;
495
+ }
496
+
497
+ template<unsigned SliceLength, typename Type>
498
+ inline CUDA_CALLABLE void adj_extract(
499
+ const transform_t<Type>& t, slice_t slice,
500
+ transform_t<Type>& adj_t, slice_t& adj_slice,
501
+ const vec_t<SliceLength, Type>& adj_ret
502
+ )
503
+ {
504
+ assert(slice.start >= 0 && slice.start <= 7);
505
+ assert(slice.stop >= -1 && slice.stop <= 7);
506
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
507
+ assert(slice_get_length(slice) == SliceLength);
508
+
509
+ bool is_reversed = slice.step < 0;
510
+
511
+ int ii = 0;
512
+ for (
513
+ int i = slice.start;
514
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
515
+ i += slice.step
516
+ )
517
+ {
518
+ adj_t[i] += adj_ret[ii];
519
+ ++ii;
520
+ }
521
+
522
+ assert(ii == SliceLength);
523
+ }
524
+
525
+ template<typename Type>
526
+ inline CUDA_CALLABLE void adj_index(transform_t<Type>& t, int idx,
527
+ transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
528
+ {
529
+ // nop
530
+ }
531
+
532
+ template<typename Type>
533
+ inline CUDA_CALLABLE void adj_indexref(transform_t<Type>* t, int idx,
534
+ transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
535
+ {
536
+ // nop
537
+ }
538
+
539
+ template<typename Type>
540
+ inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, int idx, Type value)
541
+ {
542
+ #ifndef NDEBUG
543
+ if (idx < -7 || idx >= 7)
544
+ {
545
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
546
+ assert(0);
547
+ }
548
+ #endif
549
+
550
+ if (idx < 0)
551
+ {
552
+ idx += 7;
553
+ }
554
+
555
+ t[idx] += value;
556
+ }
557
+
558
+
559
+ template<unsigned SliceLength, typename Type>
560
+ inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
561
+ {
562
+ assert(slice.start >= 0 && slice.start <= 7);
563
+ assert(slice.stop >= -1 && slice.stop <= 7);
564
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
565
+ assert(slice_get_length(slice) == SliceLength);
566
+
567
+ bool is_reversed = slice.step < 0;
568
+
569
+ int ii = 0;
570
+ for (
571
+ int i = slice.start;
572
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
573
+ i += slice.step
574
+ )
575
+ {
576
+ t[i] += a[ii];
577
+ ++ii;
578
+ }
579
+
580
+ assert(ii == SliceLength);
581
+ }
582
+
583
+
584
+ template<typename Type>
585
+ inline CUDA_CALLABLE void adj_add_inplace(transform_t<Type>& t, int idx, Type value,
586
+ transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
587
+ {
588
+ #ifndef NDEBUG
589
+ if (idx < -7 || idx >= 7)
590
+ {
591
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
592
+ assert(0);
593
+ }
594
+ #endif
595
+
596
+ if (idx < 0)
597
+ {
598
+ idx += 7;
599
+ }
600
+
601
+ adj_value += adj_t[idx];
602
+ }
603
+
604
+
605
+ template<unsigned SliceLength, typename Type>
606
+ inline CUDA_CALLABLE void adj_add_inplace(
607
+ const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
608
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
609
+ )
610
+ {
611
+ assert(slice.start >= 0 && slice.start <= 7);
612
+ assert(slice.stop >= -1 && slice.stop <= 7);
613
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
614
+ assert(slice_get_length(slice) == SliceLength);
615
+
616
+ bool is_reversed = slice.step < 0;
617
+
618
+ int ii = 0;
619
+ for (
620
+ int i = slice.start;
621
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
622
+ i += slice.step
623
+ )
624
+ {
625
+ adj_a[ii] += adj_t[i];
626
+ ++ii;
627
+ }
628
+
629
+ assert(ii == SliceLength);
630
+ }
631
+
632
+
633
+ template<typename Type>
634
+ inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, int idx, Type value)
635
+ {
636
+ #ifndef NDEBUG
637
+ if (idx < -7 || idx >= 7)
638
+ {
639
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
640
+ assert(0);
641
+ }
642
+ #endif
643
+
644
+ if (idx < 0)
645
+ {
646
+ idx += 7;
647
+ }
648
+
649
+ t[idx] -= value;
650
+ }
651
+
652
+
653
+ template<unsigned SliceLength, typename Type>
654
+ inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
655
+ {
656
+ assert(slice.start >= 0 && slice.start <= 7);
657
+ assert(slice.stop >= -1 && slice.stop <= 7);
658
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
659
+ assert(slice_get_length(slice) == SliceLength);
660
+
661
+ bool is_reversed = slice.step < 0;
662
+
663
+ int ii = 0;
664
+ for (
665
+ int i = slice.start;
666
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
667
+ i += slice.step
668
+ )
669
+ {
670
+ t[i] -= a[ii];
671
+ ++ii;
672
+ }
673
+
674
+ assert(ii == SliceLength);
675
+ }
676
+
677
+
678
+ template<typename Type>
679
+ inline CUDA_CALLABLE void adj_sub_inplace(transform_t<Type>& t, int idx, Type value,
680
+ transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
681
+ {
682
+ #ifndef NDEBUG
683
+ if (idx < -7 || idx >= 7)
684
+ {
685
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
686
+ assert(0);
687
+ }
688
+ #endif
689
+
690
+ if (idx < 0)
691
+ {
692
+ idx += 7;
693
+ }
694
+
695
+ adj_value -= adj_t[idx];
696
+ }
697
+
698
+
699
+ template<unsigned SliceLength, typename Type>
700
+ inline CUDA_CALLABLE void adj_sub_inplace(
701
+ const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
702
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
703
+ )
704
+ {
705
+ assert(slice.start >= 0 && slice.start <= 7);
706
+ assert(slice.stop >= -1 && slice.stop <= 7);
707
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
708
+ assert(slice_get_length(slice) == SliceLength);
709
+
710
+ bool is_reversed = slice.step < 0;
711
+
712
+ int ii = 0;
713
+ for (
714
+ int i = slice.start;
715
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
716
+ i += slice.step
717
+ )
718
+ {
719
+ adj_a[ii] -= adj_t[i];
720
+ ++ii;
721
+ }
722
+
723
+ assert(ii == SliceLength);
724
+ }
725
+
726
+
727
+ template<typename Type>
728
+ inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, int idx, Type value)
729
+ {
730
+ #ifndef NDEBUG
731
+ if (idx < -7 || idx >= 7)
732
+ {
733
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
734
+ assert(0);
735
+ }
736
+ #endif
737
+
738
+ if (idx < 0)
739
+ {
740
+ idx += 7;
741
+ }
742
+
743
+ t[idx] = value;
744
+ }
745
+
746
+ template<unsigned SliceLength, typename Type>
747
+ inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
748
+ {
749
+ assert(slice.start >= 0 && slice.start <= 7);
750
+ assert(slice.stop >= -1 && slice.stop <= 7);
751
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
752
+ assert(slice_get_length(slice) == SliceLength);
753
+
754
+ bool is_reversed = slice.step < 0;
755
+
756
+ int ii = 0;
757
+ for (
758
+ int i = slice.start;
759
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
760
+ i += slice.step
761
+ )
762
+ {
763
+ t[i] = a[ii];
764
+ ++ii;
765
+ }
766
+
767
+ assert(ii == SliceLength);
768
+ }
769
+
770
+ template<typename Type>
771
+ inline CUDA_CALLABLE void adj_assign_inplace(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value)
772
+ {
773
+ #ifndef NDEBUG
774
+ if (idx < -7 || idx >= 7)
775
+ {
776
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
777
+ assert(0);
778
+ }
779
+ #endif
780
+
781
+ if (idx < 0)
782
+ {
783
+ idx += 7;
784
+ }
785
+
786
+ adj_value += adj_t[idx];
787
+ }
788
+
789
+ template<unsigned SliceLength, typename Type>
790
+ inline CUDA_CALLABLE void adj_assign_inplace(
791
+ const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
792
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
793
+ )
794
+ {
795
+ assert(slice.start >= 0 && slice.start <= 7);
796
+ assert(slice.stop >= -1 && slice.stop <= 7);
797
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
798
+ assert(slice_get_length(slice) == SliceLength);
799
+
800
+ bool is_reversed = slice.step < 0;
801
+
802
+ int ii = 0;
803
+ for (
804
+ int i = slice.start;
805
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
806
+ i += slice.step
807
+ )
808
+ {
809
+ adj_a[ii] += adj_t[i];
810
+ ++ii;
811
+ }
812
+
813
+ assert(ii == SliceLength);
814
+ }
815
+
816
+
817
+ template<typename Type>
818
+ inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, int idx, Type value)
819
+ {
820
+ #ifndef NDEBUG
821
+ if (idx < -7 || idx >= 7)
822
+ {
823
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
824
+ assert(0);
825
+ }
826
+ #endif
827
+
828
+ if (idx < 0)
829
+ {
830
+ idx += 7;
831
+ }
832
+
833
+ transform_t<Type> ret(t);
834
+ ret[idx] = value;
835
+ return ret;
836
+ }
837
+
838
+ template<unsigned SliceLength, typename Type>
839
+ inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
840
+ {
841
+ transform_t<Type> ret(t);
842
+ assign_inplace<SliceLength>(ret, slice, a);
843
+ return ret;
844
+ }
845
+
846
+ template<typename Type>
847
+ inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value, const transform_t<Type>& adj_ret)
848
+ {
849
+ #ifndef NDEBUG
850
+ if (idx < -7 || idx >= 7)
851
+ {
852
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
853
+ assert(0);
854
+ }
855
+ #endif
856
+
857
+ if (idx < 0)
858
+ {
859
+ idx += 7;
860
+ }
861
+
862
+ adj_value += adj_ret[idx];
863
+ for(unsigned i=0; i < 7; ++i)
864
+ {
865
+ if (i != idx)
866
+ adj_t[i] += adj_ret[i];
867
+ }
868
+ }
869
+
870
+ template<unsigned SliceLength, typename Type>
871
+ inline CUDA_CALLABLE void adj_assign_copy(
872
+ transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
873
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
874
+ const transform_t<Type>& adj_ret
875
+ )
876
+ {
877
+ assert(slice.start >= 0 && slice.start <= 7);
878
+ assert(slice.stop >= -1 && slice.stop <= 7);
879
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
880
+ assert(slice_get_length(slice) == SliceLength);
881
+
882
+ bool is_reversed = slice.step < 0;
883
+
884
+ int ii = 0;
885
+ for (int i = 0; i < 7; ++i)
886
+ {
887
+ bool in_slice = is_reversed
888
+ ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
889
+ : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
890
+
891
+ if (!in_slice)
892
+ {
893
+ adj_t[i] += adj_ret[i];
894
+ }
895
+ else
896
+ {
897
+ adj_a[ii] += adj_ret[i];
898
+ ++ii;
899
+ }
900
+ }
901
+
902
+ assert(ii == SliceLength);
903
+ }
904
+
905
+
906
+ // adjoint methods
907
+ template<typename Type>
908
+ CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
909
+ {
910
+ adj_add(a.p, b.p, adj_a.p, adj_b.p, adj_ret.p);
911
+ adj_add(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
912
+ }
913
+
914
+ template<typename Type>
915
+ CUDA_CALLABLE inline void adj_add(
916
+ const transform_t<Type>& a, Type b,
917
+ transform_t<Type>& adj_a, Type& adj_b,
918
+ const transform_t<Type>& adj_ret
919
+ )
920
+ {
921
+ adj_a += adj_ret;
922
+
923
+ adj_b += adj_ret.p[0];
924
+ adj_b += adj_ret.p[1];
925
+ adj_b += adj_ret.p[2];
926
+
927
+ adj_b += adj_ret.q[0];
928
+ adj_b += adj_ret.q[1];
929
+ adj_b += adj_ret.q[2];
930
+ adj_b += adj_ret.q[3];
931
+ }
932
+
933
+ template<typename Type>
934
+ CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
935
+ {
936
+ adj_sub(a.p, b.p, adj_a.p, adj_b.p, adj_ret.p);
937
+ adj_sub(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
938
+ }
939
+
940
+ template<typename Type>
941
+ CUDA_CALLABLE inline void adj_sub(
942
+ const transform_t<Type>& a, Type b,
943
+ transform_t<Type>& adj_a, Type& adj_b,
944
+ const transform_t<Type>& adj_ret
945
+ )
946
+ {
947
+ adj_a -= adj_ret;
948
+
949
+ adj_b -= adj_ret.p[0];
950
+ adj_b -= adj_ret.p[1];
951
+ adj_b -= adj_ret.p[2];
952
+
953
+ adj_b -= adj_ret.q[0];
954
+ adj_b -= adj_ret.q[1];
955
+ adj_b -= adj_ret.q[2];
956
+ adj_b -= adj_ret.q[3];
957
+ }
958
+
959
+ template<typename Type>
960
+ CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, Type s, transform_t<Type>& adj_a, Type& adj_s, const transform_t<Type>& adj_ret)
961
+ {
962
+ adj_mul(a.p, s, adj_a.p, adj_s, adj_ret.p);
963
+ adj_mul(a.q, s, adj_a.q, adj_s, adj_ret.q);
964
+ }
965
+
966
+ template<typename Type>
967
+ CUDA_CALLABLE inline void adj_mul(Type s, const transform_t<Type>& a, Type& adj_s, transform_t<Type>& adj_a, const transform_t<Type>& adj_ret)
968
+ {
969
+ adj_mul(a, s, adj_a, adj_s, adj_ret);
970
+ }
971
+
972
+ template<typename Type>
973
+ CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
974
+ {
975
+ adj_transform_multiply(a, b, adj_a, adj_b, adj_ret);
976
+ }
977
+
978
+
979
+ template<typename Type>
980
+ inline CUDA_CALLABLE transform_t<Type> atomic_add(transform_t<Type>* addr, const transform_t<Type>& value)
981
+ {
982
+ vec_t<3,Type> p = atomic_add(&addr->p, value.p);
983
+ quat_t<Type> q = atomic_add(&addr->q, value.q);
984
+
985
+ return transform_t<Type>(p, q);
986
+ }
987
+
988
+ template<typename Type>
989
+ CUDA_CALLABLE inline void adj_transform_t(const vec_t<3,Type>& p, const quat_t<Type>& q, vec_t<3,Type>& adj_p, quat_t<Type>& adj_q, const transform_t<Type>& adj_ret)
990
+ {
991
+ adj_p += adj_ret.p;
992
+ adj_q += adj_ret.q;
993
+ }
994
+
995
+ template<typename Type>
996
+ CUDA_CALLABLE inline void adj_transform_t(const initializer_array<7, Type> &l, const initializer_array<7, Type*>& adj_l, const transform_t<Type>& adj_ret)
997
+ {
998
+ *adj_l[0] += adj_ret.p[0];
999
+ *adj_l[1] += adj_ret.p[1];
1000
+ *adj_l[2] += adj_ret.p[2];
1001
+ *adj_l[3] += adj_ret.q[0];
1002
+ *adj_l[4] += adj_ret.q[1];
1003
+ *adj_l[5] += adj_ret.q[2];
1004
+ *adj_l[6] += adj_ret.q[3];
1005
+ }
1006
+
1007
+ template<typename Type>
1008
+ CUDA_CALLABLE inline void adj_transform_inverse(const transform_t<Type>& t, transform_t<Type>& adj_t, const transform_t<Type>& adj_ret)
1009
+ {
1010
+
1011
+ // forward
1012
+ quat_t<Type> q_inv = quat_inverse(t.q);
1013
+ vec_t<3,Type> p = quat_rotate(q_inv, t.p);
1014
+ vec_t<3,Type> np = -p;
1015
+ // transform<Type> t = transform<Type>(np, q_inv)
1016
+
1017
+ // backward
1018
+ quat_t<Type> adj_q_inv(0.0f);
1019
+ quat_t<Type> adj_q(0.0f);
1020
+ vec_t<3,Type> adj_p(0.0f);
1021
+ vec_t<3,Type> adj_np(0.0f);
1022
+
1023
+ adj_transform_t(np, q_inv, adj_np, adj_q_inv, adj_ret);
1024
+ adj_p = -adj_np;
1025
+ adj_quat_rotate(q_inv, t.p, adj_q_inv, adj_t.p, adj_p);
1026
+ adj_quat_inverse(t.q, adj_t.q, adj_q_inv);
1027
+
1028
+ }
1029
+
1030
+ template<typename Type>
1031
+ CUDA_CALLABLE inline void adj_transform_vector(const transform_t<Type>& t, const vec_t<3,Type>& x, transform_t<Type>& adj_t, vec_t<3,Type>& adj_x, const vec_t<3,Type>& adj_ret)
1032
+ {
1033
+ adj_quat_rotate(t.q, x, adj_t.q, adj_x, adj_ret);
1034
+ }
1035
+
1036
+ template<typename Type>
1037
+ CUDA_CALLABLE inline void adj_transform_point(const transform_t<Type>& t, const vec_t<3,Type>& x, transform_t<Type>& adj_t, vec_t<3,Type>& adj_x, const vec_t<3,Type>& adj_ret)
1038
+ {
1039
+ adj_quat_rotate(t.q, x, adj_t.q, adj_x, adj_ret);
1040
+ adj_t.p += adj_ret;
1041
+ }
1042
+
1043
+
1044
+ template<typename Type>
1045
+ CUDA_CALLABLE void print(transform_t<Type> t);
1046
+
1047
+ template<typename Type>
1048
+ CUDA_CALLABLE inline transform_t<Type> lerp(const transform_t<Type>& a, const transform_t<Type>& b, Type t)
1049
+ {
1050
+ return a*(Type(1)-t) + b*t;
1051
+ }
1052
+
1053
+ template<typename Type>
1054
+ CUDA_CALLABLE inline void adj_lerp(const transform_t<Type>& a, const transform_t<Type>& b, Type t, transform_t<Type>& adj_a, transform_t<Type>& adj_b, Type& adj_t, const transform_t<Type>& adj_ret)
1055
+ {
1056
+ adj_a += adj_ret*(Type(1)-t);
1057
+ adj_b += adj_ret*t;
1058
+ adj_t += tensordot(b, adj_ret) - tensordot(a, adj_ret);
1059
+ }
1060
+
1061
+ template<typename Type>
1062
+ CUDA_CALLABLE inline int len(const transform_t<Type>& t)
1063
+ {
1064
+ return 7;
1065
+ }
1066
+
1067
+ template<typename Type>
1068
+ CUDA_CALLABLE inline void adj_len(const transform_t<Type>& t, transform_t<Type>& adj_t, const int& adj_ret)
1069
+ {
1070
+ }
1071
+
1072
+ template<typename Type>
1073
+ using spatial_matrix_t = mat_t<6,6,Type>;
1074
+
1075
+ template<typename Type>
1076
+ inline CUDA_CALLABLE spatial_matrix_t<Type> spatial_adjoint(const mat_t<3,3,Type>& R, const mat_t<3,3,Type>& S)
1077
+ {
1078
+ spatial_matrix_t<Type> adT;
1079
+
1080
+ // T = [Rah, 0]
1081
+ // [S R]
1082
+
1083
+ // diagonal blocks
1084
+ for (int i=0; i < 3; ++i)
1085
+ {
1086
+ for (int j=0; j < 3; ++j)
1087
+ {
1088
+ adT.data[i][j] = R.data[i][j];
1089
+ adT.data[i+3][j+3] = R.data[i][j];
1090
+ }
1091
+ }
1092
+
1093
+ // lower off diagonal
1094
+ for (int i=0; i < 3; ++i)
1095
+ {
1096
+ for (int j=0; j < 3; ++j)
1097
+ {
1098
+ adT.data[i+3][j] = S.data[i][j];
1099
+ }
1100
+ }
1101
+
1102
+ return adT;
1103
+ }
1104
+
1105
+ template<typename Type>
1106
+ inline CUDA_CALLABLE void adj_spatial_adjoint(const mat_t<3,3,Type>& R, const mat_t<3,3,Type>& S, mat_t<3,3,Type>& adj_R, mat_t<3,3,Type>& adj_S, const spatial_matrix_t<Type>& adj_ret)
1107
+ {
1108
+ // diagonal blocks
1109
+ for (int i=0; i < 3; ++i)
1110
+ {
1111
+ for (int j=0; j < 3; ++j)
1112
+ {
1113
+ adj_R.data[i][j] += adj_ret.data[i][j];
1114
+ adj_R.data[i][j] += adj_ret.data[i+3][j+3];
1115
+ }
1116
+ }
1117
+
1118
+ // lower off diagonal
1119
+ for (int i=0; i < 3; ++i)
1120
+ {
1121
+ for (int j=0; j < 3; ++j)
1122
+ {
1123
+ adj_S.data[i][j] += adj_ret.data[i+3][j];
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+
1129
+ CUDA_CALLABLE inline int row_index(int stride, int i, int j)
1130
+ {
1131
+ return i*stride + j;
1132
+ }
1133
+
1134
+ // builds spatial Jacobian J which is an (joint_count*6)x(dof_count) matrix
1135
+ template<typename Type>
1136
+ CUDA_CALLABLE inline void spatial_jacobian(
1137
+ const spatial_vector_t<Type>* S,
1138
+ const int* joint_parents,
1139
+ const int* joint_qd_start,
1140
+ int joint_start, // offset of the first joint for the articulation
1141
+ int joint_count,
1142
+ int J_start,
1143
+ Type* J)
1144
+ {
1145
+ const int articulation_dof_start = joint_qd_start[joint_start];
1146
+ const int articulation_dof_end = joint_qd_start[joint_start + joint_count];
1147
+ const int articulation_dof_count = articulation_dof_end-articulation_dof_start;
1148
+
1149
+ // shift output pointers
1150
+ const int S_start = articulation_dof_start;
1151
+
1152
+ S += S_start;
1153
+ J += J_start;
1154
+
1155
+ for (int i=0; i < joint_count; ++i)
1156
+ {
1157
+ const int row_start = i * 6;
1158
+
1159
+ int j = joint_start + i;
1160
+ while (j != -1)
1161
+ {
1162
+ const int joint_dof_start = joint_qd_start[j];
1163
+ const int joint_dof_end = joint_qd_start[j+1];
1164
+ const int joint_dof_count = joint_dof_end-joint_dof_start;
1165
+
1166
+ // fill out each row of the Jacobian walking up the tree
1167
+ //for (int col=dof_start; col < dof_end; ++col)
1168
+ for (int dof=0; dof < joint_dof_count; ++dof)
1169
+ {
1170
+ const int col = (joint_dof_start-articulation_dof_start) + dof;
1171
+
1172
+ J[row_index(articulation_dof_count, row_start+0, col)] = S[col].w[0];
1173
+ J[row_index(articulation_dof_count, row_start+1, col)] = S[col].w[1];
1174
+ J[row_index(articulation_dof_count, row_start+2, col)] = S[col].w[2];
1175
+ J[row_index(articulation_dof_count, row_start+3, col)] = S[col].v[0];
1176
+ J[row_index(articulation_dof_count, row_start+4, col)] = S[col].v[1];
1177
+ J[row_index(articulation_dof_count, row_start+5, col)] = S[col].v[2];
1178
+ }
1179
+
1180
+ j = joint_parents[j];
1181
+ }
1182
+ }
1183
+ }
1184
+
1185
+ template<typename Type>
1186
+ CUDA_CALLABLE inline void adj_spatial_jacobian(
1187
+ const spatial_vector_t<Type>* S,
1188
+ const int* joint_parents,
1189
+ const int* joint_qd_start,
1190
+ const int joint_start,
1191
+ const int joint_count,
1192
+ const int J_start,
1193
+ const Type* J,
1194
+ // adjs
1195
+ spatial_vector_t<Type>* adj_S,
1196
+ int* adj_joint_parents,
1197
+ int* adj_joint_qd_start,
1198
+ int& adj_joint_start,
1199
+ int& adj_joint_count,
1200
+ int& adj_J_start,
1201
+ const Type* adj_J)
1202
+ {
1203
+ const int articulation_dof_start = joint_qd_start[joint_start];
1204
+ const int articulation_dof_end = joint_qd_start[joint_start + joint_count];
1205
+ const int articulation_dof_count = articulation_dof_end-articulation_dof_start;
1206
+
1207
+ // shift output pointers
1208
+ const int S_start = articulation_dof_start;
1209
+
1210
+ S += S_start;
1211
+ J += J_start;
1212
+
1213
+ adj_S += S_start;
1214
+ adj_J += J_start;
1215
+
1216
+ for (int i=0; i < joint_count; ++i)
1217
+ {
1218
+ const int row_start = i * 6;
1219
+
1220
+ int j = joint_start + i;
1221
+ while (j != -1)
1222
+ {
1223
+ const int joint_dof_start = joint_qd_start[j];
1224
+ const int joint_dof_end = joint_qd_start[j+1];
1225
+ const int joint_dof_count = joint_dof_end-joint_dof_start;
1226
+
1227
+ // fill out each row of the Jacobian walking up the tree
1228
+ //for (int col=dof_start; col < dof_end; ++col)
1229
+ for (int dof=0; dof < joint_dof_count; ++dof)
1230
+ {
1231
+ const int col = (joint_dof_start-articulation_dof_start) + dof;
1232
+
1233
+ adj_S[col].w[0] += adj_J[row_index(articulation_dof_count, row_start+0, col)];
1234
+ adj_S[col].w[1] += adj_J[row_index(articulation_dof_count, row_start+1, col)];
1235
+ adj_S[col].w[2] += adj_J[row_index(articulation_dof_count, row_start+2, col)];
1236
+ adj_S[col].v[0] += adj_J[row_index(articulation_dof_count, row_start+3, col)];
1237
+ adj_S[col].v[1] += adj_J[row_index(articulation_dof_count, row_start+4, col)];
1238
+ adj_S[col].v[2] += adj_J[row_index(articulation_dof_count, row_start+5, col)];
1239
+ }
1240
+
1241
+ j = joint_parents[j];
1242
+ }
1243
+ }
1244
+ }
1245
+
1246
+
1247
+ template<typename Type>
1248
+ CUDA_CALLABLE inline void spatial_mass(const spatial_matrix_t<Type>* I_s, int joint_start, int joint_count, int M_start, Type* M)
1249
+ {
1250
+ const int stride = joint_count*6;
1251
+
1252
+ for (int l=0; l < joint_count; ++l)
1253
+ {
1254
+ for (int i=0; i < 6; ++i)
1255
+ {
1256
+ for (int j=0; j < 6; ++j)
1257
+ {
1258
+ M[M_start + row_index(stride, l*6 + i, l*6 + j)] = I_s[joint_start + l].data[i][j];
1259
+ }
1260
+ }
1261
+ }
1262
+ }
1263
+
1264
+ template<typename Type>
1265
+ CUDA_CALLABLE inline void adj_spatial_mass(
1266
+ const spatial_matrix_t<Type>* I_s,
1267
+ const int joint_start,
1268
+ const int joint_count,
1269
+ const int M_start,
1270
+ const Type* M,
1271
+ spatial_matrix_t<Type>* adj_I_s,
1272
+ int& adj_joint_start,
1273
+ int& adj_joint_count,
1274
+ int& adj_M_start,
1275
+ const Type* adj_M)
1276
+ {
1277
+ const int stride = joint_count*6;
1278
+
1279
+ for (int l=0; l < joint_count; ++l)
1280
+ {
1281
+ for (int i=0; i < 6; ++i)
1282
+ {
1283
+ for (int j=0; j < 6; ++j)
1284
+ {
1285
+ adj_I_s[joint_start + l].data[i][j] += adj_M[M_start + row_index(stride, l*6 + i, l*6 + j)];
1286
+ }
1287
+ }
1288
+ }
1289
+ }
1290
+
1291
+ using transform = transform_t<float>;
1292
+ using transformh = transform_t<half>;
1293
+ using transformf = transform_t<float>;
1294
+ using transformd = transform_t<double>;
1295
+
1296
+ using spatial_vector = spatial_vector_t<float>;
1297
+ using spatial_vectorh = spatial_vector_t<half>;
1298
+ using spatial_vectorf = spatial_vector_t<float>;
1299
+ using spatial_vectord = spatial_vector_t<double>;
1300
+
1301
+ using spatial_matrix = spatial_matrix_t<float>;
1302
+ using spatial_matrixh = spatial_matrix_t<half>;
1303
+ using spatial_matrixf = spatial_matrix_t<float>;
1304
+ using spatial_matrixd = spatial_matrix_t<double>;
1305
+
1306
+ } // namespace wp