warp-lang 1.6.1__py3-none-macosx_10_13_universal2.whl → 1.7.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (401) hide show
  1. warp/__init__.py +21 -7
  2. warp/autograd.py +14 -6
  3. warp/bin/libwarp-clang.dylib +0 -0
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +424 -6
  6. warp/build_dll.py +20 -20
  7. warp/builtins.py +467 -368
  8. warp/codegen.py +193 -125
  9. warp/config.py +56 -12
  10. warp/constants.py +14 -6
  11. warp/context.py +524 -277
  12. warp/dlpack.py +22 -12
  13. warp/examples/__init__.py +14 -6
  14. warp/examples/assets/nonuniform.usd +0 -0
  15. warp/examples/assets/nvidia_logo.png +0 -0
  16. warp/examples/benchmarks/benchmark_api.py +14 -6
  17. warp/examples/benchmarks/benchmark_cloth.py +14 -6
  18. warp/examples/benchmarks/benchmark_cloth_cupy.py +14 -6
  19. warp/examples/benchmarks/benchmark_cloth_jax.py +14 -6
  20. warp/examples/benchmarks/benchmark_cloth_numba.py +15 -0
  21. warp/examples/benchmarks/benchmark_cloth_numpy.py +14 -6
  22. warp/examples/benchmarks/benchmark_cloth_paddle.py +14 -6
  23. warp/examples/benchmarks/benchmark_cloth_pytorch.py +14 -6
  24. warp/examples/benchmarks/benchmark_cloth_taichi.py +14 -6
  25. warp/examples/benchmarks/benchmark_cloth_warp.py +14 -6
  26. warp/examples/benchmarks/benchmark_gemm.py +82 -48
  27. warp/examples/benchmarks/benchmark_interop_paddle.py +14 -6
  28. warp/examples/benchmarks/benchmark_interop_torch.py +14 -6
  29. warp/examples/benchmarks/benchmark_launches.py +14 -6
  30. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  31. warp/examples/browse.py +14 -6
  32. warp/examples/core/example_cupy.py +14 -6
  33. warp/examples/core/example_dem.py +14 -6
  34. warp/examples/core/example_fluid.py +14 -6
  35. warp/examples/core/example_graph_capture.py +14 -6
  36. warp/examples/core/example_marching_cubes.py +14 -6
  37. warp/examples/core/example_mesh.py +14 -6
  38. warp/examples/core/example_mesh_intersect.py +14 -6
  39. warp/examples/core/example_nvdb.py +14 -6
  40. warp/examples/core/example_raycast.py +14 -6
  41. warp/examples/core/example_raymarch.py +14 -6
  42. warp/examples/core/example_render_opengl.py +14 -6
  43. warp/examples/core/example_sample_mesh.py +300 -0
  44. warp/examples/core/example_sph.py +14 -6
  45. warp/examples/core/example_torch.py +14 -6
  46. warp/examples/core/example_wave.py +14 -6
  47. warp/examples/fem/example_adaptive_grid.py +14 -6
  48. warp/examples/fem/example_apic_fluid.py +15 -7
  49. warp/examples/fem/example_burgers.py +16 -8
  50. warp/examples/fem/example_convection_diffusion.py +14 -6
  51. warp/examples/fem/example_convection_diffusion_dg.py +14 -6
  52. warp/examples/fem/example_deformed_geometry.py +15 -7
  53. warp/examples/fem/example_diffusion.py +14 -6
  54. warp/examples/fem/example_diffusion_3d.py +14 -6
  55. warp/examples/fem/example_diffusion_mgpu.py +14 -6
  56. warp/examples/fem/example_distortion_energy.py +15 -7
  57. warp/examples/fem/example_magnetostatics.py +20 -12
  58. warp/examples/fem/example_mixed_elasticity.py +14 -6
  59. warp/examples/fem/example_navier_stokes.py +14 -6
  60. warp/examples/fem/example_nonconforming_contact.py +14 -6
  61. warp/examples/fem/example_stokes.py +14 -6
  62. warp/examples/fem/example_stokes_transfer.py +14 -6
  63. warp/examples/fem/example_streamlines.py +14 -6
  64. warp/examples/fem/utils.py +24 -3
  65. warp/examples/interop/example_jax_callable.py +116 -0
  66. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  67. warp/examples/interop/example_jax_kernel.py +205 -0
  68. warp/examples/optim/example_bounce.py +14 -6
  69. warp/examples/optim/example_cloth_throw.py +14 -6
  70. warp/examples/optim/example_diffray.py +14 -6
  71. warp/examples/optim/example_drone.py +14 -6
  72. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  73. warp/examples/optim/example_inverse_kinematics.py +14 -6
  74. warp/examples/optim/example_inverse_kinematics_torch.py +14 -6
  75. warp/examples/optim/example_softbody_properties.py +14 -6
  76. warp/examples/optim/example_spring_cage.py +14 -6
  77. warp/examples/optim/example_trajectory.py +14 -6
  78. warp/examples/sim/example_cartpole.py +14 -6
  79. warp/examples/sim/example_cloth.py +14 -6
  80. warp/examples/sim/example_cloth_self_contact.py +14 -6
  81. warp/examples/sim/example_granular.py +14 -6
  82. warp/examples/sim/example_granular_collision_sdf.py +14 -6
  83. warp/examples/sim/example_jacobian_ik.py +14 -6
  84. warp/examples/sim/example_particle_chain.py +14 -6
  85. warp/examples/sim/example_quadruped.py +14 -6
  86. warp/examples/sim/example_rigid_chain.py +14 -6
  87. warp/examples/sim/example_rigid_contact.py +14 -6
  88. warp/examples/sim/example_rigid_force.py +14 -6
  89. warp/examples/sim/example_rigid_gyroscopic.py +14 -6
  90. warp/examples/sim/example_rigid_soft_contact.py +14 -6
  91. warp/examples/sim/example_soft_body.py +14 -6
  92. warp/examples/tile/example_tile_cholesky.py +14 -6
  93. warp/examples/tile/example_tile_convolution.py +14 -6
  94. warp/examples/tile/example_tile_fft.py +14 -6
  95. warp/examples/tile/example_tile_filtering.py +14 -6
  96. warp/examples/tile/example_tile_matmul.py +16 -10
  97. warp/examples/tile/example_tile_mlp.py +14 -6
  98. warp/examples/tile/example_tile_nbody.py +14 -6
  99. warp/examples/tile/example_tile_walker.py +14 -6
  100. warp/fabric.py +15 -0
  101. warp/fem/__init__.py +26 -1
  102. warp/fem/adaptivity.py +19 -4
  103. warp/fem/cache.py +15 -0
  104. warp/fem/dirichlet.py +15 -0
  105. warp/fem/domain.py +15 -0
  106. warp/fem/field/__init__.py +15 -0
  107. warp/fem/field/field.py +15 -0
  108. warp/fem/field/nodal_field.py +37 -68
  109. warp/fem/field/restriction.py +15 -0
  110. warp/fem/field/virtual.py +77 -23
  111. warp/fem/geometry/__init__.py +15 -0
  112. warp/fem/geometry/adaptive_nanogrid.py +24 -10
  113. warp/fem/geometry/closest_point.py +16 -1
  114. warp/fem/geometry/deformed_geometry.py +20 -2
  115. warp/fem/geometry/element.py +15 -0
  116. warp/fem/geometry/geometry.py +20 -0
  117. warp/fem/geometry/grid_2d.py +27 -12
  118. warp/fem/geometry/grid_3d.py +27 -15
  119. warp/fem/geometry/hexmesh.py +20 -7
  120. warp/fem/geometry/nanogrid.py +24 -11
  121. warp/fem/geometry/partition.py +15 -0
  122. warp/fem/geometry/quadmesh.py +28 -13
  123. warp/fem/geometry/tetmesh.py +18 -4
  124. warp/fem/geometry/trimesh.py +18 -8
  125. warp/fem/integrate.py +277 -93
  126. warp/fem/linalg.py +20 -5
  127. warp/fem/operator.py +15 -0
  128. warp/fem/polynomial.py +15 -0
  129. warp/fem/quadrature/__init__.py +15 -0
  130. warp/fem/quadrature/pic_quadrature.py +52 -22
  131. warp/fem/quadrature/quadrature.py +209 -25
  132. warp/fem/space/__init__.py +16 -1
  133. warp/fem/space/basis_function_space.py +19 -2
  134. warp/fem/space/basis_space.py +40 -18
  135. warp/fem/space/dof_mapper.py +15 -0
  136. warp/fem/space/function_space.py +15 -0
  137. warp/fem/space/grid_2d_function_space.py +15 -0
  138. warp/fem/space/grid_3d_function_space.py +15 -0
  139. warp/fem/space/hexmesh_function_space.py +17 -2
  140. warp/fem/space/nanogrid_function_space.py +15 -0
  141. warp/fem/space/partition.py +21 -2
  142. warp/fem/space/quadmesh_function_space.py +23 -8
  143. warp/fem/space/restriction.py +15 -0
  144. warp/fem/space/shape/__init__.py +15 -0
  145. warp/fem/space/shape/cube_shape_function.py +38 -23
  146. warp/fem/space/shape/shape_function.py +15 -0
  147. warp/fem/space/shape/square_shape_function.py +27 -12
  148. warp/fem/space/shape/tet_shape_function.py +15 -0
  149. warp/fem/space/shape/triangle_shape_function.py +16 -1
  150. warp/fem/space/tetmesh_function_space.py +18 -3
  151. warp/fem/space/topology.py +15 -0
  152. warp/fem/space/trimesh_function_space.py +17 -2
  153. warp/fem/types.py +15 -0
  154. warp/fem/utils.py +27 -6
  155. warp/jax.py +28 -7
  156. warp/jax_experimental/__init__.py +16 -0
  157. warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -33
  158. warp/jax_experimental/ffi.py +698 -0
  159. warp/jax_experimental/xla_ffi.py +602 -0
  160. warp/math.py +103 -6
  161. warp/native/array.h +28 -6
  162. warp/native/builtin.h +44 -9
  163. warp/native/bvh.cpp +18 -7
  164. warp/native/bvh.cu +57 -20
  165. warp/native/bvh.h +17 -7
  166. warp/native/clang/clang.cpp +45 -9
  167. warp/native/coloring.cpp +15 -6
  168. warp/native/crt.cpp +15 -6
  169. warp/native/crt.h +15 -6
  170. warp/native/cuda_crt.h +15 -6
  171. warp/native/cuda_util.cpp +29 -6
  172. warp/native/cuda_util.h +17 -6
  173. warp/native/error.cpp +15 -6
  174. warp/native/error.h +15 -6
  175. warp/native/exports.h +85 -63
  176. warp/native/fabric.h +15 -6
  177. warp/native/hashgrid.cpp +15 -6
  178. warp/native/hashgrid.cu +15 -6
  179. warp/native/hashgrid.h +15 -6
  180. warp/native/initializer_array.h +15 -6
  181. warp/native/intersect.h +41 -32
  182. warp/native/intersect_adj.h +48 -39
  183. warp/native/intersect_tri.h +17 -0
  184. warp/native/marching.cpp +16 -0
  185. warp/native/marching.cu +16 -7
  186. warp/native/marching.h +17 -0
  187. warp/native/mat.h +528 -15
  188. warp/native/mathdx.cpp +15 -6
  189. warp/native/matnn.h +15 -6
  190. warp/native/mesh.cpp +15 -6
  191. warp/native/mesh.cu +15 -6
  192. warp/native/mesh.h +25 -16
  193. warp/native/noise.h +15 -6
  194. warp/native/quat.h +114 -17
  195. warp/native/rand.h +21 -6
  196. warp/native/range.h +15 -6
  197. warp/native/reduce.cpp +15 -6
  198. warp/native/reduce.cu +15 -6
  199. warp/native/runlength_encode.cpp +15 -6
  200. warp/native/runlength_encode.cu +15 -6
  201. warp/native/scan.cpp +15 -6
  202. warp/native/scan.cu +15 -6
  203. warp/native/scan.h +15 -6
  204. warp/native/solid_angle.h +17 -0
  205. warp/native/sort.cpp +137 -65
  206. warp/native/sort.cu +167 -21
  207. warp/native/sort.h +23 -7
  208. warp/native/sparse.cpp +58 -28
  209. warp/native/sparse.cu +67 -23
  210. warp/native/spatial.h +15 -6
  211. warp/native/svd.h +131 -6
  212. warp/native/temp_buffer.h +15 -6
  213. warp/native/tile.h +316 -111
  214. warp/native/tile_reduce.h +61 -9
  215. warp/native/vec.h +83 -13
  216. warp/native/volume.cpp +100 -119
  217. warp/native/volume.cu +15 -6
  218. warp/native/volume.h +15 -6
  219. warp/native/volume_builder.cu +40 -16
  220. warp/native/volume_builder.h +21 -6
  221. warp/native/volume_impl.h +15 -6
  222. warp/native/warp.cpp +20 -12
  223. warp/native/warp.cu +114 -16
  224. warp/native/warp.h +34 -16
  225. warp/optim/__init__.py +14 -6
  226. warp/optim/adam.py +14 -6
  227. warp/optim/linear.py +25 -10
  228. warp/optim/sgd.py +14 -6
  229. warp/paddle.py +14 -6
  230. warp/render/__init__.py +14 -6
  231. warp/render/render_opengl.py +14 -6
  232. warp/render/render_usd.py +14 -6
  233. warp/render/utils.py +14 -6
  234. warp/sim/__init__.py +14 -7
  235. warp/sim/articulation.py +18 -10
  236. warp/sim/collide.py +35 -16
  237. warp/sim/graph_coloring.py +14 -6
  238. warp/sim/import_mjcf.py +463 -162
  239. warp/sim/import_snu.py +14 -7
  240. warp/sim/import_urdf.py +46 -18
  241. warp/sim/import_usd.py +14 -7
  242. warp/sim/inertia.py +14 -6
  243. warp/sim/integrator.py +14 -6
  244. warp/sim/integrator_euler.py +19 -11
  245. warp/sim/integrator_featherstone.py +17 -16
  246. warp/sim/integrator_vbd.py +222 -8
  247. warp/sim/integrator_xpbd.py +19 -11
  248. warp/sim/model.py +56 -19
  249. warp/sim/particles.py +14 -6
  250. warp/sim/render.py +14 -6
  251. warp/sim/utils.py +17 -2
  252. warp/sparse.py +657 -555
  253. warp/stubs.py +231 -19
  254. warp/tape.py +14 -6
  255. warp/tests/aux_test_class_kernel.py +14 -6
  256. warp/tests/aux_test_compile_consts_dummy.py +14 -6
  257. warp/tests/aux_test_conditional_unequal_types_kernels.py +14 -6
  258. warp/tests/aux_test_dependent.py +14 -6
  259. warp/tests/aux_test_grad_customs.py +14 -6
  260. warp/tests/aux_test_instancing_gc.py +14 -6
  261. warp/tests/aux_test_module_unload.py +14 -6
  262. warp/tests/aux_test_name_clash1.py +14 -6
  263. warp/tests/aux_test_name_clash2.py +14 -6
  264. warp/tests/aux_test_unresolved_func.py +14 -6
  265. warp/tests/aux_test_unresolved_symbol.py +14 -6
  266. warp/tests/cuda/__init__.py +0 -0
  267. warp/tests/{test_async.py → cuda/test_async.py} +14 -6
  268. warp/tests/{test_ipc.py → cuda/test_ipc.py} +14 -6
  269. warp/tests/{test_mempool.py → cuda/test_mempool.py} +53 -6
  270. warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +14 -6
  271. warp/tests/{test_peer.py → cuda/test_peer.py} +14 -6
  272. warp/tests/{test_pinned.py → cuda/test_pinned.py} +14 -6
  273. warp/tests/{test_streams.py → cuda/test_streams.py} +85 -6
  274. warp/tests/geometry/__init__.py +0 -0
  275. warp/tests/{test_bvh.py → geometry/test_bvh.py} +14 -6
  276. warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +14 -6
  277. warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +14 -6
  278. warp/tests/{test_mesh.py → geometry/test_mesh.py} +14 -6
  279. warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +14 -6
  280. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +80 -69
  281. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +15 -7
  282. warp/tests/{test_volume.py → geometry/test_volume.py} +55 -12
  283. warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +14 -6
  284. warp/tests/interop/__init__.py +0 -0
  285. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +42 -11
  286. warp/tests/{test_jax.py → interop/test_jax.py} +14 -6
  287. warp/tests/{test_paddle.py → interop/test_paddle.py} +14 -6
  288. warp/tests/{test_torch.py → interop/test_torch.py} +14 -6
  289. warp/tests/run_coverage_serial.py +14 -6
  290. warp/tests/sim/__init__.py +0 -0
  291. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +23 -16
  292. warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +14 -6
  293. warp/tests/{test_collision.py → sim/test_collision.py} +16 -8
  294. warp/tests/{test_coloring.py → sim/test_coloring.py} +14 -7
  295. warp/tests/{test_model.py → sim/test_model.py} +55 -7
  296. warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +14 -6
  297. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +16 -7
  298. warp/tests/sim/test_vbd.py +597 -0
  299. warp/tests/test_adam.py +14 -6
  300. warp/tests/test_arithmetic.py +14 -6
  301. warp/tests/test_array.py +14 -6
  302. warp/tests/test_array_reduce.py +14 -6
  303. warp/tests/test_assert.py +14 -6
  304. warp/tests/test_atomic.py +14 -6
  305. warp/tests/test_bool.py +15 -7
  306. warp/tests/test_builtins_resolution.py +14 -6
  307. warp/tests/test_closest_point_edge_edge.py +14 -6
  308. warp/tests/test_codegen.py +14 -6
  309. warp/tests/test_codegen_instancing.py +14 -6
  310. warp/tests/test_compile_consts.py +14 -6
  311. warp/tests/test_conditional.py +14 -6
  312. warp/tests/test_context.py +14 -6
  313. warp/tests/test_copy.py +14 -6
  314. warp/tests/test_ctypes.py +14 -6
  315. warp/tests/test_dense.py +14 -6
  316. warp/tests/test_devices.py +14 -6
  317. warp/tests/test_examples.py +42 -42
  318. warp/tests/test_fabricarray.py +14 -6
  319. warp/tests/test_fast_math.py +14 -6
  320. warp/tests/test_fem.py +37 -10
  321. warp/tests/test_fp16.py +14 -6
  322. warp/tests/test_func.py +14 -6
  323. warp/tests/test_future_annotations.py +14 -6
  324. warp/tests/test_generics.py +14 -6
  325. warp/tests/test_grad.py +14 -6
  326. warp/tests/test_grad_customs.py +14 -6
  327. warp/tests/test_grad_debug.py +14 -6
  328. warp/tests/test_implicit_init.py +14 -6
  329. warp/tests/test_import.py +14 -6
  330. warp/tests/test_indexedarray.py +14 -6
  331. warp/tests/test_intersect.py +14 -6
  332. warp/tests/test_iter.py +14 -6
  333. warp/tests/test_large.py +14 -6
  334. warp/tests/test_launch.py +14 -6
  335. warp/tests/test_lerp.py +14 -6
  336. warp/tests/test_linear_solvers.py +15 -11
  337. warp/tests/test_lvalue.py +14 -6
  338. warp/tests/test_mat.py +247 -85
  339. warp/tests/test_mat_lite.py +14 -6
  340. warp/tests/test_mat_scalar_ops.py +18 -10
  341. warp/tests/test_math.py +14 -6
  342. warp/tests/test_mlp.py +14 -6
  343. warp/tests/test_module_hashing.py +14 -6
  344. warp/tests/test_modules_lite.py +14 -6
  345. warp/tests/test_noise.py +14 -6
  346. warp/tests/test_operators.py +14 -6
  347. warp/tests/test_options.py +14 -6
  348. warp/tests/test_overwrite.py +15 -60
  349. warp/tests/test_print.py +14 -6
  350. warp/tests/test_quat.py +81 -52
  351. warp/tests/test_rand.py +58 -43
  352. warp/tests/test_reload.py +14 -6
  353. warp/tests/test_rounding.py +14 -6
  354. warp/tests/test_runlength_encode.py +14 -6
  355. warp/tests/test_scalar_ops.py +14 -6
  356. warp/tests/test_smoothstep.py +14 -6
  357. warp/tests/test_snippet.py +15 -0
  358. warp/tests/test_sparse.py +61 -12
  359. warp/tests/test_spatial.py +89 -6
  360. warp/tests/test_special_values.py +14 -6
  361. warp/tests/test_static.py +15 -7
  362. warp/tests/test_struct.py +14 -6
  363. warp/tests/test_tape.py +14 -6
  364. warp/tests/test_transient_module.py +14 -6
  365. warp/tests/test_triangle_closest_point.py +14 -6
  366. warp/tests/test_types.py +14 -6
  367. warp/tests/test_utils.py +98 -10
  368. warp/tests/test_vec.py +60 -40
  369. warp/tests/test_vec_lite.py +14 -6
  370. warp/tests/test_vec_scalar_ops.py +14 -6
  371. warp/tests/test_verify_fp.py +14 -6
  372. warp/tests/tile/__init__.py +0 -0
  373. warp/tests/{test_tile.py → tile/test_tile.py} +150 -57
  374. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +15 -7
  375. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +23 -12
  376. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +39 -20
  377. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +74 -7
  378. warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +14 -6
  379. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +15 -7
  380. warp/tests/unittest_serial.py +15 -6
  381. warp/tests/unittest_suites.py +59 -65
  382. warp/tests/unittest_utils.py +16 -7
  383. warp/tests/walkthrough_debug.py +14 -6
  384. warp/thirdparty/unittest_parallel.py +15 -8
  385. warp/torch.py +14 -6
  386. warp/types.py +124 -664
  387. warp/utils.py +151 -78
  388. {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/METADATA +39 -12
  389. warp_lang-1.7.0.dist-info/RECORD +429 -0
  390. {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
  391. warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
  392. warp/examples/optim/example_walker.py +0 -309
  393. warp/native/cutlass_gemm.cpp +0 -34
  394. warp/native/cutlass_gemm.cu +0 -373
  395. warp/tests/test_matmul.py +0 -503
  396. warp/tests/test_matmul_lite.py +0 -403
  397. warp/tests/test_vbd.py +0 -378
  398. warp/tests/unused_test_misc.py +0 -69
  399. warp_lang-1.6.1.dist-info/LICENSE.md +0 -126
  400. warp_lang-1.6.1.dist-info/RECORD +0 -419
  401. {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/native/mat.h CHANGED
@@ -1,9 +1,18 @@
1
- /** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
- * NVIDIA CORPORATION and its licensors retain all intellectual property
3
- * and proprietary rights in and to this software, related documentation
4
- * and any modifications thereto. Any use, reproduction, disclosure or
5
- * distribution of this software and related documentation without an express
6
- * license agreement from NVIDIA CORPORATION is strictly prohibited.
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
7
16
  */
8
17
 
9
18
  #pragma once
@@ -198,6 +207,159 @@ struct mat_t
198
207
  Type data[Rows][Cols];
199
208
  };
200
209
 
210
+ template<typename Type>
211
+ inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_cols(vec_t<2, Type> c0, vec_t<2, Type> c1)
212
+ {
213
+ mat_t<2, 2, Type> m;
214
+
215
+ m.data[0][0] = c0[0];
216
+ m.data[1][0] = c0[1];
217
+
218
+ m.data[0][1] = c1[0];
219
+ m.data[1][1] = c1[1];
220
+
221
+ return m;
222
+ }
223
+
224
+ template<typename Type>
225
+ inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_cols(vec_t<3, Type> c0, vec_t<3, Type> c1, vec_t<3, Type> c2)
226
+ {
227
+ mat_t<3, 3, Type> m;
228
+
229
+ m.data[0][0] = c0[0];
230
+ m.data[1][0] = c0[1];
231
+ m.data[2][0] = c0[2];
232
+
233
+ m.data[0][1] = c1[0];
234
+ m.data[1][1] = c1[1];
235
+ m.data[2][1] = c1[2];
236
+
237
+ m.data[0][2] = c2[0];
238
+ m.data[1][2] = c2[1];
239
+ m.data[2][2] = c2[2];
240
+
241
+ return m;
242
+ }
243
+
244
+ template<typename Type>
245
+ inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_cols(vec_t<4, Type> c0, vec_t<4, Type> c1, vec_t<4, Type> c2, vec_t<4, Type> c3)
246
+ {
247
+ mat_t<4, 4, Type> m;
248
+
249
+ m.data[0][0] = c0[0];
250
+ m.data[1][0] = c0[1];
251
+ m.data[2][0] = c0[2];
252
+ m.data[3][0] = c0[3];
253
+
254
+ m.data[0][1] = c1[0];
255
+ m.data[1][1] = c1[1];
256
+ m.data[2][1] = c1[2];
257
+ m.data[3][1] = c1[3];
258
+
259
+ m.data[0][2] = c2[0];
260
+ m.data[1][2] = c2[1];
261
+ m.data[2][2] = c2[2];
262
+ m.data[3][2] = c2[3];
263
+
264
+ m.data[0][3] = c3[0];
265
+ m.data[1][3] = c3[1];
266
+ m.data[2][3] = c3[2];
267
+ m.data[3][3] = c3[3];
268
+
269
+ return m;
270
+ }
271
+
272
+ template<unsigned Rows, unsigned Cols, typename Type>
273
+ inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_cols(const initializer_array<Cols, vec_t<Rows, Type> >& l)
274
+ {
275
+ mat_t<Rows, Cols, Type> m;
276
+ for (unsigned j=0; j < Cols; ++j)
277
+ {
278
+ for (unsigned i=0; i < Rows; ++i)
279
+ {
280
+ m.data[i][j] = l[j][i];
281
+ }
282
+ }
283
+
284
+ return m;
285
+ }
286
+
287
+ template<typename Type>
288
+ inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_rows(vec_t<2, Type> r0, vec_t<2, Type> r1)
289
+ {
290
+ mat_t<2, 2, Type> m;
291
+
292
+ m.data[0][0] = r0[0];
293
+ m.data[0][1] = r0[1];
294
+
295
+ m.data[1][0] = r1[0];
296
+ m.data[1][1] = r1[1];
297
+
298
+ return m;
299
+ }
300
+
301
+ template<typename Type>
302
+ inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_rows(vec_t<3, Type> r0, vec_t<3, Type> r1, vec_t<3, Type> r2)
303
+ {
304
+ mat_t<3, 3, Type> m;
305
+
306
+ m.data[0][0] = r0[0];
307
+ m.data[0][1] = r0[1];
308
+ m.data[0][2] = r0[2];
309
+
310
+ m.data[1][0] = r1[0];
311
+ m.data[1][1] = r1[1];
312
+ m.data[1][2] = r1[2];
313
+
314
+ m.data[2][0] = r2[0];
315
+ m.data[2][1] = r2[1];
316
+ m.data[2][2] = r2[2];
317
+
318
+ return m;
319
+ }
320
+
321
+ template<typename Type>
322
+ inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_rows(vec_t<4, Type> r0, vec_t<4, Type> r1, vec_t<4, Type> r2, vec_t<4, Type> r3)
323
+ {
324
+ mat_t<4, 4, Type> m;
325
+
326
+ m.data[0][0] = r0[0];
327
+ m.data[0][1] = r0[1];
328
+ m.data[0][2] = r0[2];
329
+ m.data[0][3] = r0[3];
330
+
331
+ m.data[1][0] = r1[0];
332
+ m.data[1][1] = r1[1];
333
+ m.data[1][2] = r1[2];
334
+ m.data[1][3] = r1[3];
335
+
336
+ m.data[2][0] = r2[0];
337
+ m.data[2][1] = r2[1];
338
+ m.data[2][2] = r2[2];
339
+ m.data[2][3] = r2[3];
340
+
341
+ m.data[3][0] = r3[0];
342
+ m.data[3][1] = r3[1];
343
+ m.data[3][2] = r3[2];
344
+ m.data[3][3] = r3[3];
345
+
346
+ return m;
347
+ }
348
+
349
+ template<unsigned Rows, unsigned Cols, typename Type>
350
+ inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_rows(const initializer_array<Rows, vec_t<Cols, Type> >& l)
351
+ {
352
+ mat_t<Rows, Cols, Type> m;
353
+ for (unsigned i=0; i < Rows; ++i)
354
+ {
355
+ for (unsigned j=0; j < Cols; ++j)
356
+ {
357
+ m.data[i][j] = l[i][j];
358
+ }
359
+ }
360
+
361
+ return m;
362
+ }
201
363
 
202
364
  template<unsigned Rows, typename Type>
203
365
  inline CUDA_CALLABLE mat_t<Rows, Rows, Type> identity()
@@ -395,37 +557,241 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
395
557
 
396
558
 
397
559
  template<unsigned Rows, unsigned Cols, typename Type>
398
- inline CUDA_CALLABLE void augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
560
+ inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
399
561
  {
562
+ #ifndef NDEBUG
563
+ if (row < 0 || row >= Rows)
564
+ {
565
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
566
+ assert(0);
567
+ }
568
+ if (col < 0 || col >= Cols)
569
+ {
570
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
571
+ assert(0);
572
+ }
573
+ #endif
574
+
400
575
  m.data[row][col] += value;
401
576
  }
402
577
 
403
578
 
404
579
  template<unsigned Rows, unsigned Cols, typename Type>
405
- inline CUDA_CALLABLE void adj_augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
580
+ inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
581
+ {
582
+ #ifndef NDEBUG
583
+ if (row < 0 || row >= Rows)
584
+ {
585
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
586
+ assert(0);
587
+ }
588
+ #endif
589
+
590
+ for(unsigned i=0; i < Cols; ++i)
591
+ {
592
+ m.data[row][i] += value[i];
593
+ }
594
+ }
595
+
596
+
597
+ template<unsigned Rows, unsigned Cols, typename Type>
598
+ inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
406
599
  mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
407
600
  {
601
+ #ifndef NDEBUG
602
+ if (row < 0 || row >= Rows)
603
+ {
604
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
605
+ assert(0);
606
+ }
607
+ if (col < 0 || col >= Cols)
608
+ {
609
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
610
+ assert(0);
611
+ }
612
+ #endif
613
+
408
614
  adj_value += adj_m.data[row][col];
409
615
  }
410
616
 
411
617
 
412
618
  template<unsigned Rows, unsigned Cols, typename Type>
413
- inline CUDA_CALLABLE void augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
619
+ inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
620
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
621
+ {
622
+ #ifndef NDEBUG
623
+ if (row < 0 || row >= Rows)
624
+ {
625
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
626
+ assert(0);
627
+ }
628
+ #endif
629
+
630
+ for(unsigned i=0; i < Cols; ++i)
631
+ {
632
+ adj_value[i] += adj_m.data[row][i];
633
+ }
634
+ }
635
+
636
+
637
+ template<unsigned Rows, unsigned Cols, typename Type>
638
+ inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
414
639
  {
640
+ #ifndef NDEBUG
641
+ if (row < 0 || row >= Rows)
642
+ {
643
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
644
+ assert(0);
645
+ }
646
+ if (col < 0 || col >= Cols)
647
+ {
648
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
649
+ assert(0);
650
+ }
651
+ #endif
652
+
415
653
  m.data[row][col] -= value;
416
654
  }
417
655
 
418
656
 
419
657
  template<unsigned Rows, unsigned Cols, typename Type>
420
- inline CUDA_CALLABLE void adj_augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
658
+ inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
659
+ {
660
+ #ifndef NDEBUG
661
+ if (row < 0 || row >= Rows)
662
+ {
663
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
664
+ assert(0);
665
+ }
666
+ #endif
667
+
668
+ for(unsigned i=0; i < Cols; ++i)
669
+ {
670
+ m.data[row][i] -= value[i];
671
+ }
672
+ }
673
+
674
+
675
+ template<unsigned Rows, unsigned Cols, typename Type>
676
+ inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
421
677
  mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
422
678
  {
679
+ #ifndef NDEBUG
680
+ if (row < 0 || row >= Rows)
681
+ {
682
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
683
+ assert(0);
684
+ }
685
+ if (col < 0 || col >= Cols)
686
+ {
687
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
688
+ assert(0);
689
+ }
690
+ #endif
691
+
423
692
  adj_value -= adj_m.data[row][col];
424
693
  }
425
694
 
426
695
 
427
696
  template<unsigned Rows, unsigned Cols, typename Type>
428
- inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
697
+ inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
698
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
699
+ {
700
+ #ifndef NDEBUG
701
+ if (row < 0 || row >= Rows)
702
+ {
703
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
704
+ assert(0);
705
+ }
706
+ #endif
707
+
708
+ for(unsigned i=0; i < Cols; ++i)
709
+ {
710
+ adj_value[i] -= adj_m.data[row][i];
711
+ }
712
+ }
713
+
714
+
715
+ template<unsigned Rows, unsigned Cols, typename Type>
716
+ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
717
+ {
718
+ #ifndef NDEBUG
719
+ if (row < 0 || row >= Rows)
720
+ {
721
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
722
+ assert(0);
723
+ }
724
+ if (col < 0 || col >= Cols)
725
+ {
726
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
727
+ assert(0);
728
+ }
729
+ #endif
730
+
731
+ m.data[row][col] = value;
732
+ }
733
+
734
+
735
+ template<unsigned Rows, unsigned Cols, typename Type>
736
+ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
737
+ {
738
+ #ifndef NDEBUG
739
+ if (row < 0 || row >= Rows)
740
+ {
741
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
742
+ assert(0);
743
+ }
744
+ #endif
745
+
746
+ for(unsigned i=0; i < Cols; ++i)
747
+ {
748
+ m.data[row][i] = value[i];
749
+ }
750
+ }
751
+
752
+
753
+ template<unsigned Rows, unsigned Cols, typename Type>
754
+ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
755
+ mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value)
756
+ {
757
+ #ifndef NDEBUG
758
+ if (row < 0 || row >= Rows)
759
+ {
760
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
761
+ assert(0);
762
+ }
763
+ if (col < 0 || col >= Cols)
764
+ {
765
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
766
+ assert(0);
767
+ }
768
+ #endif
769
+
770
+ adj_value += adj_m.data[row][col];
771
+ }
772
+
773
+
774
+ template<unsigned Rows, unsigned Cols, typename Type>
775
+ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
776
+ mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
777
+ {
778
+ #ifndef NDEBUG
779
+ if (row < 0 || row >= Rows)
780
+ {
781
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
782
+ assert(0);
783
+ }
784
+ #endif
785
+
786
+ for(unsigned i=0; i < Cols; ++i)
787
+ {
788
+ adj_value[i] += adj_m.data[row][i];
789
+ }
790
+ }
791
+
792
+
793
+ template<unsigned Rows, unsigned Cols, typename Type>
794
+ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
429
795
  {
430
796
  #ifndef NDEBUG
431
797
  if (row < 0 || row >= Rows)
@@ -447,7 +813,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
447
813
 
448
814
 
449
815
  template<unsigned Rows, unsigned Cols, typename Type>
450
- inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
816
+ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
451
817
  {
452
818
  #ifndef NDEBUG
453
819
  if (row < 0 || row >= Rows)
@@ -467,7 +833,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
467
833
 
468
834
 
469
835
  template<unsigned Rows, unsigned Cols, typename Type>
470
- inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
836
+ inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
471
837
  mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
472
838
  {
473
839
  #ifndef NDEBUG
@@ -496,7 +862,7 @@ inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col,
496
862
 
497
863
 
498
864
  template<unsigned Rows, unsigned Cols, typename Type>
499
- inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
865
+ inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
500
866
  mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
501
867
  {
502
868
  #ifndef NDEBUG
@@ -701,7 +1067,7 @@ inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a
701
1067
  mat_t<Rows,ColsOut,Type> t(0);
702
1068
  for (unsigned i=0; i < Rows; ++i)
703
1069
  {
704
- for (unsigned j=0; j < ColsOut; ++j)
1070
+ for (unsigned j=0; j < ColsOut; ++j)
705
1071
  {
706
1072
  Type sum(0.0);
707
1073
 
@@ -1564,6 +1930,128 @@ inline CUDA_CALLABLE void adj_mat_t(const vec_t<4,Type> &cmps0, const vec_t<4,Ty
1564
1930
  }
1565
1931
  }
1566
1932
 
1933
+ template<typename Type>
1934
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1935
+ const vec_t<2, Type>& c0, const vec_t<2, Type>& c1,
1936
+ vec_t<2, Type>& adj_c0, vec_t<2, Type>& adj_c1,
1937
+ const mat_t<2, 2, Type>& adj_ret
1938
+ )
1939
+ {
1940
+ for (unsigned i=0; i < 2; ++i)
1941
+ {
1942
+ adj_c0[i] += adj_ret.data[i][0];
1943
+ adj_c1[i] += adj_ret.data[i][1];
1944
+ }
1945
+ }
1946
+
1947
+ template<typename Type>
1948
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1949
+ const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
1950
+ vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
1951
+ const mat_t<3, 3, Type>& adj_ret
1952
+ )
1953
+ {
1954
+ for (unsigned i=0; i < 3; ++i)
1955
+ {
1956
+ adj_c0[i] += adj_ret.data[i][0];
1957
+ adj_c1[i] += adj_ret.data[i][1];
1958
+ adj_c2[i] += adj_ret.data[i][2];
1959
+ }
1960
+ }
1961
+
1962
+ template<typename Type>
1963
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1964
+ const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
1965
+ vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
1966
+ const mat_t<4, 4, Type>& adj_ret
1967
+ )
1968
+ {
1969
+ for (unsigned i=0; i < 4; ++i)
1970
+ {
1971
+ adj_c0[i] += adj_ret.data[i][0];
1972
+ adj_c1[i] += adj_ret.data[i][1];
1973
+ adj_c2[i] += adj_ret.data[i][2];
1974
+ adj_c3[i] += adj_ret.data[i][3];
1975
+ }
1976
+ }
1977
+
1978
+ template<unsigned Rows, unsigned Cols, typename Type>
1979
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1980
+ const initializer_array<Cols, vec_t<Rows, Type> >& l,
1981
+ const initializer_array<Cols, vec_t<Rows, Type>* >& adj_l,
1982
+ const mat_t<Rows, Cols, Type>& adj_ret
1983
+ )
1984
+ {
1985
+ for (unsigned j=0; j < Cols; ++j)
1986
+ {
1987
+ for (unsigned i=0; i < Rows; ++i)
1988
+ {
1989
+ (*adj_l[j])[i] += adj_ret.data[i][j];
1990
+ }
1991
+ }
1992
+ }
1993
+
1994
+ template<typename Type>
1995
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
1996
+ const vec_t<2, Type>& r0, const vec_t<2, Type>& r1,
1997
+ vec_t<2, Type>& adj_r0, vec_t<2, Type>& adj_r1,
1998
+ const mat_t<2, 2, Type>& adj_ret
1999
+ )
2000
+ {
2001
+ for (unsigned j=0; j < 2; ++j)
2002
+ {
2003
+ adj_r0[j] += adj_ret.data[0][j];
2004
+ adj_r1[j] += adj_ret.data[1][j];
2005
+ }
2006
+ }
2007
+
2008
+ template<typename Type>
2009
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
2010
+ const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
2011
+ vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
2012
+ const mat_t<3, 3, Type>& adj_ret
2013
+ )
2014
+ {
2015
+ for (unsigned j=0; j < 3; ++j)
2016
+ {
2017
+ adj_c0[j] += adj_ret.data[0][j];
2018
+ adj_c1[j] += adj_ret.data[1][j];
2019
+ adj_c2[j] += adj_ret.data[2][j];
2020
+ }
2021
+ }
2022
+
2023
+ template<typename Type>
2024
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
2025
+ const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
2026
+ vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
2027
+ const mat_t<4, 4, Type>& adj_ret
2028
+ )
2029
+ {
2030
+ for (unsigned j=0; j < 4; ++j)
2031
+ {
2032
+ adj_c0[j] += adj_ret.data[0][j];
2033
+ adj_c1[j] += adj_ret.data[1][j];
2034
+ adj_c2[j] += adj_ret.data[2][j];
2035
+ adj_c3[j] += adj_ret.data[3][j];
2036
+ }
2037
+ }
2038
+
2039
+ template<unsigned Rows, unsigned Cols, typename Type>
2040
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
2041
+ const initializer_array<Rows, vec_t<Cols, Type> >& l,
2042
+ const initializer_array<Rows, vec_t<Cols, Type>* >& adj_l,
2043
+ const mat_t<Rows, Cols, Type>& adj_ret
2044
+ )
2045
+ {
2046
+ for (unsigned i=0; i < Rows; ++i)
2047
+ {
2048
+ for (unsigned j=0; j < Cols; ++j)
2049
+ {
2050
+ (*adj_l[i])[j] += adj_ret.data[i][j];
2051
+ }
2052
+ }
2053
+ }
2054
+
1567
2055
  template<unsigned Rows, unsigned Cols, typename Type>
1568
2056
  CUDA_CALLABLE inline mat_t<Rows, Cols, Type> lerp(const mat_t<Rows, Cols, Type>& a, const mat_t<Rows, Cols, Type>& b, Type t)
1569
2057
  {
@@ -1704,4 +2192,29 @@ CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Col
1704
2192
  {
1705
2193
  }
1706
2194
 
2195
+ template<unsigned Rows, unsigned Cols, typename Type>
2196
+ inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, const Type& tolerance)
2197
+ {
2198
+ Type diff(0);
2199
+ for (unsigned i = 0; i < Rows; ++i)
2200
+ {
2201
+ for (unsigned j = 0; j < Cols; ++j)
2202
+ {
2203
+ diff = max(diff, abs(actual.data[i][j] - expected.data[i][j]));
2204
+ }
2205
+ }
2206
+ if (diff > tolerance)
2207
+ {
2208
+ printf("Error, expect_near() failed with tolerance "); print(tolerance);
2209
+ printf("\t Expected: "); print(expected);
2210
+ printf("\t Actual: "); print(actual);
2211
+ }
2212
+ }
2213
+
2214
+ template<unsigned Rows, unsigned Cols, typename Type>
2215
+ inline CUDA_CALLABLE void adj_expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, Type tolerance, mat_t<Rows,Cols,Type>& adj_actual, mat_t<Rows,Cols,Type>& adj_expected, Type adj_tolerance)
2216
+ {
2217
+ // nop
2218
+ }
2219
+
1707
2220
  } // namespace wp
warp/native/mathdx.cpp CHANGED
@@ -1,9 +1,18 @@
1
- /** Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
- * NVIDIA CORPORATION and its licensors retain all intellectual property
3
- * and proprietary rights in and to this software, related documentation
4
- * and any modifications thereto. Any use, reproduction, disclosure or
5
- * distribution of this software and related documentation without an express
6
- * license agreement from NVIDIA CORPORATION is strictly prohibited.
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
7
16
  */
8
17
 
9
18
  #include "builtin.h"
warp/native/matnn.h CHANGED
@@ -1,9 +1,18 @@
1
- /** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
- * NVIDIA CORPORATION and its licensors retain all intellectual property
3
- * and proprietary rights in and to this software, related documentation
4
- * and any modifications thereto. Any use, reproduction, disclosure or
5
- * distribution of this software and related documentation without an express
6
- * license agreement from NVIDIA CORPORATION is strictly prohibited.
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
7
16
  */
8
17
 
9
18
  #pragma once
warp/native/mesh.cpp CHANGED
@@ -1,9 +1,18 @@
1
- /** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
2
- * NVIDIA CORPORATION and its licensors retain all intellectual property
3
- * and proprietary rights in and to this software, related documentation
4
- * and any modifications thereto. Any use, reproduction, disclosure or
5
- * distribution of this software and related documentation without an express
6
- * license agreement from NVIDIA CORPORATION is strictly prohibited.
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
7
16
  */
8
17
 
9
18
  #include "mesh.h"