warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show
  1. warp/__init__.py +10 -4
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +5 -3
  6. warp/build_dll.py +29 -9
  7. warp/builtins.py +868 -507
  8. warp/codegen.py +1074 -638
  9. warp/config.py +3 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +715 -222
  12. warp/fabric.py +326 -0
  13. warp/fem/__init__.py +27 -0
  14. warp/fem/cache.py +389 -0
  15. warp/fem/dirichlet.py +181 -0
  16. warp/fem/domain.py +263 -0
  17. warp/fem/field/__init__.py +101 -0
  18. warp/fem/field/field.py +149 -0
  19. warp/fem/field/nodal_field.py +299 -0
  20. warp/fem/field/restriction.py +21 -0
  21. warp/fem/field/test.py +181 -0
  22. warp/fem/field/trial.py +183 -0
  23. warp/fem/geometry/__init__.py +19 -0
  24. warp/fem/geometry/closest_point.py +70 -0
  25. warp/fem/geometry/deformed_geometry.py +271 -0
  26. warp/fem/geometry/element.py +744 -0
  27. warp/fem/geometry/geometry.py +186 -0
  28. warp/fem/geometry/grid_2d.py +373 -0
  29. warp/fem/geometry/grid_3d.py +435 -0
  30. warp/fem/geometry/hexmesh.py +953 -0
  31. warp/fem/geometry/partition.py +376 -0
  32. warp/fem/geometry/quadmesh_2d.py +532 -0
  33. warp/fem/geometry/tetmesh.py +840 -0
  34. warp/fem/geometry/trimesh_2d.py +577 -0
  35. warp/fem/integrate.py +1616 -0
  36. warp/fem/operator.py +191 -0
  37. warp/fem/polynomial.py +213 -0
  38. warp/fem/quadrature/__init__.py +2 -0
  39. warp/fem/quadrature/pic_quadrature.py +245 -0
  40. warp/fem/quadrature/quadrature.py +294 -0
  41. warp/fem/space/__init__.py +292 -0
  42. warp/fem/space/basis_space.py +489 -0
  43. warp/fem/space/collocated_function_space.py +105 -0
  44. warp/fem/space/dof_mapper.py +236 -0
  45. warp/fem/space/function_space.py +145 -0
  46. warp/fem/space/grid_2d_function_space.py +267 -0
  47. warp/fem/space/grid_3d_function_space.py +306 -0
  48. warp/fem/space/hexmesh_function_space.py +352 -0
  49. warp/fem/space/partition.py +350 -0
  50. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  51. warp/fem/space/restriction.py +160 -0
  52. warp/fem/space/shape/__init__.py +15 -0
  53. warp/fem/space/shape/cube_shape_function.py +738 -0
  54. warp/fem/space/shape/shape_function.py +103 -0
  55. warp/fem/space/shape/square_shape_function.py +611 -0
  56. warp/fem/space/shape/tet_shape_function.py +567 -0
  57. warp/fem/space/shape/triangle_shape_function.py +429 -0
  58. warp/fem/space/tetmesh_function_space.py +292 -0
  59. warp/fem/space/topology.py +295 -0
  60. warp/fem/space/trimesh_2d_function_space.py +221 -0
  61. warp/fem/types.py +77 -0
  62. warp/fem/utils.py +495 -0
  63. warp/native/array.h +147 -44
  64. warp/native/builtin.h +122 -149
  65. warp/native/bvh.cpp +73 -325
  66. warp/native/bvh.cu +406 -23
  67. warp/native/bvh.h +34 -43
  68. warp/native/clang/clang.cpp +13 -8
  69. warp/native/crt.h +2 -0
  70. warp/native/cuda_crt.h +5 -0
  71. warp/native/cuda_util.cpp +15 -3
  72. warp/native/cuda_util.h +3 -1
  73. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  74. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  75. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  76. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  77. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  78. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  79. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  80. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  133. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  134. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  135. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  136. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  137. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  138. warp/native/cutlass_gemm.cu +5 -3
  139. warp/native/exports.h +1240 -952
  140. warp/native/fabric.h +228 -0
  141. warp/native/hashgrid.cpp +4 -4
  142. warp/native/hashgrid.h +22 -2
  143. warp/native/intersect.h +22 -7
  144. warp/native/intersect_adj.h +8 -8
  145. warp/native/intersect_tri.h +1 -1
  146. warp/native/marching.cu +157 -161
  147. warp/native/mat.h +80 -19
  148. warp/native/matnn.h +2 -2
  149. warp/native/mesh.cpp +33 -108
  150. warp/native/mesh.cu +114 -23
  151. warp/native/mesh.h +446 -46
  152. warp/native/noise.h +272 -329
  153. warp/native/quat.h +51 -8
  154. warp/native/rand.h +45 -35
  155. warp/native/range.h +6 -2
  156. warp/native/reduce.cpp +1 -1
  157. warp/native/reduce.cu +10 -12
  158. warp/native/runlength_encode.cu +6 -10
  159. warp/native/scan.cu +8 -11
  160. warp/native/sparse.cpp +4 -4
  161. warp/native/sparse.cu +164 -154
  162. warp/native/spatial.h +2 -2
  163. warp/native/temp_buffer.h +14 -30
  164. warp/native/vec.h +107 -23
  165. warp/native/volume.h +120 -0
  166. warp/native/warp.cpp +560 -30
  167. warp/native/warp.cu +431 -44
  168. warp/native/warp.h +13 -4
  169. warp/optim/__init__.py +1 -0
  170. warp/optim/linear.py +922 -0
  171. warp/optim/sgd.py +92 -0
  172. warp/render/render_opengl.py +335 -119
  173. warp/render/render_usd.py +11 -11
  174. warp/sim/__init__.py +2 -2
  175. warp/sim/articulation.py +385 -185
  176. warp/sim/collide.py +8 -0
  177. warp/sim/import_mjcf.py +297 -106
  178. warp/sim/import_urdf.py +389 -210
  179. warp/sim/import_usd.py +198 -97
  180. warp/sim/inertia.py +17 -18
  181. warp/sim/integrator_euler.py +14 -8
  182. warp/sim/integrator_xpbd.py +158 -16
  183. warp/sim/model.py +795 -291
  184. warp/sim/render.py +3 -3
  185. warp/sim/utils.py +3 -0
  186. warp/sparse.py +640 -150
  187. warp/stubs.py +606 -267
  188. warp/tape.py +61 -10
  189. warp/tests/__main__.py +3 -6
  190. warp/tests/assets/curlnoise_golden.npy +0 -0
  191. warp/tests/assets/pnoise_golden.npy +0 -0
  192. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  193. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  194. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  195. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  196. warp/tests/aux_test_unresolved_func.py +14 -0
  197. warp/tests/aux_test_unresolved_symbol.py +14 -0
  198. warp/tests/disabled_kinematics.py +239 -0
  199. warp/tests/run_coverage_serial.py +31 -0
  200. warp/tests/test_adam.py +103 -106
  201. warp/tests/test_arithmetic.py +128 -74
  202. warp/tests/test_array.py +212 -97
  203. warp/tests/test_array_reduce.py +57 -23
  204. warp/tests/test_atomic.py +64 -28
  205. warp/tests/test_bool.py +99 -0
  206. warp/tests/test_builtins_resolution.py +1292 -0
  207. warp/tests/test_bvh.py +42 -18
  208. warp/tests/test_closest_point_edge_edge.py +54 -57
  209. warp/tests/test_codegen.py +208 -130
  210. warp/tests/test_compile_consts.py +28 -20
  211. warp/tests/test_conditional.py +108 -24
  212. warp/tests/test_copy.py +10 -12
  213. warp/tests/test_ctypes.py +112 -88
  214. warp/tests/test_dense.py +21 -14
  215. warp/tests/test_devices.py +98 -0
  216. warp/tests/test_dlpack.py +75 -75
  217. warp/tests/test_examples.py +277 -0
  218. warp/tests/test_fabricarray.py +955 -0
  219. warp/tests/test_fast_math.py +15 -11
  220. warp/tests/test_fem.py +1271 -0
  221. warp/tests/test_fp16.py +53 -19
  222. warp/tests/test_func.py +187 -86
  223. warp/tests/test_generics.py +194 -49
  224. warp/tests/test_grad.py +178 -109
  225. warp/tests/test_grad_customs.py +176 -0
  226. warp/tests/test_hash_grid.py +52 -37
  227. warp/tests/test_import.py +10 -23
  228. warp/tests/test_indexedarray.py +32 -31
  229. warp/tests/test_intersect.py +18 -9
  230. warp/tests/test_large.py +141 -0
  231. warp/tests/test_launch.py +14 -41
  232. warp/tests/test_lerp.py +64 -65
  233. warp/tests/test_linear_solvers.py +154 -0
  234. warp/tests/test_lvalue.py +493 -0
  235. warp/tests/test_marching_cubes.py +12 -13
  236. warp/tests/test_mat.py +517 -2898
  237. warp/tests/test_mat_lite.py +115 -0
  238. warp/tests/test_mat_scalar_ops.py +2889 -0
  239. warp/tests/test_math.py +103 -9
  240. warp/tests/test_matmul.py +305 -69
  241. warp/tests/test_matmul_lite.py +410 -0
  242. warp/tests/test_mesh.py +71 -14
  243. warp/tests/test_mesh_query_aabb.py +41 -25
  244. warp/tests/test_mesh_query_point.py +140 -22
  245. warp/tests/test_mesh_query_ray.py +39 -22
  246. warp/tests/test_mlp.py +30 -22
  247. warp/tests/test_model.py +92 -89
  248. warp/tests/test_modules_lite.py +39 -0
  249. warp/tests/test_multigpu.py +88 -114
  250. warp/tests/test_noise.py +12 -11
  251. warp/tests/test_operators.py +16 -20
  252. warp/tests/test_options.py +11 -11
  253. warp/tests/test_pinned.py +17 -18
  254. warp/tests/test_print.py +32 -11
  255. warp/tests/test_quat.py +275 -129
  256. warp/tests/test_rand.py +18 -16
  257. warp/tests/test_reload.py +38 -34
  258. warp/tests/test_rounding.py +50 -43
  259. warp/tests/test_runlength_encode.py +168 -20
  260. warp/tests/test_smoothstep.py +9 -11
  261. warp/tests/test_snippet.py +143 -0
  262. warp/tests/test_sparse.py +261 -63
  263. warp/tests/test_spatial.py +276 -243
  264. warp/tests/test_streams.py +110 -85
  265. warp/tests/test_struct.py +268 -63
  266. warp/tests/test_tape.py +39 -21
  267. warp/tests/test_torch.py +118 -89
  268. warp/tests/test_transient_module.py +12 -13
  269. warp/tests/test_types.py +614 -0
  270. warp/tests/test_utils.py +494 -0
  271. warp/tests/test_vec.py +354 -2050
  272. warp/tests/test_vec_lite.py +73 -0
  273. warp/tests/test_vec_scalar_ops.py +2099 -0
  274. warp/tests/test_volume.py +457 -293
  275. warp/tests/test_volume_write.py +124 -134
  276. warp/tests/unittest_serial.py +35 -0
  277. warp/tests/unittest_suites.py +341 -0
  278. warp/tests/unittest_utils.py +568 -0
  279. warp/tests/unused_test_misc.py +71 -0
  280. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  281. warp/thirdparty/appdirs.py +36 -45
  282. warp/thirdparty/unittest_parallel.py +549 -0
  283. warp/torch.py +9 -6
  284. warp/types.py +1089 -366
  285. warp/utils.py +93 -387
  286. warp_lang-0.11.0.dist-info/METADATA +238 -0
  287. warp_lang-0.11.0.dist-info/RECORD +332 -0
  288. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  289. warp/tests/test_all.py +0 -219
  290. warp/tests/test_array_scan.py +0 -60
  291. warp/tests/test_base.py +0 -208
  292. warp/tests/test_unresolved_func.py +0 -7
  293. warp/tests/test_unresolved_symbol.py +0 -7
  294. warp_lang-0.10.1.dist-info/METADATA +0 -21
  295. warp_lang-0.10.1.dist-info/RECORD +0 -188
  296. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  297. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  298. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  299. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
  300. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h CHANGED
@@ -16,9 +16,11 @@ namespace wp
16
16
  template<unsigned Length, typename Type>
17
17
  struct vec_t
18
18
  {
19
- Type c[Length] = {};
19
+ Type c[Length];
20
20
 
21
- inline vec_t() = default;
21
+ inline CUDA_CALLABLE vec_t()
22
+ : c()
23
+ {}
22
24
 
23
25
  inline CUDA_CALLABLE vec_t(Type s)
24
26
  {
@@ -33,7 +35,7 @@ struct vec_t
33
35
  {
34
36
  for( unsigned i=0; i < Length; ++i )
35
37
  {
36
- c[i] = other[i];
38
+ c[i] = static_cast<Type>(other[i]);
37
39
  }
38
40
  }
39
41
 
@@ -284,12 +286,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
284
286
  return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
285
287
  }
286
288
 
289
+ template<unsigned Length, typename Type>
290
+ inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
291
+ {
292
+ vec_t<Length, Type> ret;
293
+ for (unsigned i=0; i < Length; ++i)
294
+ {
295
+ ret[i] = s / a[i];
296
+ }
297
+ return ret;
298
+ }
299
+
300
+ template<typename Type>
301
+ inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
302
+ {
303
+ return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
304
+ }
305
+
306
+ template<typename Type>
307
+ inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
308
+ {
309
+ return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
310
+ }
311
+
287
312
  template<unsigned Length, typename Type>
288
313
  inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
289
314
  {
290
315
  return div(a,s);
291
316
  }
292
317
 
318
+ template<unsigned Length, typename Type>
319
+ inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
320
+ {
321
+ return div(s, a);
322
+ }
323
+
293
324
  // component wise division
294
325
  template<unsigned Length, typename Type>
295
326
  inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
@@ -383,7 +414,7 @@ inline CUDA_CALLABLE Type tensordot(vec_t<Length, Type> a, vec_t<Length, Type> b
383
414
 
384
415
 
385
416
  template<unsigned Length, typename Type>
386
- inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
417
+ inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
387
418
  {
388
419
  #ifndef NDEBUG
389
420
  if (idx < 0 || idx >= Length)
@@ -397,7 +428,21 @@ inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
397
428
  }
398
429
 
399
430
  template<unsigned Length, typename Type>
400
- inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
431
+ inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
432
+ {
433
+ #ifndef NDEBUG
434
+ if (idx < 0 || idx >= Length)
435
+ {
436
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
437
+ assert(0);
438
+ }
439
+ #endif
440
+
441
+ return &v[idx];
442
+ }
443
+
444
+ template<unsigned Length, typename Type>
445
+ inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
401
446
  {
402
447
  #ifndef NDEBUG
403
448
  if (idx < 0 || idx >= Length)
@@ -407,17 +452,23 @@ inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
407
452
  }
408
453
  #endif
409
454
 
410
- v[idx] = value;
455
+ return &((*v)[idx]);
411
456
  }
412
457
 
413
458
  template<unsigned Length, typename Type>
414
- inline CUDA_CALLABLE void adj_indexset(vec_t<Length, Type>& v, int idx, const Type& value,
459
+ inline CUDA_CALLABLE void adj_index(vec_t<Length, Type>& v, int idx,
415
460
  vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
416
461
  {
417
462
  // nop
418
463
  }
419
464
 
420
465
 
466
+ template<unsigned Length, typename Type>
467
+ inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
468
+ vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
469
+ {
470
+ // nop
471
+ }
421
472
 
422
473
 
423
474
  template<unsigned Length, typename Type>
@@ -645,7 +696,7 @@ inline CUDA_CALLABLE void adj_vec_t(const vec_t<Length, OtherType>& other, vec_t
645
696
  {
646
697
  for( unsigned i=0; i < Length; ++i )
647
698
  {
648
- adj_other[i] += adj_ret[i];
699
+ adj_other[i] += static_cast<OtherType>(adj_ret[i]);
649
700
  }
650
701
  }
651
702
 
@@ -715,9 +766,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
715
766
  }
716
767
 
717
768
  template<unsigned Length, typename Type>
718
- inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
769
+ inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
770
+ {
771
+
772
+ adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
773
+
774
+ for( unsigned i=0; i < Length; ++i )
775
+ {
776
+ adj_a[i] += s / adj_ret[i];
777
+ }
778
+
779
+ #if FP_CHECK
780
+ if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
781
+ {
782
+ // \TODO: How shall we implement this error message?
783
+ // printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
784
+ assert(0);
785
+ }
786
+ #endif
787
+ }
788
+
789
+ template<unsigned Length, typename Type>
790
+ inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
719
791
  adj_a += cw_div(adj_ret, b);
720
- adj_b -= cw_mul(adj_ret, cw_div(cw_div(a, b), b));
792
+ adj_b -= cw_mul(adj_ret, cw_div(ret, b));
721
793
  }
722
794
 
723
795
  template<unsigned Length, typename Type>
@@ -816,7 +888,7 @@ inline CUDA_CALLABLE void adj_dot(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
816
888
 
817
889
 
818
890
  template<unsigned Length, typename Type>
819
- inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
891
+ inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
820
892
  {
821
893
  #ifndef NDEBUG
822
894
  if (idx < 0 || idx > Length)
@@ -830,9 +902,12 @@ inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_
830
902
  }
831
903
 
832
904
  template<unsigned Length, typename Type>
833
- inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
905
+ inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
834
906
  {
835
- adj_a += normalize(a)*adj_ret;
907
+ if (ret > Type(kEps))
908
+ {
909
+ adj_a += div(a, ret) * adj_ret;
910
+ }
836
911
 
837
912
  #if FP_CHECK
838
913
  if (!isfinite(adj_a))
@@ -860,7 +935,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
860
935
  }
861
936
 
862
937
  template<unsigned Length, typename Type>
863
- inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
938
+ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
864
939
  {
865
940
  Type d = length(a);
866
941
 
@@ -868,9 +943,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
868
943
  {
869
944
  Type invd = Type(1.0f)/d;
870
945
 
871
- vec_t<Length, Type> ahat = normalize(a);
872
-
873
- adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
946
+ adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
874
947
 
875
948
  #if FP_CHECK
876
949
  if (!isfinite(adj_a))
@@ -931,8 +1004,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
931
1004
 
932
1005
  // Do I need to specialize these for different lengths?
933
1006
  template<unsigned Length, typename Type>
934
- inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
935
-
1007
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
1008
+ {
936
1009
  vec_t<Length, Type> ret;
937
1010
  for( unsigned i=0; i < Length; ++i )
938
1011
  {
@@ -943,8 +1016,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
943
1016
  }
944
1017
 
945
1018
  template<unsigned Length, typename Type>
946
- inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
947
-
1019
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
1020
+ {
948
1021
  vec_t<Length, Type> ret;
949
1022
  for( unsigned i=0; i < Length; ++i )
950
1023
  {
@@ -955,8 +1028,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
955
1028
  }
956
1029
 
957
1030
  template<unsigned Length, typename Type>
958
- inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
959
-
1031
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
1032
+ {
960
1033
  vec_t<Length, Type> ret;
961
1034
  for( unsigned i=0; i < Length; ++i )
962
1035
  {
@@ -966,6 +1039,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
966
1039
  return ret;
967
1040
  }
968
1041
 
1042
+ template<unsigned Length, typename Type>
1043
+ inline CUDA_CALLABLE void adj_atomic_minmax(
1044
+ vec_t<Length,Type> *addr,
1045
+ vec_t<Length,Type> *adj_addr,
1046
+ const vec_t<Length,Type> &value,
1047
+ vec_t<Length,Type> &adj_value)
1048
+ {
1049
+ for (unsigned i=0; i < Length; ++i)
1050
+ adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
1051
+ }
1052
+
969
1053
  // ok, the original implementation of this didn't take the absolute values.
970
1054
  // I wouldn't consider this expected behavior. It looks like it's only
971
1055
  // being used for bounding boxes at the moment, where this doesn't matter,
warp/native/volume.h CHANGED
@@ -232,6 +232,126 @@ CUDA_CALLABLE inline void adj_volume_sample_i(uint64_t id, vec3 uvw, uint64_t& a
232
232
  // NOP
233
233
  }
234
234
 
235
+ // Sampling the volume at the given index-space coordinates, uvw can be fractional
236
+ CUDA_CALLABLE inline float volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3& grad)
237
+ {
238
+ const pnanovdb_buf_t buf = volume::id_to_buffer(id);
239
+ const pnanovdb_root_handle_t root = volume::get_root(buf);
240
+ const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
241
+
242
+ if (sampling_mode == volume::CLOSEST)
243
+ {
244
+ const pnanovdb_coord_t ijk = pnanovdb_vec3_round_to_coord(uvw_pnano);
245
+ float val;
246
+ pnano_read(val, buf, root, PNANOVDB_REF(ijk));
247
+ grad = vec3(0.0f, 0.0f, 0.0f);
248
+ return val;
249
+ }
250
+ else if (sampling_mode == volume::LINEAR)
251
+ {
252
+ // NB. linear sampling is not used on int volumes
253
+ constexpr pnanovdb_coord_t OFFSETS[] = {
254
+ { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
255
+ };
256
+
257
+ const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
258
+ const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
259
+ const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
260
+
261
+ pnanovdb_readaccessor_t accessor;
262
+ pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
263
+ float val = 0.0f;
264
+ const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
265
+ const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
266
+ const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
267
+
268
+ const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
269
+ const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
270
+ const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
271
+
272
+ float dfdx = 0.0f;
273
+ float dfdy = 0.0f;
274
+ float dfdz = 0.0f;
275
+ for (int idx = 0; idx < 8; ++idx)
276
+ {
277
+ const pnanovdb_coord_t& offs = OFFSETS[idx];
278
+ const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
279
+ float v;
280
+ pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
281
+ val = add(val, wx[offs.x] * wy[offs.y] * wz[offs.z] * v);
282
+ dfdx = add(dfdx, wy[offs.y] * wz[offs.z] * sign_dx[idx] * v);
283
+ dfdy = add(dfdy, wx[offs.x] * wz[offs.z] * sign_dy[idx] * v);
284
+ dfdz = add(dfdz, wx[offs.x] * wy[offs.y] * sign_dz[idx] * v);
285
+ }
286
+ grad = vec3(dfdx, dfdy, dfdz);
287
+ return val;
288
+ }
289
+ return 0.0f;
290
+ }
291
+
292
+ CUDA_CALLABLE inline void adj_volume_sample_grad_f(
293
+ uint64_t id, vec3 uvw, int sampling_mode, vec3& grad, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, vec3& adj_grad, const float& adj_ret)
294
+ {
295
+ if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return;
296
+
297
+ if (sampling_mode != volume::LINEAR) {
298
+ return; // NOP
299
+ }
300
+
301
+ const pnanovdb_buf_t buf = volume::id_to_buffer(id);
302
+ const pnanovdb_root_handle_t root = volume::get_root(buf);
303
+ const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
304
+
305
+ constexpr pnanovdb_coord_t OFFSETS[] = {
306
+ { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
307
+ };
308
+
309
+ const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
310
+ const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
311
+ const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
312
+
313
+ pnanovdb_readaccessor_t accessor;
314
+ pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
315
+ const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
316
+ const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
317
+ const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
318
+ const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
319
+ const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
320
+ const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
321
+
322
+ float dfdxdy = 0.0f;
323
+ float dfdxdz = 0.0f;
324
+ float dfdydx = 0.0f;
325
+ float dfdydz = 0.0f;
326
+ float dfdzdx = 0.0f;
327
+ float dfdzdy = 0.0f;
328
+ vec3 dphi(0,0,0);
329
+ for (int idx = 0; idx < 8; ++idx)
330
+ {
331
+ const pnanovdb_coord_t& offs = OFFSETS[idx];
332
+ const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
333
+ float v;
334
+ pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
335
+ const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1);
336
+ const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]);
337
+ dphi = add(dphi, mul(v, grad_w));
338
+
339
+ dfdxdy = add(dfdxdy, signs[1] * wz[offs.z] * sign_dx[idx] * v);
340
+ dfdxdz = add(dfdxdz, wy[offs.y] * signs[2] * sign_dx[idx] * v);
341
+
342
+ dfdydx = add(dfdydx, signs[0] * wz[offs.z] * sign_dy[idx] * v);
343
+ dfdydz = add(dfdydz, wx[offs.x] * signs[2] * sign_dy[idx] * v);
344
+
345
+ dfdzdx = add(dfdzdx, signs[0] * wy[offs.y] * sign_dz[idx] * v);
346
+ dfdzdy = add(dfdzdy, wx[offs.x] * signs[1] * sign_dz[idx] * v);
347
+ }
348
+
349
+ adj_uvw += mul(dphi, adj_ret);
350
+ adj_uvw[0] += adj_grad[1] * dfdydx + adj_grad[2] * dfdzdx;
351
+ adj_uvw[1] += adj_grad[0] * dfdxdy + adj_grad[2] * dfdzdy;
352
+ adj_uvw[2] += adj_grad[0] * dfdxdz + adj_grad[1] * dfdydz;
353
+ }
354
+
235
355
  CUDA_CALLABLE inline float volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k)
236
356
  {
237
357
  if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return 0.f;