warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h CHANGED
@@ -16,9 +16,11 @@ namespace wp
16
16
  template<unsigned Length, typename Type>
17
17
  struct vec_t
18
18
  {
19
- Type c[Length] = {};
19
+ Type c[Length];
20
20
 
21
- inline vec_t() = default;
21
+ inline CUDA_CALLABLE vec_t()
22
+ : c()
23
+ {}
22
24
 
23
25
  inline CUDA_CALLABLE vec_t(Type s)
24
26
  {
@@ -27,6 +29,15 @@ struct vec_t
27
29
  c[i] = s;
28
30
  }
29
31
  }
32
+
33
+ template <typename OtherType>
34
+ inline explicit CUDA_CALLABLE vec_t(const vec_t<Length, OtherType>& other)
35
+ {
36
+ for( unsigned i=0; i < Length; ++i )
37
+ {
38
+ c[i] = static_cast<Type>(other[i]);
39
+ }
40
+ }
30
41
 
31
42
  inline CUDA_CALLABLE vec_t(Type x, Type y)
32
43
  {
@@ -275,12 +286,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
275
286
  return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
276
287
  }
277
288
 
289
+ template<unsigned Length, typename Type>
290
+ inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
291
+ {
292
+ vec_t<Length, Type> ret;
293
+ for (unsigned i=0; i < Length; ++i)
294
+ {
295
+ ret[i] = s / a[i];
296
+ }
297
+ return ret;
298
+ }
299
+
300
+ template<typename Type>
301
+ inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
302
+ {
303
+ return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
304
+ }
305
+
306
+ template<typename Type>
307
+ inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
308
+ {
309
+ return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
310
+ }
311
+
278
312
  template<unsigned Length, typename Type>
279
313
  inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
280
314
  {
281
315
  return div(a,s);
282
316
  }
283
317
 
318
+ template<unsigned Length, typename Type>
319
+ inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
320
+ {
321
+ return div(s, a);
322
+ }
323
+
284
324
  // component wise division
285
325
  template<unsigned Length, typename Type>
286
326
  inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
@@ -374,7 +414,7 @@ inline CUDA_CALLABLE Type tensordot(vec_t<Length, Type> a, vec_t<Length, Type> b
374
414
 
375
415
 
376
416
  template<unsigned Length, typename Type>
377
- inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
417
+ inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
378
418
  {
379
419
  #ifndef NDEBUG
380
420
  if (idx < 0 || idx >= Length)
@@ -388,7 +428,21 @@ inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
388
428
  }
389
429
 
390
430
  template<unsigned Length, typename Type>
391
- inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
431
+ inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
432
+ {
433
+ #ifndef NDEBUG
434
+ if (idx < 0 || idx >= Length)
435
+ {
436
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
437
+ assert(0);
438
+ }
439
+ #endif
440
+
441
+ return &v[idx];
442
+ }
443
+
444
+ template<unsigned Length, typename Type>
445
+ inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
392
446
  {
393
447
  #ifndef NDEBUG
394
448
  if (idx < 0 || idx >= Length)
@@ -398,17 +452,23 @@ inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
398
452
  }
399
453
  #endif
400
454
 
401
- v[idx] = value;
455
+ return &((*v)[idx]);
402
456
  }
403
457
 
404
458
  template<unsigned Length, typename Type>
405
- inline CUDA_CALLABLE void adj_indexset(vec_t<Length, Type>& v, int idx, const Type& value,
459
+ inline CUDA_CALLABLE void adj_index(vec_t<Length, Type>& v, int idx,
406
460
  vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
407
461
  {
408
462
  // nop
409
463
  }
410
464
 
411
465
 
466
+ template<unsigned Length, typename Type>
467
+ inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
468
+ vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
469
+ {
470
+ // nop
471
+ }
412
472
 
413
473
 
414
474
  template<unsigned Length, typename Type>
@@ -572,7 +632,7 @@ inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const v
572
632
  }
573
633
  if (diff > tolerance)
574
634
  {
575
- printf("Error, expect_near() failed with torerance "); print(tolerance);
635
+ printf("Error, expect_near() failed with tolerance "); print(tolerance);
576
636
  printf("\t Expected: "); print(expected);
577
637
  printf("\t Actual: "); print(actual);
578
638
  }
@@ -630,6 +690,15 @@ inline CUDA_CALLABLE void adj_vec_t(Type s, Type& adj_s, const vec_t<Length, Typ
630
690
  }
631
691
  }
632
692
 
693
+ // adjoint for the casting constructor
694
+ template<unsigned Length, typename Type, typename OtherType>
695
+ inline CUDA_CALLABLE void adj_vec_t(const vec_t<Length, OtherType>& other, vec_t<Length, OtherType>& adj_other, const vec_t<Length, Type>& adj_ret)
696
+ {
697
+ for( unsigned i=0; i < Length; ++i )
698
+ {
699
+ adj_other[i] += static_cast<OtherType>(adj_ret[i]);
700
+ }
701
+ }
633
702
 
634
703
  template<typename Type>
635
704
  CUDA_CALLABLE inline void adj_vec_t(const vec_t<3,Type>& w, const vec_t<3,Type>& v, vec_t<3,Type>& adj_w, vec_t<3,Type>& adj_v, const vec_t<6,Type>& adj_ret)
@@ -697,9 +766,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
697
766
  }
698
767
 
699
768
  template<unsigned Length, typename Type>
700
- inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
769
+ inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
770
+ {
771
+
772
+ adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
773
+
774
+ for( unsigned i=0; i < Length; ++i )
775
+ {
776
+ adj_a[i] += s / adj_ret[i];
777
+ }
778
+
779
+ #if FP_CHECK
780
+ if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
781
+ {
782
+ // \TODO: How shall we implement this error message?
783
+ // printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
784
+ assert(0);
785
+ }
786
+ #endif
787
+ }
788
+
789
+ template<unsigned Length, typename Type>
790
+ inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
701
791
  adj_a += cw_div(adj_ret, b);
702
- adj_b -= cw_mul(adj_ret, cw_div(cw_div(a, b), b));
792
+ adj_b -= cw_mul(adj_ret, cw_div(ret, b));
703
793
  }
704
794
 
705
795
  template<unsigned Length, typename Type>
@@ -798,7 +888,7 @@ inline CUDA_CALLABLE void adj_dot(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
798
888
 
799
889
 
800
890
  template<unsigned Length, typename Type>
801
- inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
891
+ inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
802
892
  {
803
893
  #ifndef NDEBUG
804
894
  if (idx < 0 || idx > Length)
@@ -812,9 +902,12 @@ inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_
812
902
  }
813
903
 
814
904
  template<unsigned Length, typename Type>
815
- inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
905
+ inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
816
906
  {
817
- adj_a += normalize(a)*adj_ret;
907
+ if (ret > Type(kEps))
908
+ {
909
+ adj_a += div(a, ret) * adj_ret;
910
+ }
818
911
 
819
912
  #if FP_CHECK
820
913
  if (!isfinite(adj_a))
@@ -842,7 +935,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
842
935
  }
843
936
 
844
937
  template<unsigned Length, typename Type>
845
- inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
938
+ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
846
939
  {
847
940
  Type d = length(a);
848
941
 
@@ -850,9 +943,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
850
943
  {
851
944
  Type invd = Type(1.0f)/d;
852
945
 
853
- vec_t<Length, Type> ahat = normalize(a);
854
-
855
- adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
946
+ adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
856
947
 
857
948
  #if FP_CHECK
858
949
  if (!isfinite(adj_a))
@@ -913,8 +1004,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
913
1004
 
914
1005
  // Do I need to specialize these for different lengths?
915
1006
  template<unsigned Length, typename Type>
916
- inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
917
-
1007
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
1008
+ {
918
1009
  vec_t<Length, Type> ret;
919
1010
  for( unsigned i=0; i < Length; ++i )
920
1011
  {
@@ -925,8 +1016,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
925
1016
  }
926
1017
 
927
1018
  template<unsigned Length, typename Type>
928
- inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
929
-
1019
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
1020
+ {
930
1021
  vec_t<Length, Type> ret;
931
1022
  for( unsigned i=0; i < Length; ++i )
932
1023
  {
@@ -937,8 +1028,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
937
1028
  }
938
1029
 
939
1030
  template<unsigned Length, typename Type>
940
- inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
941
-
1031
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
1032
+ {
942
1033
  vec_t<Length, Type> ret;
943
1034
  for( unsigned i=0; i < Length; ++i )
944
1035
  {
@@ -948,6 +1039,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
948
1039
  return ret;
949
1040
  }
950
1041
 
1042
+ template<unsigned Length, typename Type>
1043
+ inline CUDA_CALLABLE void adj_atomic_minmax(
1044
+ vec_t<Length,Type> *addr,
1045
+ vec_t<Length,Type> *adj_addr,
1046
+ const vec_t<Length,Type> &value,
1047
+ vec_t<Length,Type> &adj_value)
1048
+ {
1049
+ for (unsigned i=0; i < Length; ++i)
1050
+ adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
1051
+ }
1052
+
951
1053
  // ok, the original implementation of this didn't take the absolute values.
952
1054
  // I wouldn't consider this expected behavior. It looks like it's only
953
1055
  // being used for bounding boxes at the moment, where this doesn't matter,
@@ -956,11 +1058,11 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
956
1058
  template<unsigned Length, typename Type>
957
1059
  CUDA_CALLABLE inline int longest_axis(const vec_t<Length, Type>& v)
958
1060
  {
959
- Type lmax = fabs(v[0]);
1061
+ Type lmax = abs(v[0]);
960
1062
  int ret(0);
961
1063
  for( unsigned i=1; i < Length; ++i )
962
1064
  {
963
- Type l = fabs(v[i]);
1065
+ Type l = abs(v[i]);
964
1066
  if( l > lmax )
965
1067
  {
966
1068
  ret = i;
warp/native/volume.h CHANGED
@@ -232,6 +232,126 @@ CUDA_CALLABLE inline void adj_volume_sample_i(uint64_t id, vec3 uvw, uint64_t& a
232
232
  // NOP
233
233
  }
234
234
 
235
+ // Sampling the volume at the given index-space coordinates, uvw can be fractional
236
+ CUDA_CALLABLE inline float volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3& grad)
237
+ {
238
+ const pnanovdb_buf_t buf = volume::id_to_buffer(id);
239
+ const pnanovdb_root_handle_t root = volume::get_root(buf);
240
+ const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
241
+
242
+ if (sampling_mode == volume::CLOSEST)
243
+ {
244
+ const pnanovdb_coord_t ijk = pnanovdb_vec3_round_to_coord(uvw_pnano);
245
+ float val;
246
+ pnano_read(val, buf, root, PNANOVDB_REF(ijk));
247
+ grad = vec3(0.0f, 0.0f, 0.0f);
248
+ return val;
249
+ }
250
+ else if (sampling_mode == volume::LINEAR)
251
+ {
252
+ // NB. linear sampling is not used on int volumes
253
+ constexpr pnanovdb_coord_t OFFSETS[] = {
254
+ { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
255
+ };
256
+
257
+ const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
258
+ const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
259
+ const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
260
+
261
+ pnanovdb_readaccessor_t accessor;
262
+ pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
263
+ float val = 0.0f;
264
+ const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
265
+ const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
266
+ const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
267
+
268
+ const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
269
+ const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
270
+ const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
271
+
272
+ float dfdx = 0.0f;
273
+ float dfdy = 0.0f;
274
+ float dfdz = 0.0f;
275
+ for (int idx = 0; idx < 8; ++idx)
276
+ {
277
+ const pnanovdb_coord_t& offs = OFFSETS[idx];
278
+ const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
279
+ float v;
280
+ pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
281
+ val = add(val, wx[offs.x] * wy[offs.y] * wz[offs.z] * v);
282
+ dfdx = add(dfdx, wy[offs.y] * wz[offs.z] * sign_dx[idx] * v);
283
+ dfdy = add(dfdy, wx[offs.x] * wz[offs.z] * sign_dy[idx] * v);
284
+ dfdz = add(dfdz, wx[offs.x] * wy[offs.y] * sign_dz[idx] * v);
285
+ }
286
+ grad = vec3(dfdx, dfdy, dfdz);
287
+ return val;
288
+ }
289
+ return 0.0f;
290
+ }
291
+
292
+ CUDA_CALLABLE inline void adj_volume_sample_grad_f(
293
+ uint64_t id, vec3 uvw, int sampling_mode, vec3& grad, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, vec3& adj_grad, const float& adj_ret)
294
+ {
295
+ if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return;
296
+
297
+ if (sampling_mode != volume::LINEAR) {
298
+ return; // NOP
299
+ }
300
+
301
+ const pnanovdb_buf_t buf = volume::id_to_buffer(id);
302
+ const pnanovdb_root_handle_t root = volume::get_root(buf);
303
+ const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
304
+
305
+ constexpr pnanovdb_coord_t OFFSETS[] = {
306
+ { 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
307
+ };
308
+
309
+ const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
310
+ const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
311
+ const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
312
+
313
+ pnanovdb_readaccessor_t accessor;
314
+ pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
315
+ const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
316
+ const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
317
+ const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
318
+ const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
319
+ const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
320
+ const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
321
+
322
+ float dfdxdy = 0.0f;
323
+ float dfdxdz = 0.0f;
324
+ float dfdydx = 0.0f;
325
+ float dfdydz = 0.0f;
326
+ float dfdzdx = 0.0f;
327
+ float dfdzdy = 0.0f;
328
+ vec3 dphi(0,0,0);
329
+ for (int idx = 0; idx < 8; ++idx)
330
+ {
331
+ const pnanovdb_coord_t& offs = OFFSETS[idx];
332
+ const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
333
+ float v;
334
+ pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
335
+ const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1);
336
+ const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]);
337
+ dphi = add(dphi, mul(v, grad_w));
338
+
339
+ dfdxdy = add(dfdxdy, signs[1] * wz[offs.z] * sign_dx[idx] * v);
340
+ dfdxdz = add(dfdxdz, wy[offs.y] * signs[2] * sign_dx[idx] * v);
341
+
342
+ dfdydx = add(dfdydx, signs[0] * wz[offs.z] * sign_dy[idx] * v);
343
+ dfdydz = add(dfdydz, wx[offs.x] * signs[2] * sign_dy[idx] * v);
344
+
345
+ dfdzdx = add(dfdzdx, signs[0] * wy[offs.y] * sign_dz[idx] * v);
346
+ dfdzdy = add(dfdzdy, wx[offs.x] * signs[1] * sign_dz[idx] * v);
347
+ }
348
+
349
+ adj_uvw += mul(dphi, adj_ret);
350
+ adj_uvw[0] += adj_grad[1] * dfdydx + adj_grad[2] * dfdzdx;
351
+ adj_uvw[1] += adj_grad[0] * dfdxdy + adj_grad[2] * dfdzdy;
352
+ adj_uvw[2] += adj_grad[0] * dfdxdz + adj_grad[1] * dfdydz;
353
+ }
354
+
235
355
  CUDA_CALLABLE inline float volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k)
236
356
  {
237
357
  if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return 0.f;