warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show
  1. warp/__init__.py +10 -4
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +5 -3
  6. warp/build_dll.py +29 -9
  7. warp/builtins.py +868 -507
  8. warp/codegen.py +1074 -638
  9. warp/config.py +3 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +715 -222
  12. warp/fabric.py +326 -0
  13. warp/fem/__init__.py +27 -0
  14. warp/fem/cache.py +389 -0
  15. warp/fem/dirichlet.py +181 -0
  16. warp/fem/domain.py +263 -0
  17. warp/fem/field/__init__.py +101 -0
  18. warp/fem/field/field.py +149 -0
  19. warp/fem/field/nodal_field.py +299 -0
  20. warp/fem/field/restriction.py +21 -0
  21. warp/fem/field/test.py +181 -0
  22. warp/fem/field/trial.py +183 -0
  23. warp/fem/geometry/__init__.py +19 -0
  24. warp/fem/geometry/closest_point.py +70 -0
  25. warp/fem/geometry/deformed_geometry.py +271 -0
  26. warp/fem/geometry/element.py +744 -0
  27. warp/fem/geometry/geometry.py +186 -0
  28. warp/fem/geometry/grid_2d.py +373 -0
  29. warp/fem/geometry/grid_3d.py +435 -0
  30. warp/fem/geometry/hexmesh.py +953 -0
  31. warp/fem/geometry/partition.py +376 -0
  32. warp/fem/geometry/quadmesh_2d.py +532 -0
  33. warp/fem/geometry/tetmesh.py +840 -0
  34. warp/fem/geometry/trimesh_2d.py +577 -0
  35. warp/fem/integrate.py +1616 -0
  36. warp/fem/operator.py +191 -0
  37. warp/fem/polynomial.py +213 -0
  38. warp/fem/quadrature/__init__.py +2 -0
  39. warp/fem/quadrature/pic_quadrature.py +245 -0
  40. warp/fem/quadrature/quadrature.py +294 -0
  41. warp/fem/space/__init__.py +292 -0
  42. warp/fem/space/basis_space.py +489 -0
  43. warp/fem/space/collocated_function_space.py +105 -0
  44. warp/fem/space/dof_mapper.py +236 -0
  45. warp/fem/space/function_space.py +145 -0
  46. warp/fem/space/grid_2d_function_space.py +267 -0
  47. warp/fem/space/grid_3d_function_space.py +306 -0
  48. warp/fem/space/hexmesh_function_space.py +352 -0
  49. warp/fem/space/partition.py +350 -0
  50. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  51. warp/fem/space/restriction.py +160 -0
  52. warp/fem/space/shape/__init__.py +15 -0
  53. warp/fem/space/shape/cube_shape_function.py +738 -0
  54. warp/fem/space/shape/shape_function.py +103 -0
  55. warp/fem/space/shape/square_shape_function.py +611 -0
  56. warp/fem/space/shape/tet_shape_function.py +567 -0
  57. warp/fem/space/shape/triangle_shape_function.py +429 -0
  58. warp/fem/space/tetmesh_function_space.py +292 -0
  59. warp/fem/space/topology.py +295 -0
  60. warp/fem/space/trimesh_2d_function_space.py +221 -0
  61. warp/fem/types.py +77 -0
  62. warp/fem/utils.py +495 -0
  63. warp/native/array.h +147 -44
  64. warp/native/builtin.h +122 -149
  65. warp/native/bvh.cpp +73 -325
  66. warp/native/bvh.cu +406 -23
  67. warp/native/bvh.h +34 -43
  68. warp/native/clang/clang.cpp +13 -8
  69. warp/native/crt.h +2 -0
  70. warp/native/cuda_crt.h +5 -0
  71. warp/native/cuda_util.cpp +15 -3
  72. warp/native/cuda_util.h +3 -1
  73. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  74. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  75. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  76. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  77. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  78. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  79. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  80. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  133. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  134. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  135. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  136. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  137. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  138. warp/native/cutlass_gemm.cu +5 -3
  139. warp/native/exports.h +1240 -952
  140. warp/native/fabric.h +228 -0
  141. warp/native/hashgrid.cpp +4 -4
  142. warp/native/hashgrid.h +22 -2
  143. warp/native/intersect.h +22 -7
  144. warp/native/intersect_adj.h +8 -8
  145. warp/native/intersect_tri.h +1 -1
  146. warp/native/marching.cu +157 -161
  147. warp/native/mat.h +80 -19
  148. warp/native/matnn.h +2 -2
  149. warp/native/mesh.cpp +33 -108
  150. warp/native/mesh.cu +114 -23
  151. warp/native/mesh.h +446 -46
  152. warp/native/noise.h +272 -329
  153. warp/native/quat.h +51 -8
  154. warp/native/rand.h +45 -35
  155. warp/native/range.h +6 -2
  156. warp/native/reduce.cpp +1 -1
  157. warp/native/reduce.cu +10 -12
  158. warp/native/runlength_encode.cu +6 -10
  159. warp/native/scan.cu +8 -11
  160. warp/native/sparse.cpp +4 -4
  161. warp/native/sparse.cu +164 -154
  162. warp/native/spatial.h +2 -2
  163. warp/native/temp_buffer.h +14 -30
  164. warp/native/vec.h +107 -23
  165. warp/native/volume.h +120 -0
  166. warp/native/warp.cpp +560 -30
  167. warp/native/warp.cu +431 -44
  168. warp/native/warp.h +13 -4
  169. warp/optim/__init__.py +1 -0
  170. warp/optim/linear.py +922 -0
  171. warp/optim/sgd.py +92 -0
  172. warp/render/render_opengl.py +335 -119
  173. warp/render/render_usd.py +11 -11
  174. warp/sim/__init__.py +2 -2
  175. warp/sim/articulation.py +385 -185
  176. warp/sim/collide.py +8 -0
  177. warp/sim/import_mjcf.py +297 -106
  178. warp/sim/import_urdf.py +389 -210
  179. warp/sim/import_usd.py +198 -97
  180. warp/sim/inertia.py +17 -18
  181. warp/sim/integrator_euler.py +14 -8
  182. warp/sim/integrator_xpbd.py +158 -16
  183. warp/sim/model.py +795 -291
  184. warp/sim/render.py +3 -3
  185. warp/sim/utils.py +3 -0
  186. warp/sparse.py +640 -150
  187. warp/stubs.py +606 -267
  188. warp/tape.py +61 -10
  189. warp/tests/__main__.py +3 -6
  190. warp/tests/assets/curlnoise_golden.npy +0 -0
  191. warp/tests/assets/pnoise_golden.npy +0 -0
  192. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  193. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  194. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  195. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  196. warp/tests/aux_test_unresolved_func.py +14 -0
  197. warp/tests/aux_test_unresolved_symbol.py +14 -0
  198. warp/tests/disabled_kinematics.py +239 -0
  199. warp/tests/run_coverage_serial.py +31 -0
  200. warp/tests/test_adam.py +103 -106
  201. warp/tests/test_arithmetic.py +128 -74
  202. warp/tests/test_array.py +212 -97
  203. warp/tests/test_array_reduce.py +57 -23
  204. warp/tests/test_atomic.py +64 -28
  205. warp/tests/test_bool.py +99 -0
  206. warp/tests/test_builtins_resolution.py +1292 -0
  207. warp/tests/test_bvh.py +42 -18
  208. warp/tests/test_closest_point_edge_edge.py +54 -57
  209. warp/tests/test_codegen.py +208 -130
  210. warp/tests/test_compile_consts.py +28 -20
  211. warp/tests/test_conditional.py +108 -24
  212. warp/tests/test_copy.py +10 -12
  213. warp/tests/test_ctypes.py +112 -88
  214. warp/tests/test_dense.py +21 -14
  215. warp/tests/test_devices.py +98 -0
  216. warp/tests/test_dlpack.py +75 -75
  217. warp/tests/test_examples.py +277 -0
  218. warp/tests/test_fabricarray.py +955 -0
  219. warp/tests/test_fast_math.py +15 -11
  220. warp/tests/test_fem.py +1271 -0
  221. warp/tests/test_fp16.py +53 -19
  222. warp/tests/test_func.py +187 -86
  223. warp/tests/test_generics.py +194 -49
  224. warp/tests/test_grad.py +178 -109
  225. warp/tests/test_grad_customs.py +176 -0
  226. warp/tests/test_hash_grid.py +52 -37
  227. warp/tests/test_import.py +10 -23
  228. warp/tests/test_indexedarray.py +32 -31
  229. warp/tests/test_intersect.py +18 -9
  230. warp/tests/test_large.py +141 -0
  231. warp/tests/test_launch.py +14 -41
  232. warp/tests/test_lerp.py +64 -65
  233. warp/tests/test_linear_solvers.py +154 -0
  234. warp/tests/test_lvalue.py +493 -0
  235. warp/tests/test_marching_cubes.py +12 -13
  236. warp/tests/test_mat.py +517 -2898
  237. warp/tests/test_mat_lite.py +115 -0
  238. warp/tests/test_mat_scalar_ops.py +2889 -0
  239. warp/tests/test_math.py +103 -9
  240. warp/tests/test_matmul.py +305 -69
  241. warp/tests/test_matmul_lite.py +410 -0
  242. warp/tests/test_mesh.py +71 -14
  243. warp/tests/test_mesh_query_aabb.py +41 -25
  244. warp/tests/test_mesh_query_point.py +140 -22
  245. warp/tests/test_mesh_query_ray.py +39 -22
  246. warp/tests/test_mlp.py +30 -22
  247. warp/tests/test_model.py +92 -89
  248. warp/tests/test_modules_lite.py +39 -0
  249. warp/tests/test_multigpu.py +88 -114
  250. warp/tests/test_noise.py +12 -11
  251. warp/tests/test_operators.py +16 -20
  252. warp/tests/test_options.py +11 -11
  253. warp/tests/test_pinned.py +17 -18
  254. warp/tests/test_print.py +32 -11
  255. warp/tests/test_quat.py +275 -129
  256. warp/tests/test_rand.py +18 -16
  257. warp/tests/test_reload.py +38 -34
  258. warp/tests/test_rounding.py +50 -43
  259. warp/tests/test_runlength_encode.py +168 -20
  260. warp/tests/test_smoothstep.py +9 -11
  261. warp/tests/test_snippet.py +143 -0
  262. warp/tests/test_sparse.py +261 -63
  263. warp/tests/test_spatial.py +276 -243
  264. warp/tests/test_streams.py +110 -85
  265. warp/tests/test_struct.py +268 -63
  266. warp/tests/test_tape.py +39 -21
  267. warp/tests/test_torch.py +118 -89
  268. warp/tests/test_transient_module.py +12 -13
  269. warp/tests/test_types.py +614 -0
  270. warp/tests/test_utils.py +494 -0
  271. warp/tests/test_vec.py +354 -2050
  272. warp/tests/test_vec_lite.py +73 -0
  273. warp/tests/test_vec_scalar_ops.py +2099 -0
  274. warp/tests/test_volume.py +457 -293
  275. warp/tests/test_volume_write.py +124 -134
  276. warp/tests/unittest_serial.py +35 -0
  277. warp/tests/unittest_suites.py +341 -0
  278. warp/tests/unittest_utils.py +568 -0
  279. warp/tests/unused_test_misc.py +71 -0
  280. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  281. warp/thirdparty/appdirs.py +36 -45
  282. warp/thirdparty/unittest_parallel.py +549 -0
  283. warp/torch.py +9 -6
  284. warp/types.py +1089 -366
  285. warp/utils.py +93 -387
  286. warp_lang-0.11.0.dist-info/METADATA +238 -0
  287. warp_lang-0.11.0.dist-info/RECORD +332 -0
  288. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  289. warp/tests/test_all.py +0 -219
  290. warp/tests/test_array_scan.py +0 -60
  291. warp/tests/test_base.py +0 -208
  292. warp/tests/test_unresolved_func.py +0 -7
  293. warp/tests/test_unresolved_symbol.py +0 -7
  294. warp_lang-0.10.1.dist-info/METADATA +0 -21
  295. warp_lang-0.10.1.dist-info/RECORD +0 -188
  296. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  297. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  298. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  299. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
  300. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/fabric.h ADDED
@@ -0,0 +1,228 @@
1
+ #pragma once
2
+
3
+ #include "builtin.h"
4
+
5
+ namespace wp
6
+ {
7
+
8
+ struct fabricbucket_t
9
+ {
10
+ size_t index_start;
11
+ size_t index_end;
12
+ void* ptr;
13
+ size_t* lengths;
14
+ };
15
+
16
+
17
+ template <typename T>
18
+ struct fabricarray_t
19
+ {
20
+ CUDA_CALLABLE inline fabricarray_t()
21
+ : nbuckets(0),
22
+ size(0)
23
+ {}
24
+
25
+ CUDA_CALLABLE inline bool empty() const { return !size; }
26
+
27
+ fabricbucket_t* buckets; // array of fabricbucket_t on the correct device
28
+
29
+ size_t nbuckets;
30
+ size_t size;
31
+ };
32
+
33
+
34
+ template <typename T>
35
+ struct indexedfabricarray_t
36
+ {
37
+ CUDA_CALLABLE inline indexedfabricarray_t()
38
+ : indices(),
39
+ size(0)
40
+ {}
41
+
42
+ CUDA_CALLABLE inline bool empty() const { return !size; }
43
+
44
+ fabricarray_t<T> fa;
45
+
46
+ // TODO: we use 32-bit indices for consistency with other Warp indexed arrays,
47
+ // but Fabric uses 64-bit indexing.
48
+ int* indices;
49
+ size_t size;
50
+ };
51
+
52
+
53
+ #ifndef FABRICARRAY_USE_BINARY_SEARCH
54
+ #define FABRICARRAY_USE_BINARY_SEARCH 1
55
+ #endif
56
+
57
+ template <typename T>
58
+ CUDA_CALLABLE inline const fabricbucket_t* fabricarray_find_bucket(const fabricarray_t<T>& fa, size_t i)
59
+ {
60
+ #if FABRICARRAY_USE_BINARY_SEARCH
61
+ // use binary search to find the right bucket
62
+ const fabricbucket_t* bucket = nullptr;
63
+ size_t lo = 0;
64
+ size_t hi = fa.nbuckets - 1;
65
+ while (hi >= lo)
66
+ {
67
+ size_t mid = (lo + hi) >> 1;
68
+ bucket = fa.buckets + mid;
69
+ if (i >= bucket->index_end)
70
+ lo = mid + 1;
71
+ else if (i < bucket->index_start)
72
+ hi = mid - 1;
73
+ else
74
+ return bucket;
75
+ }
76
+ return nullptr;
77
+ #else
78
+ // use linear search to find the right bucket
79
+ const fabricbucket_t* bucket = fa.buckets;
80
+ const fabricbucket_t* bucket_end = bucket + fa.nbuckets;
81
+ for (; bucket < bucket_end; ++bucket)
82
+ {
83
+ if (i < bucket->index_end)
84
+ return bucket;
85
+ }
86
+ return nullptr;
87
+ #endif
88
+ }
89
+
90
+
91
+ // Compute the pointer to a fabricarray element at index i.
92
+ // This function is similar to wp::index(), but the array data type doesn't need to be known at compile time.
93
+ CUDA_CALLABLE inline void* fabricarray_element_ptr(const fabricarray_t<void>& fa, size_t i, size_t elem_size)
94
+ {
95
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
96
+
97
+ size_t index_in_bucket = i - bucket->index_start;
98
+
99
+ return (char*)bucket->ptr + index_in_bucket * elem_size;
100
+ }
101
+
102
+
103
+ template <typename T>
104
+ CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i)
105
+ {
106
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
107
+ assert(bucket && "Fabric array index out of range");
108
+
109
+ size_t index_in_bucket = i - bucket->index_start;
110
+
111
+ T& result = *((T*)bucket->ptr + index_in_bucket);
112
+
113
+ FP_VERIFY_FWD_1(result)
114
+
115
+ return result;
116
+ }
117
+
118
+
119
+ // indexing for fabric array of arrays
120
+ template <typename T>
121
+ CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i, size_t j)
122
+ {
123
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
124
+ assert(bucket && "Fabric array index out of range");
125
+
126
+ assert(bucket->lengths && "Missing inner array lengths");
127
+
128
+ size_t index_in_bucket = i - bucket->index_start;
129
+
130
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
131
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
132
+
133
+ assert(j < length && "Fabric array inner index out of range");
134
+
135
+ T& result = *((T*)ptr + j);
136
+
137
+ FP_VERIFY_FWD_1(result)
138
+
139
+ return result;
140
+ }
141
+
142
+
143
+ template <typename T>
144
+ CUDA_CALLABLE inline array_t<T> view(fabricarray_t<T>& fa, size_t i)
145
+ {
146
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
147
+ assert(bucket && "Fabric array index out of range");
148
+
149
+ assert(bucket->lengths && "Missing inner array lengths");
150
+
151
+ size_t index_in_bucket = i - bucket->index_start;
152
+
153
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
154
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
155
+
156
+ return array_t<T>((T*)ptr, int(length));
157
+ }
158
+
159
+
160
+ template <typename T>
161
+ CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i)
162
+ {
163
+ // index lookup
164
+ assert(i < ifa.size);
165
+ i = size_t(ifa.indices[i]);
166
+
167
+ const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
168
+ assert(bucket && "Fabric array index out of range");
169
+
170
+ size_t index_in_bucket = i - bucket->index_start;
171
+
172
+ T& result = *((T*)bucket->ptr + index_in_bucket);
173
+
174
+ FP_VERIFY_FWD_1(result)
175
+
176
+ return result;
177
+ }
178
+
179
+
180
+ // indexing for fabric array of arrays
181
+ template <typename T>
182
+ CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i, size_t j)
183
+ {
184
+ // index lookup
185
+ assert(i < ifa.size);
186
+ i = size_t(ifa.indices[i]);
187
+
188
+ const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
189
+ assert(bucket && "Fabric array index out of range");
190
+
191
+ assert(bucket->lengths && "Missing inner array lengths");
192
+
193
+ size_t index_in_bucket = i - bucket->index_start;
194
+
195
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
196
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
197
+
198
+ assert(j < length && "Fabric array inner index out of range");
199
+
200
+ T& result = *((T*)ptr + j);
201
+
202
+ FP_VERIFY_FWD_1(result)
203
+
204
+ return result;
205
+ }
206
+
207
+
208
+ template <typename T>
209
+ CUDA_CALLABLE inline array_t<T> view(indexedfabricarray_t<T>& ifa, size_t i)
210
+ {
211
+ // index lookup
212
+ assert(i < ifa.size);
213
+ i = size_t(ifa.indices[i]);
214
+
215
+ const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
216
+ assert(bucket && "Fabric array index out of range");
217
+
218
+ assert(bucket->lengths && "Missing inner array lengths");
219
+
220
+ size_t index_in_bucket = i - bucket->index_start;
221
+
222
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
223
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
224
+
225
+ return array_t<T>((T*)ptr, int(length));
226
+ }
227
+
228
+ } // namespace wp
warp/native/hashgrid.cpp CHANGED
@@ -93,8 +93,8 @@ void hash_grid_reserve_host(uint64_t id, int num_points)
93
93
  free_host(grid->point_ids);
94
94
 
95
95
  const int num_to_alloc = num_points*3/2;
96
- grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
97
- grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
96
+ grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
97
+ grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
98
98
 
99
99
  grid->max_points = num_to_alloc;
100
100
  }
@@ -212,8 +212,8 @@ void hash_grid_reserve_device(uint64_t id, int num_points)
212
212
  free_device(WP_CURRENT_CONTEXT, grid.point_ids);
213
213
 
214
214
  const int num_to_alloc = num_points*3/2;
215
- grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
216
- grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxilliary radix buffers
215
+ grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
216
+ grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
217
217
  grid.max_points = num_to_alloc;
218
218
 
219
219
  // ensure we pre-size our sort routine to avoid
warp/native/hashgrid.h CHANGED
@@ -75,8 +75,28 @@ CUDA_CALLABLE inline int hash_grid_index(const HashGrid& grid, const vec3& p)
75
75
  // stores state required to traverse neighboring cells of a point
76
76
  struct hash_grid_query_t
77
77
  {
78
- CUDA_CALLABLE hash_grid_query_t() {}
79
- CUDA_CALLABLE hash_grid_query_t(int) {} // for backward pass
78
+ CUDA_CALLABLE hash_grid_query_t()
79
+ : x_start(0),
80
+ y_start(0),
81
+ z_start(0),
82
+ x_end(0),
83
+ y_end(0),
84
+ z_end(0),
85
+ x(0),
86
+ y(0),
87
+ z(0),
88
+ cell(0),
89
+ cell_index(0),
90
+ cell_end(0),
91
+ current(0),
92
+ grid()
93
+ {}
94
+
95
+ // Required for adjoint computations.
96
+ CUDA_CALLABLE inline hash_grid_query_t& operator+=(const hash_grid_query_t& other)
97
+ {
98
+ return *this;
99
+ }
80
100
 
81
101
  int x_start;
82
102
  int y_start;
warp/native/intersect.h CHANGED
@@ -114,6 +114,21 @@ CUDA_CALLABLE inline vec2 closest_point_to_triangle(const vec3& a, const vec3& b
114
114
  return vec2(u, v);
115
115
  }
116
116
 
117
+ CUDA_CALLABLE inline vec2 furthest_point_to_triangle(const vec3& a, const vec3& b, const vec3& c, const vec3& p)
118
+ {
119
+ vec3 pa = p-a;
120
+ vec3 pb = p-b;
121
+ vec3 pc = p-c;
122
+ float dist_a = dot(pa, pa);
123
+ float dist_b = dot(pb, pb);
124
+ float dist_c = dot(pc, pc);
125
+
126
+ if (dist_a > dist_b && dist_a > dist_c)
127
+ return vec2(1.0f, 0.0f); // a is furthest
128
+ if (dist_b > dist_c)
129
+ return vec2(0.0f, 1.0f); // b is furthest
130
+ return vec2(0.0f, 0.0f); // c is furthest
131
+ }
117
132
 
118
133
  CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_dir, const vec3& lower, const vec3& upper, float& t)
119
134
  {
@@ -357,8 +372,8 @@ CUDA_CALLABLE inline bool intersect_ray_tri_woop(const vec3& p, const vec3& dir,
357
372
  }
358
373
 
359
374
  CUDA_CALLABLE inline void adj_intersect_ray_tri_woop(
360
- const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& sign, vec3* normal,
361
- vec3& adj_p, vec3& adj_dir, vec3& adj_a, vec3& adj_b, vec3& adj_c, float& adj_t, float& adj_u, float& adj_v, float& adj_sign, vec3* adj_normal, bool& adj_ret)
375
+ const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float t, float u, float v, float sign, const vec3& normal,
376
+ vec3& adj_p, vec3& adj_dir, vec3& adj_a, vec3& adj_b, vec3& adj_c, float& adj_t, float& adj_u, float& adj_v, float& adj_sign, vec3& adj_normal, bool& adj_ret)
362
377
  {
363
378
 
364
379
  // todo: precompute for ray
@@ -854,7 +869,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
854
869
  wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73);
855
870
  wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72);
856
871
  wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71);
857
- wp::adj_div(var_9, var_69, adj_9, adj_69, adj_70);
872
+ wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70);
858
873
  wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69);
859
874
  wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68);
860
875
  wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67);
@@ -866,7 +881,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
866
881
  }
867
882
  wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62);
868
883
  wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60);
869
- wp::adj_div(var_54, var_57, adj_54, adj_57, adj_58);
884
+ wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58);
870
885
  wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57);
871
886
  wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56);
872
887
  wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55);
@@ -881,7 +896,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
881
896
  wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49);
882
897
  wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48);
883
898
  }
884
- wp::adj_div(var_4, var_42, adj_4, adj_42, adj_43);
899
+ wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43);
885
900
  wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42);
886
901
  wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41);
887
902
  wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40);
@@ -902,7 +917,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
902
917
  wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29);
903
918
  wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28);
904
919
  }
905
- wp::adj_div(var_3, var_22, adj_3, adj_22, adj_23);
920
+ wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23);
906
921
  wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22);
907
922
  wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21);
908
923
  wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20);
@@ -974,7 +989,7 @@ def closest_point_edge_edge(
974
989
  else:
975
990
  c = wp.dot(d1, r)
976
991
  if e <= epsilon:
977
- # second segment genereates into a point
992
+ # second segment generates into a point
978
993
  s = wp.clamp(-c / a, 0.0, 1.0) # t = 0 => s = (b*t-c)/a = -c/a
979
994
  t = float(0.0)
980
995
  else:
@@ -276,7 +276,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
276
276
  label1:;
277
277
  adj_71 += adj_ret;
278
278
  wp::adj_vec3(var_61, var_62, var_70, adj_61, adj_62, adj_70, adj_71);
279
- wp::adj_length(var_69, adj_69, adj_70);
279
+ wp::adj_length(var_69, var_70, adj_69, adj_70);
280
280
  wp::adj_sub(var_68, var_65, adj_68, adj_65, adj_69);
281
281
  wp::adj_add(var_p2, var_67, adj_p2, adj_67, adj_68);
282
282
  wp::adj_mul(var_66, var_62, adj_66, adj_62, adj_67);
@@ -297,7 +297,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
297
297
  wp::adj_select(var_51, var_49, var_54, adj_51, adj_49, adj_54, adj_55);
298
298
  if (var_51) {
299
299
  wp::adj_clamp(var_53, var_6, var_25, adj_53, adj_6, adj_25, adj_54);
300
- wp::adj_div(var_52, var_3, adj_52, adj_3, adj_53);
300
+ wp::adj_div(var_52, var_3, var_53, adj_52, adj_3, adj_53);
301
301
  wp::adj_sub(var_30, var_21, adj_30, adj_21, adj_52);
302
302
  }
303
303
  }
@@ -305,10 +305,10 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
305
305
  wp::adj_select(var_45, var_41, var_48, adj_45, adj_41, adj_48, adj_49);
306
306
  if (var_45) {
307
307
  wp::adj_clamp(var_47, var_6, var_25, adj_47, adj_6, adj_25, adj_48);
308
- wp::adj_div(var_46, var_3, adj_46, adj_3, adj_47);
308
+ wp::adj_div(var_46, var_3, var_47, adj_46, adj_3, adj_47);
309
309
  wp::adj_neg(var_21, adj_21, adj_46);
310
310
  }
311
- wp::adj_div(var_43, var_4, adj_43, adj_4, adj_44);
311
+ wp::adj_div(var_43, var_4, var_44, adj_43, adj_4, adj_44);
312
312
  wp::adj_add(var_42, var_5, adj_42, adj_5, adj_43);
313
313
  wp::adj_mul(var_30, var_41, adj_30, adj_41, adj_42);
314
314
  wp::adj_select(var_34, var_6, var_40, adj_34, adj_6, adj_40, adj_41);
@@ -317,7 +317,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
317
317
  wp::adj_select(var_34, var_28, var_39, adj_34, adj_28, adj_39, adj_40);
318
318
  if (var_34) {
319
319
  wp::adj_clamp(var_38, var_6, var_25, adj_38, adj_6, adj_25, adj_39);
320
- wp::adj_div(var_37, var_33, adj_37, adj_33, adj_38);
320
+ wp::adj_div(var_37, var_33, var_38, adj_37, adj_33, adj_38);
321
321
  wp::adj_sub(var_35, var_36, adj_35, adj_36, adj_37);
322
322
  wp::adj_mul(var_21, var_4, adj_21, adj_4, adj_36);
323
323
  wp::adj_mul(var_30, var_5, adj_30, adj_5, adj_35);
@@ -332,7 +332,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
332
332
  if (var_22) {
333
333
  wp::adj_cast_float(var_6, adj_6, adj_27);
334
334
  wp::adj_clamp(var_24, var_6, var_25, adj_24, adj_6, adj_25, adj_26);
335
- wp::adj_div(var_23, var_3, adj_23, adj_3, adj_24);
335
+ wp::adj_div(var_23, var_3, var_24, adj_23, adj_3, adj_24);
336
336
  wp::adj_neg(var_21, adj_21, adj_23);
337
337
  }
338
338
  wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_21);
@@ -341,7 +341,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
341
341
  wp::adj_select(var_15, var_7, var_16, adj_15, adj_7, adj_16, adj_19);
342
342
  if (var_15) {
343
343
  wp::adj_cast_float(var_17, adj_17, adj_18);
344
- wp::adj_div(var_5, var_4, adj_5, adj_4, adj_17);
344
+ wp::adj_div(var_5, var_4, var_17, adj_5, adj_4, adj_17);
345
345
  wp::adj_cast_float(var_6, adj_6, adj_16);
346
346
  }
347
347
  if (var_13) {
@@ -349,7 +349,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
349
349
  adj_14 += adj_ret;
350
350
  wp::adj_vec3(var_7, var_8, var_10, adj_7, adj_8, adj_10, adj_14);
351
351
  }
352
- wp::adj_length(var_9, adj_9, adj_10);
352
+ wp::adj_length(var_9, var_10, adj_9, adj_10);
353
353
  wp::adj_sub(var_p2, var_p1, adj_p2, adj_p1, adj_9);
354
354
  wp::adj_cast_float(var_6, adj_6, adj_8);
355
355
  wp::adj_cast_float(var_6, adj_6, adj_7);
@@ -103,7 +103,7 @@
103
103
  #define POINT_IN_TRI(V0,U0,U1,U2) \
104
104
  { \
105
105
  float a,b,c,d0,d1,d2; \
106
- /* is T1 completly inside T2? */ \
106
+ /* is T1 completely inside T2? */ \
107
107
  /* check if V0 is inside tri(U0,U1,U2) */ \
108
108
  a=U1[i1]-U0[i1]; \
109
109
  b=-(U1[i0]-U0[i0]); \