warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (271) hide show
  1. docs/conf.py +17 -5
  2. examples/env/env_ant.py +1 -1
  3. examples/env/env_cartpole.py +1 -1
  4. examples/env/env_humanoid.py +1 -1
  5. examples/env/env_usd.py +4 -1
  6. examples/env/environment.py +8 -9
  7. examples/example_dem.py +34 -33
  8. examples/example_diffray.py +364 -337
  9. examples/example_fluid.py +32 -23
  10. examples/example_jacobian_ik.py +97 -93
  11. examples/example_marching_cubes.py +6 -16
  12. examples/example_mesh.py +6 -16
  13. examples/example_mesh_intersect.py +16 -14
  14. examples/example_nvdb.py +14 -16
  15. examples/example_raycast.py +14 -13
  16. examples/example_raymarch.py +16 -23
  17. examples/example_render_opengl.py +19 -10
  18. examples/example_sim_cartpole.py +82 -78
  19. examples/example_sim_cloth.py +45 -48
  20. examples/example_sim_fk_grad.py +51 -44
  21. examples/example_sim_fk_grad_torch.py +47 -40
  22. examples/example_sim_grad_bounce.py +108 -133
  23. examples/example_sim_grad_cloth.py +99 -113
  24. examples/example_sim_granular.py +5 -6
  25. examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
  26. examples/example_sim_neo_hookean.py +51 -55
  27. examples/example_sim_particle_chain.py +4 -4
  28. examples/example_sim_quadruped.py +126 -81
  29. examples/example_sim_rigid_chain.py +54 -61
  30. examples/example_sim_rigid_contact.py +66 -70
  31. examples/example_sim_rigid_fem.py +3 -3
  32. examples/example_sim_rigid_force.py +1 -1
  33. examples/example_sim_rigid_gyroscopic.py +3 -4
  34. examples/example_sim_rigid_kinematics.py +28 -39
  35. examples/example_sim_trajopt.py +112 -110
  36. examples/example_sph.py +9 -8
  37. examples/example_wave.py +7 -7
  38. examples/fem/bsr_utils.py +30 -17
  39. examples/fem/example_apic_fluid.py +85 -69
  40. examples/fem/example_convection_diffusion.py +97 -93
  41. examples/fem/example_convection_diffusion_dg.py +142 -149
  42. examples/fem/example_convection_diffusion_dg0.py +141 -136
  43. examples/fem/example_deformed_geometry.py +146 -0
  44. examples/fem/example_diffusion.py +115 -84
  45. examples/fem/example_diffusion_3d.py +116 -86
  46. examples/fem/example_diffusion_mgpu.py +102 -79
  47. examples/fem/example_mixed_elasticity.py +139 -100
  48. examples/fem/example_navier_stokes.py +175 -162
  49. examples/fem/example_stokes.py +143 -111
  50. examples/fem/example_stokes_transfer.py +186 -157
  51. examples/fem/mesh_utils.py +59 -97
  52. examples/fem/plot_utils.py +138 -17
  53. tools/ci/publishing/build_nodes_info.py +54 -0
  54. warp/__init__.py +4 -3
  55. warp/__init__.pyi +1 -0
  56. warp/bin/warp-clang.dll +0 -0
  57. warp/bin/warp.dll +0 -0
  58. warp/build.py +5 -3
  59. warp/build_dll.py +29 -9
  60. warp/builtins.py +836 -492
  61. warp/codegen.py +864 -553
  62. warp/config.py +3 -1
  63. warp/context.py +389 -172
  64. warp/fem/__init__.py +24 -6
  65. warp/fem/cache.py +318 -25
  66. warp/fem/dirichlet.py +7 -3
  67. warp/fem/domain.py +14 -0
  68. warp/fem/field/__init__.py +30 -38
  69. warp/fem/field/field.py +149 -0
  70. warp/fem/field/nodal_field.py +244 -138
  71. warp/fem/field/restriction.py +8 -6
  72. warp/fem/field/test.py +127 -59
  73. warp/fem/field/trial.py +117 -60
  74. warp/fem/geometry/__init__.py +5 -1
  75. warp/fem/geometry/deformed_geometry.py +271 -0
  76. warp/fem/geometry/element.py +24 -1
  77. warp/fem/geometry/geometry.py +86 -14
  78. warp/fem/geometry/grid_2d.py +112 -54
  79. warp/fem/geometry/grid_3d.py +134 -65
  80. warp/fem/geometry/hexmesh.py +953 -0
  81. warp/fem/geometry/partition.py +85 -33
  82. warp/fem/geometry/quadmesh_2d.py +532 -0
  83. warp/fem/geometry/tetmesh.py +451 -115
  84. warp/fem/geometry/trimesh_2d.py +197 -92
  85. warp/fem/integrate.py +534 -268
  86. warp/fem/operator.py +58 -31
  87. warp/fem/polynomial.py +11 -0
  88. warp/fem/quadrature/__init__.py +1 -1
  89. warp/fem/quadrature/pic_quadrature.py +150 -58
  90. warp/fem/quadrature/quadrature.py +209 -57
  91. warp/fem/space/__init__.py +230 -53
  92. warp/fem/space/basis_space.py +489 -0
  93. warp/fem/space/collocated_function_space.py +105 -0
  94. warp/fem/space/dof_mapper.py +49 -2
  95. warp/fem/space/function_space.py +90 -39
  96. warp/fem/space/grid_2d_function_space.py +149 -496
  97. warp/fem/space/grid_3d_function_space.py +173 -538
  98. warp/fem/space/hexmesh_function_space.py +352 -0
  99. warp/fem/space/partition.py +129 -76
  100. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  101. warp/fem/space/restriction.py +46 -34
  102. warp/fem/space/shape/__init__.py +15 -0
  103. warp/fem/space/shape/cube_shape_function.py +738 -0
  104. warp/fem/space/shape/shape_function.py +103 -0
  105. warp/fem/space/shape/square_shape_function.py +611 -0
  106. warp/fem/space/shape/tet_shape_function.py +567 -0
  107. warp/fem/space/shape/triangle_shape_function.py +429 -0
  108. warp/fem/space/tetmesh_function_space.py +132 -1039
  109. warp/fem/space/topology.py +295 -0
  110. warp/fem/space/trimesh_2d_function_space.py +104 -742
  111. warp/fem/types.py +13 -11
  112. warp/fem/utils.py +335 -60
  113. warp/native/array.h +120 -34
  114. warp/native/builtin.h +101 -72
  115. warp/native/bvh.cpp +73 -325
  116. warp/native/bvh.cu +406 -23
  117. warp/native/bvh.h +22 -40
  118. warp/native/clang/clang.cpp +1 -0
  119. warp/native/crt.h +2 -0
  120. warp/native/cuda_util.cpp +8 -3
  121. warp/native/cuda_util.h +1 -0
  122. warp/native/exports.h +1522 -1243
  123. warp/native/intersect.h +19 -4
  124. warp/native/intersect_adj.h +8 -8
  125. warp/native/mat.h +76 -17
  126. warp/native/mesh.cpp +33 -108
  127. warp/native/mesh.cu +114 -18
  128. warp/native/mesh.h +395 -40
  129. warp/native/noise.h +272 -329
  130. warp/native/quat.h +51 -8
  131. warp/native/rand.h +44 -34
  132. warp/native/reduce.cpp +1 -1
  133. warp/native/sparse.cpp +4 -4
  134. warp/native/sparse.cu +163 -155
  135. warp/native/spatial.h +2 -2
  136. warp/native/temp_buffer.h +18 -14
  137. warp/native/vec.h +103 -21
  138. warp/native/warp.cpp +2 -1
  139. warp/native/warp.cu +28 -3
  140. warp/native/warp.h +4 -3
  141. warp/render/render_opengl.py +261 -109
  142. warp/sim/__init__.py +1 -2
  143. warp/sim/articulation.py +385 -185
  144. warp/sim/import_mjcf.py +59 -48
  145. warp/sim/import_urdf.py +15 -15
  146. warp/sim/import_usd.py +174 -102
  147. warp/sim/inertia.py +17 -18
  148. warp/sim/integrator_xpbd.py +4 -3
  149. warp/sim/model.py +330 -250
  150. warp/sim/render.py +1 -1
  151. warp/sparse.py +625 -152
  152. warp/stubs.py +341 -309
  153. warp/tape.py +9 -6
  154. warp/tests/__main__.py +3 -6
  155. warp/tests/assets/curlnoise_golden.npy +0 -0
  156. warp/tests/assets/pnoise_golden.npy +0 -0
  157. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  158. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  159. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  160. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  161. warp/tests/aux_test_unresolved_func.py +14 -0
  162. warp/tests/aux_test_unresolved_symbol.py +14 -0
  163. warp/tests/disabled_kinematics.py +239 -0
  164. warp/tests/run_coverage_serial.py +31 -0
  165. warp/tests/test_adam.py +103 -106
  166. warp/tests/test_arithmetic.py +94 -74
  167. warp/tests/test_array.py +82 -101
  168. warp/tests/test_array_reduce.py +57 -23
  169. warp/tests/test_atomic.py +64 -28
  170. warp/tests/test_bool.py +22 -12
  171. warp/tests/test_builtins_resolution.py +1292 -0
  172. warp/tests/test_bvh.py +18 -18
  173. warp/tests/test_closest_point_edge_edge.py +54 -57
  174. warp/tests/test_codegen.py +165 -134
  175. warp/tests/test_compile_consts.py +28 -20
  176. warp/tests/test_conditional.py +108 -24
  177. warp/tests/test_copy.py +10 -12
  178. warp/tests/test_ctypes.py +112 -88
  179. warp/tests/test_dense.py +21 -14
  180. warp/tests/test_devices.py +98 -0
  181. warp/tests/test_dlpack.py +75 -75
  182. warp/tests/test_examples.py +237 -0
  183. warp/tests/test_fabricarray.py +22 -24
  184. warp/tests/test_fast_math.py +15 -11
  185. warp/tests/test_fem.py +1034 -124
  186. warp/tests/test_fp16.py +23 -16
  187. warp/tests/test_func.py +187 -86
  188. warp/tests/test_generics.py +194 -49
  189. warp/tests/test_grad.py +123 -181
  190. warp/tests/test_grad_customs.py +176 -0
  191. warp/tests/test_hash_grid.py +35 -34
  192. warp/tests/test_import.py +10 -23
  193. warp/tests/test_indexedarray.py +24 -25
  194. warp/tests/test_intersect.py +18 -9
  195. warp/tests/test_large.py +141 -0
  196. warp/tests/test_launch.py +14 -41
  197. warp/tests/test_lerp.py +64 -65
  198. warp/tests/test_lvalue.py +493 -0
  199. warp/tests/test_marching_cubes.py +12 -13
  200. warp/tests/test_mat.py +517 -2898
  201. warp/tests/test_mat_lite.py +115 -0
  202. warp/tests/test_mat_scalar_ops.py +2889 -0
  203. warp/tests/test_math.py +103 -9
  204. warp/tests/test_matmul.py +304 -69
  205. warp/tests/test_matmul_lite.py +410 -0
  206. warp/tests/test_mesh.py +60 -22
  207. warp/tests/test_mesh_query_aabb.py +21 -25
  208. warp/tests/test_mesh_query_point.py +111 -22
  209. warp/tests/test_mesh_query_ray.py +12 -24
  210. warp/tests/test_mlp.py +30 -22
  211. warp/tests/test_model.py +92 -89
  212. warp/tests/test_modules_lite.py +39 -0
  213. warp/tests/test_multigpu.py +88 -114
  214. warp/tests/test_noise.py +12 -11
  215. warp/tests/test_operators.py +16 -20
  216. warp/tests/test_options.py +11 -11
  217. warp/tests/test_pinned.py +17 -18
  218. warp/tests/test_print.py +32 -11
  219. warp/tests/test_quat.py +275 -129
  220. warp/tests/test_rand.py +18 -16
  221. warp/tests/test_reload.py +38 -34
  222. warp/tests/test_rounding.py +50 -43
  223. warp/tests/test_runlength_encode.py +168 -20
  224. warp/tests/test_smoothstep.py +9 -11
  225. warp/tests/test_snippet.py +143 -0
  226. warp/tests/test_sparse.py +261 -63
  227. warp/tests/test_spatial.py +276 -243
  228. warp/tests/test_streams.py +110 -85
  229. warp/tests/test_struct.py +268 -63
  230. warp/tests/test_tape.py +39 -21
  231. warp/tests/test_torch.py +90 -86
  232. warp/tests/test_transient_module.py +10 -12
  233. warp/tests/test_types.py +363 -0
  234. warp/tests/test_utils.py +451 -0
  235. warp/tests/test_vec.py +354 -2050
  236. warp/tests/test_vec_lite.py +73 -0
  237. warp/tests/test_vec_scalar_ops.py +2099 -0
  238. warp/tests/test_volume.py +418 -376
  239. warp/tests/test_volume_write.py +124 -134
  240. warp/tests/unittest_serial.py +35 -0
  241. warp/tests/unittest_suites.py +291 -0
  242. warp/tests/unittest_utils.py +342 -0
  243. warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
  244. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  245. warp/thirdparty/appdirs.py +36 -45
  246. warp/thirdparty/unittest_parallel.py +589 -0
  247. warp/types.py +622 -211
  248. warp/utils.py +54 -393
  249. warp_lang-1.0.0b6.dist-info/METADATA +238 -0
  250. warp_lang-1.0.0b6.dist-info/RECORD +409 -0
  251. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
  252. examples/example_cache_management.py +0 -40
  253. examples/example_multigpu.py +0 -54
  254. examples/example_struct.py +0 -65
  255. examples/fem/example_stokes_transfer_3d.py +0 -210
  256. warp/bin/warp-clang.so +0 -0
  257. warp/bin/warp.so +0 -0
  258. warp/fem/field/discrete_field.py +0 -80
  259. warp/fem/space/nodal_function_space.py +0 -233
  260. warp/tests/test_all.py +0 -223
  261. warp/tests/test_array_scan.py +0 -60
  262. warp/tests/test_base.py +0 -208
  263. warp/tests/test_unresolved_func.py +0 -7
  264. warp/tests/test_unresolved_symbol.py +0 -7
  265. warp_lang-1.0.0b2.dist-info/METADATA +0 -26
  266. warp_lang-1.0.0b2.dist-info/RECORD +0 -380
  267. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  268. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  269. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  270. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
  271. {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/native/intersect.h CHANGED
@@ -114,6 +114,21 @@ CUDA_CALLABLE inline vec2 closest_point_to_triangle(const vec3& a, const vec3& b
114
114
  return vec2(u, v);
115
115
  }
116
116
 
117
+ CUDA_CALLABLE inline vec2 furthest_point_to_triangle(const vec3& a, const vec3& b, const vec3& c, const vec3& p)
118
+ {
119
+ vec3 pa = p-a;
120
+ vec3 pb = p-b;
121
+ vec3 pc = p-c;
122
+ float dist_a = dot(pa, pa);
123
+ float dist_b = dot(pb, pb);
124
+ float dist_c = dot(pc, pc);
125
+
126
+ if (dist_a > dist_b && dist_a > dist_c)
127
+ return vec2(1.0f, 0.0f); // a is furthest
128
+ if (dist_b > dist_c)
129
+ return vec2(0.0f, 1.0f); // b is furthest
130
+ return vec2(0.0f, 0.0f); // c is furthest
131
+ }
117
132
 
118
133
  CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_dir, const vec3& lower, const vec3& upper, float& t)
119
134
  {
@@ -854,7 +869,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
854
869
  wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73);
855
870
  wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72);
856
871
  wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71);
857
- wp::adj_div(var_9, var_69, adj_9, adj_69, adj_70);
872
+ wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70);
858
873
  wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69);
859
874
  wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68);
860
875
  wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67);
@@ -866,7 +881,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
866
881
  }
867
882
  wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62);
868
883
  wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60);
869
- wp::adj_div(var_54, var_57, adj_54, adj_57, adj_58);
884
+ wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58);
870
885
  wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57);
871
886
  wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56);
872
887
  wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55);
@@ -881,7 +896,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
881
896
  wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49);
882
897
  wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48);
883
898
  }
884
- wp::adj_div(var_4, var_42, adj_4, adj_42, adj_43);
899
+ wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43);
885
900
  wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42);
886
901
  wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41);
887
902
  wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40);
@@ -902,7 +917,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
902
917
  wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29);
903
918
  wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28);
904
919
  }
905
- wp::adj_div(var_3, var_22, adj_3, adj_22, adj_23);
920
+ wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23);
906
921
  wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22);
907
922
  wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21);
908
923
  wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20);
@@ -276,7 +276,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
276
276
  label1:;
277
277
  adj_71 += adj_ret;
278
278
  wp::adj_vec3(var_61, var_62, var_70, adj_61, adj_62, adj_70, adj_71);
279
- wp::adj_length(var_69, adj_69, adj_70);
279
+ wp::adj_length(var_69, var_70, adj_69, adj_70);
280
280
  wp::adj_sub(var_68, var_65, adj_68, adj_65, adj_69);
281
281
  wp::adj_add(var_p2, var_67, adj_p2, adj_67, adj_68);
282
282
  wp::adj_mul(var_66, var_62, adj_66, adj_62, adj_67);
@@ -297,7 +297,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
297
297
  wp::adj_select(var_51, var_49, var_54, adj_51, adj_49, adj_54, adj_55);
298
298
  if (var_51) {
299
299
  wp::adj_clamp(var_53, var_6, var_25, adj_53, adj_6, adj_25, adj_54);
300
- wp::adj_div(var_52, var_3, adj_52, adj_3, adj_53);
300
+ wp::adj_div(var_52, var_3, var_53, adj_52, adj_3, adj_53);
301
301
  wp::adj_sub(var_30, var_21, adj_30, adj_21, adj_52);
302
302
  }
303
303
  }
@@ -305,10 +305,10 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
305
305
  wp::adj_select(var_45, var_41, var_48, adj_45, adj_41, adj_48, adj_49);
306
306
  if (var_45) {
307
307
  wp::adj_clamp(var_47, var_6, var_25, adj_47, adj_6, adj_25, adj_48);
308
- wp::adj_div(var_46, var_3, adj_46, adj_3, adj_47);
308
+ wp::adj_div(var_46, var_3, var_47, adj_46, adj_3, adj_47);
309
309
  wp::adj_neg(var_21, adj_21, adj_46);
310
310
  }
311
- wp::adj_div(var_43, var_4, adj_43, adj_4, adj_44);
311
+ wp::adj_div(var_43, var_4, var_44, adj_43, adj_4, adj_44);
312
312
  wp::adj_add(var_42, var_5, adj_42, adj_5, adj_43);
313
313
  wp::adj_mul(var_30, var_41, adj_30, adj_41, adj_42);
314
314
  wp::adj_select(var_34, var_6, var_40, adj_34, adj_6, adj_40, adj_41);
@@ -317,7 +317,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
317
317
  wp::adj_select(var_34, var_28, var_39, adj_34, adj_28, adj_39, adj_40);
318
318
  if (var_34) {
319
319
  wp::adj_clamp(var_38, var_6, var_25, adj_38, adj_6, adj_25, adj_39);
320
- wp::adj_div(var_37, var_33, adj_37, adj_33, adj_38);
320
+ wp::adj_div(var_37, var_33, var_38, adj_37, adj_33, adj_38);
321
321
  wp::adj_sub(var_35, var_36, adj_35, adj_36, adj_37);
322
322
  wp::adj_mul(var_21, var_4, adj_21, adj_4, adj_36);
323
323
  wp::adj_mul(var_30, var_5, adj_30, adj_5, adj_35);
@@ -332,7 +332,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
332
332
  if (var_22) {
333
333
  wp::adj_cast_float(var_6, adj_6, adj_27);
334
334
  wp::adj_clamp(var_24, var_6, var_25, adj_24, adj_6, adj_25, adj_26);
335
- wp::adj_div(var_23, var_3, adj_23, adj_3, adj_24);
335
+ wp::adj_div(var_23, var_3, var_24, adj_23, adj_3, adj_24);
336
336
  wp::adj_neg(var_21, adj_21, adj_23);
337
337
  }
338
338
  wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_21);
@@ -341,7 +341,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
341
341
  wp::adj_select(var_15, var_7, var_16, adj_15, adj_7, adj_16, adj_19);
342
342
  if (var_15) {
343
343
  wp::adj_cast_float(var_17, adj_17, adj_18);
344
- wp::adj_div(var_5, var_4, adj_5, adj_4, adj_17);
344
+ wp::adj_div(var_5, var_4, var_17, adj_5, adj_4, adj_17);
345
345
  wp::adj_cast_float(var_6, adj_6, adj_16);
346
346
  }
347
347
  if (var_13) {
@@ -349,7 +349,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
349
349
  adj_14 += adj_ret;
350
350
  wp::adj_vec3(var_7, var_8, var_10, adj_7, adj_8, adj_10, adj_14);
351
351
  }
352
- wp::adj_length(var_9, adj_9, adj_10);
352
+ wp::adj_length(var_9, var_10, adj_9, adj_10);
353
353
  wp::adj_sub(var_p2, var_p1, adj_p2, adj_p1, adj_9);
354
354
  wp::adj_cast_float(var_6, adj_6, adj_8);
355
355
  wp::adj_cast_float(var_6, adj_6, adj_7);
warp/native/mat.h CHANGED
@@ -298,7 +298,19 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_max(mat_t<Rows,Cols,Type> * ad
298
298
  }
299
299
 
300
300
  template<unsigned Rows, unsigned Cols, typename Type>
301
- inline CUDA_CALLABLE vec_t<Cols,Type> index(const mat_t<Rows,Cols,Type>& m, int row)
301
+ inline CUDA_CALLABLE void adj_atomic_minmax(
302
+ mat_t<Rows,Cols,Type> *addr,
303
+ mat_t<Rows,Cols,Type> *adj_addr,
304
+ const mat_t<Rows,Cols,Type> &value,
305
+ mat_t<Rows,Cols,Type> &adj_value)
306
+ {
307
+ for (unsigned i=0; i < Rows; ++i)
308
+ for (unsigned j=0; j < Cols; ++j)
309
+ adj_atomic_minmax(&addr->data[i][j], &adj_addr->data[i][j], value.data[i][j], adj_value.data[i][j]);
310
+ }
311
+
312
+ template<unsigned Rows, unsigned Cols, typename Type>
313
+ inline CUDA_CALLABLE vec_t<Cols,Type> extract(const mat_t<Rows,Cols,Type>& m, int row)
302
314
  {
303
315
  vec_t<Cols,Type> ret;
304
316
  for(unsigned i=0; i < Cols; ++i)
@@ -309,7 +321,7 @@ inline CUDA_CALLABLE vec_t<Cols,Type> index(const mat_t<Rows,Cols,Type>& m, int
309
321
  }
310
322
 
311
323
  template<unsigned Rows, unsigned Cols, typename Type>
312
- inline CUDA_CALLABLE Type index(const mat_t<Rows,Cols,Type>& m, int row, int col)
324
+ inline CUDA_CALLABLE Type extract(const mat_t<Rows,Cols,Type>& m, int row, int col)
313
325
  {
314
326
  #ifndef NDEBUG
315
327
  if (row < 0 || row >= Rows)
@@ -327,7 +339,7 @@ inline CUDA_CALLABLE Type index(const mat_t<Rows,Cols,Type>& m, int row, int col
327
339
  }
328
340
 
329
341
  template<unsigned Rows, unsigned Cols, typename Type>
330
- inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols, Type> value)
342
+ inline CUDA_CALLABLE vec_t<Cols, Type>* index(mat_t<Rows,Cols,Type>& m, int row)
331
343
  {
332
344
  #ifndef NDEBUG
333
345
  if (row < 0 || row >= Rows)
@@ -337,12 +349,11 @@ inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols
337
349
  }
338
350
  #endif
339
351
 
340
- for(unsigned i=0; i < Cols; ++i)
341
- m.data[row][i] = value[i];
352
+ return reinterpret_cast<vec_t<Cols, Type>*>(&m.data[row]);
342
353
  }
343
354
 
344
355
  template<unsigned Rows, unsigned Cols, typename Type>
345
- inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
356
+ inline CUDA_CALLABLE Type* index(mat_t<Rows,Cols,Type>& m, int row, int col)
346
357
  {
347
358
  #ifndef NDEBUG
348
359
  if (row < 0 || row >= Rows)
@@ -356,18 +367,19 @@ inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, int col, T
356
367
  assert(0);
357
368
  }
358
369
  #endif
359
- m.data[row][col] = value;
370
+
371
+ return &m.data[row][col];
360
372
  }
361
373
 
362
374
  template<unsigned Rows, unsigned Cols, typename Type>
363
- inline CUDA_CALLABLE void adj_indexset(const mat_t<Rows,Cols,Type>& m, int row, const vec_t<Cols, Type>& value,
375
+ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row,
364
376
  const mat_t<Rows,Cols,Type>& adj_m, int adj_row, const vec_t<Cols, Type>& adj_value)
365
377
  {
366
378
  // nop
367
379
  }
368
380
 
369
381
  template<unsigned Rows, unsigned Cols, typename Type>
370
- inline CUDA_CALLABLE void adj_indexset(const mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
382
+ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int col,
371
383
  const mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type adj_value)
372
384
  {
373
385
  // nop
@@ -425,7 +437,22 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, T
425
437
  }
426
438
  }
427
439
 
428
- return t;
440
+ return t;
441
+ }
442
+
443
+ template<unsigned Rows, unsigned Cols, typename Type>
444
+ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(Type b, const mat_t<Rows,Cols,Type>& a)
445
+ {
446
+ mat_t<Rows,Cols,Type> t;
447
+ for (unsigned i=0; i < Rows; ++i)
448
+ {
449
+ for (unsigned j=0; j < Cols; ++j)
450
+ {
451
+ t.data[i][j] = b / a.data[i][j];
452
+ }
453
+ }
454
+
455
+ return t;
429
456
  }
430
457
 
431
458
  template<unsigned Rows, unsigned Cols, typename Type>
@@ -440,7 +467,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(const mat_t<Rows,Cols,Type>& a, T
440
467
  }
441
468
  }
442
469
 
443
- return t;
470
+ return t;
444
471
  }
445
472
 
446
473
  template<unsigned Rows, unsigned Cols, typename Type>
@@ -473,6 +500,17 @@ inline CUDA_CALLABLE vec_t<Rows,Type> mul(const mat_t<Rows,Cols,Type>& a, const
473
500
  return r;
474
501
  }
475
502
 
503
+ template<unsigned Rows, unsigned Cols, typename Type>
504
+ inline CUDA_CALLABLE vec_t<Cols,Type> mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a)
505
+ {
506
+ vec_t<Cols,Type> r = a.get_row(0)*b[0];
507
+ for( unsigned i=1; i < Rows; ++i )
508
+ {
509
+ r += a.get_row(i)*b[i];
510
+ }
511
+ return r;
512
+ }
513
+
476
514
  template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
477
515
  inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b)
478
516
  {
@@ -861,14 +899,14 @@ inline CUDA_CALLABLE vec_t<3,Type> transform_vector(const mat_t<4,4,Type>& m, co
861
899
  }
862
900
 
863
901
  template<unsigned Rows, unsigned Cols, typename Type>
864
- inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, const vec_t<Cols,Type>& adj_ret)
902
+ inline CUDA_CALLABLE void adj_extract(const mat_t<Rows,Cols,Type>& m, int row, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, const vec_t<Cols,Type>& adj_ret)
865
903
  {
866
904
  for( unsigned col=0; col < Cols; ++col )
867
905
  adj_m.data[row][col] += adj_ret[col];
868
906
  }
869
907
 
870
908
  template<unsigned Rows, unsigned Cols, typename Type>
871
- inline void CUDA_CALLABLE adj_index(const mat_t<Rows,Cols,Type>& m, int row, int col, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type adj_ret)
909
+ inline void CUDA_CALLABLE adj_extract(const mat_t<Rows,Cols,Type>& m, int row, int col, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type adj_ret)
872
910
  {
873
911
  #ifndef NDEBUG
874
912
  if (row < 0 || row > Rows)
@@ -932,6 +970,20 @@ inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<
932
970
  }
933
971
  }
934
972
 
973
+ template<unsigned Rows, unsigned Cols, typename Type>
974
+ inline CUDA_CALLABLE void adj_div(Type s, const mat_t<Rows,Cols,Type>& a, Type& adj_s, mat_t<Rows,Cols,Type>& adj_a, const mat_t<Rows,Cols,Type>& adj_ret)
975
+ {
976
+ adj_s -= tensordot(a , adj_ret)/ (s * s); // - a / s^2
977
+
978
+ for (unsigned i=0; i < Rows; ++i)
979
+ {
980
+ for (unsigned j=0; j < Cols; ++j)
981
+ {
982
+ adj_a.data[i][j] += s / adj_ret.data[i][j];
983
+ }
984
+ }
985
+ }
986
+
935
987
  template<unsigned Rows, unsigned Cols, typename Type>
936
988
  inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, Type b, mat_t<Rows,Cols,Type>& adj_a, Type& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
937
989
  {
@@ -965,6 +1017,13 @@ inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const vec_t<Co
965
1017
  adj_b += mul(transpose(a), adj_ret);
966
1018
  }
967
1019
 
1020
+ template<unsigned Rows, unsigned Cols, typename Type>
1021
+ inline CUDA_CALLABLE void adj_mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a, vec_t<Rows,Type>& adj_b, mat_t<Rows,Cols,Type>& adj_a, const vec_t<Cols,Type>& adj_ret)
1022
+ {
1023
+ adj_a += outer(b, adj_ret);
1024
+ adj_b += mul(adj_ret, transpose(a));
1025
+ }
1026
+
968
1027
  template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
969
1028
  inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Cols,ColsOut,Type>& adj_b, const mat_t<Rows,ColsOut,Type>& adj_ret)
970
1029
  {
@@ -1105,10 +1164,10 @@ inline CUDA_CALLABLE void adj_determinant(const mat_t<4,4,Type>& m, mat_t<4,4,Ty
1105
1164
  }
1106
1165
 
1107
1166
  template<unsigned Rows, typename Type>
1108
- inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
1167
+ inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& ret, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
1109
1168
  {
1110
1169
  // todo: how to cache this from the forward pass?
1111
- mat_t<Rows,Rows,Type> invt = transpose(inverse(m));
1170
+ mat_t<Rows,Rows,Type> invt = transpose(ret);
1112
1171
 
1113
1172
  // see https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf 2.2.3
1114
1173
  adj_m -= mul(mul(invt, adj_ret), invt);
@@ -1150,10 +1209,10 @@ inline CUDA_CALLABLE void adj_cw_mul(const mat_t<Rows,Cols,Type>& a, const mat_t
1150
1209
  }
1151
1210
 
1152
1211
  template<unsigned Rows, unsigned Cols, typename Type>
1153
- inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
1212
+ inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& ret, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
1154
1213
  {
1155
1214
  adj_a += cw_div(adj_ret, b);
1156
- adj_b -= cw_mul(adj_ret, cw_div(cw_div(a, b), b));
1215
+ adj_b -= cw_mul(adj_ret, cw_div(ret, b));
1157
1216
  }
1158
1217
 
1159
1218
  // adjoint for the constant constructor:
warp/native/mesh.cpp CHANGED
@@ -103,7 +103,8 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
103
103
  {
104
104
  Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
105
105
 
106
- m->bounds = new bounds3[num_tris];
106
+ m->lowers = new vec3[num_tris];
107
+ m->uppers = new vec3[num_tris];
107
108
 
108
109
  float sum = 0.0;
109
110
  for (int i=0; i < num_tris; ++i)
@@ -111,15 +112,23 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
111
112
  wp::vec3& p0 = points[indices[i*3+0]];
112
113
  wp::vec3& p1 = points[indices[i*3+1]];
113
114
  wp::vec3& p2 = points[indices[i*3+2]];
114
- m->bounds[i].add_point(p0);
115
- m->bounds[i].add_point(p1);
116
- m->bounds[i].add_point(p2);
115
+
116
+ // compute triangle bounds
117
+ bounds3 b;
118
+ b.add_point(p0);
119
+ b.add_point(p1);
120
+ b.add_point(p2);
121
+
122
+ m->lowers[i] = b.lower;
123
+ m->uppers[i] = b.upper;
124
+
125
+ // compute edge lengths
117
126
  sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
118
127
  }
119
128
  m->average_edge_length = sum / (num_tris*3);
120
129
 
121
- m->bvh = bvh_create(m->bounds, num_tris);
122
-
130
+ m->bvh = *(wp::BVH*)bvh_create_host(m->lowers, m->uppers, num_tris);
131
+
123
132
  if (support_winding_number)
124
133
  {
125
134
  // Let's first compute the sold
@@ -131,86 +140,14 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
131
140
  return (uint64_t)m;
132
141
  }
133
142
 
134
- uint64_t mesh_create_device(void* context, array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
135
- {
136
- ContextGuard guard(context);
137
-
138
- Mesh mesh(points, velocities, indices, num_points, num_tris);
139
-
140
- mesh.context = context ? context : cuda_context_get_current();
141
-
142
- // mesh.points = array_t<vec3>(points, num_points, points_grad);
143
- // mesh.velocities = array_t<vec3>(velocities, num_points, velocities_grad);
144
- // mesh.indices = array_t<int>(indices, num_tris, 3);
145
-
146
- // mesh.num_points = num_points;
147
- // mesh.num_tris = num_tris;
148
-
149
- {
150
- // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
151
- vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
152
- int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
153
- bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
154
-
155
- memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
156
- memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
157
- cuda_context_synchronize(WP_CURRENT_CONTEXT);
158
-
159
- float sum = 0.0;
160
- for (int i=0; i < num_tris; ++i)
161
- {
162
- bounds_host[i] = bounds3();
163
- wp::vec3 p0 = points_host[indices_host[i*3+0]];
164
- wp::vec3 p1 = points_host[indices_host[i*3+1]];
165
- wp::vec3 p2 = points_host[indices_host[i*3+2]];
166
- bounds_host[i].add_point(p0);
167
- bounds_host[i].add_point(p1);
168
- bounds_host[i].add_point(p2);
169
- sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
170
- }
171
- mesh.average_edge_length = sum / (num_tris*3);
172
-
173
- BVH bvh_host = bvh_create(bounds_host, num_tris);
174
- BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
175
-
176
- bvh_destroy_host(bvh_host);
177
-
178
- // save gpu-side copy of bounds
179
- mesh.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*num_tris);
180
- memcpy_h2d(WP_CURRENT_CONTEXT, mesh.bounds, bounds_host, sizeof(bounds3)*num_tris);
181
-
182
- free_host(points_host);
183
- free_host(indices_host);
184
- free_host(bounds_host);
185
-
186
- mesh.bvh = bvh_device;
187
-
188
- if (support_winding_number)
189
- {
190
- int num_bvh_nodes = 2*num_tris-1;
191
- mesh.solid_angle_props = (SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(SolidAngleProps)*num_bvh_nodes);
192
- }
193
- }
194
-
195
- Mesh* mesh_device = (Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(Mesh));
196
- memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(Mesh));
197
-
198
- // save descriptor
199
- uint64_t mesh_id = (uint64_t)mesh_device;
200
- mesh_add_descriptor(mesh_id, mesh);
201
-
202
- if (support_winding_number)
203
- {
204
- mesh_refit_device(mesh_id);
205
- }
206
- return mesh_id;
207
- }
208
143
 
209
144
  void mesh_destroy_host(uint64_t id)
210
145
  {
211
146
  Mesh* m = (Mesh*)(id);
212
147
 
213
- delete[] m->bounds;
148
+ delete[] m->lowers;
149
+ delete[] m->uppers;
150
+
214
151
  if (m->solid_angle_props) {
215
152
  delete [] m->solid_angle_props;
216
153
  }
@@ -219,25 +156,6 @@ void mesh_destroy_host(uint64_t id)
219
156
  delete m;
220
157
  }
221
158
 
222
- void mesh_destroy_device(uint64_t id)
223
- {
224
- Mesh mesh;
225
- if (mesh_get_descriptor(id, mesh))
226
- {
227
- ContextGuard guard(mesh.context);
228
-
229
- bvh_destroy_device(mesh.bvh);
230
-
231
- free_device(WP_CURRENT_CONTEXT, mesh.bounds);
232
- free_device(WP_CURRENT_CONTEXT, (Mesh*)id);
233
-
234
- if (mesh.solid_angle_props) {
235
- free_device(WP_CURRENT_CONTEXT, mesh.solid_angle_props);
236
- }
237
- mesh_rem_descriptor(id);
238
- }
239
- }
240
-
241
159
  void mesh_refit_host(uint64_t id)
242
160
  {
243
161
  Mesh* m = (Mesh*)(id);
@@ -245,13 +163,19 @@ void mesh_refit_host(uint64_t id)
245
163
  float sum = 0.0;
246
164
  for (int i=0; i < m->num_tris; ++i)
247
165
  {
248
- m->bounds[i] = bounds3();
249
166
  wp::vec3 p0 = m->points.data[m->indices.data[i*3+0]];
250
167
  wp::vec3 p1 = m->points.data[m->indices.data[i*3+1]];
251
168
  wp::vec3 p2 = m->points.data[m->indices.data[i*3+2]];
252
- m->bounds[i].add_point(p0);
253
- m->bounds[i].add_point(p1);
254
- m->bounds[i].add_point(p2);
169
+
170
+ // compute triangle bounds
171
+ bounds3 b;
172
+ b.add_point(p0);
173
+ b.add_point(p1);
174
+ b.add_point(p2);
175
+
176
+ m->lowers[i] = b.lower;
177
+ m->uppers[i] = b.upper;
178
+
255
179
  sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
256
180
  }
257
181
  m->average_edge_length = sum / (m->num_tris*3);
@@ -263,7 +187,7 @@ void mesh_refit_host(uint64_t id)
263
187
  }
264
188
  else
265
189
  {
266
- bvh_refit_host(m->bvh, m->bounds);
190
+ bvh_refit_host(m->bvh);
267
191
  }
268
192
  }
269
193
 
@@ -271,9 +195,10 @@ void mesh_refit_host(uint64_t id)
271
195
  // stubs for non-CUDA platforms
272
196
  #if !WP_ENABLE_CUDA
273
197
 
274
- void mesh_refit_device(uint64_t id)
275
- {
276
- }
198
+
199
+ WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
200
+ WP_API void mesh_destroy_device(uint64_t id) {}
201
+ WP_API void mesh_refit_device(uint64_t id) {}
277
202
 
278
203
 
279
204
  #endif // !WP_ENABLE_CUDA