warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show
  1. warp/__init__.py +7 -1
  2. warp/bin/libwarp-clang.dylib +0 -0
  3. warp/bin/libwarp.dylib +0 -0
  4. warp/build.py +410 -0
  5. warp/build_dll.py +6 -14
  6. warp/builtins.py +452 -362
  7. warp/codegen.py +179 -119
  8. warp/config.py +42 -6
  9. warp/context.py +490 -271
  10. warp/dlpack.py +8 -6
  11. warp/examples/assets/nonuniform.usd +0 -0
  12. warp/examples/assets/nvidia_logo.png +0 -0
  13. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  14. warp/examples/core/example_sample_mesh.py +300 -0
  15. warp/examples/fem/example_apic_fluid.py +1 -1
  16. warp/examples/fem/example_burgers.py +2 -2
  17. warp/examples/fem/example_deformed_geometry.py +1 -1
  18. warp/examples/fem/example_distortion_energy.py +1 -1
  19. warp/examples/fem/example_magnetostatics.py +6 -6
  20. warp/examples/fem/utils.py +9 -3
  21. warp/examples/interop/example_jax_callable.py +116 -0
  22. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  23. warp/examples/interop/example_jax_kernel.py +205 -0
  24. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  25. warp/examples/tile/example_tile_matmul.py +2 -4
  26. warp/fem/__init__.py +11 -1
  27. warp/fem/adaptivity.py +4 -4
  28. warp/fem/field/nodal_field.py +22 -68
  29. warp/fem/field/virtual.py +62 -23
  30. warp/fem/geometry/adaptive_nanogrid.py +9 -10
  31. warp/fem/geometry/closest_point.py +1 -1
  32. warp/fem/geometry/deformed_geometry.py +5 -2
  33. warp/fem/geometry/geometry.py +5 -0
  34. warp/fem/geometry/grid_2d.py +12 -12
  35. warp/fem/geometry/grid_3d.py +12 -15
  36. warp/fem/geometry/hexmesh.py +5 -7
  37. warp/fem/geometry/nanogrid.py +9 -11
  38. warp/fem/geometry/quadmesh.py +13 -13
  39. warp/fem/geometry/tetmesh.py +3 -4
  40. warp/fem/geometry/trimesh.py +3 -8
  41. warp/fem/integrate.py +262 -93
  42. warp/fem/linalg.py +5 -5
  43. warp/fem/quadrature/pic_quadrature.py +37 -22
  44. warp/fem/quadrature/quadrature.py +194 -25
  45. warp/fem/space/__init__.py +1 -1
  46. warp/fem/space/basis_function_space.py +4 -2
  47. warp/fem/space/basis_space.py +25 -18
  48. warp/fem/space/hexmesh_function_space.py +2 -2
  49. warp/fem/space/partition.py +6 -2
  50. warp/fem/space/quadmesh_function_space.py +8 -8
  51. warp/fem/space/shape/cube_shape_function.py +23 -23
  52. warp/fem/space/shape/square_shape_function.py +12 -12
  53. warp/fem/space/shape/triangle_shape_function.py +1 -1
  54. warp/fem/space/tetmesh_function_space.py +3 -3
  55. warp/fem/space/trimesh_function_space.py +2 -2
  56. warp/fem/utils.py +12 -6
  57. warp/jax.py +14 -1
  58. warp/jax_experimental/__init__.py +16 -0
  59. warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
  60. warp/jax_experimental/ffi.py +698 -0
  61. warp/jax_experimental/xla_ffi.py +602 -0
  62. warp/math.py +89 -0
  63. warp/native/array.h +13 -0
  64. warp/native/builtin.h +29 -3
  65. warp/native/bvh.cpp +3 -1
  66. warp/native/bvh.cu +42 -14
  67. warp/native/bvh.h +2 -1
  68. warp/native/clang/clang.cpp +30 -3
  69. warp/native/cuda_util.cpp +14 -0
  70. warp/native/cuda_util.h +2 -0
  71. warp/native/exports.h +68 -63
  72. warp/native/intersect.h +26 -26
  73. warp/native/intersect_adj.h +33 -33
  74. warp/native/marching.cu +1 -1
  75. warp/native/mat.h +513 -9
  76. warp/native/mesh.h +10 -10
  77. warp/native/quat.h +99 -11
  78. warp/native/rand.h +6 -0
  79. warp/native/sort.cpp +122 -59
  80. warp/native/sort.cu +152 -15
  81. warp/native/sort.h +8 -1
  82. warp/native/sparse.cpp +43 -22
  83. warp/native/sparse.cu +52 -17
  84. warp/native/svd.h +116 -0
  85. warp/native/tile.h +301 -105
  86. warp/native/tile_reduce.h +46 -3
  87. warp/native/vec.h +68 -7
  88. warp/native/volume.cpp +85 -113
  89. warp/native/volume_builder.cu +25 -10
  90. warp/native/volume_builder.h +6 -0
  91. warp/native/warp.cpp +5 -6
  92. warp/native/warp.cu +99 -10
  93. warp/native/warp.h +19 -10
  94. warp/optim/linear.py +10 -10
  95. warp/sim/articulation.py +4 -4
  96. warp/sim/collide.py +21 -10
  97. warp/sim/import_mjcf.py +449 -155
  98. warp/sim/import_urdf.py +32 -12
  99. warp/sim/integrator_euler.py +5 -5
  100. warp/sim/integrator_featherstone.py +3 -10
  101. warp/sim/integrator_vbd.py +207 -2
  102. warp/sim/integrator_xpbd.py +5 -5
  103. warp/sim/model.py +42 -13
  104. warp/sim/utils.py +2 -2
  105. warp/sparse.py +642 -555
  106. warp/stubs.py +216 -19
  107. warp/tests/__main__.py +0 -15
  108. warp/tests/cuda/__init__.py +0 -0
  109. warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
  110. warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
  111. warp/tests/geometry/__init__.py +0 -0
  112. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
  113. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
  114. warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
  115. warp/tests/interop/__init__.py +0 -0
  116. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
  117. warp/tests/sim/__init__.py +0 -0
  118. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
  119. warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
  120. warp/tests/{test_model.py → sim/test_model.py} +40 -0
  121. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
  122. warp/tests/sim/test_vbd.py +597 -0
  123. warp/tests/test_bool.py +1 -1
  124. warp/tests/test_examples.py +28 -36
  125. warp/tests/test_fem.py +23 -4
  126. warp/tests/test_linear_solvers.py +0 -11
  127. warp/tests/test_mat.py +233 -79
  128. warp/tests/test_mat_scalar_ops.py +4 -4
  129. warp/tests/test_overwrite.py +0 -60
  130. warp/tests/test_quat.py +67 -46
  131. warp/tests/test_rand.py +44 -37
  132. warp/tests/test_sparse.py +47 -6
  133. warp/tests/test_spatial.py +75 -0
  134. warp/tests/test_static.py +1 -1
  135. warp/tests/test_utils.py +84 -4
  136. warp/tests/test_vec.py +46 -34
  137. warp/tests/tile/__init__.py +0 -0
  138. warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
  139. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
  140. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
  141. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
  142. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
  143. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
  144. warp/tests/unittest_serial.py +1 -0
  145. warp/tests/unittest_suites.py +45 -59
  146. warp/tests/unittest_utils.py +2 -1
  147. warp/thirdparty/unittest_parallel.py +3 -1
  148. warp/types.py +110 -658
  149. warp/utils.py +137 -72
  150. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
  151. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
  152. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
  153. warp/examples/optim/example_walker.py +0 -317
  154. warp/native/cutlass_gemm.cpp +0 -43
  155. warp/native/cutlass_gemm.cu +0 -382
  156. warp/tests/test_matmul.py +0 -511
  157. warp/tests/test_matmul_lite.py +0 -411
  158. warp/tests/test_vbd.py +0 -386
  159. warp/tests/unused_test_misc.py +0 -77
  160. /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
  161. /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
  162. /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
  163. /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
  164. /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
  165. /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
  166. /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
  167. /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
  168. /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
  169. /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
  170. /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
  171. /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
  172. /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
  173. /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
  174. /warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
  175. /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
  176. /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
  177. /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
  178. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
  179. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/native/mat.h CHANGED
@@ -207,6 +207,159 @@ struct mat_t
207
207
  Type data[Rows][Cols];
208
208
  };
209
209
 
210
+ template<typename Type>
211
+ inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_cols(vec_t<2, Type> c0, vec_t<2, Type> c1)
212
+ {
213
+ mat_t<2, 2, Type> m;
214
+
215
+ m.data[0][0] = c0[0];
216
+ m.data[1][0] = c0[1];
217
+
218
+ m.data[0][1] = c1[0];
219
+ m.data[1][1] = c1[1];
220
+
221
+ return m;
222
+ }
223
+
224
+ template<typename Type>
225
+ inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_cols(vec_t<3, Type> c0, vec_t<3, Type> c1, vec_t<3, Type> c2)
226
+ {
227
+ mat_t<3, 3, Type> m;
228
+
229
+ m.data[0][0] = c0[0];
230
+ m.data[1][0] = c0[1];
231
+ m.data[2][0] = c0[2];
232
+
233
+ m.data[0][1] = c1[0];
234
+ m.data[1][1] = c1[1];
235
+ m.data[2][1] = c1[2];
236
+
237
+ m.data[0][2] = c2[0];
238
+ m.data[1][2] = c2[1];
239
+ m.data[2][2] = c2[2];
240
+
241
+ return m;
242
+ }
243
+
244
+ template<typename Type>
245
+ inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_cols(vec_t<4, Type> c0, vec_t<4, Type> c1, vec_t<4, Type> c2, vec_t<4, Type> c3)
246
+ {
247
+ mat_t<4, 4, Type> m;
248
+
249
+ m.data[0][0] = c0[0];
250
+ m.data[1][0] = c0[1];
251
+ m.data[2][0] = c0[2];
252
+ m.data[3][0] = c0[3];
253
+
254
+ m.data[0][1] = c1[0];
255
+ m.data[1][1] = c1[1];
256
+ m.data[2][1] = c1[2];
257
+ m.data[3][1] = c1[3];
258
+
259
+ m.data[0][2] = c2[0];
260
+ m.data[1][2] = c2[1];
261
+ m.data[2][2] = c2[2];
262
+ m.data[3][2] = c2[3];
263
+
264
+ m.data[0][3] = c3[0];
265
+ m.data[1][3] = c3[1];
266
+ m.data[2][3] = c3[2];
267
+ m.data[3][3] = c3[3];
268
+
269
+ return m;
270
+ }
271
+
272
+ template<unsigned Rows, unsigned Cols, typename Type>
273
+ inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_cols(const initializer_array<Cols, vec_t<Rows, Type> >& l)
274
+ {
275
+ mat_t<Rows, Cols, Type> m;
276
+ for (unsigned j=0; j < Cols; ++j)
277
+ {
278
+ for (unsigned i=0; i < Rows; ++i)
279
+ {
280
+ m.data[i][j] = l[j][i];
281
+ }
282
+ }
283
+
284
+ return m;
285
+ }
286
+
287
+ template<typename Type>
288
+ inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_rows(vec_t<2, Type> r0, vec_t<2, Type> r1)
289
+ {
290
+ mat_t<2, 2, Type> m;
291
+
292
+ m.data[0][0] = r0[0];
293
+ m.data[0][1] = r0[1];
294
+
295
+ m.data[1][0] = r1[0];
296
+ m.data[1][1] = r1[1];
297
+
298
+ return m;
299
+ }
300
+
301
+ template<typename Type>
302
+ inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_rows(vec_t<3, Type> r0, vec_t<3, Type> r1, vec_t<3, Type> r2)
303
+ {
304
+ mat_t<3, 3, Type> m;
305
+
306
+ m.data[0][0] = r0[0];
307
+ m.data[0][1] = r0[1];
308
+ m.data[0][2] = r0[2];
309
+
310
+ m.data[1][0] = r1[0];
311
+ m.data[1][1] = r1[1];
312
+ m.data[1][2] = r1[2];
313
+
314
+ m.data[2][0] = r2[0];
315
+ m.data[2][1] = r2[1];
316
+ m.data[2][2] = r2[2];
317
+
318
+ return m;
319
+ }
320
+
321
+ template<typename Type>
322
+ inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_rows(vec_t<4, Type> r0, vec_t<4, Type> r1, vec_t<4, Type> r2, vec_t<4, Type> r3)
323
+ {
324
+ mat_t<4, 4, Type> m;
325
+
326
+ m.data[0][0] = r0[0];
327
+ m.data[0][1] = r0[1];
328
+ m.data[0][2] = r0[2];
329
+ m.data[0][3] = r0[3];
330
+
331
+ m.data[1][0] = r1[0];
332
+ m.data[1][1] = r1[1];
333
+ m.data[1][2] = r1[2];
334
+ m.data[1][3] = r1[3];
335
+
336
+ m.data[2][0] = r2[0];
337
+ m.data[2][1] = r2[1];
338
+ m.data[2][2] = r2[2];
339
+ m.data[2][3] = r2[3];
340
+
341
+ m.data[3][0] = r3[0];
342
+ m.data[3][1] = r3[1];
343
+ m.data[3][2] = r3[2];
344
+ m.data[3][3] = r3[3];
345
+
346
+ return m;
347
+ }
348
+
349
+ template<unsigned Rows, unsigned Cols, typename Type>
350
+ inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_rows(const initializer_array<Rows, vec_t<Cols, Type> >& l)
351
+ {
352
+ mat_t<Rows, Cols, Type> m;
353
+ for (unsigned i=0; i < Rows; ++i)
354
+ {
355
+ for (unsigned j=0; j < Cols; ++j)
356
+ {
357
+ m.data[i][j] = l[i][j];
358
+ }
359
+ }
360
+
361
+ return m;
362
+ }
210
363
 
211
364
  template<unsigned Rows, typename Type>
212
365
  inline CUDA_CALLABLE mat_t<Rows, Rows, Type> identity()
@@ -404,37 +557,241 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
404
557
 
405
558
 
406
559
  template<unsigned Rows, unsigned Cols, typename Type>
407
- inline CUDA_CALLABLE void augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
560
+ inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
408
561
  {
562
+ #ifndef NDEBUG
563
+ if (row < 0 || row >= Rows)
564
+ {
565
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
566
+ assert(0);
567
+ }
568
+ if (col < 0 || col >= Cols)
569
+ {
570
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
571
+ assert(0);
572
+ }
573
+ #endif
574
+
409
575
  m.data[row][col] += value;
410
576
  }
411
577
 
412
578
 
413
579
  template<unsigned Rows, unsigned Cols, typename Type>
414
- inline CUDA_CALLABLE void adj_augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
580
+ inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
581
+ {
582
+ #ifndef NDEBUG
583
+ if (row < 0 || row >= Rows)
584
+ {
585
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
586
+ assert(0);
587
+ }
588
+ #endif
589
+
590
+ for(unsigned i=0; i < Cols; ++i)
591
+ {
592
+ m.data[row][i] += value[i];
593
+ }
594
+ }
595
+
596
+
597
+ template<unsigned Rows, unsigned Cols, typename Type>
598
+ inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
415
599
  mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
416
600
  {
601
+ #ifndef NDEBUG
602
+ if (row < 0 || row >= Rows)
603
+ {
604
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
605
+ assert(0);
606
+ }
607
+ if (col < 0 || col >= Cols)
608
+ {
609
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
610
+ assert(0);
611
+ }
612
+ #endif
613
+
417
614
  adj_value += adj_m.data[row][col];
418
615
  }
419
616
 
420
617
 
421
618
  template<unsigned Rows, unsigned Cols, typename Type>
422
- inline CUDA_CALLABLE void augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
619
+ inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
620
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
621
+ {
622
+ #ifndef NDEBUG
623
+ if (row < 0 || row >= Rows)
624
+ {
625
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
626
+ assert(0);
627
+ }
628
+ #endif
629
+
630
+ for(unsigned i=0; i < Cols; ++i)
631
+ {
632
+ adj_value[i] += adj_m.data[row][i];
633
+ }
634
+ }
635
+
636
+
637
+ template<unsigned Rows, unsigned Cols, typename Type>
638
+ inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
423
639
  {
640
+ #ifndef NDEBUG
641
+ if (row < 0 || row >= Rows)
642
+ {
643
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
644
+ assert(0);
645
+ }
646
+ if (col < 0 || col >= Cols)
647
+ {
648
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
649
+ assert(0);
650
+ }
651
+ #endif
652
+
424
653
  m.data[row][col] -= value;
425
654
  }
426
655
 
427
656
 
428
657
  template<unsigned Rows, unsigned Cols, typename Type>
429
- inline CUDA_CALLABLE void adj_augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
658
+ inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
659
+ {
660
+ #ifndef NDEBUG
661
+ if (row < 0 || row >= Rows)
662
+ {
663
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
664
+ assert(0);
665
+ }
666
+ #endif
667
+
668
+ for(unsigned i=0; i < Cols; ++i)
669
+ {
670
+ m.data[row][i] -= value[i];
671
+ }
672
+ }
673
+
674
+
675
+ template<unsigned Rows, unsigned Cols, typename Type>
676
+ inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
430
677
  mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
431
678
  {
679
+ #ifndef NDEBUG
680
+ if (row < 0 || row >= Rows)
681
+ {
682
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
683
+ assert(0);
684
+ }
685
+ if (col < 0 || col >= Cols)
686
+ {
687
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
688
+ assert(0);
689
+ }
690
+ #endif
691
+
432
692
  adj_value -= adj_m.data[row][col];
433
693
  }
434
694
 
435
695
 
436
696
  template<unsigned Rows, unsigned Cols, typename Type>
437
- inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
697
+ inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
698
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
699
+ {
700
+ #ifndef NDEBUG
701
+ if (row < 0 || row >= Rows)
702
+ {
703
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
704
+ assert(0);
705
+ }
706
+ #endif
707
+
708
+ for(unsigned i=0; i < Cols; ++i)
709
+ {
710
+ adj_value[i] -= adj_m.data[row][i];
711
+ }
712
+ }
713
+
714
+
715
+ template<unsigned Rows, unsigned Cols, typename Type>
716
+ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
717
+ {
718
+ #ifndef NDEBUG
719
+ if (row < 0 || row >= Rows)
720
+ {
721
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
722
+ assert(0);
723
+ }
724
+ if (col < 0 || col >= Cols)
725
+ {
726
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
727
+ assert(0);
728
+ }
729
+ #endif
730
+
731
+ m.data[row][col] = value;
732
+ }
733
+
734
+
735
+ template<unsigned Rows, unsigned Cols, typename Type>
736
+ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
737
+ {
738
+ #ifndef NDEBUG
739
+ if (row < 0 || row >= Rows)
740
+ {
741
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
742
+ assert(0);
743
+ }
744
+ #endif
745
+
746
+ for(unsigned i=0; i < Cols; ++i)
747
+ {
748
+ m.data[row][i] = value[i];
749
+ }
750
+ }
751
+
752
+
753
+ template<unsigned Rows, unsigned Cols, typename Type>
754
+ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
755
+ mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value)
756
+ {
757
+ #ifndef NDEBUG
758
+ if (row < 0 || row >= Rows)
759
+ {
760
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
761
+ assert(0);
762
+ }
763
+ if (col < 0 || col >= Cols)
764
+ {
765
+ printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
766
+ assert(0);
767
+ }
768
+ #endif
769
+
770
+ adj_value += adj_m.data[row][col];
771
+ }
772
+
773
+
774
+ template<unsigned Rows, unsigned Cols, typename Type>
775
+ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
776
+ mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
777
+ {
778
+ #ifndef NDEBUG
779
+ if (row < 0 || row >= Rows)
780
+ {
781
+ printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
782
+ assert(0);
783
+ }
784
+ #endif
785
+
786
+ for(unsigned i=0; i < Cols; ++i)
787
+ {
788
+ adj_value[i] += adj_m.data[row][i];
789
+ }
790
+ }
791
+
792
+
793
+ template<unsigned Rows, unsigned Cols, typename Type>
794
+ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
438
795
  {
439
796
  #ifndef NDEBUG
440
797
  if (row < 0 || row >= Rows)
@@ -456,7 +813,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
456
813
 
457
814
 
458
815
  template<unsigned Rows, unsigned Cols, typename Type>
459
- inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
816
+ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
460
817
  {
461
818
  #ifndef NDEBUG
462
819
  if (row < 0 || row >= Rows)
@@ -476,7 +833,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
476
833
 
477
834
 
478
835
  template<unsigned Rows, unsigned Cols, typename Type>
479
- inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
836
+ inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
480
837
  mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
481
838
  {
482
839
  #ifndef NDEBUG
@@ -505,7 +862,7 @@ inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col,
505
862
 
506
863
 
507
864
  template<unsigned Rows, unsigned Cols, typename Type>
508
- inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
865
+ inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
509
866
  mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
510
867
  {
511
868
  #ifndef NDEBUG
@@ -710,7 +1067,7 @@ inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a
710
1067
  mat_t<Rows,ColsOut,Type> t(0);
711
1068
  for (unsigned i=0; i < Rows; ++i)
712
1069
  {
713
- for (unsigned j=0; j < ColsOut; ++j)
1070
+ for (unsigned j=0; j < ColsOut; ++j)
714
1071
  {
715
1072
  Type sum(0.0);
716
1073
 
@@ -1573,6 +1930,128 @@ inline CUDA_CALLABLE void adj_mat_t(const vec_t<4,Type> &cmps0, const vec_t<4,Ty
1573
1930
  }
1574
1931
  }
1575
1932
 
1933
+ template<typename Type>
1934
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1935
+ const vec_t<2, Type>& c0, const vec_t<2, Type>& c1,
1936
+ vec_t<2, Type>& adj_c0, vec_t<2, Type>& adj_c1,
1937
+ const mat_t<2, 2, Type>& adj_ret
1938
+ )
1939
+ {
1940
+ for (unsigned i=0; i < 2; ++i)
1941
+ {
1942
+ adj_c0[i] += adj_ret.data[i][0];
1943
+ adj_c1[i] += adj_ret.data[i][1];
1944
+ }
1945
+ }
1946
+
1947
+ template<typename Type>
1948
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1949
+ const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
1950
+ vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
1951
+ const mat_t<3, 3, Type>& adj_ret
1952
+ )
1953
+ {
1954
+ for (unsigned i=0; i < 3; ++i)
1955
+ {
1956
+ adj_c0[i] += adj_ret.data[i][0];
1957
+ adj_c1[i] += adj_ret.data[i][1];
1958
+ adj_c2[i] += adj_ret.data[i][2];
1959
+ }
1960
+ }
1961
+
1962
+ template<typename Type>
1963
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1964
+ const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
1965
+ vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
1966
+ const mat_t<4, 4, Type>& adj_ret
1967
+ )
1968
+ {
1969
+ for (unsigned i=0; i < 4; ++i)
1970
+ {
1971
+ adj_c0[i] += adj_ret.data[i][0];
1972
+ adj_c1[i] += adj_ret.data[i][1];
1973
+ adj_c2[i] += adj_ret.data[i][2];
1974
+ adj_c3[i] += adj_ret.data[i][3];
1975
+ }
1976
+ }
1977
+
1978
+ template<unsigned Rows, unsigned Cols, typename Type>
1979
+ inline CUDA_CALLABLE void adj_matrix_from_cols(
1980
+ const initializer_array<Cols, vec_t<Rows, Type> >& l,
1981
+ const initializer_array<Cols, vec_t<Rows, Type>* >& adj_l,
1982
+ const mat_t<Rows, Cols, Type>& adj_ret
1983
+ )
1984
+ {
1985
+ for (unsigned j=0; j < Cols; ++j)
1986
+ {
1987
+ for (unsigned i=0; i < Rows; ++i)
1988
+ {
1989
+ (*adj_l[j])[i] += adj_ret.data[i][j];
1990
+ }
1991
+ }
1992
+ }
1993
+
1994
+ template<typename Type>
1995
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
1996
+ const vec_t<2, Type>& r0, const vec_t<2, Type>& r1,
1997
+ vec_t<2, Type>& adj_r0, vec_t<2, Type>& adj_r1,
1998
+ const mat_t<2, 2, Type>& adj_ret
1999
+ )
2000
+ {
2001
+ for (unsigned j=0; j < 2; ++j)
2002
+ {
2003
+ adj_r0[j] += adj_ret.data[0][j];
2004
+ adj_r1[j] += adj_ret.data[1][j];
2005
+ }
2006
+ }
2007
+
2008
+ template<typename Type>
2009
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
2010
+ const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
2011
+ vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
2012
+ const mat_t<3, 3, Type>& adj_ret
2013
+ )
2014
+ {
2015
+ for (unsigned j=0; j < 3; ++j)
2016
+ {
2017
+ adj_c0[j] += adj_ret.data[0][j];
2018
+ adj_c1[j] += adj_ret.data[1][j];
2019
+ adj_c2[j] += adj_ret.data[2][j];
2020
+ }
2021
+ }
2022
+
2023
+ template<typename Type>
2024
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
2025
+ const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
2026
+ vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
2027
+ const mat_t<4, 4, Type>& adj_ret
2028
+ )
2029
+ {
2030
+ for (unsigned j=0; j < 4; ++j)
2031
+ {
2032
+ adj_c0[j] += adj_ret.data[0][j];
2033
+ adj_c1[j] += adj_ret.data[1][j];
2034
+ adj_c2[j] += adj_ret.data[2][j];
2035
+ adj_c3[j] += adj_ret.data[3][j];
2036
+ }
2037
+ }
2038
+
2039
+ template<unsigned Rows, unsigned Cols, typename Type>
2040
+ inline CUDA_CALLABLE void adj_matrix_from_rows(
2041
+ const initializer_array<Rows, vec_t<Cols, Type> >& l,
2042
+ const initializer_array<Rows, vec_t<Cols, Type>* >& adj_l,
2043
+ const mat_t<Rows, Cols, Type>& adj_ret
2044
+ )
2045
+ {
2046
+ for (unsigned i=0; i < Rows; ++i)
2047
+ {
2048
+ for (unsigned j=0; j < Cols; ++j)
2049
+ {
2050
+ (*adj_l[i])[j] += adj_ret.data[i][j];
2051
+ }
2052
+ }
2053
+ }
2054
+
1576
2055
  template<unsigned Rows, unsigned Cols, typename Type>
1577
2056
  CUDA_CALLABLE inline mat_t<Rows, Cols, Type> lerp(const mat_t<Rows, Cols, Type>& a, const mat_t<Rows, Cols, Type>& b, Type t)
1578
2057
  {
@@ -1713,4 +2192,29 @@ CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Col
1713
2192
  {
1714
2193
  }
1715
2194
 
2195
+ template<unsigned Rows, unsigned Cols, typename Type>
2196
+ inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, const Type& tolerance)
2197
+ {
2198
+ Type diff(0);
2199
+ for (unsigned i = 0; i < Rows; ++i)
2200
+ {
2201
+ for (unsigned j = 0; j < Cols; ++j)
2202
+ {
2203
+ diff = max(diff, abs(actual.data[i][j] - expected.data[i][j]));
2204
+ }
2205
+ }
2206
+ if (diff > tolerance)
2207
+ {
2208
+ printf("Error, expect_near() failed with tolerance "); print(tolerance);
2209
+ printf("\t Expected: "); print(expected);
2210
+ printf("\t Actual: "); print(actual);
2211
+ }
2212
+ }
2213
+
2214
+ template<unsigned Rows, unsigned Cols, typename Type>
2215
+ inline CUDA_CALLABLE void adj_expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, Type tolerance, mat_t<Rows,Cols,Type>& adj_actual, mat_t<Rows,Cols,Type>& adj_expected, Type adj_tolerance)
2216
+ {
2217
+ // nop
2218
+ }
2219
+
1716
2220
  } // namespace wp
warp/native/mesh.h CHANGED
@@ -123,7 +123,7 @@ CUDA_CALLABLE inline bool mesh_query_point(uint64_t id, const vec3& point, float
123
123
  {
124
124
  Mesh mesh = mesh_get(id);
125
125
 
126
- int stack[32];
126
+ int stack[BVH_QUERY_STACK_SIZE];
127
127
  stack[0] = *mesh.bvh.root;
128
128
 
129
129
  int count = 1;
@@ -311,7 +311,7 @@ CUDA_CALLABLE inline bool mesh_query_point_no_sign(uint64_t id, const vec3& poin
311
311
  {
312
312
  Mesh mesh = mesh_get(id);
313
313
 
314
- int stack[32];
314
+ int stack[BVH_QUERY_STACK_SIZE];
315
315
  stack[0] = *mesh.bvh.root;
316
316
 
317
317
  int count = 1;
@@ -495,7 +495,7 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
495
495
  {
496
496
  Mesh mesh = mesh_get(id);
497
497
 
498
- int stack[32];
498
+ int stack[BVH_QUERY_STACK_SIZE];
499
499
  stack[0] = *mesh.bvh.root;
500
500
 
501
501
  int count = 1;
@@ -682,7 +682,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_normal(uint64_t id, const vec3&
682
682
  {
683
683
  Mesh mesh = mesh_get(id);
684
684
 
685
- int stack[32];
685
+ int stack[BVH_QUERY_STACK_SIZE];
686
686
  stack[0] = *mesh.bvh.root;
687
687
  int count = 1;
688
688
  float min_dist = max_dist;
@@ -926,9 +926,9 @@ CUDA_CALLABLE inline float solid_angle_iterative(uint64_t id, const vec3& p, con
926
926
  {
927
927
  Mesh mesh = mesh_get(id);
928
928
 
929
- int stack[32];
930
- int at_child[32]; // 0 for left, 1 for right, 2 for done
931
- float angle[32];
929
+ int stack[BVH_QUERY_STACK_SIZE];
930
+ int at_child[BVH_QUERY_STACK_SIZE]; // 0 for left, 1 for right, 2 for done
931
+ float angle[BVH_QUERY_STACK_SIZE];
932
932
  stack[0] = *mesh.bvh.root;
933
933
  at_child[0] = 0;
934
934
 
@@ -1017,7 +1017,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
1017
1017
  {
1018
1018
  Mesh mesh = mesh_get(id);
1019
1019
 
1020
- int stack[32];
1020
+ int stack[BVH_QUERY_STACK_SIZE];
1021
1021
  stack[0] = *mesh.bvh.root;
1022
1022
 
1023
1023
  int count = 1;
@@ -1371,7 +1371,7 @@ CUDA_CALLABLE inline bool mesh_query_ray(uint64_t id, const vec3& start, const v
1371
1371
  {
1372
1372
  Mesh mesh = mesh_get(id);
1373
1373
 
1374
- int stack[32];
1374
+ int stack[BVH_QUERY_STACK_SIZE];
1375
1375
  stack[0] = *mesh.bvh.root;
1376
1376
  int count = 1;
1377
1377
 
@@ -1587,7 +1587,7 @@ struct mesh_query_aabb_t
1587
1587
  // Mesh Id
1588
1588
  Mesh mesh;
1589
1589
  // BVH traversal stack:
1590
- int stack[32];
1590
+ int stack[BVH_QUERY_STACK_SIZE];
1591
1591
  int count;
1592
1592
 
1593
1593
  // inputs