warp-lang 1.2.2__py3-none-manylinux2014_aarch64.whl → 1.3.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (193) hide show
  1. warp/__init__.py +8 -6
  2. warp/autograd.py +823 -0
  3. warp/bin/warp.so +0 -0
  4. warp/build.py +6 -2
  5. warp/builtins.py +1410 -886
  6. warp/codegen.py +503 -166
  7. warp/config.py +48 -18
  8. warp/context.py +400 -198
  9. warp/dlpack.py +8 -0
  10. warp/examples/assets/bunny.usd +0 -0
  11. warp/examples/benchmarks/benchmark_cloth_warp.py +1 -1
  12. warp/examples/benchmarks/benchmark_interop_torch.py +158 -0
  13. warp/examples/benchmarks/benchmark_launches.py +1 -1
  14. warp/examples/core/example_cupy.py +78 -0
  15. warp/examples/fem/example_apic_fluid.py +17 -36
  16. warp/examples/fem/example_burgers.py +9 -18
  17. warp/examples/fem/example_convection_diffusion.py +7 -17
  18. warp/examples/fem/example_convection_diffusion_dg.py +27 -47
  19. warp/examples/fem/example_deformed_geometry.py +11 -22
  20. warp/examples/fem/example_diffusion.py +7 -18
  21. warp/examples/fem/example_diffusion_3d.py +24 -28
  22. warp/examples/fem/example_diffusion_mgpu.py +7 -14
  23. warp/examples/fem/example_magnetostatics.py +190 -0
  24. warp/examples/fem/example_mixed_elasticity.py +111 -80
  25. warp/examples/fem/example_navier_stokes.py +30 -34
  26. warp/examples/fem/example_nonconforming_contact.py +290 -0
  27. warp/examples/fem/example_stokes.py +17 -32
  28. warp/examples/fem/example_stokes_transfer.py +12 -21
  29. warp/examples/fem/example_streamlines.py +350 -0
  30. warp/examples/fem/utils.py +936 -0
  31. warp/fabric.py +5 -2
  32. warp/fem/__init__.py +13 -3
  33. warp/fem/cache.py +161 -11
  34. warp/fem/dirichlet.py +37 -28
  35. warp/fem/domain.py +105 -14
  36. warp/fem/field/__init__.py +14 -3
  37. warp/fem/field/field.py +454 -11
  38. warp/fem/field/nodal_field.py +33 -18
  39. warp/fem/geometry/deformed_geometry.py +50 -15
  40. warp/fem/geometry/hexmesh.py +12 -24
  41. warp/fem/geometry/nanogrid.py +106 -31
  42. warp/fem/geometry/quadmesh_2d.py +6 -11
  43. warp/fem/geometry/tetmesh.py +103 -61
  44. warp/fem/geometry/trimesh_2d.py +98 -47
  45. warp/fem/integrate.py +231 -186
  46. warp/fem/operator.py +14 -9
  47. warp/fem/quadrature/pic_quadrature.py +35 -9
  48. warp/fem/quadrature/quadrature.py +119 -32
  49. warp/fem/space/basis_space.py +98 -22
  50. warp/fem/space/collocated_function_space.py +3 -1
  51. warp/fem/space/function_space.py +7 -2
  52. warp/fem/space/grid_2d_function_space.py +3 -3
  53. warp/fem/space/grid_3d_function_space.py +4 -4
  54. warp/fem/space/hexmesh_function_space.py +3 -2
  55. warp/fem/space/nanogrid_function_space.py +12 -14
  56. warp/fem/space/partition.py +45 -47
  57. warp/fem/space/restriction.py +19 -16
  58. warp/fem/space/shape/cube_shape_function.py +91 -3
  59. warp/fem/space/shape/shape_function.py +7 -0
  60. warp/fem/space/shape/square_shape_function.py +32 -0
  61. warp/fem/space/shape/tet_shape_function.py +11 -7
  62. warp/fem/space/shape/triangle_shape_function.py +10 -1
  63. warp/fem/space/topology.py +116 -42
  64. warp/fem/types.py +8 -1
  65. warp/fem/utils.py +301 -83
  66. warp/native/array.h +16 -0
  67. warp/native/builtin.h +0 -15
  68. warp/native/cuda_util.cpp +14 -6
  69. warp/native/exports.h +1348 -1308
  70. warp/native/quat.h +79 -0
  71. warp/native/rand.h +27 -4
  72. warp/native/sparse.cpp +83 -81
  73. warp/native/sparse.cu +381 -453
  74. warp/native/vec.h +64 -0
  75. warp/native/volume.cpp +40 -49
  76. warp/native/volume_builder.cu +2 -3
  77. warp/native/volume_builder.h +12 -17
  78. warp/native/warp.cu +3 -3
  79. warp/native/warp.h +69 -59
  80. warp/render/render_opengl.py +17 -9
  81. warp/sim/articulation.py +117 -17
  82. warp/sim/collide.py +35 -29
  83. warp/sim/model.py +123 -18
  84. warp/sim/render.py +3 -1
  85. warp/sparse.py +867 -203
  86. warp/stubs.py +312 -541
  87. warp/tape.py +29 -1
  88. warp/tests/disabled_kinematics.py +1 -1
  89. warp/tests/test_adam.py +1 -1
  90. warp/tests/test_arithmetic.py +1 -1
  91. warp/tests/test_array.py +58 -1
  92. warp/tests/test_array_reduce.py +1 -1
  93. warp/tests/test_async.py +1 -1
  94. warp/tests/test_atomic.py +1 -1
  95. warp/tests/test_bool.py +1 -1
  96. warp/tests/test_builtins_resolution.py +1 -1
  97. warp/tests/test_bvh.py +6 -1
  98. warp/tests/test_closest_point_edge_edge.py +1 -1
  99. warp/tests/test_codegen.py +66 -1
  100. warp/tests/test_compile_consts.py +1 -1
  101. warp/tests/test_conditional.py +1 -1
  102. warp/tests/test_copy.py +1 -1
  103. warp/tests/test_ctypes.py +1 -1
  104. warp/tests/test_dense.py +1 -1
  105. warp/tests/test_devices.py +1 -1
  106. warp/tests/test_dlpack.py +1 -1
  107. warp/tests/test_examples.py +33 -4
  108. warp/tests/test_fabricarray.py +5 -2
  109. warp/tests/test_fast_math.py +1 -1
  110. warp/tests/test_fem.py +213 -6
  111. warp/tests/test_fp16.py +1 -1
  112. warp/tests/test_func.py +1 -1
  113. warp/tests/test_future_annotations.py +90 -0
  114. warp/tests/test_generics.py +1 -1
  115. warp/tests/test_grad.py +1 -1
  116. warp/tests/test_grad_customs.py +1 -1
  117. warp/tests/test_grad_debug.py +247 -0
  118. warp/tests/test_hash_grid.py +6 -1
  119. warp/tests/test_implicit_init.py +354 -0
  120. warp/tests/test_import.py +1 -1
  121. warp/tests/test_indexedarray.py +1 -1
  122. warp/tests/test_intersect.py +1 -1
  123. warp/tests/test_jax.py +1 -1
  124. warp/tests/test_large.py +1 -1
  125. warp/tests/test_launch.py +1 -1
  126. warp/tests/test_lerp.py +1 -1
  127. warp/tests/test_linear_solvers.py +1 -1
  128. warp/tests/test_lvalue.py +1 -1
  129. warp/tests/test_marching_cubes.py +5 -2
  130. warp/tests/test_mat.py +34 -35
  131. warp/tests/test_mat_lite.py +2 -1
  132. warp/tests/test_mat_scalar_ops.py +1 -1
  133. warp/tests/test_math.py +1 -1
  134. warp/tests/test_matmul.py +20 -16
  135. warp/tests/test_matmul_lite.py +1 -1
  136. warp/tests/test_mempool.py +1 -1
  137. warp/tests/test_mesh.py +5 -2
  138. warp/tests/test_mesh_query_aabb.py +1 -1
  139. warp/tests/test_mesh_query_point.py +1 -1
  140. warp/tests/test_mesh_query_ray.py +1 -1
  141. warp/tests/test_mlp.py +1 -1
  142. warp/tests/test_model.py +1 -1
  143. warp/tests/test_module_hashing.py +77 -1
  144. warp/tests/test_modules_lite.py +1 -1
  145. warp/tests/test_multigpu.py +1 -1
  146. warp/tests/test_noise.py +1 -1
  147. warp/tests/test_operators.py +1 -1
  148. warp/tests/test_options.py +1 -1
  149. warp/tests/test_overwrite.py +542 -0
  150. warp/tests/test_peer.py +1 -1
  151. warp/tests/test_pinned.py +1 -1
  152. warp/tests/test_print.py +1 -1
  153. warp/tests/test_quat.py +15 -1
  154. warp/tests/test_rand.py +1 -1
  155. warp/tests/test_reload.py +1 -1
  156. warp/tests/test_rounding.py +1 -1
  157. warp/tests/test_runlength_encode.py +1 -1
  158. warp/tests/test_scalar_ops.py +95 -0
  159. warp/tests/test_sim_grad.py +1 -1
  160. warp/tests/test_sim_kinematics.py +1 -1
  161. warp/tests/test_smoothstep.py +1 -1
  162. warp/tests/test_sparse.py +82 -15
  163. warp/tests/test_spatial.py +1 -1
  164. warp/tests/test_special_values.py +2 -11
  165. warp/tests/test_streams.py +11 -1
  166. warp/tests/test_struct.py +1 -1
  167. warp/tests/test_tape.py +1 -1
  168. warp/tests/test_torch.py +194 -1
  169. warp/tests/test_transient_module.py +1 -1
  170. warp/tests/test_types.py +1 -1
  171. warp/tests/test_utils.py +1 -1
  172. warp/tests/test_vec.py +15 -63
  173. warp/tests/test_vec_lite.py +2 -1
  174. warp/tests/test_vec_scalar_ops.py +65 -1
  175. warp/tests/test_verify_fp.py +1 -1
  176. warp/tests/test_volume.py +28 -2
  177. warp/tests/test_volume_write.py +1 -1
  178. warp/tests/unittest_serial.py +1 -1
  179. warp/tests/unittest_suites.py +9 -1
  180. warp/tests/walkthrough_debug.py +1 -1
  181. warp/thirdparty/unittest_parallel.py +2 -5
  182. warp/torch.py +103 -41
  183. warp/types.py +341 -224
  184. warp/utils.py +11 -2
  185. {warp_lang-1.2.2.dist-info → warp_lang-1.3.0.dist-info}/METADATA +99 -46
  186. warp_lang-1.3.0.dist-info/RECORD +368 -0
  187. warp/examples/fem/bsr_utils.py +0 -378
  188. warp/examples/fem/mesh_utils.py +0 -133
  189. warp/examples/fem/plot_utils.py +0 -292
  190. warp_lang-1.2.2.dist-info/RECORD +0 -359
  191. {warp_lang-1.2.2.dist-info → warp_lang-1.3.0.dist-info}/LICENSE.md +0 -0
  192. {warp_lang-1.2.2.dist-info → warp_lang-1.3.0.dist-info}/WHEEL +0 -0
  193. {warp_lang-1.2.2.dist-info → warp_lang-1.3.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h CHANGED
@@ -648,6 +648,30 @@ inline CUDA_CALLABLE unsigned argmax(vec_t<Length,Type> v)
648
648
  return ret;
649
649
  }
650
650
 
651
+ template<unsigned Length, typename Type>
652
+ inline CUDA_CALLABLE vec_t<Length,Type> abs(vec_t<Length,Type> v)
653
+ {
654
+ vec_t<Length,Type> ret;
655
+ for (unsigned i=0; i < Length; ++i)
656
+ {
657
+ ret[i] = abs(v[i]);
658
+ }
659
+
660
+ return ret;
661
+ }
662
+
663
+ template<unsigned Length, typename Type>
664
+ inline CUDA_CALLABLE vec_t<Length,Type> sign(vec_t<Length,Type> v)
665
+ {
666
+ vec_t<Length,Type> ret;
667
+ for (unsigned i=0; i < Length; ++i)
668
+ {
669
+ ret[i] = v[i] < Type(0) ? Type(-1) : Type(1);
670
+ }
671
+
672
+ return ret;
673
+ }
674
+
651
675
  template<unsigned Length, typename Type>
652
676
  inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const vec_t<Length, Type>& expected, const Type& tolerance)
653
677
  {
@@ -1046,6 +1070,46 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
1046
1070
  adj_v[i] += adj_ret;
1047
1071
  }
1048
1072
 
1073
+ template<unsigned Length, typename Type>
1074
+ inline CUDA_CALLABLE void adj_abs(
1075
+ const vec_t<Length,Type>& v,
1076
+ vec_t<Length,Type>& adj_v,
1077
+ const vec_t<Length,Type>& adj_ret
1078
+ )
1079
+ {
1080
+ for (unsigned i=0; i < Length; ++i)
1081
+ {
1082
+ if (v[i] < Type(0))
1083
+ {
1084
+ adj_v[i] -= adj_ret[i];
1085
+ }
1086
+ else
1087
+ {
1088
+ adj_v[i] += adj_ret[i];
1089
+ }
1090
+ }
1091
+ }
1092
+
1093
+ template<unsigned Length, typename Type>
1094
+ inline CUDA_CALLABLE void adj_sign(
1095
+ const vec_t<Length,Type>& v,
1096
+ vec_t<Length,Type>& adj_v,
1097
+ const vec_t<Length,Type>& adj_ret
1098
+ )
1099
+ {
1100
+ for (unsigned i=0; i < Length; ++i)
1101
+ {
1102
+ if (v[i] < Type(0))
1103
+ {
1104
+ adj_v[i] -= adj_ret[i];
1105
+ }
1106
+ else
1107
+ {
1108
+ adj_v[i] += adj_ret[i];
1109
+ }
1110
+ }
1111
+ }
1112
+
1049
1113
  // Do I need to specialize these for different lengths?
1050
1114
  template<unsigned Length, typename Type>
1051
1115
  inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
warp/native/volume.cpp CHANGED
@@ -34,10 +34,7 @@ struct VolumeDesc
34
34
  // CUDA context for this volume (NULL if CPU)
35
35
  void *context;
36
36
 
37
- pnanovdb_buf_t as_pnano() const
38
- {
39
- return pnanovdb_make_buf(static_cast<uint32_t *>(buffer), size_in_bytes);
40
- }
37
+ pnanovdb_buf_t as_pnano() const { return pnanovdb_make_buf(static_cast<uint32_t *>(buffer), size_in_bytes); }
41
38
  };
42
39
 
43
40
  // Host-side volume descriptors. Maps each CPU/GPU volume buffer address (id) to a CPU desc
@@ -62,14 +59,18 @@ bool volume_exists(const void *id)
62
59
  return volume_get_descriptor((uint64_t)id, volume);
63
60
  }
64
61
 
65
- void volume_add_descriptor(uint64_t id, VolumeDesc &&volumeDesc)
66
- {
67
- g_volume_descriptors[id] = std::move(volumeDesc);
68
- }
62
+ void volume_add_descriptor(uint64_t id, VolumeDesc &&volumeDesc) { g_volume_descriptors[id] = std::move(volumeDesc); }
69
63
 
70
- void volume_rem_descriptor(uint64_t id)
64
+ void volume_rem_descriptor(uint64_t id) { g_volume_descriptors.erase(id); }
65
+
66
+ void volume_set_map(nanovdb::Map &map, const float transform[9], const float translation[3])
71
67
  {
72
- g_volume_descriptors.erase(id);
68
+ // Need to transpose as Map::set is transposing again
69
+ const mat_t<3, 3, double> transpose(transform[0], transform[3], transform[6], transform[1], transform[4], transform[7],
70
+ transform[2], transform[5], transform[8]);
71
+ const mat_t<3, 3, double> inv = inverse(transpose);
72
+
73
+ map.set(transpose.data, inv.data, translation);
73
74
  }
74
75
 
75
76
  } // anonymous namespace
@@ -380,74 +381,69 @@ void volume_destroy_device(uint64_t id)
380
381
  }
381
382
 
382
383
  #if WP_ENABLE_CUDA
383
- uint64_t volume_f_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value,
384
- float tx, float ty, float tz, bool points_in_world_space)
384
+
385
+ uint64_t volume_f_from_tiles_device(void *context, void *points, int num_points, float transform[9],
386
+ float translation[3], bool points_in_world_space, float bg_value)
385
387
  {
386
388
  nanovdb::FloatGrid *grid;
387
389
  size_t gridSize;
388
390
  BuildGridParams<float> params;
389
- params.voxel_size = voxel_size;
390
391
  params.background_value = bg_value;
391
- params.translation = nanovdb::Vec3f{tx, ty, tz};
392
+ volume_set_map(params.map, transform, translation);
392
393
 
393
394
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
394
395
 
395
396
  return volume_create_device(context, grid, gridSize, false, true);
396
397
  }
397
398
 
398
- uint64_t volume_v_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value_x,
399
- float bg_value_y, float bg_value_z, float tx, float ty, float tz,
400
- bool points_in_world_space)
399
+ uint64_t volume_v_from_tiles_device(void *context, void *points, int num_points, float transform[9],
400
+ float translation[3], bool points_in_world_space, float bg_value[3])
401
401
  {
402
402
  nanovdb::Vec3fGrid *grid;
403
403
  size_t gridSize;
404
404
  BuildGridParams<nanovdb::Vec3f> params;
405
- params.voxel_size = voxel_size;
406
- params.background_value = nanovdb::Vec3f{bg_value_x, bg_value_y, bg_value_z};
407
- params.translation = nanovdb::Vec3f{tx, ty, tz};
405
+ params.background_value = nanovdb::Vec3f{bg_value[0], bg_value[1], bg_value[2]};
406
+ volume_set_map(params.map, transform, translation);
408
407
 
409
408
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
410
409
 
411
410
  return volume_create_device(context, grid, gridSize, false, true);
412
411
  }
413
412
 
414
- uint64_t volume_i_from_tiles_device(void *context, void *points, int num_points, float voxel_size, int bg_value,
415
- float tx, float ty, float tz, bool points_in_world_space)
413
+ uint64_t volume_i_from_tiles_device(void *context, void *points, int num_points, float transform[9],
414
+ float translation[3], bool points_in_world_space, int bg_value)
416
415
  {
417
416
  nanovdb::Int32Grid *grid;
418
417
  size_t gridSize;
419
418
  BuildGridParams<int32_t> params;
420
- params.voxel_size = voxel_size;
421
419
  params.background_value = (int32_t)(bg_value);
422
- params.translation = nanovdb::Vec3f{tx, ty, tz};
420
+ volume_set_map(params.map, transform, translation);
423
421
 
424
422
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
425
423
 
426
424
  return volume_create_device(context, grid, gridSize, false, true);
427
425
  }
428
426
 
429
- uint64_t volume_index_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float tx,
430
- float ty, float tz, bool points_in_world_space)
427
+ uint64_t volume_index_from_tiles_device(void *context, void *points, int num_points, float transform[9],
428
+ float translation[3], bool points_in_world_space)
431
429
  {
432
430
  nanovdb::IndexGrid *grid;
433
431
  size_t gridSize;
434
432
  BuildGridParams<nanovdb::ValueIndex> params;
435
- params.voxel_size = voxel_size;
436
- params.translation = nanovdb::Vec3f{tx, ty, tz};
433
+ volume_set_map(params.map, transform, translation);
437
434
 
438
435
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
439
436
 
440
437
  return volume_create_device(context, grid, gridSize, false, true);
441
438
  }
442
439
 
443
- uint64_t volume_from_active_voxels_device(void *context, void *points, int num_points, float voxel_size, float tx,
444
- float ty, float tz, bool points_in_world_space)
440
+ uint64_t volume_from_active_voxels_device(void *context, void *points, int num_points, float transform[9],
441
+ float translation[3], bool points_in_world_space)
445
442
  {
446
443
  nanovdb::OnIndexGrid *grid;
447
444
  size_t gridSize;
448
445
  BuildGridParams<nanovdb::ValueOnIndex> params;
449
- params.voxel_size = voxel_size;
450
- params.translation = nanovdb::Vec3f{tx, ty, tz};
446
+ volume_set_map(params.map, transform, translation);
451
447
 
452
448
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
453
449
 
@@ -487,43 +483,38 @@ void volume_get_voxels_device(uint64_t id, void *buf)
487
483
 
488
484
  #else
489
485
  // stubs for non-CUDA platforms
490
- uint64_t volume_f_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value,
491
- float tx, float ty, float tz, bool points_in_world_space)
486
+ uint64_t volume_f_from_tiles_device(void *context, void *points, int num_points, float transform[9],
487
+ float translation[3], bool points_in_world_space, float bg_value)
492
488
  {
493
489
  return 0;
494
490
  }
495
491
 
496
- uint64_t volume_v_from_tiles_device(void *context, void *points, int num_points, float voxel_size, float bg_value_x,
497
- float bg_value_y, float bg_value_z, float tx, float ty, float tz,
498
- bool points_in_world_space)
492
+ uint64_t volume_v_from_tiles_device(void *context, void *points, int num_points, float transform[9],
493
+ float translation[3], bool points_in_world_space, float bg_value[3])
499
494
  {
500
495
  return 0;
501
496
  }
502
497
 
503
- uint64_t volume_i_from_tiles_device(void *context, void *points, int num_points, float voxel_size, int bg_value,
504
- float tx, float ty, float tz, bool points_in_world_space)
498
+ uint64_t volume_i_from_tiles_device(void *context, void *points, int num_points, float transform[9],
499
+ float translation[3], bool points_in_world_space, int bg_value)
505
500
  {
506
501
  return 0;
507
502
  }
508
503
 
509
- uint64_t volume_index_from_tiles_device(void *context, void *points, int num_points, float voxel_size,
510
- float tx, float ty, float tz, bool points_in_world_space)
504
+ uint64_t volume_index_from_tiles_device(void *context, void *points, int num_points, float transform[9],
505
+ float translation[3], bool points_in_world_space)
511
506
  {
512
507
  return 0;
513
508
  }
514
509
 
515
- uint64_t volume_from_active_voxels_device(void *context, void *points, int num_points, float voxel_size, float tx,
516
- float ty, float tz, bool points_in_world_space)
510
+ uint64_t volume_from_active_voxels_device(void *context, void *points, int num_points, float transform[9],
511
+ float translation[3], bool points_in_world_space)
517
512
  {
518
513
  return 0;
519
514
  }
520
515
 
521
- void volume_get_tiles_device(uint64_t id, void *buf)
522
- {
523
- }
516
+ void volume_get_tiles_device(uint64_t id, void *buf) {}
524
517
 
525
- void volume_get_voxels_device(uint64_t id, void *buf)
526
- {
527
- }
518
+ void volume_get_voxels_device(uint64_t id, void *buf) {}
528
519
 
529
520
  #endif
@@ -403,8 +403,7 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
403
403
  out_grid_size = 0;
404
404
 
405
405
  cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
406
- nanovdb::Map map(params.voxel_size, params.translation);
407
- nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(map, stream);
406
+ nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
408
407
 
409
408
  // p2g.setVerbose(2);
410
409
  p2g.setGridName(params.name);
@@ -417,7 +416,7 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
417
416
 
418
417
  if (points_in_world_space)
419
418
  {
420
- grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f *>(points), map}, num_points,
419
+ grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f *>(points), params.map}, num_points,
421
420
  DeviceBuffer());
422
421
  }
423
422
  else
@@ -2,33 +2,28 @@
2
2
 
3
3
  #include <nanovdb/NanoVDB.h>
4
4
 
5
- template<typename BuildT>
6
- struct BuildGridParams {
7
- double voxel_size = 1.0;
5
+ template <typename BuildT> struct BuildGridParams
6
+ {
7
+ nanovdb::Map map;
8
8
  BuildT background_value{0};
9
- nanovdb::Vec3d translation{0.0, 0.0, 0.0};
10
9
  char name[256] = "";
11
10
  };
12
11
 
13
- template<>
14
- struct BuildGridParams<nanovdb::ValueIndex> {
15
- double voxel_size = 1.0;
12
+ template <> struct BuildGridParams<nanovdb::ValueIndex>
13
+ {
14
+ nanovdb::Map map;
16
15
  nanovdb::ValueIndex background_value;
17
- nanovdb::Vec3d translation{0.0, 0.0, 0.0};
18
16
  char name[256] = "";
19
17
  };
20
18
 
21
- template<>
22
- struct BuildGridParams<nanovdb::ValueOnIndex> {
19
+ template <> struct BuildGridParams<nanovdb::ValueOnIndex>
20
+ {
21
+ nanovdb::Map map;
23
22
  double voxel_size = 1.0;
24
- nanovdb::Vec3d translation{0.0, 0.0, 0.0};
25
23
  char name[256] = "";
26
24
  };
27
25
 
28
26
  template <typename BuildT>
29
- void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
30
- size_t &out_grid_size,
31
- const void *points,
32
- size_t num_points,
33
- bool points_in_world_space,
34
- const BuildGridParams<BuildT> &params);
27
+ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>>*& out_grid, size_t& out_grid_size,
28
+ const void* points, size_t num_points, bool points_in_world_space,
29
+ const BuildGridParams<BuildT>& params);
warp/native/warp.cu CHANGED
@@ -755,7 +755,7 @@ bool memcpy_p2p(void* dst_context, void* dst, void* src_context, void* src, size
755
755
 
756
756
  __global__ void memset_kernel(int* dest, int value, size_t n)
757
757
  {
758
- const size_t tid = wp::grid_index();
758
+ const size_t tid = static_cast<size_t>(blockDim.x) * static_cast<size_t>(blockIdx.x) + static_cast<size_t>(threadIdx.x);
759
759
 
760
760
  if (tid < n)
761
761
  {
@@ -789,7 +789,7 @@ void memset_device(void* context, void* dest, int value, size_t n)
789
789
  // fill memory buffer with a value: generic memtile kernel using memcpy for each element
790
790
  __global__ void memtile_kernel(void* dst, const void* src, size_t srcsize, size_t n)
791
791
  {
792
- size_t tid = wp::grid_index();
792
+ size_t tid = static_cast<size_t>(blockDim.x) * static_cast<size_t>(blockIdx.x) + static_cast<size_t>(threadIdx.x);
793
793
  if (tid < n)
794
794
  {
795
795
  memcpy((int8_t*)dst + srcsize * tid, src, srcsize);
@@ -800,7 +800,7 @@ __global__ void memtile_kernel(void* dst, const void* src, size_t srcsize, size_
800
800
  template <typename T>
801
801
  __global__ void memtile_value_kernel(T* dst, T value, size_t n)
802
802
  {
803
- size_t tid = wp::grid_index();
803
+ size_t tid = static_cast<size_t>(blockDim.x) * static_cast<size_t>(blockIdx.x) + static_cast<size_t>(threadIdx.x);
804
804
  if (tid < n)
805
805
  {
806
806
  dst[tid] = value;
warp/native/warp.h CHANGED
@@ -107,11 +107,11 @@ extern "C"
107
107
  WP_API void volume_get_voxels_device(uint64_t id, void* buf);
108
108
  WP_API void volume_destroy_device(uint64_t id);
109
109
 
110
- WP_API uint64_t volume_f_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value, float tx, float ty, float tz, bool points_in_world_space);
111
- WP_API uint64_t volume_v_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float bg_value_x, float bg_value_y, float bg_value_z, float tx, float ty, float tz, bool points_in_world_space);
112
- WP_API uint64_t volume_i_from_tiles_device(void* context, void* points, int num_points, float voxel_size, int bg_value, float tx, float ty, float tz, bool points_in_world_space);
113
- WP_API uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float voxel_size, float tx, float ty, float tz, bool points_in_world_space);
114
- WP_API uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float voxel_size, float tx, float ty, float tz, bool points_in_world_space);
110
+ WP_API uint64_t volume_f_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3], bool points_in_world_space, float bg_value);
111
+ WP_API uint64_t volume_v_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3], bool points_in_world_space, float bg_value[3]);
112
+ WP_API uint64_t volume_i_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3], bool points_in_world_space, int bg_value);
113
+ WP_API uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3], bool points_in_world_space);
114
+ WP_API uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9], float translation[3], bool points_in_world_space);
115
115
 
116
116
  WP_API void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size);
117
117
  WP_API void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz);
@@ -154,81 +154,91 @@ extern "C"
154
154
  WP_API void runlength_encode_int_host(uint64_t values, uint64_t run_values, uint64_t run_lengths, uint64_t run_count, int n);
155
155
  WP_API void runlength_encode_int_device(uint64_t values, uint64_t run_values, uint64_t run_lengths, uint64_t run_count, int n);
156
156
 
157
- WP_API int bsr_matrix_from_triplets_float_host(
157
+ WP_API void bsr_matrix_from_triplets_float_host(
158
158
  int rows_per_block,
159
159
  int cols_per_block,
160
160
  int row_count,
161
- int nnz,
162
- uint64_t tpl_rows,
163
- uint64_t tpl_columns,
164
- uint64_t tpl_values,
165
- uint64_t bsr_offsets,
166
- uint64_t bsr_columns,
167
- uint64_t bsr_values);
168
- WP_API int bsr_matrix_from_triplets_double_host(
161
+ int tpl_nnz,
162
+ int* tpl_rows,
163
+ int* tpl_columns,
164
+ void* tpl_values,
165
+ bool prune_numerical_zeros,
166
+ int* bsr_offsets,
167
+ int* bsr_columns,
168
+ void* bsr_values,
169
+ int* bsr_nnz,
170
+ void* bsr_nnz_event);
171
+ WP_API void bsr_matrix_from_triplets_double_host(
169
172
  int rows_per_block,
170
173
  int cols_per_block,
171
174
  int row_count,
172
- int nnz,
173
- uint64_t tpl_rows,
174
- uint64_t tpl_columns,
175
- uint64_t tpl_values,
176
- uint64_t bsr_offsets,
177
- uint64_t bsr_columns,
178
- uint64_t bsr_values);
179
-
180
- WP_API int bsr_matrix_from_triplets_float_device(
175
+ int tpl_nnz,
176
+ int* tpl_rows,
177
+ int* tpl_columns,
178
+ void* tpl_values,
179
+ bool prune_numerical_zeros,
180
+ int* bsr_offsets,
181
+ int* bsr_columns,
182
+ void* bsr_values,
183
+ int* bsr_nnz,
184
+ void* bsr_nnz_event);
185
+ WP_API void bsr_matrix_from_triplets_float_device(
181
186
  int rows_per_block,
182
187
  int cols_per_block,
183
188
  int row_count,
184
- int nnz,
185
- uint64_t tpl_rows,
186
- uint64_t tpl_columns,
187
- uint64_t tpl_values,
188
- uint64_t bsr_offsets,
189
- uint64_t bsr_columns,
190
- uint64_t bsr_values);
191
- WP_API int bsr_matrix_from_triplets_double_device(
189
+ int tpl_nnz,
190
+ int* tpl_rows,
191
+ int* tpl_columns,
192
+ void* tpl_values,
193
+ bool prune_numerical_zeros,
194
+ int* bsr_offsets,
195
+ int* bsr_columns,
196
+ void* bsr_values,
197
+ int* bsr_nnz,
198
+ void* bsr_nnz_event);
199
+ WP_API void bsr_matrix_from_triplets_double_device(
192
200
  int rows_per_block,
193
201
  int cols_per_block,
194
202
  int row_count,
195
- int nnz,
196
- uint64_t tpl_rows,
197
- uint64_t tpl_columns,
198
- uint64_t tpl_values,
199
- uint64_t bsr_offsets,
200
- uint64_t bsr_columns,
201
- uint64_t bsr_values);
203
+ int tpl_nnz,
204
+ int* tpl_rows,
205
+ int* tpl_columns,
206
+ void* tpl_values,
207
+ bool prune_numerical_zeros,
208
+ int* bsr_offsets,
209
+ int* bsr_columns,
210
+ void* bsr_values,
211
+ int* bsr_nnz,
212
+ void* bsr_nnz_event);
202
213
 
203
214
  WP_API void bsr_transpose_float_host(int rows_per_block, int cols_per_block,
204
215
  int row_count, int col_count, int nnz,
205
- uint64_t bsr_offsets, uint64_t bsr_columns,
206
- uint64_t bsr_values,
207
- uint64_t transposed_bsr_offsets,
208
- uint64_t transposed_bsr_columns,
209
- uint64_t transposed_bsr_values);
216
+ int* bsr_offsets, int* bsr_columns,
217
+ void* bsr_values,
218
+ int* transposed_bsr_offsets,
219
+ int* transposed_bsr_columns,
220
+ void* transposed_bsr_values);
210
221
  WP_API void bsr_transpose_double_host(int rows_per_block, int cols_per_block,
211
222
  int row_count, int col_count, int nnz,
212
- uint64_t bsr_offsets, uint64_t bsr_columns,
213
- uint64_t bsr_values,
214
- uint64_t transposed_bsr_offsets,
215
- uint64_t transposed_bsr_columns,
216
- uint64_t transposed_bsr_values);
217
-
223
+ int* bsr_offsets, int* bsr_columns,
224
+ void* bsr_values,
225
+ int* transposed_bsr_offsets,
226
+ int* transposed_bsr_columns,
227
+ void* transposed_bsr_values);
218
228
  WP_API void bsr_transpose_float_device(int rows_per_block, int cols_per_block,
219
229
  int row_count, int col_count, int nnz,
220
- uint64_t bsr_offsets, uint64_t bsr_columns,
221
- uint64_t bsr_values,
222
- uint64_t transposed_bsr_offsets,
223
- uint64_t transposed_bsr_columns,
224
- uint64_t transposed_bsr_values);
230
+ int* bsr_offsets, int* bsr_columns,
231
+ void* bsr_values,
232
+ int* transposed_bsr_offsets,
233
+ int* transposed_bsr_columns,
234
+ void* transposed_bsr_values);
225
235
  WP_API void bsr_transpose_double_device(int rows_per_block, int cols_per_block,
226
236
  int row_count, int col_count, int nnz,
227
- uint64_t bsr_offsets, uint64_t bsr_columns,
228
- uint64_t bsr_values,
229
- uint64_t transposed_bsr_offsets,
230
- uint64_t transposed_bsr_columns,
231
- uint64_t transposed_bsr_values);
237
+ int* bsr_offsets, int* bsr_columns,
238
+ void* bsr_values,
239
+ int* transposed_bsr_offsets,
240
+ int* transposed_bsr_columns,
241
+ void* transposed_bsr_values);
232
242
 
233
243
 
234
244
  WP_API int cuda_driver_version(); // CUDA driver version
@@ -150,7 +150,7 @@ layout (location = 1) in vec3 aNormal;
150
150
  layout (location = 2) in vec2 aTexCoord;
151
151
 
152
152
  uniform mat4 view;
153
- uniform mat4 model;
153
+ uniform mat4 inv_model;
154
154
  uniform mat4 projection;
155
155
  uniform vec3 viewPos;
156
156
 
@@ -160,7 +160,8 @@ out vec2 TexCoord;
160
160
  void main()
161
161
  {
162
162
  vec4 worldPos = vec4(aPos + viewPos, 1.0);
163
- gl_Position = projection * view * worldPos;
163
+ gl_Position = projection * view * inv_model * worldPos;
164
+
164
165
  FragPos = vec3(worldPos);
165
166
  TexCoord = aTexCoord;
166
167
  }
@@ -666,12 +667,16 @@ class ShapeInstancer:
666
667
  [3D point, 3D normal, UV texture coordinates]
667
668
  """
668
669
 
670
+ def __new__(cls, *args, **kwargs):
671
+ instance = super(ShapeInstancer, cls).__new__(cls)
672
+ instance.instance_transform_gl_buffer = None
673
+ instance.vao = None
674
+ return instance
675
+
669
676
  def __init__(self, shape_shader, device):
670
677
  self.shape_shader = shape_shader
671
678
  self.device = device
672
679
  self.face_count = 0
673
- self.vao = None
674
- self.instance_transform_gl_buffer = None
675
680
  self.instance_color1_buffer = None
676
681
  self.instance_color2_buffer = None
677
682
  self.color1 = (1.0, 1.0, 1.0)
@@ -1074,6 +1079,7 @@ class OpenGLRenderer:
1074
1079
  self._scaling = scaling
1075
1080
 
1076
1081
  self._model_matrix = self.compute_model_matrix(self._camera_axis, scaling)
1082
+ self._inv_model_matrix = np.linalg.inv(self._model_matrix.reshape(4, 4)).flatten()
1077
1083
  self.update_view_matrix(cam_pos=camera_pos, cam_front=camera_front, cam_up=camera_up)
1078
1084
  self.update_projection_matrix()
1079
1085
 
@@ -1212,7 +1218,7 @@ class OpenGLRenderer:
1212
1218
 
1213
1219
  with self._sky_shader:
1214
1220
  self._loc_sky_view = gl.glGetUniformLocation(self._sky_shader.id, str_buffer("view"))
1215
- self._loc_sky_model = gl.glGetUniformLocation(self._sky_shader.id, str_buffer("model"))
1221
+ self._loc_sky_inv_model = gl.glGetUniformLocation(self._sky_shader.id, str_buffer("inv_model"))
1216
1222
  self._loc_sky_projection = gl.glGetUniformLocation(self._sky_shader.id, str_buffer("projection"))
1217
1223
 
1218
1224
  self._loc_sky_color1 = gl.glGetUniformLocation(self._sky_shader.id, str_buffer("color1"))
@@ -1748,26 +1754,28 @@ class OpenGLRenderer:
1748
1754
  if camera_axis == 0:
1749
1755
  return np.array((0, 0, scaling, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 0, 1), dtype=np.float32)
1750
1756
  elif camera_axis == 2:
1751
- return np.array((-scaling, 0, 0, 0, 0, 0, scaling, 0, 0, scaling, 0, 0, 0, 0, 0, 1), dtype=np.float32)
1757
+ return np.array((0, scaling, 0, 0, 0, 0, scaling, 0, scaling, 0, 0, 0, 0, 0, 0, 1), dtype=np.float32)
1752
1758
 
1753
1759
  return np.array((scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 1), dtype=np.float32)
1754
1760
 
1755
1761
  def update_model_matrix(self, model_matrix: Optional[Mat44] = None):
1756
1762
  from pyglet import gl
1757
1763
 
1758
- # fmt: off
1759
1764
  if model_matrix is None:
1760
1765
  self._model_matrix = self.compute_model_matrix(self._camera_axis, self._scaling)
1761
1766
  else:
1762
1767
  self._model_matrix = np.array(model_matrix).flatten()
1763
- # fmt: on
1768
+ self._inv_model_matrix = np.linalg.inv(self._model_matrix.reshape((4, 4))).flatten()
1769
+ # update model view matrix in shaders
1764
1770
  ptr = arr_pointer(self._model_matrix)
1765
1771
  gl.glUseProgram(self._shape_shader.id)
1766
1772
  gl.glUniformMatrix4fv(self._loc_shape_model, 1, gl.GL_FALSE, ptr)
1767
1773
  gl.glUseProgram(self._grid_shader.id)
1768
1774
  gl.glUniformMatrix4fv(self._loc_grid_model, 1, gl.GL_FALSE, ptr)
1775
+ # sky shader needs inverted model view matrix
1769
1776
  gl.glUseProgram(self._sky_shader.id)
1770
- gl.glUniformMatrix4fv(self._loc_sky_model, 1, gl.GL_FALSE, ptr)
1777
+ inv_ptr = arr_pointer(self._inv_model_matrix)
1778
+ gl.glUniformMatrix4fv(self._loc_sky_inv_model, 1, gl.GL_FALSE, inv_ptr)
1771
1779
 
1772
1780
  @property
1773
1781
  def num_tiles(self):