warp-lang 1.5.1__py3-none-manylinux2014_x86_64.whl → 1.6.1__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (131) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1077 -481
  8. warp/codegen.py +250 -122
  9. warp/config.py +65 -21
  10. warp/context.py +500 -149
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_marching_cubes.py +1 -1
  16. warp/examples/core/example_mesh.py +1 -1
  17. warp/examples/core/example_torch.py +18 -34
  18. warp/examples/core/example_wave.py +1 -1
  19. warp/examples/fem/example_apic_fluid.py +1 -0
  20. warp/examples/fem/example_mixed_elasticity.py +1 -1
  21. warp/examples/optim/example_bounce.py +1 -1
  22. warp/examples/optim/example_cloth_throw.py +1 -1
  23. warp/examples/optim/example_diffray.py +4 -15
  24. warp/examples/optim/example_drone.py +1 -1
  25. warp/examples/optim/example_softbody_properties.py +392 -0
  26. warp/examples/optim/example_trajectory.py +1 -3
  27. warp/examples/optim/example_walker.py +5 -0
  28. warp/examples/sim/example_cartpole.py +0 -2
  29. warp/examples/sim/example_cloth_self_contact.py +314 -0
  30. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  31. warp/examples/sim/example_jacobian_ik.py +0 -2
  32. warp/examples/sim/example_quadruped.py +5 -2
  33. warp/examples/tile/example_tile_cholesky.py +79 -0
  34. warp/examples/tile/example_tile_convolution.py +2 -2
  35. warp/examples/tile/example_tile_fft.py +2 -2
  36. warp/examples/tile/example_tile_filtering.py +3 -3
  37. warp/examples/tile/example_tile_matmul.py +4 -4
  38. warp/examples/tile/example_tile_mlp.py +12 -12
  39. warp/examples/tile/example_tile_nbody.py +191 -0
  40. warp/examples/tile/example_tile_walker.py +319 -0
  41. warp/math.py +147 -0
  42. warp/native/array.h +12 -0
  43. warp/native/builtin.h +0 -1
  44. warp/native/bvh.cpp +149 -70
  45. warp/native/bvh.cu +287 -68
  46. warp/native/bvh.h +195 -85
  47. warp/native/clang/clang.cpp +6 -2
  48. warp/native/crt.h +1 -0
  49. warp/native/cuda_util.cpp +35 -0
  50. warp/native/cuda_util.h +5 -0
  51. warp/native/exports.h +40 -40
  52. warp/native/intersect.h +17 -0
  53. warp/native/mat.h +57 -3
  54. warp/native/mathdx.cpp +19 -0
  55. warp/native/mesh.cpp +25 -8
  56. warp/native/mesh.cu +153 -101
  57. warp/native/mesh.h +482 -403
  58. warp/native/quat.h +40 -0
  59. warp/native/solid_angle.h +7 -0
  60. warp/native/sort.cpp +85 -0
  61. warp/native/sort.cu +34 -0
  62. warp/native/sort.h +3 -1
  63. warp/native/spatial.h +11 -0
  64. warp/native/tile.h +1189 -664
  65. warp/native/tile_reduce.h +8 -6
  66. warp/native/vec.h +41 -0
  67. warp/native/warp.cpp +8 -1
  68. warp/native/warp.cu +263 -40
  69. warp/native/warp.h +19 -5
  70. warp/optim/linear.py +22 -4
  71. warp/render/render_opengl.py +132 -59
  72. warp/render/render_usd.py +10 -2
  73. warp/sim/__init__.py +6 -1
  74. warp/sim/collide.py +289 -32
  75. warp/sim/import_urdf.py +20 -5
  76. warp/sim/integrator_euler.py +25 -7
  77. warp/sim/integrator_featherstone.py +147 -35
  78. warp/sim/integrator_vbd.py +842 -40
  79. warp/sim/model.py +173 -112
  80. warp/sim/render.py +2 -2
  81. warp/stubs.py +249 -116
  82. warp/tape.py +28 -30
  83. warp/tests/aux_test_module_unload.py +15 -0
  84. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  85. warp/tests/test_array.py +100 -0
  86. warp/tests/test_assert.py +242 -0
  87. warp/tests/test_codegen.py +14 -61
  88. warp/tests/test_collision.py +8 -8
  89. warp/tests/test_examples.py +16 -1
  90. warp/tests/test_grad_debug.py +87 -2
  91. warp/tests/test_hash_grid.py +1 -1
  92. warp/tests/test_ipc.py +116 -0
  93. warp/tests/test_launch.py +77 -26
  94. warp/tests/test_mat.py +213 -168
  95. warp/tests/test_math.py +47 -1
  96. warp/tests/test_matmul.py +11 -7
  97. warp/tests/test_matmul_lite.py +4 -4
  98. warp/tests/test_mesh.py +84 -60
  99. warp/tests/test_mesh_query_aabb.py +165 -0
  100. warp/tests/test_mesh_query_point.py +328 -286
  101. warp/tests/test_mesh_query_ray.py +134 -121
  102. warp/tests/test_mlp.py +2 -2
  103. warp/tests/test_operators.py +43 -0
  104. warp/tests/test_overwrite.py +6 -5
  105. warp/tests/test_quat.py +77 -0
  106. warp/tests/test_reload.py +29 -0
  107. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  108. warp/tests/test_static.py +16 -0
  109. warp/tests/test_tape.py +25 -0
  110. warp/tests/test_tile.py +134 -191
  111. warp/tests/test_tile_load.py +399 -0
  112. warp/tests/test_tile_mathdx.py +61 -8
  113. warp/tests/test_tile_mlp.py +17 -17
  114. warp/tests/test_tile_reduce.py +24 -18
  115. warp/tests/test_tile_shared_memory.py +66 -17
  116. warp/tests/test_tile_view.py +165 -0
  117. warp/tests/test_torch.py +35 -0
  118. warp/tests/test_utils.py +36 -24
  119. warp/tests/test_vec.py +110 -0
  120. warp/tests/unittest_suites.py +29 -4
  121. warp/tests/unittest_utils.py +30 -11
  122. warp/thirdparty/unittest_parallel.py +5 -2
  123. warp/types.py +419 -111
  124. warp/utils.py +9 -5
  125. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
  126. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
  127. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
  128. warp/examples/benchmarks/benchmark_tile.py +0 -179
  129. warp/native/tile_gemm.h +0 -341
  130. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
  131. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/native/mat.h CHANGED
@@ -394,6 +394,36 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
394
394
  }
395
395
 
396
396
 
397
+ template<unsigned Rows, unsigned Cols, typename Type>
398
+ inline CUDA_CALLABLE void augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
399
+ {
400
+ m.data[row][col] += value;
401
+ }
402
+
403
+
404
+ template<unsigned Rows, unsigned Cols, typename Type>
405
+ inline CUDA_CALLABLE void adj_augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
406
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
407
+ {
408
+ adj_value += adj_m.data[row][col];
409
+ }
410
+
411
+
412
+ template<unsigned Rows, unsigned Cols, typename Type>
413
+ inline CUDA_CALLABLE void augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
414
+ {
415
+ m.data[row][col] -= value;
416
+ }
417
+
418
+
419
+ template<unsigned Rows, unsigned Cols, typename Type>
420
+ inline CUDA_CALLABLE void adj_augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
421
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
422
+ {
423
+ adj_value -= adj_m.data[row][col];
424
+ }
425
+
426
+
397
427
  template<unsigned Rows, unsigned Cols, typename Type>
398
428
  inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
399
429
  {
@@ -651,6 +681,20 @@ inline CUDA_CALLABLE vec_t<Cols,Type> mul(const vec_t<Rows,Type>& b, const mat_t
651
681
  return r;
652
682
  }
653
683
 
684
+ template<typename T>
685
+ inline CUDA_CALLABLE T muladd(T a, T b, T c) {
686
+ return c + a*b;
687
+ }
688
+ template<>
689
+ inline CUDA_CALLABLE float muladd(float a, float b, float c) {
690
+ return fmaf(a, b, c);
691
+ }
692
+ template<>
693
+ inline CUDA_CALLABLE double muladd(double a, double b, double c) {
694
+ return fma(a, b, c);
695
+ }
696
+
697
+
654
698
  template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
655
699
  inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b)
656
700
  {
@@ -663,8 +707,7 @@ inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a
663
707
 
664
708
  for (unsigned k=0; k < Cols; ++k)
665
709
  {
666
- //t.data[i][j] += a.data[i][k]*b.data[k][j];
667
- sum = fmaf(a.data[i][k], b.data[k][j], sum);
710
+ sum = muladd<Type>(a.data[i][k], b.data[k][j], sum);
668
711
  }
669
712
 
670
713
  t.data[i][j] = sum;
@@ -683,7 +726,7 @@ inline CUDA_CALLABLE Type ddot(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,
683
726
  {
684
727
  for (unsigned j=0; j < Cols; ++j)
685
728
  {
686
- r += a.data[i][j] * b.data[i][j];
729
+ r = muladd<Type>(a.data[i][j], b.data[i][j], r);
687
730
  }
688
731
  }
689
732
  return r;
@@ -1650,4 +1693,15 @@ inline CUDA_CALLABLE void adj_mat44(float m00, float m01, float m02, float m03,
1650
1693
  a33 += adj_ret.data[3][3];
1651
1694
  }
1652
1695
 
1696
+ template<unsigned Rows, unsigned Cols, typename Type>
1697
+ CUDA_CALLABLE inline int len(const mat_t<Rows,Cols,Type>& x)
1698
+ {
1699
+ return Rows;
1700
+ }
1701
+
1702
+ template<unsigned Rows, unsigned Cols, typename Type>
1703
+ CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Cols,Type>& adj_x, const int& adj_ret)
1704
+ {
1705
+ }
1706
+
1653
1707
  } // namespace wp
warp/native/mathdx.cpp CHANGED
@@ -32,6 +32,7 @@ bool cuda_compile_fft(
32
32
  }
33
33
 
34
34
  WP_API bool cuda_compile_dot(
35
+ const char* fatbin_output_path,
35
36
  const char* ltoir_output_path,
36
37
  const char* symbol_name,
37
38
  int num_include_dirs,
@@ -54,6 +55,24 @@ WP_API bool cuda_compile_dot(
54
55
  return false;
55
56
  }
56
57
 
58
+ WP_API bool cuda_compile_solver(
59
+ const char* ltoir_output_path,
60
+ const char* symbol_name,
61
+ int num_include_dirs,
62
+ const char** include_dirs,
63
+ const char* mathdx_include_dir,
64
+ int arch,
65
+ int M,
66
+ int N,
67
+ int function,
68
+ int precision,
69
+ int fill_mode,
70
+ int num_threads)
71
+ {
72
+ printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
73
+ return false;
74
+ }
75
+
57
76
  } // extern "C"
58
77
 
59
78
  #endif // !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
warp/native/mesh.cpp CHANGED
@@ -67,11 +67,28 @@ void bvh_refit_with_solid_angle_recursive_host(BVH& bvh, int index, Mesh& mesh)
67
67
  if (lower.b)
68
68
  {
69
69
  // Leaf, compute properties
70
- const int leaf_index = lower.i;
71
-
72
- precompute_triangle_solid_angle_props(mesh.points[mesh.indices[leaf_index*3+0]], mesh.points[mesh.indices[leaf_index*3+1]], mesh.points[mesh.indices[leaf_index*3+2]], mesh.solid_angle_props[index]);
73
- (vec3&)lower = mesh.solid_angle_props[index].box.lower;
74
- (vec3&)upper = mesh.solid_angle_props[index].box.upper;
70
+ const int start = lower.i;
71
+ const int end = upper.i;
72
+ // loops through primitives in the leaf
73
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
74
+ {
75
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
76
+ if (primitive_counter == start)
77
+ {
78
+ precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
79
+ mesh.points[mesh.indices[primitive_index * 3 + 2]], mesh.solid_angle_props[index]);
80
+ }
81
+ else
82
+ {
83
+ SolidAngleProps triangle_solid_angle_props;
84
+ precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
85
+ mesh.points[mesh.indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
86
+ mesh.solid_angle_props[index] = combine_precomputed_solid_angle_props(&mesh.solid_angle_props[index], &triangle_solid_angle_props);
87
+ }
88
+ }
89
+
90
+ (vec3&)lower = mesh.solid_angle_props[index].box.lower;
91
+ (vec3&)upper = mesh.solid_angle_props[index].box.upper;
75
92
  }
76
93
  else
77
94
  {
@@ -109,7 +126,7 @@ void bvh_refit_with_solid_angle_host(BVH& bvh, Mesh& mesh)
109
126
  bvh_refit_with_solid_angle_recursive_host(bvh, 0, mesh);
110
127
  }
111
128
 
112
- uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
129
+ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
113
130
  {
114
131
  Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
115
132
 
@@ -137,7 +154,7 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
137
154
  }
138
155
  m->average_edge_length = sum / (num_tris*3);
139
156
 
140
- wp::bvh_create_host(m->lowers, m->uppers, num_tris, m->bvh);
157
+ wp::bvh_create_host(m->lowers, m->uppers, num_tris, constructor_type, m->bvh);
141
158
 
142
159
  if (support_winding_number)
143
160
  {
@@ -230,7 +247,7 @@ void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities)
230
247
  #if !WP_ENABLE_CUDA
231
248
 
232
249
 
233
- WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
250
+ WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number, int constructor_type) { return 0; }
234
251
  WP_API void mesh_destroy_device(uint64_t id) {}
235
252
  WP_API void mesh_refit_device(uint64_t id) {}
236
253
  WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points) {};
warp/native/mesh.cu CHANGED
@@ -63,23 +63,61 @@ __global__ void compute_average_mesh_edge_length(int n, float* sum_edge_lengths,
63
63
  m->average_edge_length = sum_edge_lengths[n - 1] / (3*n);
64
64
  }
65
65
 
66
- __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ lowers, BVHPackedNodeHalf* __restrict__ uppers, const vec3* points, const int* indices, SolidAngleProps* solid_angle_props)
66
+ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents,
67
+ int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers,
68
+ const vec3* points, const int* indices, const int* primitive_indices, SolidAngleProps* solid_angle_props)
67
69
  {
68
70
  int index = blockDim.x*blockIdx.x + threadIdx.x;
69
71
 
70
72
  if (index < n)
71
73
  {
72
- bool leaf = lowers[index].b;
74
+ bool leaf = node_lowers[index].b;
75
+ int parent = parents[index];
73
76
 
74
77
  if (leaf)
75
78
  {
79
+ BVHPackedNodeHalf& lower = node_lowers[index];
80
+ BVHPackedNodeHalf& upper = node_uppers[index];
81
+
76
82
  // update the leaf node
77
- const int leaf_index = lowers[index].i;
78
- precompute_triangle_solid_angle_props(points[indices[leaf_index*3+0]], points[indices[leaf_index*3+1]], points[indices[leaf_index*3+2]], solid_angle_props[index]);
83
+ bool true_leaf = true;
84
+
85
+ if (parent != -1)
86
+ {
87
+ true_leaf = !node_lowers[parent].b;
88
+ }
79
89
 
80
- make_node(lowers+index, solid_angle_props[index].box.lower, leaf_index, true);
81
- make_node(uppers+index, solid_angle_props[index].box.upper, 0, false);
90
+ if (true_leaf)
91
+ {
92
+ SolidAngleProps node_solid_angle_props;
93
+
94
+ const int start = lower.i;
95
+ const int end = upper.i;
96
+
97
+ // loops through primitives in the leaf
98
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
99
+ {
100
+ int primitive_index = primitive_indices[primitive_counter];
101
+ if (primitive_counter == start)
102
+ {
103
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
104
+ points[indices[primitive_index * 3 + 2]], node_solid_angle_props);
105
+ }
106
+ else
107
+ {
108
+ SolidAngleProps triangle_solid_angle_props;
109
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
110
+ points[indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
111
+ node_solid_angle_props = combine_precomputed_solid_angle_props(&node_solid_angle_props, &triangle_solid_angle_props);
112
+ }
113
+ }
114
+
115
+ (vec3&)lower = node_solid_angle_props.box.lower;
116
+ (vec3&)upper = node_solid_angle_props.box.upper;
117
+ solid_angle_props[index] = node_solid_angle_props;
118
+ }
82
119
  }
120
+
83
121
  else
84
122
  {
85
123
  // only keep leaf threads
@@ -89,7 +127,7 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
89
127
  // update hierarchy
90
128
  for (;;)
91
129
  {
92
- int parent = parents[index];
130
+ parent = parents[index];
93
131
 
94
132
  // reached root
95
133
  if (parent == -1)
@@ -104,41 +142,74 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
104
142
  // then update its bounds and move onto the next parent in the hierarchy
105
143
  if (finished == 1)
106
144
  {
107
- //printf("Compute non-leaf at %d\n", index);
108
- const int left_child = lowers[parent].i;
109
- const int right_child = uppers[parent].i;
110
-
111
- vec3 left_lower = vec3(lowers[left_child].x,
112
- lowers[left_child].y,
113
- lowers[left_child].z);
114
-
115
- vec3 left_upper = vec3(uppers[left_child].x,
116
- uppers[left_child].y,
117
- uppers[left_child].z);
118
-
119
- vec3 right_lower = vec3(lowers[right_child].x,
120
- lowers[right_child].y,
121
- lowers[right_child].z);
122
-
123
-
124
- vec3 right_upper = vec3(uppers[right_child].x,
125
- uppers[right_child].y,
126
- uppers[right_child].z);
127
-
128
- // union of child bounds
129
- vec3 lower = min(left_lower, right_lower);
130
- vec3 upper = max(left_upper, right_upper);
131
-
132
- // write new BVH nodes
133
- make_node(lowers+parent, lower, left_child, false);
134
- make_node(uppers+parent, upper, right_child, false);
135
-
136
- // combine
137
- SolidAngleProps* left_child_data = &solid_angle_props[left_child];
138
- SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
139
-
140
- combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
141
-
145
+ BVHPackedNodeHalf& parent_lower = node_lowers[parent];
146
+ BVHPackedNodeHalf& parent_upper = node_uppers[parent];
147
+ if (parent_lower.b)
148
+ // a packed leaf node can still be a parent in LBVH, we need to recompute its bounds
149
+ // since we've lost its left and right child node index in the muting process
150
+ {
151
+ int parent_parent = parents[parent];;
152
+ // only need to compute bound when this is a valid leaf node
153
+ bool true_leaf = true;
154
+
155
+ if (parent_parent != -1)
156
+ {
157
+ true_leaf = !node_lowers[parent_parent].b;
158
+ }
159
+
160
+ if (true_leaf)
161
+ {
162
+ SolidAngleProps node_solid_angle_props;
163
+ const int start = parent_lower.i;
164
+ const int end = parent_upper.i;
165
+ // loops through primitives in the leaf
166
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
167
+ {
168
+ int primitive_index = primitive_indices[primitive_counter];
169
+ if (primitive_counter == start)
170
+ {
171
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
172
+ points[indices[primitive_index * 3 + 2]], node_solid_angle_props);
173
+ }
174
+ else
175
+ {
176
+ SolidAngleProps triangle_solid_angle_props;
177
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
178
+ points[indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
179
+ node_solid_angle_props = combine_precomputed_solid_angle_props(&node_solid_angle_props, &triangle_solid_angle_props);
180
+ }
181
+ }
182
+
183
+ (vec3&)parent_lower = node_solid_angle_props.box.lower;
184
+ (vec3&)parent_upper = node_solid_angle_props.box.upper;
185
+ solid_angle_props[parent] = node_solid_angle_props;
186
+ }
187
+ }
188
+ else
189
+ {
190
+ //printf("Compute non-leaf at %d\n", index);
191
+ const int left_child = node_lowers[parent].i;
192
+ const int right_child = node_uppers[parent].i;
193
+
194
+ vec3 left_lower = (vec3&)(node_lowers[left_child]);
195
+ vec3 left_upper = (vec3&)(node_uppers[left_child]);
196
+ vec3 right_lower = (vec3&)(node_lowers[right_child]);
197
+ vec3 right_upper = (vec3&)(node_uppers[right_child]);
198
+
199
+ // union of child bounds
200
+ vec3 lower = min(left_lower, right_lower);
201
+ vec3 upper = max(left_upper, right_upper);
202
+
203
+ // write new BVH nodes
204
+ (vec3&)parent_lower = lower;
205
+ (vec3&)parent_upper = upper;
206
+
207
+ // combine
208
+ SolidAngleProps* left_child_data = &solid_angle_props[left_child];
209
+ SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
210
+
211
+ combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
212
+ }
142
213
  // move onto processing the parent
143
214
  index = parent;
144
215
  }
@@ -157,15 +228,15 @@ void bvh_refit_with_solid_angle_device(BVH& bvh, Mesh& mesh)
157
228
  ContextGuard guard(bvh.context);
158
229
 
159
230
  // clear child counters
160
- memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int)*bvh.max_nodes);
161
-
162
- wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_items, (bvh.num_items, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, mesh.solid_angle_props));
231
+ memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int) * bvh.max_nodes);
232
+ wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_leaf_nodes,
233
+ (bvh.num_leaf_nodes, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, bvh.primitive_indices, mesh.solid_angle_props));
163
234
  }
164
235
 
165
236
  } // namespace wp
166
237
 
167
238
 
168
- uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number)
239
+ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
169
240
  {
170
241
  ContextGuard guard(context);
171
242
 
@@ -173,55 +244,38 @@ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::arr
173
244
 
174
245
  mesh.context = context ? context : cuda_context_get_current();
175
246
 
247
+ // create lower upper arrays expected by GPU BVH builder
248
+ mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
249
+ mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
250
+
251
+ if (support_winding_number)
176
252
  {
177
- // // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
178
- // vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
179
- // int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
180
- // bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
181
-
182
- // memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
183
- // memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
184
- // cuda_context_synchronize(WP_CURRENT_CONTEXT);
185
-
186
- // float sum = 0.0;
187
- // for (int i=0; i < num_tris; ++i)
188
- // {
189
- // bounds_host[i] = bounds3();
190
- // wp::vec3 p0 = points_host[indices_host[i*3+0]];
191
- // wp::vec3 p1 = points_host[indices_host[i*3+1]];
192
- // wp::vec3 p2 = points_host[indices_host[i*3+2]];
193
- // bounds_host[i].add_point(p0);
194
- // bounds_host[i].add_point(p1);
195
- // bounds_host[i].add_point(p2);
196
- // sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
197
- // }
198
- // mesh.average_edge_length = sum / (num_tris*3);
199
-
200
- // BVH bvh_host = bvh_create(bounds_host, num_tris);
201
- // BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
202
-
203
- // bvh_destroy_host(bvh_host);
204
-
205
- // create lower upper arrays expected by GPU BVH builder
206
- mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
207
- mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
208
-
209
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));
210
-
211
- wp::bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris, mesh.bvh);
212
-
213
- if (support_winding_number)
214
- {
215
- int num_bvh_nodes = 2*num_tris;
216
- mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps)*num_bvh_nodes);
217
- }
253
+ int num_bvh_nodes = 2 * num_tris;
254
+ mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps) * num_bvh_nodes);
218
255
  }
219
256
 
220
257
  wp::Mesh* mesh_device = (wp::Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::Mesh));
221
258
  memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(wp::Mesh));
222
-
259
+
223
260
  // save descriptor
224
261
  uint64_t mesh_id = (uint64_t)mesh_device;
262
+
263
+ // we compute mesh the average edge length
264
+ // for use in mesh_query_point_sign_normal()
265
+ // since it relies on an epsilon for welding
266
+ // reuse bounds memory temporarily for computing edge lengths
267
+ float* length_tmp_ptr = (float*)mesh.lowers;
268
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, mesh.num_tris, (mesh.num_tris, mesh.points, mesh.indices, length_tmp_ptr));
269
+ scan_device(length_tmp_ptr, length_tmp_ptr, mesh.num_tris, true);
270
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (mesh.num_tris, length_tmp_ptr, mesh_device));
271
+
272
+ // compute triangle bound and construct BVH
273
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, mesh.num_tris, (mesh.num_tris, mesh.points, mesh.indices, mesh.lowers, mesh.uppers));
274
+ wp::bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris, constructor_type, mesh.bvh);
275
+
276
+ // we need to overwrite mesh.bvh because it is not initialized when we construct it on device
277
+ memcpy_h2d(WP_CURRENT_CONTEXT, &(mesh_device->bvh), &mesh.bvh, sizeof(wp::BVH));
278
+
225
279
  mesh_add_descriptor(mesh_id, mesh);
226
280
 
227
281
  if (support_winding_number)
@@ -263,23 +317,21 @@ void mesh_refit_device(uint64_t id)
263
317
  {
264
318
  ContextGuard guard(m.context);
265
319
 
320
+ // we compute mesh the average edge length
321
+ // for use in mesh_query_point_sign_normal()
322
+ // since it relies on an epsilon for welding
323
+
324
+ // reuse bounds memory temporarily for computing edge lengths
325
+ float* length_tmp_ptr = (float*)m.lowers;
326
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
327
+
328
+ scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
329
+
330
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
266
331
  wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
267
332
 
268
333
  if (m.solid_angle_props)
269
334
  {
270
- // we compute mesh the average edge length
271
- // for use in mesh_query_point_sign_normal()
272
- // since it relies on an epsilon for welding
273
-
274
- // reuse bounds memory temporarily for computing edge lengths
275
- float* length_tmp_ptr = (float*)m.lowers;
276
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
277
-
278
- scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
279
-
280
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
281
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
282
-
283
335
  // update solid angle data
284
336
  bvh_refit_with_solid_angle_device(m.bvh, m);
285
337
  }