warp-lang 1.7.2rc1__py3-none-macosx_10_13_universal2.whl → 1.8.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (192) hide show
  1. warp/__init__.py +3 -1
  2. warp/__init__.pyi +3489 -1
  3. warp/autograd.py +45 -122
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +241 -252
  6. warp/build_dll.py +130 -26
  7. warp/builtins.py +1907 -384
  8. warp/codegen.py +272 -104
  9. warp/config.py +12 -1
  10. warp/constants.py +1 -1
  11. warp/context.py +770 -238
  12. warp/dlpack.py +1 -1
  13. warp/examples/benchmarks/benchmark_cloth.py +2 -2
  14. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  15. warp/examples/core/example_sample_mesh.py +1 -1
  16. warp/examples/core/example_spin_lock.py +93 -0
  17. warp/examples/core/example_work_queue.py +118 -0
  18. warp/examples/fem/example_adaptive_grid.py +5 -5
  19. warp/examples/fem/example_apic_fluid.py +1 -1
  20. warp/examples/fem/example_burgers.py +1 -1
  21. warp/examples/fem/example_convection_diffusion.py +9 -6
  22. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  23. warp/examples/fem/example_deformed_geometry.py +1 -1
  24. warp/examples/fem/example_diffusion.py +2 -2
  25. warp/examples/fem/example_diffusion_3d.py +1 -1
  26. warp/examples/fem/example_distortion_energy.py +1 -1
  27. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  28. warp/examples/fem/example_magnetostatics.py +5 -3
  29. warp/examples/fem/example_mixed_elasticity.py +5 -3
  30. warp/examples/fem/example_navier_stokes.py +11 -9
  31. warp/examples/fem/example_nonconforming_contact.py +5 -3
  32. warp/examples/fem/example_streamlines.py +8 -3
  33. warp/examples/fem/utils.py +9 -8
  34. warp/examples/interop/example_jax_callable.py +34 -4
  35. warp/examples/interop/example_jax_ffi_callback.py +2 -2
  36. warp/examples/interop/example_jax_kernel.py +27 -1
  37. warp/examples/optim/example_drone.py +1 -1
  38. warp/examples/sim/example_cloth.py +1 -1
  39. warp/examples/sim/example_cloth_self_contact.py +48 -54
  40. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  41. warp/examples/tile/example_tile_cholesky.py +2 -1
  42. warp/examples/tile/example_tile_convolution.py +1 -1
  43. warp/examples/tile/example_tile_filtering.py +1 -1
  44. warp/examples/tile/example_tile_matmul.py +1 -1
  45. warp/examples/tile/example_tile_mlp.py +2 -0
  46. warp/fabric.py +7 -7
  47. warp/fem/__init__.py +5 -0
  48. warp/fem/adaptivity.py +1 -1
  49. warp/fem/cache.py +152 -63
  50. warp/fem/dirichlet.py +2 -2
  51. warp/fem/domain.py +136 -6
  52. warp/fem/field/field.py +141 -99
  53. warp/fem/field/nodal_field.py +85 -39
  54. warp/fem/field/virtual.py +99 -52
  55. warp/fem/geometry/adaptive_nanogrid.py +91 -86
  56. warp/fem/geometry/closest_point.py +13 -0
  57. warp/fem/geometry/deformed_geometry.py +102 -40
  58. warp/fem/geometry/element.py +56 -2
  59. warp/fem/geometry/geometry.py +323 -22
  60. warp/fem/geometry/grid_2d.py +157 -62
  61. warp/fem/geometry/grid_3d.py +116 -20
  62. warp/fem/geometry/hexmesh.py +86 -20
  63. warp/fem/geometry/nanogrid.py +166 -86
  64. warp/fem/geometry/partition.py +59 -25
  65. warp/fem/geometry/quadmesh.py +86 -135
  66. warp/fem/geometry/tetmesh.py +47 -119
  67. warp/fem/geometry/trimesh.py +77 -270
  68. warp/fem/integrate.py +181 -95
  69. warp/fem/linalg.py +25 -58
  70. warp/fem/operator.py +124 -27
  71. warp/fem/quadrature/pic_quadrature.py +36 -14
  72. warp/fem/quadrature/quadrature.py +40 -16
  73. warp/fem/space/__init__.py +1 -1
  74. warp/fem/space/basis_function_space.py +66 -46
  75. warp/fem/space/basis_space.py +17 -4
  76. warp/fem/space/dof_mapper.py +1 -1
  77. warp/fem/space/function_space.py +2 -2
  78. warp/fem/space/grid_2d_function_space.py +4 -1
  79. warp/fem/space/hexmesh_function_space.py +4 -2
  80. warp/fem/space/nanogrid_function_space.py +3 -1
  81. warp/fem/space/partition.py +11 -2
  82. warp/fem/space/quadmesh_function_space.py +4 -1
  83. warp/fem/space/restriction.py +5 -2
  84. warp/fem/space/shape/__init__.py +10 -8
  85. warp/fem/space/tetmesh_function_space.py +4 -1
  86. warp/fem/space/topology.py +52 -21
  87. warp/fem/space/trimesh_function_space.py +4 -1
  88. warp/fem/utils.py +53 -8
  89. warp/jax.py +1 -2
  90. warp/jax_experimental/ffi.py +210 -67
  91. warp/jax_experimental/xla_ffi.py +37 -24
  92. warp/math.py +171 -1
  93. warp/native/array.h +103 -4
  94. warp/native/builtin.h +182 -35
  95. warp/native/coloring.cpp +6 -2
  96. warp/native/cuda_util.cpp +1 -1
  97. warp/native/exports.h +118 -63
  98. warp/native/intersect.h +5 -5
  99. warp/native/mat.h +8 -13
  100. warp/native/mathdx.cpp +11 -5
  101. warp/native/matnn.h +1 -123
  102. warp/native/mesh.h +1 -1
  103. warp/native/quat.h +34 -6
  104. warp/native/rand.h +7 -7
  105. warp/native/sparse.cpp +121 -258
  106. warp/native/sparse.cu +181 -274
  107. warp/native/spatial.h +305 -17
  108. warp/native/svd.h +23 -8
  109. warp/native/tile.h +603 -73
  110. warp/native/tile_radix_sort.h +1112 -0
  111. warp/native/tile_reduce.h +239 -13
  112. warp/native/tile_scan.h +240 -0
  113. warp/native/tuple.h +189 -0
  114. warp/native/vec.h +10 -20
  115. warp/native/warp.cpp +36 -4
  116. warp/native/warp.cu +588 -52
  117. warp/native/warp.h +47 -74
  118. warp/optim/linear.py +5 -1
  119. warp/paddle.py +7 -8
  120. warp/py.typed +0 -0
  121. warp/render/render_opengl.py +110 -80
  122. warp/render/render_usd.py +124 -62
  123. warp/sim/__init__.py +9 -0
  124. warp/sim/collide.py +253 -80
  125. warp/sim/graph_coloring.py +8 -1
  126. warp/sim/import_mjcf.py +4 -3
  127. warp/sim/import_usd.py +11 -7
  128. warp/sim/integrator.py +5 -2
  129. warp/sim/integrator_euler.py +1 -1
  130. warp/sim/integrator_featherstone.py +1 -1
  131. warp/sim/integrator_vbd.py +761 -322
  132. warp/sim/integrator_xpbd.py +1 -1
  133. warp/sim/model.py +265 -260
  134. warp/sim/utils.py +10 -7
  135. warp/sparse.py +303 -166
  136. warp/tape.py +54 -51
  137. warp/tests/cuda/test_conditional_captures.py +1046 -0
  138. warp/tests/cuda/test_streams.py +1 -1
  139. warp/tests/geometry/test_volume.py +2 -2
  140. warp/tests/interop/test_dlpack.py +9 -9
  141. warp/tests/interop/test_jax.py +0 -1
  142. warp/tests/run_coverage_serial.py +1 -1
  143. warp/tests/sim/disabled_kinematics.py +2 -2
  144. warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
  145. warp/tests/sim/test_collision.py +159 -51
  146. warp/tests/sim/test_coloring.py +91 -2
  147. warp/tests/test_array.py +254 -2
  148. warp/tests/test_array_reduce.py +2 -2
  149. warp/tests/test_assert.py +53 -0
  150. warp/tests/test_atomic_cas.py +312 -0
  151. warp/tests/test_codegen.py +142 -19
  152. warp/tests/test_conditional.py +47 -1
  153. warp/tests/test_ctypes.py +0 -20
  154. warp/tests/test_devices.py +8 -0
  155. warp/tests/test_fabricarray.py +4 -2
  156. warp/tests/test_fem.py +58 -25
  157. warp/tests/test_func.py +42 -1
  158. warp/tests/test_grad.py +1 -1
  159. warp/tests/test_lerp.py +1 -3
  160. warp/tests/test_map.py +481 -0
  161. warp/tests/test_mat.py +23 -24
  162. warp/tests/test_quat.py +28 -15
  163. warp/tests/test_rounding.py +10 -38
  164. warp/tests/test_runlength_encode.py +7 -7
  165. warp/tests/test_smoothstep.py +1 -1
  166. warp/tests/test_sparse.py +83 -2
  167. warp/tests/test_spatial.py +507 -1
  168. warp/tests/test_static.py +48 -0
  169. warp/tests/test_struct.py +2 -2
  170. warp/tests/test_tape.py +38 -0
  171. warp/tests/test_tuple.py +265 -0
  172. warp/tests/test_types.py +2 -2
  173. warp/tests/test_utils.py +24 -18
  174. warp/tests/test_vec.py +38 -408
  175. warp/tests/test_vec_constructors.py +325 -0
  176. warp/tests/tile/test_tile.py +438 -131
  177. warp/tests/tile/test_tile_mathdx.py +518 -14
  178. warp/tests/tile/test_tile_matmul.py +179 -0
  179. warp/tests/tile/test_tile_reduce.py +307 -5
  180. warp/tests/tile/test_tile_shared_memory.py +136 -7
  181. warp/tests/tile/test_tile_sort.py +121 -0
  182. warp/tests/unittest_suites.py +14 -6
  183. warp/types.py +462 -308
  184. warp/utils.py +647 -86
  185. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
  186. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +189 -175
  187. warp/stubs.py +0 -3381
  188. warp/tests/sim/test_xpbd.py +0 -399
  189. warp/tests/test_mlp.py +0 -282
  190. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
  191. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
  192. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0
warp/native/mat.h CHANGED
@@ -389,23 +389,17 @@ inline CUDA_CALLABLE bool operator==(const mat_t<Rows,Cols,Type>& a, const mat_t
389
389
  return true;
390
390
  }
391
391
 
392
-
393
- // negation:
394
392
  template<unsigned Rows, unsigned Cols, typename Type>
395
- inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator - (mat_t<Rows,Cols,Type> a)
393
+ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator - (const mat_t<Rows,Cols,Type>& x)
396
394
  {
397
- // NB: this constructor will initialize all ret's components to 0, which is
398
- // unnecessary...
399
395
  mat_t<Rows,Cols,Type> ret;
400
396
  for (unsigned i=0; i < Rows; ++i)
401
397
  for (unsigned j=0; j < Cols; ++j)
402
- ret.data[i][j] = -a.data[i][j];
398
+ ret.data[i][j] = -x.data[i][j];
403
399
 
404
- // Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
405
400
  return ret;
406
401
  }
407
402
 
408
-
409
403
  template<unsigned Rows, unsigned Cols, typename Type>
410
404
  CUDA_CALLABLE inline mat_t<Rows,Cols,Type> pos(const mat_t<Rows,Cols,Type>& x)
411
405
  {
@@ -1539,13 +1533,13 @@ inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<
1539
1533
  template<unsigned Rows, unsigned Cols, typename Type>
1540
1534
  inline CUDA_CALLABLE void adj_div(Type s, const mat_t<Rows,Cols,Type>& a, Type& adj_s, mat_t<Rows,Cols,Type>& adj_a, const mat_t<Rows,Cols,Type>& adj_ret)
1541
1535
  {
1542
- adj_s -= tensordot(a , adj_ret)/ (s * s); // - a / s^2
1543
-
1544
1536
  for (unsigned i=0; i < Rows; ++i)
1545
1537
  {
1546
1538
  for (unsigned j=0; j < Cols; ++j)
1547
1539
  {
1548
- adj_a.data[i][j] += s / adj_ret.data[i][j];
1540
+ Type inv = Type(1) / a.data[i][j];
1541
+ adj_a.data[i][j] -= s * adj_ret.data[i][j] * inv * inv;
1542
+ adj_s += adj_ret.data[i][j] * inv;
1549
1543
  }
1550
1544
  }
1551
1545
  }
@@ -2206,8 +2200,9 @@ inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const
2206
2200
  if (diff > tolerance)
2207
2201
  {
2208
2202
  printf("Error, expect_near() failed with tolerance "); print(tolerance);
2209
- printf("\t Expected: "); print(expected);
2210
- printf("\t Actual: "); print(actual);
2203
+ printf(" Expected: "); print(expected);
2204
+ printf(" Actual: "); print(actual);
2205
+ printf(" Max absolute difference: "); print(diff);
2211
2206
  }
2212
2207
  }
2213
2208
 
warp/native/mathdx.cpp CHANGED
@@ -26,7 +26,8 @@ extern "C"
26
26
  WP_API
27
27
  bool cuda_compile_fft(
28
28
  const char* ltoir_output_path,
29
- const char* symbol_name, int num_include_dirs,
29
+ const char* symbol_name,
30
+ int num_include_dirs,
30
31
  const char** include_dirs,
31
32
  const char* mathdx_include_dir,
32
33
  int arch,
@@ -41,7 +42,6 @@ bool cuda_compile_fft(
41
42
  }
42
43
 
43
44
  WP_API bool cuda_compile_dot(
44
- const char* fatbin_output_path,
45
45
  const char* ltoir_output_path,
46
46
  const char* symbol_name,
47
47
  int num_include_dirs,
@@ -55,9 +55,9 @@ WP_API bool cuda_compile_dot(
55
55
  int precision_B,
56
56
  int precision_C,
57
57
  int type,
58
- int a_arrangement,
59
- int b_arrangement,
60
- int c_arrangement,
58
+ int arrangement_A,
59
+ int arrangement_B,
60
+ int arrangement_C,
61
61
  int num_threads)
62
62
  {
63
63
  printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
@@ -65,6 +65,7 @@ WP_API bool cuda_compile_dot(
65
65
  }
66
66
 
67
67
  WP_API bool cuda_compile_solver(
68
+ const char* fatbin_output_path,
68
69
  const char* ltoir_output_path,
69
70
  const char* symbol_name,
70
71
  int num_include_dirs,
@@ -73,8 +74,13 @@ WP_API bool cuda_compile_solver(
73
74
  int arch,
74
75
  int M,
75
76
  int N,
77
+ int NRHS,
76
78
  int function,
79
+ int side,
80
+ int diag,
77
81
  int precision,
82
+ int arrangement_A,
83
+ int arrangement_B,
78
84
  int fill_mode,
79
85
  int num_threads)
80
86
  {
warp/native/matnn.h CHANGED
@@ -218,126 +218,4 @@ CUDA_CALLABLE inline void adj_dense_solve(int n,
218
218
  }
219
219
  }
220
220
 
221
-
222
- template <typename F>
223
- CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out)
224
- {
225
- const int m = weights.shape[0];
226
- const int n = weights.shape[1];
227
- const int b = x.shape[1];
228
-
229
- for (int i=0; i < m; ++i)
230
- {
231
- float tmp = bias.data[i];
232
-
233
- for(int j=0; j < n; ++j)
234
- {
235
- tmp += weights.data[i*n + j]*x.data[index + b*j];
236
- }
237
-
238
- out.data[index + b*i] = activation(tmp);
239
- }
240
- }
241
-
242
- template <typename F, typename AdjF>
243
- CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out,
244
- array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
245
- {
246
- const int m = weights.shape[0];
247
- const int n = weights.shape[1];
248
- const int b = x.shape[1];
249
-
250
- for (int i=0; i < m; ++i)
251
- {
252
- // recompute forward pass so we don't have to store pre-activation outputs
253
- float tmp = bias.data[i];
254
-
255
- for(int j=0; j < n; ++j)
256
- {
257
- tmp += weights.data[i*n + j]*x.data[index + b*j];
258
- }
259
-
260
- // adjoint w.r.t to activation
261
- float adj_f = 0.0f;
262
-
263
- if (adj_out.data)
264
- adj_activation(tmp, adj_f, adj_out.data[index + b*i]);
265
-
266
- for (int j=0; j < n; ++j)
267
- {
268
- // adjoint w.r.t M_i
269
- if (adj_weights.data)
270
- atomic_add(&adj_weights.data[i*n + j], x.data[index + b*j]*adj_f); // todo: reduce these atomic stores using warp/block level reductions
271
-
272
- // adjoint w.r.t x
273
- if (adj_x.data)
274
- atomic_add(&adj_x.data[index + b*j], weights.data[i*n + j]*adj_f);
275
- }
276
-
277
- // adjoint w.r.t b
278
- if (adj_bias.data)
279
- atomic_add(&adj_bias.data[i], adj_f);
280
-
281
- }
282
- }
283
-
284
-
285
- // template <typename F>
286
- // CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, array_t<float>& out)
287
- // {
288
- // x += index*n;
289
- // out += index*m;
290
-
291
-
292
- // for (int i=0; i < m; ++i)
293
- // {
294
- // float tmp = bias[i];
295
-
296
- // for(int j=0; j < n; ++j)
297
- // {
298
- // tmp += weights[i*n + j]*x[j];
299
- // }
300
-
301
- // out[i] = activation(tmp);
302
- // }
303
- // }
304
-
305
- // template <typename F, typename AdjF>
306
- // CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, const array_t<float>& out,
307
- // array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_m, int adj_n, int adj_b, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
308
- // {
309
- // x += index*n;
310
- // out += index*m;
311
-
312
- // adj_x += index*n;
313
- // adj_out += index*m;
314
-
315
- // for (int i=0; i < m; ++i)
316
- // {
317
- // // recompute forward pass so we don't have to store pre-activation outputs
318
- // float tmp = bias[i];
319
-
320
- // for(int j=0; j < n; ++j)
321
- // {
322
- // tmp += weights[i*n + j]*x[index + b*j];
323
- // }
324
-
325
- // // adjoint w.r.t to activation
326
- // float adj_f = 0.0f;
327
- // adj_activation(tmp, adj_f, adj_out[index + b*i]);
328
-
329
- // for (int j=0; j < n; ++j)
330
- // {
331
- // // adjoint w.r.t M_i
332
- // adj_weights[i*n + j] += x[j]*adj_f;
333
-
334
- // // adjoint w.r.t x
335
- // adj_x[index + b*j] += weights[i*n + j]*adj_f;
336
- // }
337
-
338
- // // adjoint w.r.t b
339
- // adj_bias[i] += adj_f;
340
- // }
341
- // }
342
-
343
- } // namespace wp
221
+ } // namespace wp
warp/native/mesh.h CHANGED
@@ -1357,7 +1357,7 @@ CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const ve
1357
1357
  uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_epsilon, mesh_query_point_t& adj_ret)
1358
1358
  {
1359
1359
  adj_mesh_query_point_sign_normal(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v, epsilon,
1360
- adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, epsilon, adj_ret.result);
1360
+ adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, adj_epsilon, adj_ret.result);
1361
1361
  }
1362
1362
 
1363
1363
  CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float accuracy, float winding_number_threshold, const mesh_query_point_t& ret,
warp/native/quat.h CHANGED
@@ -274,8 +274,32 @@ inline CUDA_CALLABLE quat_t<Type> add(const quat_t<Type>& a, const quat_t<Type>&
274
274
  template<typename Type>
275
275
  inline CUDA_CALLABLE quat_t<Type> sub(const quat_t<Type>& a, const quat_t<Type>& b)
276
276
  {
277
- return quat_t<Type>(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);}
277
+ return quat_t<Type>(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
278
+ }
279
+
280
+ template<typename Type>
281
+ inline CUDA_CALLABLE quat_t<Type> operator - (const quat_t<Type>& q)
282
+ {
283
+ return quat_t<Type>(-q.x, -q.y, -q.z, -q.w);
284
+ }
278
285
 
286
+ template<typename Type>
287
+ CUDA_CALLABLE inline quat_t<Type> pos(const quat_t<Type>& q)
288
+ {
289
+ return q;
290
+ }
291
+
292
+ template<typename Type>
293
+ CUDA_CALLABLE inline quat_t<Type> neg(const quat_t<Type>& q)
294
+ {
295
+ return -q;
296
+ }
297
+
298
+ template<typename Type>
299
+ CUDA_CALLABLE inline void adj_neg(const quat_t<Type>& q, quat_t<Type>& adj_q, const quat_t<Type>& adj_ret)
300
+ {
301
+ adj_q -= adj_ret;
302
+ }
279
303
 
280
304
  template<typename Type>
281
305
  inline CUDA_CALLABLE quat_t<Type> mul(const quat_t<Type>& a, const quat_t<Type>& b)
@@ -298,7 +322,6 @@ inline CUDA_CALLABLE quat_t<Type> mul(Type s, const quat_t<Type>& a)
298
322
  return mul(a, s);
299
323
  }
300
324
 
301
- // division
302
325
  template<typename Type>
303
326
  inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
304
327
  {
@@ -881,8 +904,12 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
881
904
  template<typename Type>
882
905
  inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
883
906
  {
884
- adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
885
- adj_a += s / adj_ret;
907
+ for (unsigned i=0; i < 4; ++i)
908
+ {
909
+ Type inv = Type(1) / a[i];
910
+ adj_a[i] -= s * adj_ret[i] * inv * inv;
911
+ adj_s += adj_ret[i] * inv;
912
+ }
886
913
  }
887
914
 
888
915
  template<typename Type>
@@ -1357,8 +1384,9 @@ inline CUDA_CALLABLE void expect_near(const quat_t<Type>& actual, const quat_t<T
1357
1384
  if (diff > tolerance)
1358
1385
  {
1359
1386
  printf("Error, expect_near() failed with tolerance "); print(tolerance);
1360
- printf("\t Expected: "); print(expected);
1361
- printf("\t Actual: "); print(actual);
1387
+ printf(" Expected: "); print(expected);
1388
+ printf(" Actual: "); print(actual);
1389
+ printf(" Max absolute difference: "); print(diff);
1362
1390
  }
1363
1391
  }
1364
1392
 
warp/native/rand.h CHANGED
@@ -71,14 +71,14 @@ inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (
71
71
  // Box-Muller method
72
72
  inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state) + RANDN_EPSILON)) * cos(2.f * M_PI_F * randf(state)); }
73
73
 
74
- inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
75
- inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
74
+ inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, uint32 adj_ret) {}
75
+ inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, uint32 adj_ret) {}
76
76
 
77
- inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state, float adj_ret) {}
78
- inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max, float adj_ret) {}
77
+ inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state, int adj_ret) {}
78
+ inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max, int adj_ret) {}
79
79
 
80
- inline CUDA_CALLABLE void adj_randu(uint32& state, uint32& adj_state, float adj_ret) {}
81
- inline CUDA_CALLABLE void adj_randu(uint32& state, uint32 min, uint32 max, uint32& adj_state, uint32& adj_min, uint32& adj_max, float adj_ret) {}
80
+ inline CUDA_CALLABLE void adj_randu(uint32& state, uint32& adj_state, uint32 adj_ret) {}
81
+ inline CUDA_CALLABLE void adj_randu(uint32& state, uint32 min, uint32 max, uint32& adj_state, uint32& adj_min, uint32& adj_max, uint32 adj_ret) {}
82
82
 
83
83
  inline CUDA_CALLABLE void adj_randf(uint32& state, uint32& adj_state, float adj_ret) {}
84
84
  inline CUDA_CALLABLE void adj_randf(uint32& state, float min, float max, uint32& adj_state, float& adj_min, float& adj_max, float adj_ret) {}
@@ -195,7 +195,7 @@ inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint
195
195
  inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
196
196
  inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
197
197
  inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
198
- inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
198
+ inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec4& adj_ret) {}
199
199
 
200
200
  /*
201
201
  * log-gamma function to support some of these distributions. The