warp-lang 1.7.2rc1__py3-none-manylinux_2_34_aarch64.whl → 1.8.1__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +3 -1
- warp/__init__.pyi +3489 -1
- warp/autograd.py +45 -122
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +241 -252
- warp/build_dll.py +130 -26
- warp/builtins.py +1907 -384
- warp/codegen.py +272 -104
- warp/config.py +12 -1
- warp/constants.py +1 -1
- warp/context.py +770 -238
- warp/dlpack.py +1 -1
- warp/examples/benchmarks/benchmark_cloth.py +2 -2
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/core/example_sample_mesh.py +1 -1
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/fem/example_adaptive_grid.py +5 -5
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +1 -1
- warp/examples/fem/example_convection_diffusion.py +9 -6
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion.py +2 -2
- warp/examples/fem/example_diffusion_3d.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +5 -3
- warp/examples/fem/example_mixed_elasticity.py +5 -3
- warp/examples/fem/example_navier_stokes.py +11 -9
- warp/examples/fem/example_nonconforming_contact.py +5 -3
- warp/examples/fem/example_streamlines.py +8 -3
- warp/examples/fem/utils.py +9 -8
- warp/examples/interop/example_jax_callable.py +34 -4
- warp/examples/interop/example_jax_ffi_callback.py +2 -2
- warp/examples/interop/example_jax_kernel.py +27 -1
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/sim/example_cloth.py +1 -1
- warp/examples/sim/example_cloth_self_contact.py +48 -54
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +2 -1
- warp/examples/tile/example_tile_convolution.py +1 -1
- warp/examples/tile/example_tile_filtering.py +1 -1
- warp/examples/tile/example_tile_matmul.py +1 -1
- warp/examples/tile/example_tile_mlp.py +2 -0
- warp/fabric.py +7 -7
- warp/fem/__init__.py +5 -0
- warp/fem/adaptivity.py +1 -1
- warp/fem/cache.py +152 -63
- warp/fem/dirichlet.py +2 -2
- warp/fem/domain.py +136 -6
- warp/fem/field/field.py +141 -99
- warp/fem/field/nodal_field.py +85 -39
- warp/fem/field/virtual.py +99 -52
- warp/fem/geometry/adaptive_nanogrid.py +91 -86
- warp/fem/geometry/closest_point.py +13 -0
- warp/fem/geometry/deformed_geometry.py +102 -40
- warp/fem/geometry/element.py +56 -2
- warp/fem/geometry/geometry.py +323 -22
- warp/fem/geometry/grid_2d.py +157 -62
- warp/fem/geometry/grid_3d.py +116 -20
- warp/fem/geometry/hexmesh.py +86 -20
- warp/fem/geometry/nanogrid.py +166 -86
- warp/fem/geometry/partition.py +59 -25
- warp/fem/geometry/quadmesh.py +86 -135
- warp/fem/geometry/tetmesh.py +47 -119
- warp/fem/geometry/trimesh.py +77 -270
- warp/fem/integrate.py +181 -95
- warp/fem/linalg.py +25 -58
- warp/fem/operator.py +124 -27
- warp/fem/quadrature/pic_quadrature.py +36 -14
- warp/fem/quadrature/quadrature.py +40 -16
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/basis_function_space.py +66 -46
- warp/fem/space/basis_space.py +17 -4
- warp/fem/space/dof_mapper.py +1 -1
- warp/fem/space/function_space.py +2 -2
- warp/fem/space/grid_2d_function_space.py +4 -1
- warp/fem/space/hexmesh_function_space.py +4 -2
- warp/fem/space/nanogrid_function_space.py +3 -1
- warp/fem/space/partition.py +11 -2
- warp/fem/space/quadmesh_function_space.py +4 -1
- warp/fem/space/restriction.py +5 -2
- warp/fem/space/shape/__init__.py +10 -8
- warp/fem/space/tetmesh_function_space.py +4 -1
- warp/fem/space/topology.py +52 -21
- warp/fem/space/trimesh_function_space.py +4 -1
- warp/fem/utils.py +53 -8
- warp/jax.py +1 -2
- warp/jax_experimental/ffi.py +210 -67
- warp/jax_experimental/xla_ffi.py +37 -24
- warp/math.py +171 -1
- warp/native/array.h +103 -4
- warp/native/builtin.h +182 -35
- warp/native/coloring.cpp +6 -2
- warp/native/cuda_util.cpp +1 -1
- warp/native/exports.h +118 -63
- warp/native/intersect.h +5 -5
- warp/native/mat.h +8 -13
- warp/native/mathdx.cpp +11 -5
- warp/native/matnn.h +1 -123
- warp/native/mesh.h +1 -1
- warp/native/quat.h +34 -6
- warp/native/rand.h +7 -7
- warp/native/sparse.cpp +121 -258
- warp/native/sparse.cu +181 -274
- warp/native/spatial.h +305 -17
- warp/native/svd.h +23 -8
- warp/native/tile.h +603 -73
- warp/native/tile_radix_sort.h +1112 -0
- warp/native/tile_reduce.h +239 -13
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +10 -20
- warp/native/warp.cpp +36 -4
- warp/native/warp.cu +588 -52
- warp/native/warp.h +47 -74
- warp/optim/linear.py +5 -1
- warp/paddle.py +7 -8
- warp/py.typed +0 -0
- warp/render/render_opengl.py +110 -80
- warp/render/render_usd.py +124 -62
- warp/sim/__init__.py +9 -0
- warp/sim/collide.py +253 -80
- warp/sim/graph_coloring.py +8 -1
- warp/sim/import_mjcf.py +4 -3
- warp/sim/import_usd.py +11 -7
- warp/sim/integrator.py +5 -2
- warp/sim/integrator_euler.py +1 -1
- warp/sim/integrator_featherstone.py +1 -1
- warp/sim/integrator_vbd.py +761 -322
- warp/sim/integrator_xpbd.py +1 -1
- warp/sim/model.py +265 -260
- warp/sim/utils.py +10 -7
- warp/sparse.py +303 -166
- warp/tape.py +54 -51
- warp/tests/cuda/test_conditional_captures.py +1046 -0
- warp/tests/cuda/test_streams.py +1 -1
- warp/tests/geometry/test_volume.py +2 -2
- warp/tests/interop/test_dlpack.py +9 -9
- warp/tests/interop/test_jax.py +0 -1
- warp/tests/run_coverage_serial.py +1 -1
- warp/tests/sim/disabled_kinematics.py +2 -2
- warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
- warp/tests/sim/test_collision.py +159 -51
- warp/tests/sim/test_coloring.py +91 -2
- warp/tests/test_array.py +254 -2
- warp/tests/test_array_reduce.py +2 -2
- warp/tests/test_assert.py +53 -0
- warp/tests/test_atomic_cas.py +312 -0
- warp/tests/test_codegen.py +142 -19
- warp/tests/test_conditional.py +47 -1
- warp/tests/test_ctypes.py +0 -20
- warp/tests/test_devices.py +8 -0
- warp/tests/test_fabricarray.py +4 -2
- warp/tests/test_fem.py +58 -25
- warp/tests/test_func.py +42 -1
- warp/tests/test_grad.py +1 -1
- warp/tests/test_lerp.py +1 -3
- warp/tests/test_map.py +481 -0
- warp/tests/test_mat.py +23 -24
- warp/tests/test_quat.py +28 -15
- warp/tests/test_rounding.py +10 -38
- warp/tests/test_runlength_encode.py +7 -7
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +83 -2
- warp/tests/test_spatial.py +507 -1
- warp/tests/test_static.py +48 -0
- warp/tests/test_struct.py +2 -2
- warp/tests/test_tape.py +38 -0
- warp/tests/test_tuple.py +265 -0
- warp/tests/test_types.py +2 -2
- warp/tests/test_utils.py +24 -18
- warp/tests/test_vec.py +38 -408
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/tile/test_tile.py +438 -131
- warp/tests/tile/test_tile_mathdx.py +518 -14
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_reduce.py +307 -5
- warp/tests/tile/test_tile_shared_memory.py +136 -7
- warp/tests/tile/test_tile_sort.py +121 -0
- warp/tests/unittest_suites.py +14 -6
- warp/types.py +462 -308
- warp/utils.py +647 -86
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +190 -176
- warp/stubs.py +0 -3381
- warp/tests/sim/test_xpbd.py +0 -399
- warp/tests/test_mlp.py +0 -282
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0
warp/native/mat.h
CHANGED
|
@@ -389,23 +389,17 @@ inline CUDA_CALLABLE bool operator==(const mat_t<Rows,Cols,Type>& a, const mat_t
|
|
|
389
389
|
return true;
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
-
|
|
393
|
-
// negation:
|
|
394
392
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
395
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator - (mat_t<Rows,Cols,Type
|
|
393
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator - (const mat_t<Rows,Cols,Type>& x)
|
|
396
394
|
{
|
|
397
|
-
// NB: this constructor will initialize all ret's components to 0, which is
|
|
398
|
-
// unnecessary...
|
|
399
395
|
mat_t<Rows,Cols,Type> ret;
|
|
400
396
|
for (unsigned i=0; i < Rows; ++i)
|
|
401
397
|
for (unsigned j=0; j < Cols; ++j)
|
|
402
|
-
ret.data[i][j] = -
|
|
398
|
+
ret.data[i][j] = -x.data[i][j];
|
|
403
399
|
|
|
404
|
-
// Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
|
|
405
400
|
return ret;
|
|
406
401
|
}
|
|
407
402
|
|
|
408
|
-
|
|
409
403
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
410
404
|
CUDA_CALLABLE inline mat_t<Rows,Cols,Type> pos(const mat_t<Rows,Cols,Type>& x)
|
|
411
405
|
{
|
|
@@ -1539,13 +1533,13 @@ inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<
|
|
|
1539
1533
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1540
1534
|
inline CUDA_CALLABLE void adj_div(Type s, const mat_t<Rows,Cols,Type>& a, Type& adj_s, mat_t<Rows,Cols,Type>& adj_a, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
1541
1535
|
{
|
|
1542
|
-
adj_s -= tensordot(a , adj_ret)/ (s * s); // - a / s^2
|
|
1543
|
-
|
|
1544
1536
|
for (unsigned i=0; i < Rows; ++i)
|
|
1545
1537
|
{
|
|
1546
1538
|
for (unsigned j=0; j < Cols; ++j)
|
|
1547
1539
|
{
|
|
1548
|
-
|
|
1540
|
+
Type inv = Type(1) / a.data[i][j];
|
|
1541
|
+
adj_a.data[i][j] -= s * adj_ret.data[i][j] * inv * inv;
|
|
1542
|
+
adj_s += adj_ret.data[i][j] * inv;
|
|
1549
1543
|
}
|
|
1550
1544
|
}
|
|
1551
1545
|
}
|
|
@@ -2206,8 +2200,9 @@ inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const
|
|
|
2206
2200
|
if (diff > tolerance)
|
|
2207
2201
|
{
|
|
2208
2202
|
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
2209
|
-
printf("
|
|
2210
|
-
printf("
|
|
2203
|
+
printf(" Expected: "); print(expected);
|
|
2204
|
+
printf(" Actual: "); print(actual);
|
|
2205
|
+
printf(" Max absolute difference: "); print(diff);
|
|
2211
2206
|
}
|
|
2212
2207
|
}
|
|
2213
2208
|
|
warp/native/mathdx.cpp
CHANGED
|
@@ -26,7 +26,8 @@ extern "C"
|
|
|
26
26
|
WP_API
|
|
27
27
|
bool cuda_compile_fft(
|
|
28
28
|
const char* ltoir_output_path,
|
|
29
|
-
const char* symbol_name,
|
|
29
|
+
const char* symbol_name,
|
|
30
|
+
int num_include_dirs,
|
|
30
31
|
const char** include_dirs,
|
|
31
32
|
const char* mathdx_include_dir,
|
|
32
33
|
int arch,
|
|
@@ -41,7 +42,6 @@ bool cuda_compile_fft(
|
|
|
41
42
|
}
|
|
42
43
|
|
|
43
44
|
WP_API bool cuda_compile_dot(
|
|
44
|
-
const char* fatbin_output_path,
|
|
45
45
|
const char* ltoir_output_path,
|
|
46
46
|
const char* symbol_name,
|
|
47
47
|
int num_include_dirs,
|
|
@@ -55,9 +55,9 @@ WP_API bool cuda_compile_dot(
|
|
|
55
55
|
int precision_B,
|
|
56
56
|
int precision_C,
|
|
57
57
|
int type,
|
|
58
|
-
int
|
|
59
|
-
int
|
|
60
|
-
int
|
|
58
|
+
int arrangement_A,
|
|
59
|
+
int arrangement_B,
|
|
60
|
+
int arrangement_C,
|
|
61
61
|
int num_threads)
|
|
62
62
|
{
|
|
63
63
|
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
@@ -65,6 +65,7 @@ WP_API bool cuda_compile_dot(
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
WP_API bool cuda_compile_solver(
|
|
68
|
+
const char* fatbin_output_path,
|
|
68
69
|
const char* ltoir_output_path,
|
|
69
70
|
const char* symbol_name,
|
|
70
71
|
int num_include_dirs,
|
|
@@ -73,8 +74,13 @@ WP_API bool cuda_compile_solver(
|
|
|
73
74
|
int arch,
|
|
74
75
|
int M,
|
|
75
76
|
int N,
|
|
77
|
+
int NRHS,
|
|
76
78
|
int function,
|
|
79
|
+
int side,
|
|
80
|
+
int diag,
|
|
77
81
|
int precision,
|
|
82
|
+
int arrangement_A,
|
|
83
|
+
int arrangement_B,
|
|
78
84
|
int fill_mode,
|
|
79
85
|
int num_threads)
|
|
80
86
|
{
|
warp/native/matnn.h
CHANGED
|
@@ -218,126 +218,4 @@ CUDA_CALLABLE inline void adj_dense_solve(int n,
|
|
|
218
218
|
}
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
-
|
|
222
|
-
template <typename F>
|
|
223
|
-
CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out)
|
|
224
|
-
{
|
|
225
|
-
const int m = weights.shape[0];
|
|
226
|
-
const int n = weights.shape[1];
|
|
227
|
-
const int b = x.shape[1];
|
|
228
|
-
|
|
229
|
-
for (int i=0; i < m; ++i)
|
|
230
|
-
{
|
|
231
|
-
float tmp = bias.data[i];
|
|
232
|
-
|
|
233
|
-
for(int j=0; j < n; ++j)
|
|
234
|
-
{
|
|
235
|
-
tmp += weights.data[i*n + j]*x.data[index + b*j];
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
out.data[index + b*i] = activation(tmp);
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
template <typename F, typename AdjF>
|
|
243
|
-
CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out,
|
|
244
|
-
array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
|
|
245
|
-
{
|
|
246
|
-
const int m = weights.shape[0];
|
|
247
|
-
const int n = weights.shape[1];
|
|
248
|
-
const int b = x.shape[1];
|
|
249
|
-
|
|
250
|
-
for (int i=0; i < m; ++i)
|
|
251
|
-
{
|
|
252
|
-
// recompute forward pass so we don't have to store pre-activation outputs
|
|
253
|
-
float tmp = bias.data[i];
|
|
254
|
-
|
|
255
|
-
for(int j=0; j < n; ++j)
|
|
256
|
-
{
|
|
257
|
-
tmp += weights.data[i*n + j]*x.data[index + b*j];
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
// adjoint w.r.t to activation
|
|
261
|
-
float adj_f = 0.0f;
|
|
262
|
-
|
|
263
|
-
if (adj_out.data)
|
|
264
|
-
adj_activation(tmp, adj_f, adj_out.data[index + b*i]);
|
|
265
|
-
|
|
266
|
-
for (int j=0; j < n; ++j)
|
|
267
|
-
{
|
|
268
|
-
// adjoint w.r.t M_i
|
|
269
|
-
if (adj_weights.data)
|
|
270
|
-
atomic_add(&adj_weights.data[i*n + j], x.data[index + b*j]*adj_f); // todo: reduce these atomic stores using warp/block level reductions
|
|
271
|
-
|
|
272
|
-
// adjoint w.r.t x
|
|
273
|
-
if (adj_x.data)
|
|
274
|
-
atomic_add(&adj_x.data[index + b*j], weights.data[i*n + j]*adj_f);
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// adjoint w.r.t b
|
|
278
|
-
if (adj_bias.data)
|
|
279
|
-
atomic_add(&adj_bias.data[i], adj_f);
|
|
280
|
-
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
// template <typename F>
|
|
286
|
-
// CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, array_t<float>& out)
|
|
287
|
-
// {
|
|
288
|
-
// x += index*n;
|
|
289
|
-
// out += index*m;
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
// for (int i=0; i < m; ++i)
|
|
293
|
-
// {
|
|
294
|
-
// float tmp = bias[i];
|
|
295
|
-
|
|
296
|
-
// for(int j=0; j < n; ++j)
|
|
297
|
-
// {
|
|
298
|
-
// tmp += weights[i*n + j]*x[j];
|
|
299
|
-
// }
|
|
300
|
-
|
|
301
|
-
// out[i] = activation(tmp);
|
|
302
|
-
// }
|
|
303
|
-
// }
|
|
304
|
-
|
|
305
|
-
// template <typename F, typename AdjF>
|
|
306
|
-
// CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, const array_t<float>& out,
|
|
307
|
-
// array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_m, int adj_n, int adj_b, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
|
|
308
|
-
// {
|
|
309
|
-
// x += index*n;
|
|
310
|
-
// out += index*m;
|
|
311
|
-
|
|
312
|
-
// adj_x += index*n;
|
|
313
|
-
// adj_out += index*m;
|
|
314
|
-
|
|
315
|
-
// for (int i=0; i < m; ++i)
|
|
316
|
-
// {
|
|
317
|
-
// // recompute forward pass so we don't have to store pre-activation outputs
|
|
318
|
-
// float tmp = bias[i];
|
|
319
|
-
|
|
320
|
-
// for(int j=0; j < n; ++j)
|
|
321
|
-
// {
|
|
322
|
-
// tmp += weights[i*n + j]*x[index + b*j];
|
|
323
|
-
// }
|
|
324
|
-
|
|
325
|
-
// // adjoint w.r.t to activation
|
|
326
|
-
// float adj_f = 0.0f;
|
|
327
|
-
// adj_activation(tmp, adj_f, adj_out[index + b*i]);
|
|
328
|
-
|
|
329
|
-
// for (int j=0; j < n; ++j)
|
|
330
|
-
// {
|
|
331
|
-
// // adjoint w.r.t M_i
|
|
332
|
-
// adj_weights[i*n + j] += x[j]*adj_f;
|
|
333
|
-
|
|
334
|
-
// // adjoint w.r.t x
|
|
335
|
-
// adj_x[index + b*j] += weights[i*n + j]*adj_f;
|
|
336
|
-
// }
|
|
337
|
-
|
|
338
|
-
// // adjoint w.r.t b
|
|
339
|
-
// adj_bias[i] += adj_f;
|
|
340
|
-
// }
|
|
341
|
-
// }
|
|
342
|
-
|
|
343
|
-
} // namespace wp
|
|
221
|
+
} // namespace wp
|
warp/native/mesh.h
CHANGED
|
@@ -1357,7 +1357,7 @@ CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const ve
|
|
|
1357
1357
|
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_epsilon, mesh_query_point_t& adj_ret)
|
|
1358
1358
|
{
|
|
1359
1359
|
adj_mesh_query_point_sign_normal(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v, epsilon,
|
|
1360
|
-
adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v,
|
|
1360
|
+
adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, adj_epsilon, adj_ret.result);
|
|
1361
1361
|
}
|
|
1362
1362
|
|
|
1363
1363
|
CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float accuracy, float winding_number_threshold, const mesh_query_point_t& ret,
|
warp/native/quat.h
CHANGED
|
@@ -274,8 +274,32 @@ inline CUDA_CALLABLE quat_t<Type> add(const quat_t<Type>& a, const quat_t<Type>&
|
|
|
274
274
|
template<typename Type>
|
|
275
275
|
inline CUDA_CALLABLE quat_t<Type> sub(const quat_t<Type>& a, const quat_t<Type>& b)
|
|
276
276
|
{
|
|
277
|
-
return quat_t<Type>(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
|
277
|
+
return quat_t<Type>(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
template<typename Type>
|
|
281
|
+
inline CUDA_CALLABLE quat_t<Type> operator - (const quat_t<Type>& q)
|
|
282
|
+
{
|
|
283
|
+
return quat_t<Type>(-q.x, -q.y, -q.z, -q.w);
|
|
284
|
+
}
|
|
278
285
|
|
|
286
|
+
template<typename Type>
|
|
287
|
+
CUDA_CALLABLE inline quat_t<Type> pos(const quat_t<Type>& q)
|
|
288
|
+
{
|
|
289
|
+
return q;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
template<typename Type>
|
|
293
|
+
CUDA_CALLABLE inline quat_t<Type> neg(const quat_t<Type>& q)
|
|
294
|
+
{
|
|
295
|
+
return -q;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
template<typename Type>
|
|
299
|
+
CUDA_CALLABLE inline void adj_neg(const quat_t<Type>& q, quat_t<Type>& adj_q, const quat_t<Type>& adj_ret)
|
|
300
|
+
{
|
|
301
|
+
adj_q -= adj_ret;
|
|
302
|
+
}
|
|
279
303
|
|
|
280
304
|
template<typename Type>
|
|
281
305
|
inline CUDA_CALLABLE quat_t<Type> mul(const quat_t<Type>& a, const quat_t<Type>& b)
|
|
@@ -298,7 +322,6 @@ inline CUDA_CALLABLE quat_t<Type> mul(Type s, const quat_t<Type>& a)
|
|
|
298
322
|
return mul(a, s);
|
|
299
323
|
}
|
|
300
324
|
|
|
301
|
-
// division
|
|
302
325
|
template<typename Type>
|
|
303
326
|
inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
|
|
304
327
|
{
|
|
@@ -881,8 +904,12 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
|
|
|
881
904
|
template<typename Type>
|
|
882
905
|
inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
|
|
883
906
|
{
|
|
884
|
-
|
|
885
|
-
|
|
907
|
+
for (unsigned i=0; i < 4; ++i)
|
|
908
|
+
{
|
|
909
|
+
Type inv = Type(1) / a[i];
|
|
910
|
+
adj_a[i] -= s * adj_ret[i] * inv * inv;
|
|
911
|
+
adj_s += adj_ret[i] * inv;
|
|
912
|
+
}
|
|
886
913
|
}
|
|
887
914
|
|
|
888
915
|
template<typename Type>
|
|
@@ -1357,8 +1384,9 @@ inline CUDA_CALLABLE void expect_near(const quat_t<Type>& actual, const quat_t<T
|
|
|
1357
1384
|
if (diff > tolerance)
|
|
1358
1385
|
{
|
|
1359
1386
|
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
1360
|
-
printf("
|
|
1361
|
-
printf("
|
|
1387
|
+
printf(" Expected: "); print(expected);
|
|
1388
|
+
printf(" Actual: "); print(actual);
|
|
1389
|
+
printf(" Max absolute difference: "); print(diff);
|
|
1362
1390
|
}
|
|
1363
1391
|
}
|
|
1364
1392
|
|
warp/native/rand.h
CHANGED
|
@@ -71,14 +71,14 @@ inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (
|
|
|
71
71
|
// Box-Muller method
|
|
72
72
|
inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state) + RANDN_EPSILON)) * cos(2.f * M_PI_F * randf(state)); }
|
|
73
73
|
|
|
74
|
-
inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed,
|
|
75
|
-
inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset,
|
|
74
|
+
inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, uint32 adj_ret) {}
|
|
75
|
+
inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, uint32 adj_ret) {}
|
|
76
76
|
|
|
77
|
-
inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state,
|
|
78
|
-
inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max,
|
|
77
|
+
inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state, int adj_ret) {}
|
|
78
|
+
inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max, int adj_ret) {}
|
|
79
79
|
|
|
80
|
-
inline CUDA_CALLABLE void adj_randu(uint32& state, uint32& adj_state,
|
|
81
|
-
inline CUDA_CALLABLE void adj_randu(uint32& state, uint32 min, uint32 max, uint32& adj_state, uint32& adj_min, uint32& adj_max,
|
|
80
|
+
inline CUDA_CALLABLE void adj_randu(uint32& state, uint32& adj_state, uint32 adj_ret) {}
|
|
81
|
+
inline CUDA_CALLABLE void adj_randu(uint32& state, uint32 min, uint32 max, uint32& adj_state, uint32& adj_min, uint32& adj_max, uint32 adj_ret) {}
|
|
82
82
|
|
|
83
83
|
inline CUDA_CALLABLE void adj_randf(uint32& state, uint32& adj_state, float adj_ret) {}
|
|
84
84
|
inline CUDA_CALLABLE void adj_randf(uint32& state, float min, float max, uint32& adj_state, float& adj_min, float& adj_max, float adj_ret) {}
|
|
@@ -195,7 +195,7 @@ inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint
|
|
|
195
195
|
inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
|
|
196
196
|
inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
|
|
197
197
|
inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
|
|
198
|
-
inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const
|
|
198
|
+
inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec4& adj_ret) {}
|
|
199
199
|
|
|
200
200
|
/*
|
|
201
201
|
* log-gamma function to support some of these distributions. The
|