warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- docs/conf.py +17 -5
- examples/env/env_ant.py +1 -1
- examples/env/env_cartpole.py +1 -1
- examples/env/env_humanoid.py +1 -1
- examples/env/env_usd.py +4 -1
- examples/env/environment.py +8 -9
- examples/example_dem.py +34 -33
- examples/example_diffray.py +364 -337
- examples/example_fluid.py +32 -23
- examples/example_jacobian_ik.py +97 -93
- examples/example_marching_cubes.py +6 -16
- examples/example_mesh.py +6 -16
- examples/example_mesh_intersect.py +16 -14
- examples/example_nvdb.py +14 -16
- examples/example_raycast.py +14 -13
- examples/example_raymarch.py +16 -23
- examples/example_render_opengl.py +19 -10
- examples/example_sim_cartpole.py +82 -78
- examples/example_sim_cloth.py +45 -48
- examples/example_sim_fk_grad.py +51 -44
- examples/example_sim_fk_grad_torch.py +47 -40
- examples/example_sim_grad_bounce.py +108 -133
- examples/example_sim_grad_cloth.py +99 -113
- examples/example_sim_granular.py +5 -6
- examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
- examples/example_sim_neo_hookean.py +51 -55
- examples/example_sim_particle_chain.py +4 -4
- examples/example_sim_quadruped.py +126 -81
- examples/example_sim_rigid_chain.py +54 -61
- examples/example_sim_rigid_contact.py +66 -70
- examples/example_sim_rigid_fem.py +3 -3
- examples/example_sim_rigid_force.py +1 -1
- examples/example_sim_rigid_gyroscopic.py +3 -4
- examples/example_sim_rigid_kinematics.py +28 -39
- examples/example_sim_trajopt.py +112 -110
- examples/example_sph.py +9 -8
- examples/example_wave.py +7 -7
- examples/fem/bsr_utils.py +30 -17
- examples/fem/example_apic_fluid.py +85 -69
- examples/fem/example_convection_diffusion.py +97 -93
- examples/fem/example_convection_diffusion_dg.py +142 -149
- examples/fem/example_convection_diffusion_dg0.py +141 -136
- examples/fem/example_deformed_geometry.py +146 -0
- examples/fem/example_diffusion.py +115 -84
- examples/fem/example_diffusion_3d.py +116 -86
- examples/fem/example_diffusion_mgpu.py +102 -79
- examples/fem/example_mixed_elasticity.py +139 -100
- examples/fem/example_navier_stokes.py +175 -162
- examples/fem/example_stokes.py +143 -111
- examples/fem/example_stokes_transfer.py +186 -157
- examples/fem/mesh_utils.py +59 -97
- examples/fem/plot_utils.py +138 -17
- tools/ci/publishing/build_nodes_info.py +54 -0
- warp/__init__.py +4 -3
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +5 -3
- warp/build_dll.py +29 -9
- warp/builtins.py +836 -492
- warp/codegen.py +864 -553
- warp/config.py +3 -1
- warp/context.py +389 -172
- warp/fem/__init__.py +24 -6
- warp/fem/cache.py +318 -25
- warp/fem/dirichlet.py +7 -3
- warp/fem/domain.py +14 -0
- warp/fem/field/__init__.py +30 -38
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +244 -138
- warp/fem/field/restriction.py +8 -6
- warp/fem/field/test.py +127 -59
- warp/fem/field/trial.py +117 -60
- warp/fem/geometry/__init__.py +5 -1
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +24 -1
- warp/fem/geometry/geometry.py +86 -14
- warp/fem/geometry/grid_2d.py +112 -54
- warp/fem/geometry/grid_3d.py +134 -65
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +85 -33
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +451 -115
- warp/fem/geometry/trimesh_2d.py +197 -92
- warp/fem/integrate.py +534 -268
- warp/fem/operator.py +58 -31
- warp/fem/polynomial.py +11 -0
- warp/fem/quadrature/__init__.py +1 -1
- warp/fem/quadrature/pic_quadrature.py +150 -58
- warp/fem/quadrature/quadrature.py +209 -57
- warp/fem/space/__init__.py +230 -53
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +49 -2
- warp/fem/space/function_space.py +90 -39
- warp/fem/space/grid_2d_function_space.py +149 -496
- warp/fem/space/grid_3d_function_space.py +173 -538
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +129 -76
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +46 -34
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +132 -1039
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +104 -742
- warp/fem/types.py +13 -11
- warp/fem/utils.py +335 -60
- warp/native/array.h +120 -34
- warp/native/builtin.h +101 -72
- warp/native/bvh.cpp +73 -325
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +22 -40
- warp/native/clang/clang.cpp +1 -0
- warp/native/crt.h +2 -0
- warp/native/cuda_util.cpp +8 -3
- warp/native/cuda_util.h +1 -0
- warp/native/exports.h +1522 -1243
- warp/native/intersect.h +19 -4
- warp/native/intersect_adj.h +8 -8
- warp/native/mat.h +76 -17
- warp/native/mesh.cpp +33 -108
- warp/native/mesh.cu +114 -18
- warp/native/mesh.h +395 -40
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +44 -34
- warp/native/reduce.cpp +1 -1
- warp/native/sparse.cpp +4 -4
- warp/native/sparse.cu +163 -155
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +18 -14
- warp/native/vec.h +103 -21
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +28 -3
- warp/native/warp.h +4 -3
- warp/render/render_opengl.py +261 -109
- warp/sim/__init__.py +1 -2
- warp/sim/articulation.py +385 -185
- warp/sim/import_mjcf.py +59 -48
- warp/sim/import_urdf.py +15 -15
- warp/sim/import_usd.py +174 -102
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_xpbd.py +4 -3
- warp/sim/model.py +330 -250
- warp/sim/render.py +1 -1
- warp/sparse.py +625 -152
- warp/stubs.py +341 -309
- warp/tape.py +9 -6
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +94 -74
- warp/tests/test_array.py +82 -101
- warp/tests/test_array_reduce.py +57 -23
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +22 -12
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +18 -18
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +165 -134
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +75 -75
- warp/tests/test_examples.py +237 -0
- warp/tests/test_fabricarray.py +22 -24
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1034 -124
- warp/tests/test_fp16.py +23 -16
- warp/tests/test_func.py +187 -86
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +123 -181
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +35 -34
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +24 -25
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +14 -41
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +517 -2898
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +304 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +60 -22
- warp/tests/test_mesh_query_aabb.py +21 -25
- warp/tests/test_mesh_query_point.py +111 -22
- warp/tests/test_mesh_query_ray.py +12 -24
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +168 -20
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +261 -63
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +268 -63
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +90 -86
- warp/tests/test_transient_module.py +10 -12
- warp/tests/test_types.py +363 -0
- warp/tests/test_utils.py +451 -0
- warp/tests/test_vec.py +354 -2050
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +418 -376
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +291 -0
- warp/tests/unittest_utils.py +342 -0
- warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +589 -0
- warp/types.py +622 -211
- warp/utils.py +54 -393
- warp_lang-1.0.0b6.dist-info/METADATA +238 -0
- warp_lang-1.0.0b6.dist-info/RECORD +409 -0
- {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
- examples/example_cache_management.py +0 -40
- examples/example_multigpu.py +0 -54
- examples/example_struct.py +0 -65
- examples/fem/example_stokes_transfer_3d.py +0 -210
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/fem/field/discrete_field.py +0 -80
- warp/fem/space/nodal_function_space.py +0 -233
- warp/tests/test_all.py +0 -223
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-1.0.0b2.dist-info/METADATA +0 -26
- warp_lang-1.0.0b2.dist-info/RECORD +0 -380
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/native/quat.h
CHANGED
|
@@ -19,6 +19,15 @@ struct quat_t
|
|
|
19
19
|
// zero constructor for adjoint variable initialization
|
|
20
20
|
inline CUDA_CALLABLE quat_t(Type x=Type(0), Type y=Type(0), Type z=Type(0), Type w=Type(0)) : x(x), y(y), z(z), w(w) {}
|
|
21
21
|
explicit inline CUDA_CALLABLE quat_t(const vec_t<3,Type>& v, Type w=Type(0)) : x(v[0]), y(v[1]), z(v[2]), w(w) {}
|
|
22
|
+
|
|
23
|
+
template<typename OtherType>
|
|
24
|
+
explicit inline CUDA_CALLABLE quat_t(const quat_t<OtherType>& other)
|
|
25
|
+
{
|
|
26
|
+
x = static_cast<Type>(other.x);
|
|
27
|
+
y = static_cast<Type>(other.y);
|
|
28
|
+
z = static_cast<Type>(other.z);
|
|
29
|
+
w = static_cast<Type>(other.w);
|
|
30
|
+
}
|
|
22
31
|
|
|
23
32
|
// imaginary part
|
|
24
33
|
Type x;
|
|
@@ -73,7 +82,17 @@ inline CUDA_CALLABLE void adj_quat_t(const vec_t<3,Type>& v, Type w, vec_t<3,Typ
|
|
|
73
82
|
adj_v[0] += adj_ret.x;
|
|
74
83
|
adj_v[1] += adj_ret.y;
|
|
75
84
|
adj_v[2] += adj_ret.z;
|
|
76
|
-
adj_w
|
|
85
|
+
adj_w += adj_ret.w;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// casting constructor adjoint
|
|
89
|
+
template<typename Type, typename OtherType>
|
|
90
|
+
inline CUDA_CALLABLE void adj_quat_t(const quat_t<OtherType>& other, quat_t<OtherType>& adj_other, const quat_t<Type>& adj_ret)
|
|
91
|
+
{
|
|
92
|
+
adj_other.x += static_cast<OtherType>(adj_ret.x);
|
|
93
|
+
adj_other.y += static_cast<OtherType>(adj_ret.y);
|
|
94
|
+
adj_other.z += static_cast<OtherType>(adj_ret.z);
|
|
95
|
+
adj_other.w += static_cast<OtherType>(adj_ret.w);
|
|
77
96
|
}
|
|
78
97
|
|
|
79
98
|
// forward methods
|
|
@@ -206,12 +225,24 @@ inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
|
|
|
206
225
|
return quat_t<Type>(q.x/s, q.y/s, q.z/s, q.w/s);
|
|
207
226
|
}
|
|
208
227
|
|
|
228
|
+
template<typename Type>
|
|
229
|
+
inline CUDA_CALLABLE quat_t<Type> div(Type s, quat_t<Type> q)
|
|
230
|
+
{
|
|
231
|
+
return quat_t<Type>(s/q.x, s/q.y, s/q.z, s/q.w);
|
|
232
|
+
}
|
|
233
|
+
|
|
209
234
|
template<typename Type>
|
|
210
235
|
inline CUDA_CALLABLE quat_t<Type> operator / (quat_t<Type> a, Type s)
|
|
211
236
|
{
|
|
212
237
|
return div(a,s);
|
|
213
238
|
}
|
|
214
239
|
|
|
240
|
+
template<typename Type>
|
|
241
|
+
inline CUDA_CALLABLE quat_t<Type> operator / (Type s, quat_t<Type> a)
|
|
242
|
+
{
|
|
243
|
+
return div(s,a);
|
|
244
|
+
}
|
|
245
|
+
|
|
215
246
|
template<typename Type>
|
|
216
247
|
inline CUDA_CALLABLE quat_t<Type> operator*(Type s, const quat_t<Type>& a)
|
|
217
248
|
{
|
|
@@ -321,7 +352,7 @@ inline CUDA_CALLABLE quat_t<Type> quat_from_matrix(const mat_t<3,3,Type>& m)
|
|
|
321
352
|
}
|
|
322
353
|
|
|
323
354
|
template<typename Type>
|
|
324
|
-
inline CUDA_CALLABLE Type
|
|
355
|
+
inline CUDA_CALLABLE Type extract(const quat_t<Type>& a, int idx)
|
|
325
356
|
{
|
|
326
357
|
#if FP_CHECK
|
|
327
358
|
if (idx < 0 || idx > 3)
|
|
@@ -357,7 +388,7 @@ CUDA_CALLABLE inline void adj_lerp(const quat_t<Type>& a, const quat_t<Type>& b,
|
|
|
357
388
|
}
|
|
358
389
|
|
|
359
390
|
template<typename Type>
|
|
360
|
-
inline CUDA_CALLABLE void
|
|
391
|
+
inline CUDA_CALLABLE void adj_extract(const quat_t<Type>& a, int idx, quat_t<Type>& adj_a, int & adj_idx, Type & adj_ret)
|
|
361
392
|
{
|
|
362
393
|
#if FP_CHECK
|
|
363
394
|
if (idx < 0 || idx > 3)
|
|
@@ -367,7 +398,7 @@ inline CUDA_CALLABLE void adj_index(const quat_t<Type>& a, int idx, quat_t<Type>
|
|
|
367
398
|
}
|
|
368
399
|
#endif
|
|
369
400
|
|
|
370
|
-
// See wp::
|
|
401
|
+
// See wp::extract(const quat_t<Type>& a, int idx) note
|
|
371
402
|
if (idx == 0) {adj_a.x += adj_ret;}
|
|
372
403
|
else if (idx == 1) {adj_a.y += adj_ret;}
|
|
373
404
|
else if (idx == 2) {adj_a.z += adj_ret;}
|
|
@@ -504,9 +535,14 @@ inline CUDA_CALLABLE void tensordot(const quat_t<Type>& a, const quat_t<Type>& b
|
|
|
504
535
|
}
|
|
505
536
|
|
|
506
537
|
template<typename Type>
|
|
507
|
-
inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, quat_t<Type>& adj_a, const Type adj_ret)
|
|
538
|
+
inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, Type ret, quat_t<Type>& adj_a, const Type adj_ret)
|
|
508
539
|
{
|
|
509
|
-
|
|
540
|
+
if (ret > Type(kEps))
|
|
541
|
+
{
|
|
542
|
+
Type inv_l = Type(1)/ret;
|
|
543
|
+
|
|
544
|
+
adj_a += quat_t<Type>(a.x*inv_l, a.y*inv_l, a.z*inv_l, a.w*inv_l) * adj_ret;
|
|
545
|
+
}
|
|
510
546
|
}
|
|
511
547
|
|
|
512
548
|
template<typename Type>
|
|
@@ -589,6 +625,13 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
|
|
|
589
625
|
adj_a += adj_ret / s;
|
|
590
626
|
}
|
|
591
627
|
|
|
628
|
+
template<typename Type>
|
|
629
|
+
inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
|
|
630
|
+
{
|
|
631
|
+
adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
|
|
632
|
+
adj_a += s / adj_ret;
|
|
633
|
+
}
|
|
634
|
+
|
|
592
635
|
template<typename Type>
|
|
593
636
|
inline CUDA_CALLABLE void adj_quat_rotate(const quat_t<Type>& q, const vec_t<3,Type>& p, quat_t<Type>& adj_q, vec_t<3,Type>& adj_p, const vec_t<3,Type>& adj_ret)
|
|
594
637
|
{
|
|
@@ -658,7 +701,7 @@ inline CUDA_CALLABLE void adj_quat_rotate_inv(const quat_t<Type>& q, const vec_t
|
|
|
658
701
|
}
|
|
659
702
|
|
|
660
703
|
template<typename Type>
|
|
661
|
-
inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
|
|
704
|
+
inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& ret, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
|
|
662
705
|
{
|
|
663
706
|
vec_t<3,Type> axis;
|
|
664
707
|
Type angle;
|
|
@@ -669,7 +712,7 @@ inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Ty
|
|
|
669
712
|
angle = angle * 0.5;
|
|
670
713
|
|
|
671
714
|
// adj_t
|
|
672
|
-
adj_t += dot(mul(
|
|
715
|
+
adj_t += dot(mul(ret, quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
|
|
673
716
|
|
|
674
717
|
// adj_q0
|
|
675
718
|
quat_t<Type> q_inc_x_q0;
|
warp/native/rand.h
CHANGED
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
# pragma once
|
|
10
10
|
#include "array.h"
|
|
11
11
|
|
|
12
|
-
#ifndef
|
|
13
|
-
#define
|
|
12
|
+
#ifndef M_PI_F
|
|
13
|
+
#define M_PI_F 3.14159265358979323846f
|
|
14
14
|
#endif
|
|
15
15
|
|
|
16
16
|
namespace wp
|
|
@@ -33,7 +33,7 @@ inline CUDA_CALLABLE float randf(uint32& state) { state = rand_pcg(state); retur
|
|
|
33
33
|
inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (max - min) * randf(state) + min; }
|
|
34
34
|
|
|
35
35
|
// Box-Muller method
|
|
36
|
-
inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f *
|
|
36
|
+
inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI_F * randf(state)); }
|
|
37
37
|
|
|
38
38
|
inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
|
|
39
39
|
inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
|
|
@@ -55,14 +55,14 @@ inline CUDA_CALLABLE int sample_cdf(uint32& state, const array_t<float>& cdf)
|
|
|
55
55
|
inline CUDA_CALLABLE vec2 sample_triangle(uint32& state)
|
|
56
56
|
{
|
|
57
57
|
float r = sqrt(randf(state));
|
|
58
|
-
float u = 1.
|
|
58
|
+
float u = 1.f - r;
|
|
59
59
|
float v = randf(state) * r;
|
|
60
60
|
return vec2(u, v);
|
|
61
61
|
}
|
|
62
62
|
|
|
63
63
|
inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
|
|
64
64
|
{
|
|
65
|
-
float theta = randf(state, 0.f, 2.f*
|
|
65
|
+
float theta = randf(state, 0.f, 2.f*M_PI_F);
|
|
66
66
|
float x = cos(theta);
|
|
67
67
|
float y = sin(theta);
|
|
68
68
|
return vec2(x, y);
|
|
@@ -71,7 +71,7 @@ inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
|
|
|
71
71
|
inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
|
|
72
72
|
{
|
|
73
73
|
float r = sqrt(randf(state));
|
|
74
|
-
float theta = randf(state, 0.f, 2.f*
|
|
74
|
+
float theta = randf(state, 0.f, 2.f*M_PI_F);
|
|
75
75
|
float x = r * cos(theta);
|
|
76
76
|
float y = r * sin(theta);
|
|
77
77
|
return vec2(x, y);
|
|
@@ -80,7 +80,7 @@ inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
|
|
|
80
80
|
inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
|
|
81
81
|
{
|
|
82
82
|
float phi = acos(1.f - 2.f * randf(state));
|
|
83
|
-
float theta = randf(state, 0.f, 2.f*
|
|
83
|
+
float theta = randf(state, 0.f, 2.f*M_PI_F);
|
|
84
84
|
float x = cos(theta) * sin(phi);
|
|
85
85
|
float y = sin(theta) * sin(phi);
|
|
86
86
|
float z = cos(phi);
|
|
@@ -90,7 +90,7 @@ inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
|
|
|
90
90
|
inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
|
|
91
91
|
{
|
|
92
92
|
float phi = acos(1.f - 2.f * randf(state));
|
|
93
|
-
float theta = randf(state, 0.f, 2.f*
|
|
93
|
+
float theta = randf(state, 0.f, 2.f*M_PI_F);
|
|
94
94
|
float r = pow(randf(state), 1.f/3.f);
|
|
95
95
|
float x = r * cos(theta) * sin(phi);
|
|
96
96
|
float y = r * sin(theta) * sin(phi);
|
|
@@ -101,7 +101,7 @@ inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
|
|
|
101
101
|
inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
|
|
102
102
|
{
|
|
103
103
|
float phi = acos(1.f - randf(state));
|
|
104
|
-
float theta = randf(state, 0.f, 2.f*
|
|
104
|
+
float theta = randf(state, 0.f, 2.f*M_PI_F);
|
|
105
105
|
float x = cos(theta) * sin(phi);
|
|
106
106
|
float y = sin(theta) * sin(phi);
|
|
107
107
|
float z = cos(phi);
|
|
@@ -111,7 +111,7 @@ inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
|
|
|
111
111
|
inline CUDA_CALLABLE vec3 sample_unit_hemisphere(uint32& state)
|
|
112
112
|
{
|
|
113
113
|
float phi = acos(1.f - randf(state));
|
|
114
|
-
float theta = randf(state, 0.f, 2.f*
|
|
114
|
+
float theta = randf(state, 0.f, 2.f*M_PI_F);
|
|
115
115
|
float r = pow(randf(state), 1.f/3.f);
|
|
116
116
|
float x = r * cos(theta) * sin(phi);
|
|
117
117
|
float y = r * sin(theta) * sin(phi);
|
|
@@ -134,6 +134,15 @@ inline CUDA_CALLABLE vec3 sample_unit_cube(uint32& state)
|
|
|
134
134
|
return vec3(x, y, z);
|
|
135
135
|
}
|
|
136
136
|
|
|
137
|
+
inline CUDA_CALLABLE vec4 sample_unit_hypercube(uint32& state)
|
|
138
|
+
{
|
|
139
|
+
float a = randf(state) - 0.5f;
|
|
140
|
+
float b = randf(state) - 0.5f;
|
|
141
|
+
float c = randf(state) - 0.5f;
|
|
142
|
+
float d = randf(state) - 0.5f;
|
|
143
|
+
return vec4(a, b, c, d);
|
|
144
|
+
}
|
|
145
|
+
|
|
137
146
|
inline CUDA_CALLABLE void adj_sample_cdf(uint32& state, const array_t<float>& cdf, uint32& adj_state, array_t<float>& adj_cdf, const int& adj_ret) {}
|
|
138
147
|
inline CUDA_CALLABLE void adj_sample_triangle(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
|
|
139
148
|
inline CUDA_CALLABLE void adj_sample_unit_ring(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
|
|
@@ -144,6 +153,7 @@ inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint
|
|
|
144
153
|
inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
|
|
145
154
|
inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
|
|
146
155
|
inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
|
|
156
|
+
inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
|
|
147
157
|
|
|
148
158
|
/*
|
|
149
159
|
* log-gamma function to support some of these distributions. The
|
|
@@ -158,17 +168,17 @@ inline CUDA_CALLABLE float random_loggam(float x)
|
|
|
158
168
|
float x0, x2, lg2pi, gl, gl0;
|
|
159
169
|
uint32 n;
|
|
160
170
|
|
|
161
|
-
const float a[10] = {8.333333333333333e-
|
|
162
|
-
7.936507936507937e-
|
|
163
|
-
8.417508417508418e-
|
|
164
|
-
6.410256410256410e-
|
|
165
|
-
1.796443723688307e-
|
|
171
|
+
const float a[10] = {8.333333333333333e-02f, -2.777777777777778e-03f,
|
|
172
|
+
7.936507936507937e-04f, -5.952380952380952e-04f,
|
|
173
|
+
8.417508417508418e-04f, -1.917526917526918e-03f,
|
|
174
|
+
6.410256410256410e-03f, -2.955065359477124e-02f,
|
|
175
|
+
1.796443723688307e-01f, -1.39243221690590e+00f};
|
|
166
176
|
|
|
167
|
-
if ((x == 1.
|
|
177
|
+
if ((x == 1.f) || (x == 2.f))
|
|
168
178
|
{
|
|
169
|
-
return 0.
|
|
179
|
+
return 0.f;
|
|
170
180
|
}
|
|
171
|
-
else if (x < 7.
|
|
181
|
+
else if (x < 7.f)
|
|
172
182
|
{
|
|
173
183
|
n = uint32((7 - x));
|
|
174
184
|
}
|
|
@@ -178,8 +188,8 @@ inline CUDA_CALLABLE float random_loggam(float x)
|
|
|
178
188
|
}
|
|
179
189
|
|
|
180
190
|
x0 = x + float(n);
|
|
181
|
-
x2 = (1.
|
|
182
|
-
// log(2 *
|
|
191
|
+
x2 = (1.f / x0) * (1.f / x0);
|
|
192
|
+
// log(2 * M_PI_F)
|
|
183
193
|
lg2pi = 1.8378770664093453f;
|
|
184
194
|
gl0 = a[9];
|
|
185
195
|
for (int i = 8; i >= 0; i--)
|
|
@@ -187,13 +197,13 @@ inline CUDA_CALLABLE float random_loggam(float x)
|
|
|
187
197
|
gl0 *= x2;
|
|
188
198
|
gl0 += a[i];
|
|
189
199
|
}
|
|
190
|
-
gl = gl0 / x0 + 0.
|
|
191
|
-
if (x < 7.
|
|
200
|
+
gl = gl0 / x0 + 0.5f * lg2pi + (x0 - 0.5f) * log(x0) - x0;
|
|
201
|
+
if (x < 7.f)
|
|
192
202
|
{
|
|
193
203
|
for (uint32 k = 1; k <= n; k++)
|
|
194
204
|
{
|
|
195
|
-
gl -= log(x0 - 1.
|
|
196
|
-
x0 -= 1.
|
|
205
|
+
gl -= log(x0 - 1.f);
|
|
206
|
+
x0 -= 1.f;
|
|
197
207
|
}
|
|
198
208
|
}
|
|
199
209
|
return gl;
|
|
@@ -205,7 +215,7 @@ inline CUDA_CALLABLE uint32 random_poisson_mult(uint32& state, float lam) {
|
|
|
205
215
|
|
|
206
216
|
enlam = exp(-lam);
|
|
207
217
|
X = 0;
|
|
208
|
-
prod = 1.
|
|
218
|
+
prod = 1.f;
|
|
209
219
|
|
|
210
220
|
while (1)
|
|
211
221
|
{
|
|
@@ -234,22 +244,22 @@ inline CUDA_CALLABLE uint32 random_poisson(uint32& state, float lam)
|
|
|
234
244
|
|
|
235
245
|
slam = sqrt(lam);
|
|
236
246
|
loglam = log(lam);
|
|
237
|
-
b = 0.
|
|
238
|
-
a = -0.
|
|
239
|
-
invalpha = 1.
|
|
240
|
-
vr = 0.
|
|
247
|
+
b = 0.931f + 2.53f * slam;
|
|
248
|
+
a = -0.059f + 0.02483f * b;
|
|
249
|
+
invalpha = 1.1239f + 1.1328f / (b - 3.4f);
|
|
250
|
+
vr = 0.9277f - 3.6224f / (b - 2.f);
|
|
241
251
|
|
|
242
252
|
while (1)
|
|
243
253
|
{
|
|
244
|
-
U = randf(state) - 0.
|
|
254
|
+
U = randf(state) - 0.5f;
|
|
245
255
|
V = randf(state);
|
|
246
|
-
us = 0.
|
|
247
|
-
k = uint32(floor((2 * a / us + b) * U + lam + 0.
|
|
248
|
-
if ((us >= 0.
|
|
256
|
+
us = 0.5f - abs(U);
|
|
257
|
+
k = uint32(floor((2.f * a / us + b) * U + lam + 0.43f));
|
|
258
|
+
if ((us >= 0.07f) && (V <= vr))
|
|
249
259
|
{
|
|
250
260
|
return k;
|
|
251
261
|
}
|
|
252
|
-
if ((us < 0.
|
|
262
|
+
if ((us < 0.013f) && (V > us))
|
|
253
263
|
{
|
|
254
264
|
continue;
|
|
255
265
|
}
|
warp/native/reduce.cpp
CHANGED
|
@@ -97,7 +97,7 @@ template <typename T> void array_sum_host(const T *ptr_a, T *ptr_out, int count,
|
|
|
97
97
|
accumulate_func = dyn_len_sum<T>;
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
memset(ptr_out, 0, sizeof(T)*type_length);
|
|
101
101
|
for (int i = 0; i < count; ++i)
|
|
102
102
|
accumulate_func(ptr_a + i * stride, ptr_out, type_length);
|
|
103
103
|
}
|
warp/native/sparse.cpp
CHANGED
|
@@ -179,10 +179,10 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
|
|
|
179
179
|
const int block_size = rows_per_block * cols_per_block;
|
|
180
180
|
|
|
181
181
|
void (*block_transpose_func)(const T *, T *, int, int) = bsr_dyn_block_transpose<T>;
|
|
182
|
-
switch (
|
|
182
|
+
switch (rows_per_block)
|
|
183
183
|
{
|
|
184
184
|
case 1:
|
|
185
|
-
switch (
|
|
185
|
+
switch (cols_per_block)
|
|
186
186
|
{
|
|
187
187
|
case 1:
|
|
188
188
|
block_transpose_func = bsr_fixed_block_transpose<1, 1, T>;
|
|
@@ -196,7 +196,7 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
|
|
|
196
196
|
}
|
|
197
197
|
break;
|
|
198
198
|
case 2:
|
|
199
|
-
switch (
|
|
199
|
+
switch (cols_per_block)
|
|
200
200
|
{
|
|
201
201
|
case 1:
|
|
202
202
|
block_transpose_func = bsr_fixed_block_transpose<2, 1, T>;
|
|
@@ -210,7 +210,7 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
|
|
|
210
210
|
}
|
|
211
211
|
break;
|
|
212
212
|
case 3:
|
|
213
|
-
switch (
|
|
213
|
+
switch (cols_per_block)
|
|
214
214
|
{
|
|
215
215
|
case 1:
|
|
216
216
|
block_transpose_func = bsr_fixed_block_transpose<3, 1, T>;
|