PyPI - warp-lang - Versions diffs - 1.0.0b2__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 1.0.0b2__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (269) hide show

docs/conf.py +17 -5
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/env/env_usd.py +4 -1
examples/env/environment.py +8 -9
examples/example_dem.py +34 -33
examples/example_diffray.py +364 -337
examples/example_fluid.py +32 -23
examples/example_jacobian_ik.py +97 -93
examples/example_marching_cubes.py +6 -16
examples/example_mesh.py +6 -16
examples/example_mesh_intersect.py +16 -14
examples/example_nvdb.py +14 -16
examples/example_raycast.py +14 -13
examples/example_raymarch.py +16 -23
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +82 -78
examples/example_sim_cloth.py +45 -48
examples/example_sim_fk_grad.py +51 -44
examples/example_sim_fk_grad_torch.py +47 -40
examples/example_sim_grad_bounce.py +108 -133
examples/example_sim_grad_cloth.py +99 -113
examples/example_sim_granular.py +5 -6
examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
examples/example_sim_neo_hookean.py +51 -55
examples/example_sim_particle_chain.py +4 -4
examples/example_sim_quadruped.py +126 -81
examples/example_sim_rigid_chain.py +54 -61
examples/example_sim_rigid_contact.py +66 -70
examples/example_sim_rigid_fem.py +3 -3
examples/example_sim_rigid_force.py +1 -1
examples/example_sim_rigid_gyroscopic.py +3 -4
examples/example_sim_rigid_kinematics.py +28 -39
examples/example_sim_trajopt.py +112 -110
examples/example_sph.py +9 -8
examples/example_wave.py +7 -7
examples/fem/bsr_utils.py +30 -17
examples/fem/example_apic_fluid.py +85 -69
examples/fem/example_convection_diffusion.py +97 -93
examples/fem/example_convection_diffusion_dg.py +142 -149
examples/fem/example_convection_diffusion_dg0.py +141 -136
examples/fem/example_deformed_geometry.py +146 -0
examples/fem/example_diffusion.py +115 -84
examples/fem/example_diffusion_3d.py +116 -86
examples/fem/example_diffusion_mgpu.py +102 -79
examples/fem/example_mixed_elasticity.py +139 -100
examples/fem/example_navier_stokes.py +175 -162
examples/fem/example_stokes.py +143 -111
examples/fem/example_stokes_transfer.py +186 -157
examples/fem/mesh_utils.py +59 -97
examples/fem/plot_utils.py +138 -17
tools/ci/publishing/build_nodes_info.py +54 -0
warp/__init__.py +4 -3
warp/__init__.pyi +1 -0
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +836 -492
warp/codegen.py +864 -553
warp/config.py +3 -1
warp/context.py +389 -172
warp/fem/__init__.py +24 -6
warp/fem/cache.py +318 -25
warp/fem/dirichlet.py +7 -3
warp/fem/domain.py +14 -0
warp/fem/field/__init__.py +30 -38
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +244 -138
warp/fem/field/restriction.py +8 -6
warp/fem/field/test.py +127 -59
warp/fem/field/trial.py +117 -60
warp/fem/geometry/__init__.py +5 -1
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +24 -1
warp/fem/geometry/geometry.py +86 -14
warp/fem/geometry/grid_2d.py +112 -54
warp/fem/geometry/grid_3d.py +134 -65
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +85 -33
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +451 -115
warp/fem/geometry/trimesh_2d.py +197 -92
warp/fem/integrate.py +534 -268
warp/fem/operator.py +58 -31
warp/fem/polynomial.py +11 -0
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +150 -58
warp/fem/quadrature/quadrature.py +209 -57
warp/fem/space/__init__.py +230 -53
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +49 -2
warp/fem/space/function_space.py +90 -39
warp/fem/space/grid_2d_function_space.py +149 -496
warp/fem/space/grid_3d_function_space.py +173 -538
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +129 -76
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +46 -34
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +132 -1039
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +104 -742
warp/fem/types.py +13 -11
warp/fem/utils.py +335 -60
warp/native/array.h +120 -34
warp/native/builtin.h +101 -72
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +22 -40
warp/native/clang/clang.cpp +1 -0
warp/native/crt.h +2 -0
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1522 -1243
warp/native/intersect.h +19 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +76 -17
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -18
warp/native/mesh.h +395 -40
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +44 -34
warp/native/reduce.cpp +1 -1
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +163 -155
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +18 -14
warp/native/vec.h +103 -21
warp/native/warp.cpp +2 -1
warp/native/warp.cu +28 -3
warp/native/warp.h +4 -3
warp/render/render_opengl.py +261 -109
warp/sim/__init__.py +1 -2
warp/sim/articulation.py +385 -185
warp/sim/import_mjcf.py +59 -48
warp/sim/import_urdf.py +15 -15
warp/sim/import_usd.py +174 -102
warp/sim/inertia.py +17 -18
warp/sim/integrator_xpbd.py +4 -3
warp/sim/model.py +330 -250
warp/sim/render.py +1 -1
warp/sparse.py +625 -152
warp/stubs.py +341 -309
warp/tape.py +9 -6
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +94 -74
warp/tests/test_array.py +82 -101
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +22 -12
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +18 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +165 -134
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +237 -0
warp/tests/test_fabricarray.py +22 -24
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1034 -124
warp/tests/test_fp16.py +23 -16
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +123 -181
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +35 -34
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +24 -25
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +304 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +60 -22
warp/tests/test_mesh_query_aabb.py +21 -25
warp/tests/test_mesh_query_point.py +111 -22
warp/tests/test_mesh_query_ray.py +12 -24
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +90 -86
warp/tests/test_transient_module.py +10 -12
warp/tests/test_types.py +363 -0
warp/tests/test_utils.py +451 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +418 -376
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/unittest_utils.py +342 -0
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +589 -0
warp/types.py +622 -211
warp/utils.py +54 -393
warp_lang-1.0.0b6.dist-info/METADATA +238 -0
warp_lang-1.0.0b6.dist-info/RECORD +409 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
examples/example_cache_management.py +0 -40
examples/example_multigpu.py +0 -54
examples/example_struct.py +0 -65
examples/fem/example_stokes_transfer_3d.py +0 -210
warp/fem/field/discrete_field.py +0 -80
warp/fem/space/nodal_function_space.py +0 -233
warp/tests/test_all.py +0 -223
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-1.0.0b2.dist-info/METADATA +0 -26
warp_lang-1.0.0b2.dist-info/RECORD +0 -378
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

warp/native/quat.h CHANGED Viewed

@@ -19,6 +19,15 @@ struct quat_t
     // zero constructor for adjoint variable initialization
     inline CUDA_CALLABLE quat_t(Type x=Type(0), Type y=Type(0), Type z=Type(0), Type w=Type(0)) : x(x), y(y), z(z), w(w) {}
     explicit inline CUDA_CALLABLE quat_t(const vec_t<3,Type>& v, Type w=Type(0)) : x(v[0]), y(v[1]), z(v[2]), w(w) {}
+    template<typename OtherType>
+    explicit inline CUDA_CALLABLE quat_t(const quat_t<OtherType>& other)
+    {
+        x = static_cast<Type>(other.x);
+        y = static_cast<Type>(other.y);
+        z = static_cast<Type>(other.z);
+        w = static_cast<Type>(other.w);
+    }
     // imaginary part
     Type x;
@@ -73,7 +82,17 @@ inline CUDA_CALLABLE void adj_quat_t(const vec_t<3,Type>& v, Type w, vec_t<3,Typ
     adj_v[0] += adj_ret.x;
     adj_v[1] += adj_ret.y;
     adj_v[2] += adj_ret.z;
-    adj_w   += adj_ret.w;
+    adj_w    += adj_ret.w;
+}
+// casting constructor adjoint
+template<typename Type, typename OtherType>
+inline CUDA_CALLABLE void adj_quat_t(const quat_t<OtherType>& other, quat_t<OtherType>& adj_other, const quat_t<Type>& adj_ret)
+{
+    adj_other.x += static_cast<OtherType>(adj_ret.x);
+    adj_other.y += static_cast<OtherType>(adj_ret.y);
+    adj_other.z += static_cast<OtherType>(adj_ret.z);
+    adj_other.w += static_cast<OtherType>(adj_ret.w);
 }
 // forward methods
@@ -206,12 +225,24 @@ inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
     return quat_t<Type>(q.x/s, q.y/s, q.z/s, q.w/s);
 }
+template<typename Type>
+inline CUDA_CALLABLE quat_t<Type> div(Type s, quat_t<Type> q)
+{
+    return quat_t<Type>(s/q.x, s/q.y, s/q.z, s/q.w);
+}
 template<typename Type>
 inline CUDA_CALLABLE quat_t<Type> operator / (quat_t<Type> a, Type s)
 {
     return div(a,s);
 }
+template<typename Type>
+inline CUDA_CALLABLE quat_t<Type> operator / (Type s, quat_t<Type> a)
+{
+    return div(s,a);
+}
 template<typename Type>
 inline CUDA_CALLABLE quat_t<Type> operator*(Type s, const quat_t<Type>& a)
 {
@@ -321,7 +352,7 @@ inline CUDA_CALLABLE quat_t<Type> quat_from_matrix(const mat_t<3,3,Type>& m)
 }
 template<typename Type>
-inline CUDA_CALLABLE Type index(const quat_t<Type>& a, int idx)
+inline CUDA_CALLABLE Type extract(const quat_t<Type>& a, int idx)
 {
 #if FP_CHECK
     if (idx < 0 || idx > 3)
@@ -357,7 +388,7 @@ CUDA_CALLABLE inline void adj_lerp(const quat_t<Type>& a, const quat_t<Type>& b,
 }
 template<typename Type>
-inline CUDA_CALLABLE void adj_index(const quat_t<Type>& a, int idx, quat_t<Type>& adj_a, int & adj_idx, Type & adj_ret)
+inline CUDA_CALLABLE void adj_extract(const quat_t<Type>& a, int idx, quat_t<Type>& adj_a, int & adj_idx, Type & adj_ret)
 {
 #if FP_CHECK
     if (idx < 0 || idx > 3)
@@ -367,7 +398,7 @@ inline CUDA_CALLABLE void adj_index(const quat_t<Type>& a, int idx, quat_t<Type>
     }
 #endif
-    // See wp::index(const quat_t<Type>& a, int idx) note
+    // See wp::extract(const quat_t<Type>& a, int idx) note
     if (idx == 0)       {adj_a.x += adj_ret;}
     else if (idx == 1)  {adj_a.y += adj_ret;}
     else if (idx == 2)  {adj_a.z += adj_ret;}
@@ -504,9 +535,14 @@ inline CUDA_CALLABLE void tensordot(const quat_t<Type>& a, const quat_t<Type>& b
 }
 template<typename Type>
-inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, quat_t<Type>& adj_a, const Type adj_ret)
+inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, Type ret, quat_t<Type>& adj_a, const Type adj_ret)
 {
-    adj_a += normalize(a)*adj_ret;
+    if (ret > Type(kEps))
+    {
+        Type inv_l = Type(1)/ret;
+        adj_a += quat_t<Type>(a.x*inv_l, a.y*inv_l, a.z*inv_l, a.w*inv_l) * adj_ret;
+    }
 }
 template<typename Type>
@@ -589,6 +625,13 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
     adj_a += adj_ret / s;
 }
+template<typename Type>
+inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
+{
+    adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
+    adj_a += s / adj_ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_quat_rotate(const quat_t<Type>& q, const vec_t<3,Type>& p, quat_t<Type>& adj_q, vec_t<3,Type>& adj_p, const vec_t<3,Type>& adj_ret)
 {
@@ -658,7 +701,7 @@ inline CUDA_CALLABLE void adj_quat_rotate_inv(const quat_t<Type>& q, const vec_t
 }
 template<typename Type>
-inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
+inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& ret, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
 {
     vec_t<3,Type> axis;
     Type angle;
@@ -669,7 +712,7 @@ inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Ty
     angle = angle * 0.5;
     // adj_t
-    adj_t += dot(mul(quat_slerp(q0, q1, t), quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
+    adj_t += dot(mul(ret, quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
     // adj_q0
     quat_t<Type> q_inc_x_q0;

warp/native/rand.h CHANGED Viewed

@@ -9,8 +9,8 @@
 # pragma once
 #include "array.h"
-#ifndef M_PI
-#define M_PI 3.14159265358979323846f
+#ifndef M_PI_F
+#define M_PI_F 3.14159265358979323846f
 #endif
 namespace wp
@@ -33,7 +33,7 @@ inline CUDA_CALLABLE float randf(uint32& state) { state = rand_pcg(state); retur
 inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (max - min) * randf(state) + min; }
 // Box-Muller method
-inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI * randf(state)); }
+inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state))) * cos(2.f * M_PI_F * randf(state)); }
 inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
 inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
@@ -55,14 +55,14 @@ inline CUDA_CALLABLE int sample_cdf(uint32& state, const array_t<float>& cdf)
 inline CUDA_CALLABLE vec2 sample_triangle(uint32& state)
 {
     float r = sqrt(randf(state));
-    float u = 1.0 - r;
+    float u = 1.f - r;
     float v = randf(state) * r;
     return vec2(u, v);
 }
 inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
 {
-    float theta = randf(state, 0.f, 2.f*M_PI);
+    float theta = randf(state, 0.f, 2.f*M_PI_F);
     float x = cos(theta);
     float y = sin(theta);
     return vec2(x, y);
@@ -71,7 +71,7 @@ inline CUDA_CALLABLE vec2 sample_unit_ring(uint32& state)
 inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
 {
     float r = sqrt(randf(state));
-    float theta = randf(state, 0.f, 2.f*M_PI);
+    float theta = randf(state, 0.f, 2.f*M_PI_F);
     float x = r * cos(theta);
     float y = r * sin(theta);
     return vec2(x, y);
@@ -80,7 +80,7 @@ inline CUDA_CALLABLE vec2 sample_unit_disk(uint32& state)
 inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
 {
     float phi = acos(1.f - 2.f * randf(state));
-    float theta = randf(state, 0.f, 2.f*M_PI);
+    float theta = randf(state, 0.f, 2.f*M_PI_F);
     float x = cos(theta) * sin(phi);
     float y = sin(theta) * sin(phi);
     float z = cos(phi);
@@ -90,7 +90,7 @@ inline CUDA_CALLABLE vec3 sample_unit_sphere_surface(uint32& state)
 inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
 {
     float phi = acos(1.f  - 2.f * randf(state));
-    float theta = randf(state, 0.f, 2.f*M_PI);
+    float theta = randf(state, 0.f, 2.f*M_PI_F);
     float r = pow(randf(state), 1.f/3.f);
     float x = r * cos(theta) * sin(phi);
     float y = r * sin(theta) * sin(phi);
@@ -101,7 +101,7 @@ inline CUDA_CALLABLE vec3 sample_unit_sphere(uint32& state)
 inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
 {
     float phi = acos(1.f - randf(state));
-    float theta = randf(state, 0.f, 2.f*M_PI);
+    float theta = randf(state, 0.f, 2.f*M_PI_F);
     float x = cos(theta) * sin(phi);
     float y = sin(theta) * sin(phi);
     float z = cos(phi);
@@ -111,7 +111,7 @@ inline CUDA_CALLABLE vec3 sample_unit_hemisphere_surface(uint32& state)
 inline CUDA_CALLABLE vec3 sample_unit_hemisphere(uint32& state)
 {
     float phi = acos(1.f - randf(state));
-    float theta = randf(state, 0.f, 2.f*M_PI);
+    float theta = randf(state, 0.f, 2.f*M_PI_F);
     float r = pow(randf(state), 1.f/3.f);
     float x = r * cos(theta) * sin(phi);
     float y = r * sin(theta) * sin(phi);
@@ -134,6 +134,15 @@ inline CUDA_CALLABLE vec3 sample_unit_cube(uint32& state)
     return vec3(x, y, z);
 }
+inline CUDA_CALLABLE vec4 sample_unit_hypercube(uint32& state)
+{
+    float a = randf(state) - 0.5f;
+    float b = randf(state) - 0.5f;
+    float c = randf(state) - 0.5f;
+    float d = randf(state) - 0.5f;
+    return vec4(a, b, c, d);
+}
 inline CUDA_CALLABLE void adj_sample_cdf(uint32& state, const array_t<float>& cdf, uint32& adj_state, array_t<float>& adj_cdf, const int& adj_ret) {}
 inline CUDA_CALLABLE void adj_sample_triangle(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
 inline CUDA_CALLABLE void adj_sample_unit_ring(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
@@ -144,6 +153,7 @@ inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint
 inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
 inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
 inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
+inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
 /*
  * log-gamma function to support some of these distributions. The
@@ -158,17 +168,17 @@ inline CUDA_CALLABLE float random_loggam(float x)
     float x0, x2, lg2pi, gl, gl0;
     uint32 n;
-    const float a[10] = {8.333333333333333e-02, -2.777777777777778e-03,
-                        7.936507936507937e-04, -5.952380952380952e-04,
-                        8.417508417508418e-04, -1.917526917526918e-03,
-                        6.410256410256410e-03, -2.955065359477124e-02,
-                        1.796443723688307e-01, -1.39243221690590e+00};
+    const float a[10] = {8.333333333333333e-02f, -2.777777777777778e-03f,
+                        7.936507936507937e-04f, -5.952380952380952e-04f,
+                        8.417508417508418e-04f, -1.917526917526918e-03f,
+                        6.410256410256410e-03f, -2.955065359477124e-02f,
+                        1.796443723688307e-01f, -1.39243221690590e+00f};
-    if ((x == 1.0) || (x == 2.0))
+    if ((x == 1.f) || (x == 2.f))
     {
-        return 0.0;
+        return 0.f;
     }
-    else if (x < 7.0)
+    else if (x < 7.f)
     {
         n = uint32((7 - x));
     }
@@ -178,8 +188,8 @@ inline CUDA_CALLABLE float random_loggam(float x)
     }
     x0 = x + float(n);
-    x2 = (1.0 / x0) * (1.0 / x0);
-    // log(2 * M_PI)
+    x2 = (1.f / x0) * (1.f / x0);
+    // log(2 * M_PI_F)
     lg2pi = 1.8378770664093453f;
     gl0 = a[9];
     for (int i = 8; i >= 0; i--)
@@ -187,13 +197,13 @@ inline CUDA_CALLABLE float random_loggam(float x)
         gl0 *= x2;
         gl0 += a[i];
     }
-    gl = gl0 / x0 + 0.5 * lg2pi + (x0 - 0.5) * log(x0) - x0;
-    if (x < 7.0)
+    gl = gl0 / x0 + 0.5f * lg2pi + (x0 - 0.5f) * log(x0) - x0;
+    if (x < 7.f)
     {
         for (uint32 k = 1; k <= n; k++)
         {
-            gl -= log(x0 - 1.0);
-            x0 -= 1.0;
+            gl -= log(x0 - 1.f);
+            x0 -= 1.f;
         }
     }
     return gl;
@@ -205,7 +215,7 @@ inline CUDA_CALLABLE uint32 random_poisson_mult(uint32& state, float lam) {
     enlam = exp(-lam);
     X = 0;
-    prod = 1.0;
+    prod = 1.f;
     while (1)
     {
@@ -234,22 +244,22 @@ inline CUDA_CALLABLE uint32 random_poisson(uint32& state, float lam)
     slam = sqrt(lam);
     loglam = log(lam);
-    b = 0.931 + 2.53 * slam;
-    a = -0.059 + 0.02483 * b;
-    invalpha = 1.1239 + 1.1328 / (b - 3.4);
-    vr = 0.9277 - 3.6224 / (b - 2.0);
+    b = 0.931f + 2.53f * slam;
+    a = -0.059f + 0.02483f * b;
+    invalpha = 1.1239f + 1.1328f / (b - 3.4f);
+    vr = 0.9277f - 3.6224f / (b - 2.f);
     while (1)
     {
-        U = randf(state) - 0.5;
+        U = randf(state) - 0.5f;
         V = randf(state);
-        us = 0.5 - abs(U);
-        k = uint32(floor((2 * a / us + b) * U + lam + 0.43));
-        if ((us >= 0.07) && (V <= vr))
+        us = 0.5f - abs(U);
+        k = uint32(floor((2.f * a / us + b) * U + lam + 0.43f));
+        if ((us >= 0.07f) && (V <= vr))
         {
             return k;
         }
-        if ((us < 0.013) && (V > us))
+        if ((us < 0.013f) && (V > us))
         {
             continue;
         }

warp/native/reduce.cpp CHANGED Viewed

@@ -97,7 +97,7 @@ template <typename T> void array_sum_host(const T *ptr_a, T *ptr_out, int count,
         accumulate_func = dyn_len_sum<T>;
     }
-    *ptr_out = 0.0f;
+    memset(ptr_out, 0, sizeof(T)*type_length);
     for (int i = 0; i < count; ++i)
         accumulate_func(ptr_a + i * stride, ptr_out, type_length);
 }

warp/native/sparse.cpp CHANGED Viewed

@@ -179,10 +179,10 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
     const int block_size = rows_per_block * cols_per_block;
     void (*block_transpose_func)(const T *, T *, int, int) = bsr_dyn_block_transpose<T>;
-    switch (row_count)
+    switch (rows_per_block)
     {
     case 1:
-        switch (col_count)
+        switch (cols_per_block)
         {
         case 1:
             block_transpose_func = bsr_fixed_block_transpose<1, 1, T>;
@@ -196,7 +196,7 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
         }
         break;
     case 2:
-        switch (col_count)
+        switch (cols_per_block)
         {
         case 1:
             block_transpose_func = bsr_fixed_block_transpose<2, 1, T>;
@@ -210,7 +210,7 @@ void bsr_transpose_host(int rows_per_block, int cols_per_block, int row_count, i
         }
         break;
     case 3:
-        switch (col_count)
+        switch (cols_per_block)
         {
         case 1:
             block_transpose_func = bsr_fixed_block_transpose<3, 1, T>;