warp-lang 1.9.1__py3-none-manylinux_2_34_aarch64.whl → 1.10.0rc2__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +301 -287
- warp/__init__.pyi +794 -305
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1075 -0
- warp/_src/build.py +618 -0
- warp/_src/build_dll.py +640 -0
- warp/{builtins.py → _src/builtins.py} +1382 -377
- warp/_src/codegen.py +4359 -0
- warp/{config.py → _src/config.py} +178 -169
- warp/_src/constants.py +57 -0
- warp/_src/context.py +8294 -0
- warp/_src/dlpack.py +462 -0
- warp/_src/fabric.py +355 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +508 -0
- warp/_src/fem/cache.py +687 -0
- warp/_src/fem/dirichlet.py +188 -0
- warp/{fem → _src/fem}/domain.py +40 -30
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +701 -0
- warp/{fem → _src/fem}/field/nodal_field.py +30 -15
- warp/{fem → _src/fem}/field/restriction.py +1 -1
- warp/{fem → _src/fem}/field/virtual.py +53 -27
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
- warp/_src/fem/geometry/closest_point.py +97 -0
- warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
- warp/{fem → _src/fem}/geometry/element.py +32 -10
- warp/{fem → _src/fem}/geometry/geometry.py +48 -20
- warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
- warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
- warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
- warp/{fem → _src/fem}/geometry/partition.py +121 -63
- warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
- warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
- warp/{fem → _src/fem}/integrate.py +164 -158
- warp/_src/fem/linalg.py +383 -0
- warp/_src/fem/operator.py +396 -0
- warp/_src/fem/polynomial.py +229 -0
- warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
- warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
- warp/_src/fem/space/__init__.py +248 -0
- warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
- warp/_src/fem/space/basis_space.py +679 -0
- warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
- warp/{fem → _src/fem}/space/function_space.py +14 -13
- warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
- warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
- warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
- warp/{fem → _src/fem}/space/partition.py +117 -60
- warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/restriction.py +66 -33
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
- warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
- warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
- warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
- warp/_src/fem/space/topology.py +459 -0
- warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
- warp/_src/fem/types.py +112 -0
- warp/_src/fem/utils.py +486 -0
- warp/_src/jax.py +186 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +387 -0
- warp/_src/jax_experimental/ffi.py +1284 -0
- warp/_src/jax_experimental/xla_ffi.py +656 -0
- warp/_src/marching_cubes.py +708 -0
- warp/_src/math.py +414 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +163 -0
- warp/_src/optim/linear.py +1606 -0
- warp/_src/optim/sgd.py +112 -0
- warp/_src/paddle.py +406 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +289 -0
- warp/_src/render/render_opengl.py +3636 -0
- warp/_src/render/render_usd.py +937 -0
- warp/_src/render/utils.py +160 -0
- warp/_src/sparse.py +2716 -0
- warp/_src/tape.py +1206 -0
- warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
- warp/_src/torch.py +391 -0
- warp/_src/types.py +5870 -0
- warp/_src/utils.py +1693 -0
- warp/autograd.py +12 -1054
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +8 -588
- warp/build_dll.py +6 -721
- warp/codegen.py +6 -4251
- warp/constants.py +6 -39
- warp/context.py +12 -8062
- warp/dlpack.py +6 -444
- warp/examples/distributed/example_jacobi_mpi.py +4 -5
- warp/examples/fem/example_adaptive_grid.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +8 -8
- warp/examples/fem/example_diffusion.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_mixed_elasticity.py +2 -2
- warp/examples/fem/example_navier_stokes.py +1 -1
- warp/examples/fem/example_nonconforming_contact.py +7 -7
- warp/examples/fem/example_stokes.py +1 -1
- warp/examples/fem/example_stokes_transfer.py +1 -1
- warp/examples/fem/utils.py +2 -2
- warp/examples/interop/example_jax_callable.py +1 -1
- warp/examples/interop/example_jax_ffi_callback.py +1 -1
- warp/examples/interop/example_jax_kernel.py +1 -1
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/fabric.py +6 -337
- warp/fem/__init__.py +159 -97
- warp/fem/adaptivity.py +7 -489
- warp/fem/cache.py +9 -648
- warp/fem/dirichlet.py +6 -184
- warp/fem/field/__init__.py +8 -109
- warp/fem/field/field.py +7 -652
- warp/fem/geometry/__init__.py +7 -18
- warp/fem/geometry/closest_point.py +11 -77
- warp/fem/linalg.py +18 -366
- warp/fem/operator.py +11 -369
- warp/fem/polynomial.py +9 -209
- warp/fem/space/__init__.py +5 -211
- warp/fem/space/basis_space.py +6 -662
- warp/fem/space/shape/__init__.py +41 -118
- warp/fem/space/topology.py +6 -437
- warp/fem/types.py +6 -81
- warp/fem/utils.py +11 -444
- warp/jax.py +8 -165
- warp/jax_experimental/__init__.py +14 -1
- warp/jax_experimental/custom_call.py +8 -365
- warp/jax_experimental/ffi.py +17 -873
- warp/jax_experimental/xla_ffi.py +5 -605
- warp/marching_cubes.py +5 -689
- warp/math.py +16 -393
- warp/native/array.h +385 -37
- warp/native/builtin.h +314 -37
- warp/native/bvh.cpp +43 -9
- warp/native/bvh.cu +62 -27
- warp/native/bvh.h +310 -309
- warp/native/clang/clang.cpp +102 -97
- warp/native/coloring.cpp +0 -1
- warp/native/crt.h +208 -0
- warp/native/exports.h +156 -0
- warp/native/hashgrid.cu +2 -0
- warp/native/intersect.h +24 -1
- warp/native/intersect_tri.h +44 -35
- warp/native/mat.h +1456 -276
- warp/native/mesh.cpp +4 -4
- warp/native/mesh.cu +4 -2
- warp/native/mesh.h +176 -61
- warp/native/quat.h +0 -52
- warp/native/scan.cu +2 -0
- warp/native/sparse.cu +7 -3
- warp/native/spatial.h +12 -0
- warp/native/tile.h +681 -89
- warp/native/tile_radix_sort.h +1 -1
- warp/native/tile_reduce.h +394 -46
- warp/native/tile_scan.h +4 -4
- warp/native/vec.h +469 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +1 -1
- warp/native/volume.cu +1 -0
- warp/native/volume.h +1 -1
- warp/native/volume_builder.cu +2 -0
- warp/native/warp.cpp +57 -29
- warp/native/warp.cu +253 -171
- warp/native/warp.h +11 -8
- warp/optim/__init__.py +6 -3
- warp/optim/adam.py +6 -145
- warp/optim/linear.py +14 -1585
- warp/optim/sgd.py +6 -94
- warp/paddle.py +6 -388
- warp/render/__init__.py +8 -4
- warp/render/imgui_manager.py +7 -267
- warp/render/render_opengl.py +6 -3618
- warp/render/render_usd.py +6 -919
- warp/render/utils.py +6 -142
- warp/sparse.py +37 -2563
- warp/tape.py +6 -1188
- warp/tests/__main__.py +1 -1
- warp/tests/cuda/test_async.py +4 -4
- warp/tests/cuda/test_conditional_captures.py +1 -1
- warp/tests/cuda/test_multigpu.py +1 -1
- warp/tests/cuda/test_streams.py +58 -1
- warp/tests/geometry/test_bvh.py +157 -22
- warp/tests/geometry/test_marching_cubes.py +0 -1
- warp/tests/geometry/test_mesh.py +5 -3
- warp/tests/geometry/test_mesh_query_aabb.py +5 -12
- warp/tests/geometry/test_mesh_query_point.py +5 -2
- warp/tests/geometry/test_mesh_query_ray.py +15 -3
- warp/tests/geometry/test_volume_write.py +5 -5
- warp/tests/interop/test_dlpack.py +14 -14
- warp/tests/interop/test_jax.py +772 -49
- warp/tests/interop/test_paddle.py +1 -1
- warp/tests/test_adam.py +0 -1
- warp/tests/test_arithmetic.py +9 -9
- warp/tests/test_array.py +527 -100
- warp/tests/test_array_reduce.py +3 -3
- warp/tests/test_atomic.py +12 -8
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +4 -4
- warp/tests/test_bool.py +2 -2
- warp/tests/test_builtins_resolution.py +5 -571
- warp/tests/test_codegen.py +33 -14
- warp/tests/test_conditional.py +1 -1
- warp/tests/test_context.py +6 -6
- warp/tests/test_copy.py +242 -161
- warp/tests/test_ctypes.py +3 -3
- warp/tests/test_devices.py +24 -2
- warp/tests/test_examples.py +16 -84
- warp/tests/test_fabricarray.py +35 -35
- warp/tests/test_fast_math.py +0 -2
- warp/tests/test_fem.py +56 -10
- warp/tests/test_fixedarray.py +3 -3
- warp/tests/test_func.py +8 -5
- warp/tests/test_generics.py +1 -1
- warp/tests/test_indexedarray.py +24 -24
- warp/tests/test_intersect.py +39 -9
- warp/tests/test_large.py +1 -1
- warp/tests/test_lerp.py +3 -1
- warp/tests/test_linear_solvers.py +1 -1
- warp/tests/test_map.py +35 -4
- warp/tests/test_mat.py +52 -62
- warp/tests/test_mat_constructors.py +4 -5
- warp/tests/test_mat_lite.py +1 -1
- warp/tests/test_mat_scalar_ops.py +121 -121
- warp/tests/test_math.py +34 -0
- warp/tests/test_module_aot.py +4 -4
- warp/tests/test_modules_lite.py +28 -2
- warp/tests/test_print.py +11 -11
- warp/tests/test_quat.py +93 -58
- warp/tests/test_runlength_encode.py +1 -1
- warp/tests/test_scalar_ops.py +38 -10
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +126 -15
- warp/tests/test_spatial.py +105 -87
- warp/tests/test_special_values.py +6 -6
- warp/tests/test_static.py +7 -7
- warp/tests/test_struct.py +13 -2
- warp/tests/test_triangle_closest_point.py +48 -1
- warp/tests/test_types.py +27 -15
- warp/tests/test_utils.py +52 -52
- warp/tests/test_vec.py +29 -29
- warp/tests/test_vec_constructors.py +5 -5
- warp/tests/test_vec_scalar_ops.py +97 -97
- warp/tests/test_version.py +75 -0
- warp/tests/tile/test_tile.py +178 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +7 -4
- warp/tests/tile/test_tile_load.py +26 -2
- warp/tests/tile/test_tile_mathdx.py +3 -3
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +2 -4
- warp/tests/tile/test_tile_reduce.py +214 -13
- warp/tests/unittest_suites.py +6 -14
- warp/tests/unittest_utils.py +10 -9
- warp/tests/walkthrough_debug.py +3 -1
- warp/torch.py +6 -373
- warp/types.py +29 -5764
- warp/utils.py +10 -1659
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +46 -99
- warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp/examples/assets/cartpole.urdf +0 -110
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/nv_ant.xml +0 -92
- warp/examples/assets/nv_humanoid.xml +0 -183
- warp/examples/assets/quadruped.urdf +0 -268
- warp/examples/optim/example_bounce.py +0 -266
- warp/examples/optim/example_cloth_throw.py +0 -228
- warp/examples/optim/example_drone.py +0 -870
- warp/examples/optim/example_inverse_kinematics.py +0 -182
- warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
- warp/examples/optim/example_softbody_properties.py +0 -400
- warp/examples/optim/example_spring_cage.py +0 -245
- warp/examples/optim/example_trajectory.py +0 -227
- warp/examples/sim/example_cartpole.py +0 -143
- warp/examples/sim/example_cloth.py +0 -225
- warp/examples/sim/example_cloth_self_contact.py +0 -316
- warp/examples/sim/example_granular.py +0 -130
- warp/examples/sim/example_granular_collision_sdf.py +0 -202
- warp/examples/sim/example_jacobian_ik.py +0 -244
- warp/examples/sim/example_particle_chain.py +0 -124
- warp/examples/sim/example_quadruped.py +0 -203
- warp/examples/sim/example_rigid_chain.py +0 -203
- warp/examples/sim/example_rigid_contact.py +0 -195
- warp/examples/sim/example_rigid_force.py +0 -133
- warp/examples/sim/example_rigid_gyroscopic.py +0 -115
- warp/examples/sim/example_rigid_soft_contact.py +0 -140
- warp/examples/sim/example_soft_body.py +0 -196
- warp/examples/tile/example_tile_walker.py +0 -327
- warp/sim/__init__.py +0 -74
- warp/sim/articulation.py +0 -793
- warp/sim/collide.py +0 -2570
- warp/sim/graph_coloring.py +0 -307
- warp/sim/import_mjcf.py +0 -791
- warp/sim/import_snu.py +0 -227
- warp/sim/import_urdf.py +0 -579
- warp/sim/import_usd.py +0 -898
- warp/sim/inertia.py +0 -357
- warp/sim/integrator.py +0 -245
- warp/sim/integrator_euler.py +0 -2000
- warp/sim/integrator_featherstone.py +0 -2101
- warp/sim/integrator_vbd.py +0 -2487
- warp/sim/integrator_xpbd.py +0 -3295
- warp/sim/model.py +0 -4821
- warp/sim/particles.py +0 -121
- warp/sim/render.py +0 -431
- warp/sim/utils.py +0 -431
- warp/tests/sim/disabled_kinematics.py +0 -244
- warp/tests/sim/test_cloth.py +0 -863
- warp/tests/sim/test_collision.py +0 -743
- warp/tests/sim/test_coloring.py +0 -347
- warp/tests/sim/test_inertia.py +0 -161
- warp/tests/sim/test_model.py +0 -226
- warp/tests/sim/test_sim_grad.py +0 -287
- warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
- warp/tests/sim/test_sim_kinematics.py +0 -98
- warp/thirdparty/__init__.py +0 -0
- warp_lang-1.9.1.dist-info/RECORD +0 -456
- /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
- /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -149,6 +149,17 @@ using vec2d = vec_t<2,double>;
|
|
|
149
149
|
using vec3d = vec_t<3,double>;
|
|
150
150
|
using vec4d = vec_t<4,double>;
|
|
151
151
|
|
|
152
|
+
// Type trait to detect if a type is a vec_t
|
|
153
|
+
template<typename T>
|
|
154
|
+
struct is_vector {
|
|
155
|
+
static constexpr bool value = false;
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
template<unsigned Length, typename Type>
|
|
159
|
+
struct is_vector<vec_t<Length, Type>> {
|
|
160
|
+
static constexpr bool value = true;
|
|
161
|
+
};
|
|
162
|
+
|
|
152
163
|
template<unsigned Length, typename Type>
|
|
153
164
|
inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
|
|
154
165
|
{
|
|
@@ -404,6 +415,139 @@ inline CUDA_CALLABLE vec_t<3, Type> mod(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
|
404
415
|
return vec_t<3, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]), mod(a.c[2], b.c[2]));
|
|
405
416
|
}
|
|
406
417
|
|
|
418
|
+
// bitwise AND
|
|
419
|
+
template<unsigned Length, typename Type>
|
|
420
|
+
inline CUDA_CALLABLE vec_t<Length, Type> bit_and(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
421
|
+
{
|
|
422
|
+
vec_t<Length, Type> ret;
|
|
423
|
+
for( unsigned i=0; i < Length; ++i )
|
|
424
|
+
{
|
|
425
|
+
ret[i] = Type(a[i] & b[i]);
|
|
426
|
+
}
|
|
427
|
+
return ret;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
template<typename Type>
|
|
431
|
+
inline CUDA_CALLABLE vec_t<2, Type> bit_and(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
432
|
+
{
|
|
433
|
+
return vec_t<2, Type>( a.c[0] & b.c[0], a.c[1] & b.c[1]);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
template<typename Type>
|
|
437
|
+
inline CUDA_CALLABLE vec_t<3, Type> bit_and(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
438
|
+
{
|
|
439
|
+
return vec_t<3, Type>( a.c[0] & b.c[0], a.c[1] & b.c[1], a.c[2] & b.c[2]);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// bitwise OR
|
|
443
|
+
template<unsigned Length, typename Type>
|
|
444
|
+
inline CUDA_CALLABLE vec_t<Length, Type> bit_or(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
445
|
+
{
|
|
446
|
+
vec_t<Length, Type> ret;
|
|
447
|
+
for( unsigned i=0; i < Length; ++i )
|
|
448
|
+
{
|
|
449
|
+
ret[i] = Type(a[i] | b[i]);
|
|
450
|
+
}
|
|
451
|
+
return ret;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
template<typename Type>
|
|
455
|
+
inline CUDA_CALLABLE vec_t<2, Type> bit_or(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
456
|
+
{
|
|
457
|
+
return vec_t<2, Type>( a.c[0] | b.c[0], a.c[1] | b.c[1]);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
template<typename Type>
|
|
461
|
+
inline CUDA_CALLABLE vec_t<3, Type> bit_or(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
462
|
+
{
|
|
463
|
+
return vec_t<3, Type>( a.c[0] | b.c[0], a.c[1] | b.c[1], a.c[2] | b.c[2]);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// bitwise XOR
|
|
467
|
+
template<unsigned Length, typename Type>
|
|
468
|
+
inline CUDA_CALLABLE vec_t<Length, Type> bit_xor(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
469
|
+
{
|
|
470
|
+
vec_t<Length, Type> ret;
|
|
471
|
+
for( unsigned i=0; i < Length; ++i )
|
|
472
|
+
{
|
|
473
|
+
ret[i] = Type(a[i] ^ b[i]);
|
|
474
|
+
}
|
|
475
|
+
return ret;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
template<typename Type>
|
|
479
|
+
inline CUDA_CALLABLE vec_t<2, Type> bit_xor(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
480
|
+
{
|
|
481
|
+
return vec_t<2, Type>( a.c[0] ^ b.c[0], a.c[1] ^ b.c[1]);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
template<typename Type>
|
|
485
|
+
inline CUDA_CALLABLE vec_t<3, Type> bit_xor(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
486
|
+
{
|
|
487
|
+
return vec_t<3, Type>( a.c[0] ^ b.c[0], a.c[1] ^ b.c[1], a.c[2] ^ b.c[2]);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// left shift
|
|
491
|
+
template<unsigned Length, typename Type>
|
|
492
|
+
inline CUDA_CALLABLE vec_t<Length, Type> lshift(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
493
|
+
{
|
|
494
|
+
vec_t<Length, Type> ret;
|
|
495
|
+
for( unsigned i=0; i < Length; ++i )
|
|
496
|
+
{
|
|
497
|
+
ret[i] = Type(a[i] << b[i]);
|
|
498
|
+
}
|
|
499
|
+
return ret;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
template<typename Type>
|
|
503
|
+
inline CUDA_CALLABLE vec_t<2, Type> lshift(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
504
|
+
{
|
|
505
|
+
return vec_t<2, Type>( a.c[0] << b.c[0], a.c[1] << b.c[1]);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
template<typename Type>
|
|
509
|
+
inline CUDA_CALLABLE vec_t<3, Type> lshift(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
510
|
+
{
|
|
511
|
+
return vec_t<3, Type>( a.c[0] << b.c[0], a.c[1] << b.c[1], a.c[2] << b.c[2]);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// right shift
|
|
515
|
+
template<unsigned Length, typename Type>
|
|
516
|
+
inline CUDA_CALLABLE vec_t<Length, Type> rshift(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
517
|
+
{
|
|
518
|
+
vec_t<Length, Type> ret;
|
|
519
|
+
for( unsigned i=0; i < Length; ++i )
|
|
520
|
+
{
|
|
521
|
+
ret[i] = Type(a[i] >> b[i]);
|
|
522
|
+
}
|
|
523
|
+
return ret;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
template<typename Type>
|
|
527
|
+
inline CUDA_CALLABLE vec_t<2, Type> rshift(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
528
|
+
{
|
|
529
|
+
return vec_t<2, Type>( a.c[0] >> b.c[0], a.c[1] >> b.c[1]);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
template<typename Type>
|
|
533
|
+
inline CUDA_CALLABLE vec_t<3, Type> rshift(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
534
|
+
{
|
|
535
|
+
return vec_t<3, Type>( a.c[0] >> b.c[0], a.c[1] >> b.c[1], a.c[2] >> b.c[2]);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// invert
|
|
539
|
+
template<unsigned Length, typename Type>
|
|
540
|
+
inline CUDA_CALLABLE vec_t<Length,Type> invert(vec_t<Length,Type> v)
|
|
541
|
+
{
|
|
542
|
+
vec_t<Length,Type> ret;
|
|
543
|
+
for (unsigned i=0; i < Length; ++i)
|
|
544
|
+
{
|
|
545
|
+
ret[i] = ~v[i];
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
return ret;
|
|
549
|
+
}
|
|
550
|
+
|
|
407
551
|
// dot product:
|
|
408
552
|
template<unsigned Length, typename Type>
|
|
409
553
|
inline CUDA_CALLABLE Type dot(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -724,6 +868,183 @@ inline CUDA_CALLABLE void adj_sub_inplace(
|
|
|
724
868
|
}
|
|
725
869
|
|
|
726
870
|
|
|
871
|
+
template<unsigned Length, typename Type>
|
|
872
|
+
inline CUDA_CALLABLE void bit_and_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
873
|
+
{
|
|
874
|
+
#ifndef NDEBUG
|
|
875
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
876
|
+
{
|
|
877
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
878
|
+
assert(0);
|
|
879
|
+
}
|
|
880
|
+
#endif
|
|
881
|
+
|
|
882
|
+
if (idx < 0)
|
|
883
|
+
{
|
|
884
|
+
idx += Length;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
v[idx] &= value;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
892
|
+
inline CUDA_CALLABLE void bit_and_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
893
|
+
{
|
|
894
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
895
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
896
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
897
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
898
|
+
|
|
899
|
+
bool is_reversed = slice.step < 0;
|
|
900
|
+
|
|
901
|
+
int ii = 0;
|
|
902
|
+
for (
|
|
903
|
+
int i = slice.start;
|
|
904
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
905
|
+
i += slice.step
|
|
906
|
+
)
|
|
907
|
+
{
|
|
908
|
+
v[i] &= a[ii];
|
|
909
|
+
++ii;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
assert(ii == SliceLength);
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
template<unsigned Length, typename Type>
|
|
917
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
918
|
+
vec_t<Length, Type>& v, int idx, Type value,
|
|
919
|
+
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
|
|
920
|
+
) {}
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
924
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
925
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
926
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
927
|
+
) {}
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
template<unsigned Length, typename Type>
|
|
931
|
+
inline CUDA_CALLABLE void bit_or_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
932
|
+
{
|
|
933
|
+
#ifndef NDEBUG
|
|
934
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
935
|
+
{
|
|
936
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
937
|
+
assert(0);
|
|
938
|
+
}
|
|
939
|
+
#endif
|
|
940
|
+
|
|
941
|
+
if (idx < 0)
|
|
942
|
+
{
|
|
943
|
+
idx += Length;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
v[idx] |= value;
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
951
|
+
inline CUDA_CALLABLE void bit_or_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
952
|
+
{
|
|
953
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
954
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
955
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
956
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
957
|
+
|
|
958
|
+
bool is_reversed = slice.step < 0;
|
|
959
|
+
|
|
960
|
+
int ii = 0;
|
|
961
|
+
for (
|
|
962
|
+
int i = slice.start;
|
|
963
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
964
|
+
i += slice.step
|
|
965
|
+
)
|
|
966
|
+
{
|
|
967
|
+
v[i] |= a[ii];
|
|
968
|
+
++ii;
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
assert(ii == SliceLength);
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
template<unsigned Length, typename Type>
|
|
976
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
977
|
+
vec_t<Length, Type>& v, int idx, Type value,
|
|
978
|
+
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
|
|
979
|
+
) {}
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
983
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
984
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
985
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
986
|
+
) {}
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
template<unsigned Length, typename Type>
|
|
990
|
+
inline CUDA_CALLABLE void bit_xor_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
991
|
+
{
|
|
992
|
+
#ifndef NDEBUG
|
|
993
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
994
|
+
{
|
|
995
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
996
|
+
assert(0);
|
|
997
|
+
}
|
|
998
|
+
#endif
|
|
999
|
+
|
|
1000
|
+
if (idx < 0)
|
|
1001
|
+
{
|
|
1002
|
+
idx += Length;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
v[idx] ^= value;
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
1010
|
+
inline CUDA_CALLABLE void bit_xor_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
1011
|
+
{
|
|
1012
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
1013
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
1014
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
1015
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
1016
|
+
|
|
1017
|
+
bool is_reversed = slice.step < 0;
|
|
1018
|
+
|
|
1019
|
+
int ii = 0;
|
|
1020
|
+
for (
|
|
1021
|
+
int i = slice.start;
|
|
1022
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
1023
|
+
i += slice.step
|
|
1024
|
+
)
|
|
1025
|
+
{
|
|
1026
|
+
v[i] ^= a[ii];
|
|
1027
|
+
++ii;
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
assert(ii == SliceLength);
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
template<unsigned Length, typename Type>
|
|
1035
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
1036
|
+
vec_t<Length, Type>& v, int idx, Type value,
|
|
1037
|
+
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
|
|
1038
|
+
) {}
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
1042
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
1043
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
1044
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
1045
|
+
) {}
|
|
1046
|
+
|
|
1047
|
+
|
|
727
1048
|
template<unsigned Length, typename Type>
|
|
728
1049
|
inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
729
1050
|
{
|
|
@@ -1342,6 +1663,90 @@ inline CUDA_CALLABLE void adj_mod(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
|
|
|
1342
1663
|
{
|
|
1343
1664
|
}
|
|
1344
1665
|
|
|
1666
|
+
template<unsigned Length, typename Type>
|
|
1667
|
+
inline CUDA_CALLABLE void adj_bit_and(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1668
|
+
{
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
template<typename Type>
|
|
1672
|
+
inline CUDA_CALLABLE void adj_bit_and(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1673
|
+
{
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
template<typename Type>
|
|
1677
|
+
inline CUDA_CALLABLE void adj_bit_and(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1678
|
+
{
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
template<unsigned Length, typename Type>
|
|
1682
|
+
inline CUDA_CALLABLE void adj_bit_or(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1683
|
+
{
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
template<typename Type>
|
|
1687
|
+
inline CUDA_CALLABLE void adj_bit_or(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1688
|
+
{
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
template<typename Type>
|
|
1692
|
+
inline CUDA_CALLABLE void adj_bit_or(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1693
|
+
{
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
template<unsigned Length, typename Type>
|
|
1697
|
+
inline CUDA_CALLABLE void adj_bit_xor(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1698
|
+
{
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
template<typename Type>
|
|
1702
|
+
inline CUDA_CALLABLE void adj_bit_xor(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1703
|
+
{
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
template<typename Type>
|
|
1707
|
+
inline CUDA_CALLABLE void adj_bit_xor(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1708
|
+
{
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
template<unsigned Length, typename Type>
|
|
1712
|
+
inline CUDA_CALLABLE void adj_lshift(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1713
|
+
{
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
template<typename Type>
|
|
1717
|
+
inline CUDA_CALLABLE void adj_lshift(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1718
|
+
{
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
template<typename Type>
|
|
1722
|
+
inline CUDA_CALLABLE void adj_lshift(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1723
|
+
{
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
template<unsigned Length, typename Type>
|
|
1727
|
+
inline CUDA_CALLABLE void adj_rshift(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1728
|
+
{
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
template<typename Type>
|
|
1732
|
+
inline CUDA_CALLABLE void adj_rshift(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1733
|
+
{
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
template<typename Type>
|
|
1737
|
+
inline CUDA_CALLABLE void adj_rshift(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1738
|
+
{
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
template<unsigned Length, typename Type>
|
|
1742
|
+
inline CUDA_CALLABLE void adj_invert(
|
|
1743
|
+
const vec_t<Length,Type>& v,
|
|
1744
|
+
vec_t<Length,Type>& adj_v,
|
|
1745
|
+
const vec_t<Length,Type>& adj_ret
|
|
1746
|
+
)
|
|
1747
|
+
{
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1345
1750
|
template<unsigned Length, typename Type>
|
|
1346
1751
|
inline CUDA_CALLABLE void adj_dot(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const Type adj_ret)
|
|
1347
1752
|
{
|
|
@@ -1626,6 +2031,42 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
1626
2031
|
return ret;
|
|
1627
2032
|
}
|
|
1628
2033
|
|
|
2034
|
+
template<unsigned Length, typename Type>
|
|
2035
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_and(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
2036
|
+
{
|
|
2037
|
+
vec_t<Length, Type> ret;
|
|
2038
|
+
for( unsigned i=0; i < Length; ++i )
|
|
2039
|
+
{
|
|
2040
|
+
ret[i] = atomic_and(&(addr -> c[i]), value[i]);
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
return ret;
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
template<unsigned Length, typename Type>
|
|
2047
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_or(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
2048
|
+
{
|
|
2049
|
+
vec_t<Length, Type> ret;
|
|
2050
|
+
for( unsigned i=0; i < Length; ++i )
|
|
2051
|
+
{
|
|
2052
|
+
ret[i] = atomic_or(&(addr -> c[i]), value[i]);
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
return ret;
|
|
2056
|
+
}
|
|
2057
|
+
|
|
2058
|
+
template<unsigned Length, typename Type>
|
|
2059
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_xor(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
2060
|
+
{
|
|
2061
|
+
vec_t<Length, Type> ret;
|
|
2062
|
+
for( unsigned i=0; i < Length; ++i )
|
|
2063
|
+
{
|
|
2064
|
+
ret[i] = atomic_xor(&(addr -> c[i]), value[i]);
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
return ret;
|
|
2068
|
+
}
|
|
2069
|
+
|
|
1629
2070
|
template<unsigned Length, typename Type>
|
|
1630
2071
|
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
1631
2072
|
vec_t<Length,Type> *addr,
|
|
@@ -1683,6 +2124,34 @@ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length,
|
|
|
1683
2124
|
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
1684
2125
|
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
1685
2126
|
|
|
2127
|
+
// for bitwise operations we do not accumulate gradients
|
|
2128
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
|
|
2129
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
|
|
2130
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
|
|
2131
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
|
|
2132
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
|
|
2133
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
|
|
2134
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
2135
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
2136
|
+
|
|
2137
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
|
|
2138
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
|
|
2139
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
|
|
2140
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
|
|
2141
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
|
|
2142
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
|
|
2143
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
2144
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
2145
|
+
|
|
2146
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
|
|
2147
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
|
|
2148
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
|
|
2149
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
|
|
2150
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
|
|
2151
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
|
|
2152
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
2153
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
2154
|
+
|
|
1686
2155
|
|
|
1687
2156
|
// adjoints for some of the constructors, used in intersect.h
|
|
1688
2157
|
inline CUDA_CALLABLE void adj_vec2(float x, float y, float& adj_x, float& adj_y, const vec2& adj_ret)
|
warp/native/version.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#ifndef WP_VERSION_H
|
|
19
|
+
#define WP_VERSION_H
|
|
20
|
+
|
|
21
|
+
#define WP_VERSION_STRING "1.10.0rc2"
|
|
22
|
+
|
|
23
|
+
#endif // WP_VERSION_H
|
warp/native/volume.cpp
CHANGED
|
@@ -205,7 +205,7 @@ void wp_volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
|
|
|
205
205
|
|
|
206
206
|
void wp_volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
|
|
207
207
|
{
|
|
208
|
-
*dx = *
|
|
208
|
+
*dx = *dy = *dz = 0.0f;
|
|
209
209
|
|
|
210
210
|
const VolumeDesc* volume;
|
|
211
211
|
if (volume_get_descriptor(id, volume))
|
warp/native/volume.cu
CHANGED
warp/native/volume.h
CHANGED
|
@@ -161,7 +161,7 @@ CUDA_CALLABLE inline void pnano_read(T &result, pnanovdb_buf_t buf, PNANOVDB_INO
|
|
|
161
161
|
pnano_read<T>(result, buf, address);
|
|
162
162
|
}
|
|
163
163
|
|
|
164
|
-
/// regular grid accessor (values stored in
|
|
164
|
+
/// regular grid accessor (values stored in leaves)
|
|
165
165
|
|
|
166
166
|
struct value_accessor_base
|
|
167
167
|
{
|