warp-lang 1.9.0__py3-none-win_amd64.whl → 1.10.0rc2__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +301 -287
- warp/__init__.pyi +2220 -313
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1075 -0
- warp/_src/build.py +618 -0
- warp/_src/build_dll.py +640 -0
- warp/{builtins.py → _src/builtins.py} +1497 -226
- warp/_src/codegen.py +4359 -0
- warp/{config.py → _src/config.py} +178 -169
- warp/_src/constants.py +57 -0
- warp/_src/context.py +8294 -0
- warp/_src/dlpack.py +462 -0
- warp/_src/fabric.py +355 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +508 -0
- warp/_src/fem/cache.py +687 -0
- warp/_src/fem/dirichlet.py +188 -0
- warp/{fem → _src/fem}/domain.py +40 -30
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +701 -0
- warp/{fem → _src/fem}/field/nodal_field.py +30 -15
- warp/{fem → _src/fem}/field/restriction.py +1 -1
- warp/{fem → _src/fem}/field/virtual.py +53 -27
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
- warp/_src/fem/geometry/closest_point.py +97 -0
- warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
- warp/{fem → _src/fem}/geometry/element.py +32 -10
- warp/{fem → _src/fem}/geometry/geometry.py +48 -20
- warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
- warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
- warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
- warp/{fem → _src/fem}/geometry/partition.py +121 -63
- warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
- warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
- warp/{fem → _src/fem}/integrate.py +164 -158
- warp/_src/fem/linalg.py +383 -0
- warp/_src/fem/operator.py +396 -0
- warp/_src/fem/polynomial.py +229 -0
- warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
- warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
- warp/_src/fem/space/__init__.py +248 -0
- warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
- warp/_src/fem/space/basis_space.py +679 -0
- warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
- warp/{fem → _src/fem}/space/function_space.py +14 -13
- warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
- warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
- warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
- warp/{fem → _src/fem}/space/partition.py +117 -60
- warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/restriction.py +66 -33
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
- warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
- warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
- warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
- warp/_src/fem/space/topology.py +459 -0
- warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
- warp/_src/fem/types.py +112 -0
- warp/_src/fem/utils.py +486 -0
- warp/_src/jax.py +186 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +387 -0
- warp/_src/jax_experimental/ffi.py +1284 -0
- warp/_src/jax_experimental/xla_ffi.py +656 -0
- warp/_src/marching_cubes.py +708 -0
- warp/_src/math.py +414 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +163 -0
- warp/_src/optim/linear.py +1606 -0
- warp/_src/optim/sgd.py +112 -0
- warp/_src/paddle.py +406 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +289 -0
- warp/_src/render/render_opengl.py +3636 -0
- warp/_src/render/render_usd.py +937 -0
- warp/_src/render/utils.py +160 -0
- warp/_src/sparse.py +2716 -0
- warp/_src/tape.py +1206 -0
- warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
- warp/_src/torch.py +391 -0
- warp/_src/types.py +5870 -0
- warp/_src/utils.py +1693 -0
- warp/autograd.py +12 -1054
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +8 -588
- warp/build_dll.py +6 -471
- warp/codegen.py +6 -4246
- warp/constants.py +6 -39
- warp/context.py +12 -7851
- warp/dlpack.py +6 -444
- warp/examples/distributed/example_jacobi_mpi.py +4 -5
- warp/examples/fem/example_adaptive_grid.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +8 -8
- warp/examples/fem/example_diffusion.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_mixed_elasticity.py +2 -2
- warp/examples/fem/example_navier_stokes.py +1 -1
- warp/examples/fem/example_nonconforming_contact.py +7 -7
- warp/examples/fem/example_stokes.py +1 -1
- warp/examples/fem/example_stokes_transfer.py +1 -1
- warp/examples/fem/utils.py +2 -2
- warp/examples/interop/example_jax_callable.py +1 -1
- warp/examples/interop/example_jax_ffi_callback.py +1 -1
- warp/examples/interop/example_jax_kernel.py +3 -2
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/fabric.py +6 -337
- warp/fem/__init__.py +159 -97
- warp/fem/adaptivity.py +7 -489
- warp/fem/cache.py +9 -648
- warp/fem/dirichlet.py +6 -184
- warp/fem/field/__init__.py +8 -109
- warp/fem/field/field.py +7 -652
- warp/fem/geometry/__init__.py +7 -18
- warp/fem/geometry/closest_point.py +11 -77
- warp/fem/linalg.py +18 -366
- warp/fem/operator.py +11 -369
- warp/fem/polynomial.py +9 -209
- warp/fem/space/__init__.py +5 -211
- warp/fem/space/basis_space.py +6 -662
- warp/fem/space/shape/__init__.py +41 -118
- warp/fem/space/topology.py +6 -437
- warp/fem/types.py +6 -81
- warp/fem/utils.py +11 -444
- warp/jax.py +8 -165
- warp/jax_experimental/__init__.py +14 -1
- warp/jax_experimental/custom_call.py +8 -342
- warp/jax_experimental/ffi.py +17 -853
- warp/jax_experimental/xla_ffi.py +5 -596
- warp/marching_cubes.py +5 -689
- warp/math.py +16 -393
- warp/native/array.h +385 -37
- warp/native/builtin.h +316 -39
- warp/native/bvh.cpp +43 -9
- warp/native/bvh.cu +62 -27
- warp/native/bvh.h +310 -309
- warp/native/clang/clang.cpp +102 -97
- warp/native/coloring.cpp +0 -1
- warp/native/crt.h +208 -0
- warp/native/exports.h +156 -0
- warp/native/hashgrid.cu +2 -0
- warp/native/intersect.h +24 -1
- warp/native/intersect_tri.h +44 -35
- warp/native/mat.h +1456 -276
- warp/native/mesh.cpp +4 -4
- warp/native/mesh.cu +4 -2
- warp/native/mesh.h +176 -61
- warp/native/quat.h +0 -52
- warp/native/scan.cu +2 -0
- warp/native/sort.cu +22 -13
- warp/native/sort.h +2 -0
- warp/native/sparse.cu +7 -3
- warp/native/spatial.h +12 -0
- warp/native/tile.h +837 -70
- warp/native/tile_radix_sort.h +1 -1
- warp/native/tile_reduce.h +394 -46
- warp/native/tile_scan.h +4 -4
- warp/native/vec.h +469 -53
- warp/native/version.h +23 -0
- warp/native/volume.cpp +1 -1
- warp/native/volume.cu +1 -0
- warp/native/volume.h +1 -1
- warp/native/volume_builder.cu +2 -0
- warp/native/warp.cpp +60 -32
- warp/native/warp.cu +313 -201
- warp/native/warp.h +14 -11
- warp/optim/__init__.py +6 -3
- warp/optim/adam.py +6 -145
- warp/optim/linear.py +14 -1585
- warp/optim/sgd.py +6 -94
- warp/paddle.py +6 -388
- warp/render/__init__.py +8 -4
- warp/render/imgui_manager.py +7 -267
- warp/render/render_opengl.py +6 -3616
- warp/render/render_usd.py +6 -918
- warp/render/utils.py +6 -142
- warp/sparse.py +37 -2563
- warp/tape.py +6 -1188
- warp/tests/__main__.py +1 -1
- warp/tests/cuda/test_async.py +4 -4
- warp/tests/cuda/test_conditional_captures.py +1 -1
- warp/tests/cuda/test_multigpu.py +1 -1
- warp/tests/cuda/test_streams.py +58 -1
- warp/tests/geometry/test_bvh.py +157 -22
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +0 -1
- warp/tests/geometry/test_mesh.py +5 -3
- warp/tests/geometry/test_mesh_query_aabb.py +5 -12
- warp/tests/geometry/test_mesh_query_point.py +5 -2
- warp/tests/geometry/test_mesh_query_ray.py +15 -3
- warp/tests/geometry/test_volume_write.py +5 -5
- warp/tests/interop/test_dlpack.py +14 -14
- warp/tests/interop/test_jax.py +1382 -79
- warp/tests/interop/test_paddle.py +1 -1
- warp/tests/test_adam.py +0 -1
- warp/tests/test_arithmetic.py +9 -9
- warp/tests/test_array.py +529 -100
- warp/tests/test_array_reduce.py +3 -3
- warp/tests/test_atomic.py +12 -8
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +4 -4
- warp/tests/test_bool.py +2 -2
- warp/tests/test_builtins_resolution.py +5 -571
- warp/tests/test_codegen.py +34 -15
- warp/tests/test_conditional.py +1 -1
- warp/tests/test_context.py +6 -6
- warp/tests/test_copy.py +242 -161
- warp/tests/test_ctypes.py +3 -3
- warp/tests/test_devices.py +24 -2
- warp/tests/test_examples.py +16 -84
- warp/tests/test_fabricarray.py +35 -35
- warp/tests/test_fast_math.py +0 -2
- warp/tests/test_fem.py +60 -14
- warp/tests/test_fixedarray.py +3 -3
- warp/tests/test_func.py +8 -5
- warp/tests/test_generics.py +1 -1
- warp/tests/test_indexedarray.py +24 -24
- warp/tests/test_intersect.py +39 -9
- warp/tests/test_large.py +1 -1
- warp/tests/test_lerp.py +3 -1
- warp/tests/test_linear_solvers.py +1 -1
- warp/tests/test_map.py +49 -4
- warp/tests/test_mat.py +52 -62
- warp/tests/test_mat_constructors.py +4 -5
- warp/tests/test_mat_lite.py +1 -1
- warp/tests/test_mat_scalar_ops.py +121 -121
- warp/tests/test_math.py +34 -0
- warp/tests/test_module_aot.py +4 -4
- warp/tests/test_modules_lite.py +28 -2
- warp/tests/test_print.py +11 -11
- warp/tests/test_quat.py +93 -58
- warp/tests/test_runlength_encode.py +1 -1
- warp/tests/test_scalar_ops.py +38 -10
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +126 -15
- warp/tests/test_spatial.py +105 -87
- warp/tests/test_special_values.py +6 -6
- warp/tests/test_static.py +7 -7
- warp/tests/test_struct.py +13 -2
- warp/tests/test_triangle_closest_point.py +48 -1
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +82 -9
- warp/tests/test_utils.py +52 -52
- warp/tests/test_vec.py +29 -29
- warp/tests/test_vec_constructors.py +5 -5
- warp/tests/test_vec_scalar_ops.py +97 -97
- warp/tests/test_version.py +75 -0
- warp/tests/tile/test_tile.py +239 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +7 -4
- warp/tests/tile/test_tile_load.py +26 -2
- warp/tests/tile/test_tile_mathdx.py +3 -3
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +2 -4
- warp/tests/tile/test_tile_reduce.py +214 -13
- warp/tests/unittest_suites.py +6 -14
- warp/tests/unittest_utils.py +10 -9
- warp/tests/walkthrough_debug.py +3 -1
- warp/torch.py +6 -373
- warp/types.py +29 -5750
- warp/utils.py +10 -1659
- {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +47 -103
- warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp/examples/assets/cartpole.urdf +0 -110
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/nv_ant.xml +0 -92
- warp/examples/assets/nv_humanoid.xml +0 -183
- warp/examples/assets/quadruped.urdf +0 -268
- warp/examples/optim/example_bounce.py +0 -266
- warp/examples/optim/example_cloth_throw.py +0 -228
- warp/examples/optim/example_drone.py +0 -870
- warp/examples/optim/example_inverse_kinematics.py +0 -182
- warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
- warp/examples/optim/example_softbody_properties.py +0 -400
- warp/examples/optim/example_spring_cage.py +0 -245
- warp/examples/optim/example_trajectory.py +0 -227
- warp/examples/sim/example_cartpole.py +0 -143
- warp/examples/sim/example_cloth.py +0 -225
- warp/examples/sim/example_cloth_self_contact.py +0 -316
- warp/examples/sim/example_granular.py +0 -130
- warp/examples/sim/example_granular_collision_sdf.py +0 -202
- warp/examples/sim/example_jacobian_ik.py +0 -244
- warp/examples/sim/example_particle_chain.py +0 -124
- warp/examples/sim/example_quadruped.py +0 -203
- warp/examples/sim/example_rigid_chain.py +0 -203
- warp/examples/sim/example_rigid_contact.py +0 -195
- warp/examples/sim/example_rigid_force.py +0 -133
- warp/examples/sim/example_rigid_gyroscopic.py +0 -115
- warp/examples/sim/example_rigid_soft_contact.py +0 -140
- warp/examples/sim/example_soft_body.py +0 -196
- warp/examples/tile/example_tile_walker.py +0 -327
- warp/sim/__init__.py +0 -74
- warp/sim/articulation.py +0 -793
- warp/sim/collide.py +0 -2570
- warp/sim/graph_coloring.py +0 -307
- warp/sim/import_mjcf.py +0 -791
- warp/sim/import_snu.py +0 -227
- warp/sim/import_urdf.py +0 -579
- warp/sim/import_usd.py +0 -898
- warp/sim/inertia.py +0 -357
- warp/sim/integrator.py +0 -245
- warp/sim/integrator_euler.py +0 -2000
- warp/sim/integrator_featherstone.py +0 -2101
- warp/sim/integrator_vbd.py +0 -2487
- warp/sim/integrator_xpbd.py +0 -3295
- warp/sim/model.py +0 -4821
- warp/sim/particles.py +0 -121
- warp/sim/render.py +0 -431
- warp/sim/utils.py +0 -431
- warp/tests/sim/disabled_kinematics.py +0 -244
- warp/tests/sim/test_cloth.py +0 -863
- warp/tests/sim/test_collision.py +0 -743
- warp/tests/sim/test_coloring.py +0 -347
- warp/tests/sim/test_inertia.py +0 -161
- warp/tests/sim/test_model.py +0 -226
- warp/tests/sim/test_sim_grad.py +0 -287
- warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
- warp/tests/sim/test_sim_kinematics.py +0 -98
- warp/thirdparty/__init__.py +0 -0
- warp_lang-1.9.0.dist-info/RECORD +0 -456
- /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
- /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
- {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
- {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -149,6 +149,17 @@ using vec2d = vec_t<2,double>;
|
|
|
149
149
|
using vec3d = vec_t<3,double>;
|
|
150
150
|
using vec4d = vec_t<4,double>;
|
|
151
151
|
|
|
152
|
+
// Type trait to detect if a type is a vec_t
|
|
153
|
+
template<typename T>
|
|
154
|
+
struct is_vector {
|
|
155
|
+
static constexpr bool value = false;
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
template<unsigned Length, typename Type>
|
|
159
|
+
struct is_vector<vec_t<Length, Type>> {
|
|
160
|
+
static constexpr bool value = true;
|
|
161
|
+
};
|
|
162
|
+
|
|
152
163
|
template<unsigned Length, typename Type>
|
|
153
164
|
inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
|
|
154
165
|
{
|
|
@@ -343,17 +354,6 @@ inline CUDA_CALLABLE vec_t<Length, Type> add(vec_t<Length, Type> a, vec_t<Length
|
|
|
343
354
|
return ret;
|
|
344
355
|
}
|
|
345
356
|
|
|
346
|
-
template<unsigned Length, typename Type>
|
|
347
|
-
inline CUDA_CALLABLE vec_t<Length, Type> add(Type a, vec_t<Length, Type> b)
|
|
348
|
-
{
|
|
349
|
-
vec_t<Length, Type> ret;
|
|
350
|
-
for( unsigned i=0; i < Length; ++i )
|
|
351
|
-
{
|
|
352
|
-
ret[i] = a + b[i];
|
|
353
|
-
}
|
|
354
|
-
return ret;
|
|
355
|
-
}
|
|
356
|
-
|
|
357
357
|
template<typename Type>
|
|
358
358
|
inline CUDA_CALLABLE vec_t<2, Type> add(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
359
359
|
{
|
|
@@ -378,18 +378,6 @@ inline CUDA_CALLABLE vec_t<Length, Type> sub(vec_t<Length, Type> a, vec_t<Length
|
|
|
378
378
|
return ret;
|
|
379
379
|
}
|
|
380
380
|
|
|
381
|
-
template<unsigned Length, typename Type>
|
|
382
|
-
inline CUDA_CALLABLE vec_t<Length, Type> sub(Type a, vec_t<Length, Type> b)
|
|
383
|
-
{
|
|
384
|
-
vec_t<Length, Type> ret;
|
|
385
|
-
for (unsigned i=0; i < Length; ++i)
|
|
386
|
-
{
|
|
387
|
-
ret[i] = Type(a - b[i]);
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
return ret;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
381
|
template<typename Type>
|
|
394
382
|
inline CUDA_CALLABLE vec_t<2, Type> sub(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
395
383
|
{
|
|
@@ -427,6 +415,139 @@ inline CUDA_CALLABLE vec_t<3, Type> mod(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
|
427
415
|
return vec_t<3, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]), mod(a.c[2], b.c[2]));
|
|
428
416
|
}
|
|
429
417
|
|
|
418
|
+
// bitwise AND
|
|
419
|
+
template<unsigned Length, typename Type>
|
|
420
|
+
inline CUDA_CALLABLE vec_t<Length, Type> bit_and(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
421
|
+
{
|
|
422
|
+
vec_t<Length, Type> ret;
|
|
423
|
+
for( unsigned i=0; i < Length; ++i )
|
|
424
|
+
{
|
|
425
|
+
ret[i] = Type(a[i] & b[i]);
|
|
426
|
+
}
|
|
427
|
+
return ret;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
template<typename Type>
|
|
431
|
+
inline CUDA_CALLABLE vec_t<2, Type> bit_and(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
432
|
+
{
|
|
433
|
+
return vec_t<2, Type>( a.c[0] & b.c[0], a.c[1] & b.c[1]);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
template<typename Type>
|
|
437
|
+
inline CUDA_CALLABLE vec_t<3, Type> bit_and(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
438
|
+
{
|
|
439
|
+
return vec_t<3, Type>( a.c[0] & b.c[0], a.c[1] & b.c[1], a.c[2] & b.c[2]);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// bitwise OR
|
|
443
|
+
template<unsigned Length, typename Type>
|
|
444
|
+
inline CUDA_CALLABLE vec_t<Length, Type> bit_or(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
445
|
+
{
|
|
446
|
+
vec_t<Length, Type> ret;
|
|
447
|
+
for( unsigned i=0; i < Length; ++i )
|
|
448
|
+
{
|
|
449
|
+
ret[i] = Type(a[i] | b[i]);
|
|
450
|
+
}
|
|
451
|
+
return ret;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
template<typename Type>
|
|
455
|
+
inline CUDA_CALLABLE vec_t<2, Type> bit_or(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
456
|
+
{
|
|
457
|
+
return vec_t<2, Type>( a.c[0] | b.c[0], a.c[1] | b.c[1]);
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
template<typename Type>
|
|
461
|
+
inline CUDA_CALLABLE vec_t<3, Type> bit_or(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
462
|
+
{
|
|
463
|
+
return vec_t<3, Type>( a.c[0] | b.c[0], a.c[1] | b.c[1], a.c[2] | b.c[2]);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// bitwise XOR
|
|
467
|
+
template<unsigned Length, typename Type>
|
|
468
|
+
inline CUDA_CALLABLE vec_t<Length, Type> bit_xor(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
469
|
+
{
|
|
470
|
+
vec_t<Length, Type> ret;
|
|
471
|
+
for( unsigned i=0; i < Length; ++i )
|
|
472
|
+
{
|
|
473
|
+
ret[i] = Type(a[i] ^ b[i]);
|
|
474
|
+
}
|
|
475
|
+
return ret;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
template<typename Type>
|
|
479
|
+
inline CUDA_CALLABLE vec_t<2, Type> bit_xor(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
480
|
+
{
|
|
481
|
+
return vec_t<2, Type>( a.c[0] ^ b.c[0], a.c[1] ^ b.c[1]);
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
template<typename Type>
|
|
485
|
+
inline CUDA_CALLABLE vec_t<3, Type> bit_xor(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
486
|
+
{
|
|
487
|
+
return vec_t<3, Type>( a.c[0] ^ b.c[0], a.c[1] ^ b.c[1], a.c[2] ^ b.c[2]);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// left shift
|
|
491
|
+
template<unsigned Length, typename Type>
|
|
492
|
+
inline CUDA_CALLABLE vec_t<Length, Type> lshift(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
493
|
+
{
|
|
494
|
+
vec_t<Length, Type> ret;
|
|
495
|
+
for( unsigned i=0; i < Length; ++i )
|
|
496
|
+
{
|
|
497
|
+
ret[i] = Type(a[i] << b[i]);
|
|
498
|
+
}
|
|
499
|
+
return ret;
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
template<typename Type>
|
|
503
|
+
inline CUDA_CALLABLE vec_t<2, Type> lshift(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
504
|
+
{
|
|
505
|
+
return vec_t<2, Type>( a.c[0] << b.c[0], a.c[1] << b.c[1]);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
template<typename Type>
|
|
509
|
+
inline CUDA_CALLABLE vec_t<3, Type> lshift(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
510
|
+
{
|
|
511
|
+
return vec_t<3, Type>( a.c[0] << b.c[0], a.c[1] << b.c[1], a.c[2] << b.c[2]);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// right shift
|
|
515
|
+
template<unsigned Length, typename Type>
|
|
516
|
+
inline CUDA_CALLABLE vec_t<Length, Type> rshift(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
517
|
+
{
|
|
518
|
+
vec_t<Length, Type> ret;
|
|
519
|
+
for( unsigned i=0; i < Length; ++i )
|
|
520
|
+
{
|
|
521
|
+
ret[i] = Type(a[i] >> b[i]);
|
|
522
|
+
}
|
|
523
|
+
return ret;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
template<typename Type>
|
|
527
|
+
inline CUDA_CALLABLE vec_t<2, Type> rshift(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
528
|
+
{
|
|
529
|
+
return vec_t<2, Type>( a.c[0] >> b.c[0], a.c[1] >> b.c[1]);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
template<typename Type>
|
|
533
|
+
inline CUDA_CALLABLE vec_t<3, Type> rshift(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
534
|
+
{
|
|
535
|
+
return vec_t<3, Type>( a.c[0] >> b.c[0], a.c[1] >> b.c[1], a.c[2] >> b.c[2]);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
// invert
|
|
539
|
+
template<unsigned Length, typename Type>
|
|
540
|
+
inline CUDA_CALLABLE vec_t<Length,Type> invert(vec_t<Length,Type> v)
|
|
541
|
+
{
|
|
542
|
+
vec_t<Length,Type> ret;
|
|
543
|
+
for (unsigned i=0; i < Length; ++i)
|
|
544
|
+
{
|
|
545
|
+
ret[i] = ~v[i];
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
return ret;
|
|
549
|
+
}
|
|
550
|
+
|
|
430
551
|
// dot product:
|
|
431
552
|
template<unsigned Length, typename Type>
|
|
432
553
|
inline CUDA_CALLABLE Type dot(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -747,6 +868,183 @@ inline CUDA_CALLABLE void adj_sub_inplace(
|
|
|
747
868
|
}
|
|
748
869
|
|
|
749
870
|
|
|
871
|
+
template<unsigned Length, typename Type>
|
|
872
|
+
inline CUDA_CALLABLE void bit_and_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
873
|
+
{
|
|
874
|
+
#ifndef NDEBUG
|
|
875
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
876
|
+
{
|
|
877
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
878
|
+
assert(0);
|
|
879
|
+
}
|
|
880
|
+
#endif
|
|
881
|
+
|
|
882
|
+
if (idx < 0)
|
|
883
|
+
{
|
|
884
|
+
idx += Length;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
v[idx] &= value;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
892
|
+
inline CUDA_CALLABLE void bit_and_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
893
|
+
{
|
|
894
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
895
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
896
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
897
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
898
|
+
|
|
899
|
+
bool is_reversed = slice.step < 0;
|
|
900
|
+
|
|
901
|
+
int ii = 0;
|
|
902
|
+
for (
|
|
903
|
+
int i = slice.start;
|
|
904
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
905
|
+
i += slice.step
|
|
906
|
+
)
|
|
907
|
+
{
|
|
908
|
+
v[i] &= a[ii];
|
|
909
|
+
++ii;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
assert(ii == SliceLength);
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
template<unsigned Length, typename Type>
|
|
917
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
918
|
+
vec_t<Length, Type>& v, int idx, Type value,
|
|
919
|
+
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
|
|
920
|
+
) {}
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
924
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
925
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
926
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
927
|
+
) {}
|
|
928
|
+
|
|
929
|
+
|
|
930
|
+
template<unsigned Length, typename Type>
|
|
931
|
+
inline CUDA_CALLABLE void bit_or_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
932
|
+
{
|
|
933
|
+
#ifndef NDEBUG
|
|
934
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
935
|
+
{
|
|
936
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
937
|
+
assert(0);
|
|
938
|
+
}
|
|
939
|
+
#endif
|
|
940
|
+
|
|
941
|
+
if (idx < 0)
|
|
942
|
+
{
|
|
943
|
+
idx += Length;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
v[idx] |= value;
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
951
|
+
inline CUDA_CALLABLE void bit_or_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
952
|
+
{
|
|
953
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
954
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
955
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
956
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
957
|
+
|
|
958
|
+
bool is_reversed = slice.step < 0;
|
|
959
|
+
|
|
960
|
+
int ii = 0;
|
|
961
|
+
for (
|
|
962
|
+
int i = slice.start;
|
|
963
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
964
|
+
i += slice.step
|
|
965
|
+
)
|
|
966
|
+
{
|
|
967
|
+
v[i] |= a[ii];
|
|
968
|
+
++ii;
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
assert(ii == SliceLength);
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
template<unsigned Length, typename Type>
|
|
976
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
977
|
+
vec_t<Length, Type>& v, int idx, Type value,
|
|
978
|
+
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
|
|
979
|
+
) {}
|
|
980
|
+
|
|
981
|
+
|
|
982
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
983
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
984
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
985
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
986
|
+
) {}
|
|
987
|
+
|
|
988
|
+
|
|
989
|
+
template<unsigned Length, typename Type>
|
|
990
|
+
inline CUDA_CALLABLE void bit_xor_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
991
|
+
{
|
|
992
|
+
#ifndef NDEBUG
|
|
993
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
994
|
+
{
|
|
995
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
996
|
+
assert(0);
|
|
997
|
+
}
|
|
998
|
+
#endif
|
|
999
|
+
|
|
1000
|
+
if (idx < 0)
|
|
1001
|
+
{
|
|
1002
|
+
idx += Length;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
v[idx] ^= value;
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
1010
|
+
inline CUDA_CALLABLE void bit_xor_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
1011
|
+
{
|
|
1012
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
1013
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
1014
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
1015
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
1016
|
+
|
|
1017
|
+
bool is_reversed = slice.step < 0;
|
|
1018
|
+
|
|
1019
|
+
int ii = 0;
|
|
1020
|
+
for (
|
|
1021
|
+
int i = slice.start;
|
|
1022
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
1023
|
+
i += slice.step
|
|
1024
|
+
)
|
|
1025
|
+
{
|
|
1026
|
+
v[i] ^= a[ii];
|
|
1027
|
+
++ii;
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
assert(ii == SliceLength);
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
template<unsigned Length, typename Type>
|
|
1035
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
1036
|
+
vec_t<Length, Type>& v, int idx, Type value,
|
|
1037
|
+
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
|
|
1038
|
+
) {}
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
1042
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
1043
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
1044
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
1045
|
+
) {}
|
|
1046
|
+
|
|
1047
|
+
|
|
750
1048
|
template<unsigned Length, typename Type>
|
|
751
1049
|
inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
752
1050
|
{
|
|
@@ -1303,21 +1601,6 @@ inline CUDA_CALLABLE void adj_add(vec_t<Length, Type> a, vec_t<Length, Type> b,
|
|
|
1303
1601
|
adj_b += adj_ret;
|
|
1304
1602
|
}
|
|
1305
1603
|
|
|
1306
|
-
template<unsigned Length, typename Type>
|
|
1307
|
-
inline CUDA_CALLABLE void adj_add(
|
|
1308
|
-
Type a, vec_t<Length, Type> b,
|
|
1309
|
-
Type& adj_a, vec_t<Length, Type>& adj_b,
|
|
1310
|
-
const vec_t<Length, Type>& adj_ret
|
|
1311
|
-
)
|
|
1312
|
-
{
|
|
1313
|
-
for (unsigned i = 0; i < Length; ++i)
|
|
1314
|
-
{
|
|
1315
|
-
adj_a += adj_ret.c[i];
|
|
1316
|
-
}
|
|
1317
|
-
|
|
1318
|
-
adj_b += adj_ret;
|
|
1319
|
-
}
|
|
1320
|
-
|
|
1321
1604
|
template<typename Type>
|
|
1322
1605
|
inline CUDA_CALLABLE void adj_add(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1323
1606
|
{
|
|
@@ -1345,21 +1628,6 @@ inline CUDA_CALLABLE void adj_sub(vec_t<Length, Type> a, vec_t<Length, Type> b,
|
|
|
1345
1628
|
adj_b -= adj_ret;
|
|
1346
1629
|
}
|
|
1347
1630
|
|
|
1348
|
-
template<unsigned Length, typename Type>
|
|
1349
|
-
inline CUDA_CALLABLE void adj_sub(
|
|
1350
|
-
Type a, vec_t<Length, Type> b,
|
|
1351
|
-
Type& adj_a, vec_t<Length, Type>& adj_b,
|
|
1352
|
-
const vec_t<Length, Type>& adj_ret
|
|
1353
|
-
)
|
|
1354
|
-
{
|
|
1355
|
-
for (unsigned i = 0; i < Length; ++i)
|
|
1356
|
-
{
|
|
1357
|
-
adj_a += adj_ret.c[i];
|
|
1358
|
-
}
|
|
1359
|
-
|
|
1360
|
-
adj_b -= adj_ret;
|
|
1361
|
-
}
|
|
1362
|
-
|
|
1363
1631
|
template<typename Type>
|
|
1364
1632
|
inline CUDA_CALLABLE void adj_sub(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1365
1633
|
{
|
|
@@ -1395,6 +1663,90 @@ inline CUDA_CALLABLE void adj_mod(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
|
|
|
1395
1663
|
{
|
|
1396
1664
|
}
|
|
1397
1665
|
|
|
1666
|
+
template<unsigned Length, typename Type>
|
|
1667
|
+
inline CUDA_CALLABLE void adj_bit_and(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1668
|
+
{
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
template<typename Type>
|
|
1672
|
+
inline CUDA_CALLABLE void adj_bit_and(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1673
|
+
{
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
template<typename Type>
|
|
1677
|
+
inline CUDA_CALLABLE void adj_bit_and(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1678
|
+
{
|
|
1679
|
+
}
|
|
1680
|
+
|
|
1681
|
+
template<unsigned Length, typename Type>
|
|
1682
|
+
inline CUDA_CALLABLE void adj_bit_or(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1683
|
+
{
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
template<typename Type>
|
|
1687
|
+
inline CUDA_CALLABLE void adj_bit_or(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1688
|
+
{
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
template<typename Type>
|
|
1692
|
+
inline CUDA_CALLABLE void adj_bit_or(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1693
|
+
{
|
|
1694
|
+
}
|
|
1695
|
+
|
|
1696
|
+
template<unsigned Length, typename Type>
|
|
1697
|
+
inline CUDA_CALLABLE void adj_bit_xor(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1698
|
+
{
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
template<typename Type>
|
|
1702
|
+
inline CUDA_CALLABLE void adj_bit_xor(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1703
|
+
{
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
template<typename Type>
|
|
1707
|
+
inline CUDA_CALLABLE void adj_bit_xor(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1708
|
+
{
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
template<unsigned Length, typename Type>
|
|
1712
|
+
inline CUDA_CALLABLE void adj_lshift(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1713
|
+
{
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
template<typename Type>
|
|
1717
|
+
inline CUDA_CALLABLE void adj_lshift(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1718
|
+
{
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
template<typename Type>
|
|
1722
|
+
inline CUDA_CALLABLE void adj_lshift(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1723
|
+
{
|
|
1724
|
+
}
|
|
1725
|
+
|
|
1726
|
+
template<unsigned Length, typename Type>
|
|
1727
|
+
inline CUDA_CALLABLE void adj_rshift(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
1728
|
+
{
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
template<typename Type>
|
|
1732
|
+
inline CUDA_CALLABLE void adj_rshift(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1733
|
+
{
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
template<typename Type>
|
|
1737
|
+
inline CUDA_CALLABLE void adj_rshift(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
1738
|
+
{
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
template<unsigned Length, typename Type>
|
|
1742
|
+
inline CUDA_CALLABLE void adj_invert(
|
|
1743
|
+
const vec_t<Length,Type>& v,
|
|
1744
|
+
vec_t<Length,Type>& adj_v,
|
|
1745
|
+
const vec_t<Length,Type>& adj_ret
|
|
1746
|
+
)
|
|
1747
|
+
{
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1398
1750
|
template<unsigned Length, typename Type>
|
|
1399
1751
|
inline CUDA_CALLABLE void adj_dot(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const Type adj_ret)
|
|
1400
1752
|
{
|
|
@@ -1679,6 +2031,42 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
1679
2031
|
return ret;
|
|
1680
2032
|
}
|
|
1681
2033
|
|
|
2034
|
+
template<unsigned Length, typename Type>
|
|
2035
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_and(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
2036
|
+
{
|
|
2037
|
+
vec_t<Length, Type> ret;
|
|
2038
|
+
for( unsigned i=0; i < Length; ++i )
|
|
2039
|
+
{
|
|
2040
|
+
ret[i] = atomic_and(&(addr -> c[i]), value[i]);
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
return ret;
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
template<unsigned Length, typename Type>
|
|
2047
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_or(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
2048
|
+
{
|
|
2049
|
+
vec_t<Length, Type> ret;
|
|
2050
|
+
for( unsigned i=0; i < Length; ++i )
|
|
2051
|
+
{
|
|
2052
|
+
ret[i] = atomic_or(&(addr -> c[i]), value[i]);
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
return ret;
|
|
2056
|
+
}
|
|
2057
|
+
|
|
2058
|
+
template<unsigned Length, typename Type>
|
|
2059
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_xor(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
2060
|
+
{
|
|
2061
|
+
vec_t<Length, Type> ret;
|
|
2062
|
+
for( unsigned i=0; i < Length; ++i )
|
|
2063
|
+
{
|
|
2064
|
+
ret[i] = atomic_xor(&(addr -> c[i]), value[i]);
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
return ret;
|
|
2068
|
+
}
|
|
2069
|
+
|
|
1682
2070
|
template<unsigned Length, typename Type>
|
|
1683
2071
|
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
1684
2072
|
vec_t<Length,Type> *addr,
|
|
@@ -1736,6 +2124,34 @@ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length,
|
|
|
1736
2124
|
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
1737
2125
|
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
1738
2126
|
|
|
2127
|
+
// for bitwise operations we do not accumulate gradients
|
|
2128
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
|
|
2129
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
|
|
2130
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
|
|
2131
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
|
|
2132
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
|
|
2133
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
|
|
2134
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
2135
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
2136
|
+
|
|
2137
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
|
|
2138
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
|
|
2139
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
|
|
2140
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
|
|
2141
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
|
|
2142
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
|
|
2143
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
2144
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
2145
|
+
|
|
2146
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
|
|
2147
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
|
|
2148
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
|
|
2149
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
|
|
2150
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
|
|
2151
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
|
|
2152
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
|
|
2153
|
+
template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
|
|
2154
|
+
|
|
1739
2155
|
|
|
1740
2156
|
// adjoints for some of the constructors, used in intersect.h
|
|
1741
2157
|
inline CUDA_CALLABLE void adj_vec2(float x, float y, float& adj_x, float& adj_y, const vec2& adj_ret)
|
warp/native/version.h
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#ifndef WP_VERSION_H
|
|
19
|
+
#define WP_VERSION_H
|
|
20
|
+
|
|
21
|
+
#define WP_VERSION_STRING "1.10.0rc2"
|
|
22
|
+
|
|
23
|
+
#endif // WP_VERSION_H
|
warp/native/volume.cpp
CHANGED
|
@@ -205,7 +205,7 @@ void wp_volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
|
|
|
205
205
|
|
|
206
206
|
void wp_volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
|
|
207
207
|
{
|
|
208
|
-
*dx = *
|
|
208
|
+
*dx = *dy = *dz = 0.0f;
|
|
209
209
|
|
|
210
210
|
const VolumeDesc* volume;
|
|
211
211
|
if (volume_get_descriptor(id, volume))
|
warp/native/volume.cu
CHANGED
warp/native/volume.h
CHANGED
|
@@ -161,7 +161,7 @@ CUDA_CALLABLE inline void pnano_read(T &result, pnanovdb_buf_t buf, PNANOVDB_INO
|
|
|
161
161
|
pnano_read<T>(result, buf, address);
|
|
162
162
|
}
|
|
163
163
|
|
|
164
|
-
/// regular grid accessor (values stored in
|
|
164
|
+
/// regular grid accessor (values stored in leaves)
|
|
165
165
|
|
|
166
166
|
struct value_accessor_base
|
|
167
167
|
{
|