warp-lang 1.0.1__py3-none-manylinux2014_x86_64.whl → 1.1.0__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +108 -97
- warp/__init__.pyi +1 -1
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +115 -113
- warp/build_dll.py +383 -375
- warp/builtins.py +3425 -3354
- warp/codegen.py +2878 -2792
- warp/config.py +40 -36
- warp/constants.py +45 -45
- warp/context.py +5194 -5102
- warp/dlpack.py +442 -442
- warp/examples/__init__.py +16 -16
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cartpole.urdf +110 -110
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nv_ant.xml +92 -92
- warp/examples/assets/nv_humanoid.xml +183 -183
- warp/examples/assets/quadruped.urdf +267 -267
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +383 -383
- warp/examples/benchmarks/benchmark_cloth.py +278 -279
- warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -88
- warp/examples/benchmarks/benchmark_cloth_jax.py +97 -100
- warp/examples/benchmarks/benchmark_cloth_numba.py +146 -142
- warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -77
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -86
- warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -112
- warp/examples/benchmarks/benchmark_cloth_warp.py +146 -146
- warp/examples/benchmarks/benchmark_launches.py +295 -295
- warp/examples/browse.py +29 -28
- warp/examples/core/example_dem.py +234 -221
- warp/examples/core/example_fluid.py +293 -267
- warp/examples/core/example_graph_capture.py +144 -129
- warp/examples/core/example_marching_cubes.py +188 -176
- warp/examples/core/example_mesh.py +174 -154
- warp/examples/core/example_mesh_intersect.py +205 -193
- warp/examples/core/example_nvdb.py +176 -169
- warp/examples/core/example_raycast.py +105 -89
- warp/examples/core/example_raymarch.py +199 -178
- warp/examples/core/example_render_opengl.py +185 -141
- warp/examples/core/example_sph.py +405 -389
- warp/examples/core/example_torch.py +222 -181
- warp/examples/core/example_wave.py +263 -249
- warp/examples/fem/bsr_utils.py +378 -380
- warp/examples/fem/example_apic_fluid.py +407 -391
- warp/examples/fem/example_convection_diffusion.py +182 -168
- warp/examples/fem/example_convection_diffusion_dg.py +219 -209
- warp/examples/fem/example_convection_diffusion_dg0.py +204 -194
- warp/examples/fem/example_deformed_geometry.py +177 -159
- warp/examples/fem/example_diffusion.py +201 -173
- warp/examples/fem/example_diffusion_3d.py +177 -152
- warp/examples/fem/example_diffusion_mgpu.py +221 -214
- warp/examples/fem/example_mixed_elasticity.py +244 -222
- warp/examples/fem/example_navier_stokes.py +259 -243
- warp/examples/fem/example_stokes.py +220 -192
- warp/examples/fem/example_stokes_transfer.py +265 -249
- warp/examples/fem/mesh_utils.py +133 -109
- warp/examples/fem/plot_utils.py +292 -287
- warp/examples/optim/example_bounce.py +260 -248
- warp/examples/optim/example_cloth_throw.py +222 -210
- warp/examples/optim/example_diffray.py +566 -535
- warp/examples/optim/example_drone.py +864 -835
- warp/examples/optim/example_inverse_kinematics.py +176 -169
- warp/examples/optim/example_inverse_kinematics_torch.py +185 -170
- warp/examples/optim/example_spring_cage.py +239 -234
- warp/examples/optim/example_trajectory.py +223 -201
- warp/examples/optim/example_walker.py +306 -292
- warp/examples/sim/example_cartpole.py +139 -128
- warp/examples/sim/example_cloth.py +196 -184
- warp/examples/sim/example_granular.py +124 -113
- warp/examples/sim/example_granular_collision_sdf.py +197 -185
- warp/examples/sim/example_jacobian_ik.py +236 -213
- warp/examples/sim/example_particle_chain.py +118 -106
- warp/examples/sim/example_quadruped.py +193 -179
- warp/examples/sim/example_rigid_chain.py +197 -189
- warp/examples/sim/example_rigid_contact.py +189 -176
- warp/examples/sim/example_rigid_force.py +127 -126
- warp/examples/sim/example_rigid_gyroscopic.py +109 -97
- warp/examples/sim/example_rigid_soft_contact.py +134 -124
- warp/examples/sim/example_soft_body.py +190 -178
- warp/fabric.py +337 -335
- warp/fem/__init__.py +60 -27
- warp/fem/cache.py +401 -388
- warp/fem/dirichlet.py +178 -179
- warp/fem/domain.py +262 -263
- warp/fem/field/__init__.py +100 -101
- warp/fem/field/field.py +148 -149
- warp/fem/field/nodal_field.py +298 -299
- warp/fem/field/restriction.py +22 -21
- warp/fem/field/test.py +180 -181
- warp/fem/field/trial.py +183 -183
- warp/fem/geometry/__init__.py +15 -19
- warp/fem/geometry/closest_point.py +69 -70
- warp/fem/geometry/deformed_geometry.py +270 -271
- warp/fem/geometry/element.py +744 -744
- warp/fem/geometry/geometry.py +184 -186
- warp/fem/geometry/grid_2d.py +380 -373
- warp/fem/geometry/grid_3d.py +441 -435
- warp/fem/geometry/hexmesh.py +953 -953
- warp/fem/geometry/partition.py +374 -376
- warp/fem/geometry/quadmesh_2d.py +532 -532
- warp/fem/geometry/tetmesh.py +840 -840
- warp/fem/geometry/trimesh_2d.py +577 -577
- warp/fem/integrate.py +1630 -1615
- warp/fem/operator.py +190 -191
- warp/fem/polynomial.py +214 -213
- warp/fem/quadrature/__init__.py +2 -2
- warp/fem/quadrature/pic_quadrature.py +243 -245
- warp/fem/quadrature/quadrature.py +295 -294
- warp/fem/space/__init__.py +294 -292
- warp/fem/space/basis_space.py +488 -489
- warp/fem/space/collocated_function_space.py +100 -105
- warp/fem/space/dof_mapper.py +236 -236
- warp/fem/space/function_space.py +148 -145
- warp/fem/space/grid_2d_function_space.py +267 -267
- warp/fem/space/grid_3d_function_space.py +305 -306
- warp/fem/space/hexmesh_function_space.py +350 -352
- warp/fem/space/partition.py +350 -350
- warp/fem/space/quadmesh_2d_function_space.py +368 -369
- warp/fem/space/restriction.py +158 -160
- warp/fem/space/shape/__init__.py +13 -15
- warp/fem/space/shape/cube_shape_function.py +738 -738
- warp/fem/space/shape/shape_function.py +102 -103
- warp/fem/space/shape/square_shape_function.py +611 -611
- warp/fem/space/shape/tet_shape_function.py +565 -567
- warp/fem/space/shape/triangle_shape_function.py +429 -429
- warp/fem/space/tetmesh_function_space.py +294 -292
- warp/fem/space/topology.py +297 -295
- warp/fem/space/trimesh_2d_function_space.py +223 -221
- warp/fem/types.py +77 -77
- warp/fem/utils.py +495 -495
- warp/jax.py +166 -141
- warp/jax_experimental.py +341 -339
- warp/native/array.h +1072 -1025
- warp/native/builtin.h +1560 -1560
- warp/native/bvh.cpp +398 -398
- warp/native/bvh.cu +525 -525
- warp/native/bvh.h +429 -429
- warp/native/clang/clang.cpp +495 -464
- warp/native/crt.cpp +31 -31
- warp/native/crt.h +334 -334
- warp/native/cuda_crt.h +1049 -1049
- warp/native/cuda_util.cpp +549 -540
- warp/native/cuda_util.h +288 -203
- warp/native/cutlass_gemm.cpp +34 -34
- warp/native/cutlass_gemm.cu +372 -372
- warp/native/error.cpp +66 -66
- warp/native/error.h +27 -27
- warp/native/fabric.h +228 -228
- warp/native/hashgrid.cpp +301 -278
- warp/native/hashgrid.cu +78 -77
- warp/native/hashgrid.h +227 -227
- warp/native/initializer_array.h +32 -32
- warp/native/intersect.h +1204 -1204
- warp/native/intersect_adj.h +365 -365
- warp/native/intersect_tri.h +322 -322
- warp/native/marching.cpp +2 -2
- warp/native/marching.cu +497 -497
- warp/native/marching.h +2 -2
- warp/native/mat.h +1498 -1498
- warp/native/matnn.h +333 -333
- warp/native/mesh.cpp +203 -203
- warp/native/mesh.cu +293 -293
- warp/native/mesh.h +1887 -1887
- warp/native/nanovdb/NanoVDB.h +4782 -4782
- warp/native/nanovdb/PNanoVDB.h +2553 -2553
- warp/native/nanovdb/PNanoVDBWrite.h +294 -294
- warp/native/noise.h +850 -850
- warp/native/quat.h +1084 -1084
- warp/native/rand.h +299 -299
- warp/native/range.h +108 -108
- warp/native/reduce.cpp +156 -156
- warp/native/reduce.cu +348 -348
- warp/native/runlength_encode.cpp +61 -61
- warp/native/runlength_encode.cu +46 -46
- warp/native/scan.cpp +30 -30
- warp/native/scan.cu +36 -36
- warp/native/scan.h +7 -7
- warp/native/solid_angle.h +442 -442
- warp/native/sort.cpp +94 -94
- warp/native/sort.cu +97 -97
- warp/native/sort.h +14 -14
- warp/native/sparse.cpp +337 -337
- warp/native/sparse.cu +544 -544
- warp/native/spatial.h +630 -630
- warp/native/svd.h +562 -562
- warp/native/temp_buffer.h +30 -30
- warp/native/vec.h +1132 -1132
- warp/native/volume.cpp +297 -297
- warp/native/volume.cu +32 -32
- warp/native/volume.h +538 -538
- warp/native/volume_builder.cu +425 -425
- warp/native/volume_builder.h +19 -19
- warp/native/warp.cpp +1057 -1052
- warp/native/warp.cu +2943 -2828
- warp/native/warp.h +313 -305
- warp/optim/__init__.py +9 -9
- warp/optim/adam.py +120 -120
- warp/optim/linear.py +1104 -939
- warp/optim/sgd.py +104 -92
- warp/render/__init__.py +10 -10
- warp/render/render_opengl.py +3217 -3204
- warp/render/render_usd.py +768 -749
- warp/render/utils.py +152 -150
- warp/sim/__init__.py +52 -59
- warp/sim/articulation.py +685 -685
- warp/sim/collide.py +1594 -1590
- warp/sim/import_mjcf.py +489 -481
- warp/sim/import_snu.py +220 -221
- warp/sim/import_urdf.py +536 -516
- warp/sim/import_usd.py +887 -881
- warp/sim/inertia.py +316 -317
- warp/sim/integrator.py +234 -233
- warp/sim/integrator_euler.py +1956 -1956
- warp/sim/integrator_featherstone.py +1910 -1991
- warp/sim/integrator_xpbd.py +3294 -3312
- warp/sim/model.py +4473 -4314
- warp/sim/particles.py +113 -112
- warp/sim/render.py +417 -403
- warp/sim/utils.py +413 -410
- warp/sparse.py +1227 -1227
- warp/stubs.py +2109 -2469
- warp/tape.py +1162 -225
- warp/tests/__init__.py +1 -1
- warp/tests/__main__.py +4 -4
- warp/tests/assets/torus.usda +105 -105
- warp/tests/aux_test_class_kernel.py +26 -26
- warp/tests/aux_test_compile_consts_dummy.py +10 -10
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -21
- warp/tests/aux_test_dependent.py +22 -22
- warp/tests/aux_test_grad_customs.py +23 -23
- warp/tests/aux_test_reference.py +11 -11
- warp/tests/aux_test_reference_reference.py +10 -10
- warp/tests/aux_test_square.py +17 -17
- warp/tests/aux_test_unresolved_func.py +14 -14
- warp/tests/aux_test_unresolved_symbol.py +14 -14
- warp/tests/disabled_kinematics.py +239 -239
- warp/tests/run_coverage_serial.py +31 -31
- warp/tests/test_adam.py +157 -157
- warp/tests/test_arithmetic.py +1124 -1124
- warp/tests/test_array.py +2417 -2326
- warp/tests/test_array_reduce.py +150 -150
- warp/tests/test_async.py +668 -656
- warp/tests/test_atomic.py +141 -141
- warp/tests/test_bool.py +204 -149
- warp/tests/test_builtins_resolution.py +1292 -1292
- warp/tests/test_bvh.py +164 -171
- warp/tests/test_closest_point_edge_edge.py +228 -228
- warp/tests/test_codegen.py +566 -553
- warp/tests/test_compile_consts.py +97 -101
- warp/tests/test_conditional.py +246 -246
- warp/tests/test_copy.py +232 -215
- warp/tests/test_ctypes.py +632 -632
- warp/tests/test_dense.py +67 -67
- warp/tests/test_devices.py +91 -98
- warp/tests/test_dlpack.py +530 -529
- warp/tests/test_examples.py +400 -378
- warp/tests/test_fabricarray.py +955 -955
- warp/tests/test_fast_math.py +62 -54
- warp/tests/test_fem.py +1277 -1278
- warp/tests/test_fp16.py +130 -130
- warp/tests/test_func.py +338 -337
- warp/tests/test_generics.py +571 -571
- warp/tests/test_grad.py +746 -640
- warp/tests/test_grad_customs.py +333 -336
- warp/tests/test_hash_grid.py +210 -164
- warp/tests/test_import.py +39 -39
- warp/tests/test_indexedarray.py +1134 -1134
- warp/tests/test_intersect.py +67 -67
- warp/tests/test_jax.py +307 -307
- warp/tests/test_large.py +167 -164
- warp/tests/test_launch.py +354 -354
- warp/tests/test_lerp.py +261 -261
- warp/tests/test_linear_solvers.py +191 -171
- warp/tests/test_lvalue.py +421 -493
- warp/tests/test_marching_cubes.py +65 -65
- warp/tests/test_mat.py +1801 -1827
- warp/tests/test_mat_lite.py +115 -115
- warp/tests/test_mat_scalar_ops.py +2907 -2889
- warp/tests/test_math.py +126 -193
- warp/tests/test_matmul.py +500 -499
- warp/tests/test_matmul_lite.py +410 -410
- warp/tests/test_mempool.py +188 -190
- warp/tests/test_mesh.py +284 -324
- warp/tests/test_mesh_query_aabb.py +228 -241
- warp/tests/test_mesh_query_point.py +692 -702
- warp/tests/test_mesh_query_ray.py +292 -303
- warp/tests/test_mlp.py +276 -276
- warp/tests/test_model.py +110 -110
- warp/tests/test_modules_lite.py +39 -39
- warp/tests/test_multigpu.py +163 -163
- warp/tests/test_noise.py +248 -248
- warp/tests/test_operators.py +250 -250
- warp/tests/test_options.py +123 -125
- warp/tests/test_peer.py +133 -137
- warp/tests/test_pinned.py +78 -78
- warp/tests/test_print.py +54 -54
- warp/tests/test_quat.py +2086 -2086
- warp/tests/test_rand.py +288 -288
- warp/tests/test_reload.py +217 -217
- warp/tests/test_rounding.py +179 -179
- warp/tests/test_runlength_encode.py +190 -190
- warp/tests/test_sim_grad.py +243 -0
- warp/tests/test_sim_kinematics.py +91 -97
- warp/tests/test_smoothstep.py +168 -168
- warp/tests/test_snippet.py +305 -266
- warp/tests/test_sparse.py +468 -460
- warp/tests/test_spatial.py +2148 -2148
- warp/tests/test_streams.py +486 -473
- warp/tests/test_struct.py +710 -675
- warp/tests/test_tape.py +173 -148
- warp/tests/test_torch.py +743 -743
- warp/tests/test_transient_module.py +87 -87
- warp/tests/test_types.py +556 -659
- warp/tests/test_utils.py +490 -499
- warp/tests/test_vec.py +1264 -1268
- warp/tests/test_vec_lite.py +73 -73
- warp/tests/test_vec_scalar_ops.py +2099 -2099
- warp/tests/test_verify_fp.py +94 -94
- warp/tests/test_volume.py +737 -736
- warp/tests/test_volume_write.py +255 -265
- warp/tests/unittest_serial.py +37 -37
- warp/tests/unittest_suites.py +363 -359
- warp/tests/unittest_utils.py +603 -578
- warp/tests/unused_test_misc.py +71 -71
- warp/tests/walkthrough_debug.py +85 -85
- warp/thirdparty/appdirs.py +598 -598
- warp/thirdparty/dlpack.py +143 -143
- warp/thirdparty/unittest_parallel.py +566 -561
- warp/torch.py +321 -295
- warp/types.py +4504 -4450
- warp/utils.py +1008 -821
- {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/LICENSE.md +126 -126
- {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/METADATA +338 -400
- warp_lang-1.1.0.dist-info/RECORD +352 -0
- warp/examples/assets/cube.usda +0 -42
- warp/examples/assets/sphere.usda +0 -56
- warp/examples/assets/torus.usda +0 -105
- warp_lang-1.0.1.dist-info/RECORD +0 -352
- {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.0.1.dist-info → warp_lang-1.1.0.dist-info}/top_level.txt +0 -0
warp/native/hashgrid.cpp
CHANGED
|
@@ -1,279 +1,302 @@
|
|
|
1
|
-
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
-
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
-
* and proprietary rights in and to this software, related documentation
|
|
4
|
-
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
-
* distribution of this software and related documentation without an express
|
|
6
|
-
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
#include "warp.h"
|
|
10
|
-
#include "cuda_util.h"
|
|
11
|
-
#include "hashgrid.h"
|
|
12
|
-
#include "sort.h"
|
|
13
|
-
#include "string.h"
|
|
14
|
-
|
|
15
|
-
using namespace wp;
|
|
16
|
-
|
|
17
|
-
#include <map>
|
|
18
|
-
|
|
19
|
-
namespace
|
|
20
|
-
{
|
|
21
|
-
// host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
|
|
22
|
-
std::map<uint64_t, HashGrid> g_hash_grid_descriptors;
|
|
23
|
-
|
|
24
|
-
} // anonymous namespace
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
namespace wp
|
|
28
|
-
{
|
|
29
|
-
|
|
30
|
-
bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid)
|
|
31
|
-
{
|
|
32
|
-
const auto& iter = g_hash_grid_descriptors.find(id);
|
|
33
|
-
if (iter == g_hash_grid_descriptors.end())
|
|
34
|
-
return false;
|
|
35
|
-
else
|
|
36
|
-
grid = iter->second;
|
|
37
|
-
return true;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid)
|
|
41
|
-
{
|
|
42
|
-
g_hash_grid_descriptors[id] = grid;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
void hash_grid_rem_descriptor(uint64_t id)
|
|
46
|
-
{
|
|
47
|
-
g_hash_grid_descriptors.erase(id);
|
|
48
|
-
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// implemented in hashgrid.cu
|
|
52
|
-
void hash_grid_rebuild_device(const HashGrid& grid, const wp::vec3
|
|
53
|
-
|
|
54
|
-
} // namespace wp
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
// host methods
|
|
58
|
-
uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z)
|
|
59
|
-
{
|
|
60
|
-
HashGrid* grid = new HashGrid();
|
|
61
|
-
memset(grid, 0, sizeof(HashGrid));
|
|
62
|
-
|
|
63
|
-
grid->dim_x = dim_x;
|
|
64
|
-
grid->dim_y = dim_y;
|
|
65
|
-
grid->dim_z = dim_z;
|
|
66
|
-
|
|
67
|
-
const int num_cells = dim_x*dim_y*dim_z;
|
|
68
|
-
grid->cell_starts = (int*)alloc_host(num_cells*sizeof(int));
|
|
69
|
-
grid->cell_ends = (int*)alloc_host(num_cells*sizeof(int));
|
|
70
|
-
|
|
71
|
-
return (uint64_t)(grid);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
void hash_grid_destroy_host(uint64_t id)
|
|
75
|
-
{
|
|
76
|
-
HashGrid* grid = (HashGrid*)(id);
|
|
77
|
-
|
|
78
|
-
free_host(grid->point_ids);
|
|
79
|
-
free_host(grid->point_cells);
|
|
80
|
-
free_host(grid->cell_starts);
|
|
81
|
-
free_host(grid->cell_ends);
|
|
82
|
-
|
|
83
|
-
delete grid;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
void hash_grid_reserve_host(uint64_t id, int num_points)
|
|
87
|
-
{
|
|
88
|
-
HashGrid* grid = (HashGrid*)(id);
|
|
89
|
-
|
|
90
|
-
if (num_points > grid->max_points)
|
|
91
|
-
{
|
|
92
|
-
free_host(grid->point_cells);
|
|
93
|
-
free_host(grid->point_ids);
|
|
94
|
-
|
|
95
|
-
const int num_to_alloc = num_points*3/2;
|
|
96
|
-
grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
97
|
-
grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
98
|
-
|
|
99
|
-
grid->max_points = num_to_alloc;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
grid->num_points = num_points;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
void hash_grid_update_host(uint64_t id, float cell_width, const wp::vec3
|
|
106
|
-
{
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
//
|
|
129
|
-
for (int i=0; i < num_points; ++i)
|
|
130
|
-
{
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
{
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
//
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
{
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include "warp.h"
|
|
10
|
+
#include "cuda_util.h"
|
|
11
|
+
#include "hashgrid.h"
|
|
12
|
+
#include "sort.h"
|
|
13
|
+
#include "string.h"
|
|
14
|
+
|
|
15
|
+
using namespace wp;
|
|
16
|
+
|
|
17
|
+
#include <map>
|
|
18
|
+
|
|
19
|
+
namespace
|
|
20
|
+
{
|
|
21
|
+
// host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
|
|
22
|
+
std::map<uint64_t, HashGrid> g_hash_grid_descriptors;
|
|
23
|
+
|
|
24
|
+
} // anonymous namespace
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
namespace wp
|
|
28
|
+
{
|
|
29
|
+
|
|
30
|
+
bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid)
|
|
31
|
+
{
|
|
32
|
+
const auto& iter = g_hash_grid_descriptors.find(id);
|
|
33
|
+
if (iter == g_hash_grid_descriptors.end())
|
|
34
|
+
return false;
|
|
35
|
+
else
|
|
36
|
+
grid = iter->second;
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid)
|
|
41
|
+
{
|
|
42
|
+
g_hash_grid_descriptors[id] = grid;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
void hash_grid_rem_descriptor(uint64_t id)
|
|
46
|
+
{
|
|
47
|
+
g_hash_grid_descriptors.erase(id);
|
|
48
|
+
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// implemented in hashgrid.cu
|
|
52
|
+
void hash_grid_rebuild_device(const HashGrid& grid, const wp::array_t<wp::vec3>& points);
|
|
53
|
+
|
|
54
|
+
} // namespace wp
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
// host methods
|
|
58
|
+
uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z)
|
|
59
|
+
{
|
|
60
|
+
HashGrid* grid = new HashGrid();
|
|
61
|
+
memset(grid, 0, sizeof(HashGrid));
|
|
62
|
+
|
|
63
|
+
grid->dim_x = dim_x;
|
|
64
|
+
grid->dim_y = dim_y;
|
|
65
|
+
grid->dim_z = dim_z;
|
|
66
|
+
|
|
67
|
+
const int num_cells = dim_x*dim_y*dim_z;
|
|
68
|
+
grid->cell_starts = (int*)alloc_host(num_cells*sizeof(int));
|
|
69
|
+
grid->cell_ends = (int*)alloc_host(num_cells*sizeof(int));
|
|
70
|
+
|
|
71
|
+
return (uint64_t)(grid);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
void hash_grid_destroy_host(uint64_t id)
|
|
75
|
+
{
|
|
76
|
+
HashGrid* grid = (HashGrid*)(id);
|
|
77
|
+
|
|
78
|
+
free_host(grid->point_ids);
|
|
79
|
+
free_host(grid->point_cells);
|
|
80
|
+
free_host(grid->cell_starts);
|
|
81
|
+
free_host(grid->cell_ends);
|
|
82
|
+
|
|
83
|
+
delete grid;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
void hash_grid_reserve_host(uint64_t id, int num_points)
|
|
87
|
+
{
|
|
88
|
+
HashGrid* grid = (HashGrid*)(id);
|
|
89
|
+
|
|
90
|
+
if (num_points > grid->max_points)
|
|
91
|
+
{
|
|
92
|
+
free_host(grid->point_cells);
|
|
93
|
+
free_host(grid->point_ids);
|
|
94
|
+
|
|
95
|
+
const int num_to_alloc = num_points*3/2;
|
|
96
|
+
grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
97
|
+
grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
98
|
+
|
|
99
|
+
grid->max_points = num_to_alloc;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
grid->num_points = num_points;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
void hash_grid_update_host(uint64_t id, float cell_width, const wp::array_t<wp::vec3>* points)
|
|
106
|
+
{
|
|
107
|
+
// Python enforces this, but let's be defensive anyways
|
|
108
|
+
if (!points || points->ndim != 1)
|
|
109
|
+
{
|
|
110
|
+
fprintf(stderr, "Warp error: Invalid points array passed to %s\n", __FUNCTION__);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (!id)
|
|
115
|
+
{
|
|
116
|
+
fprintf(stderr, "Warp error: Invalid grid passed to %s\n", __FUNCTION__);
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
HashGrid* grid = (HashGrid*)(id);
|
|
121
|
+
int num_points = points->shape[0];
|
|
122
|
+
|
|
123
|
+
hash_grid_reserve_host(id, num_points);
|
|
124
|
+
|
|
125
|
+
grid->cell_width = cell_width;
|
|
126
|
+
grid->cell_width_inv = 1.0f / cell_width;
|
|
127
|
+
|
|
128
|
+
// calculate cell for each position
|
|
129
|
+
for (int i=0; i < num_points; ++i)
|
|
130
|
+
{
|
|
131
|
+
const vec3& point = wp::index(*points, i);
|
|
132
|
+
grid->point_cells[i] = hash_grid_index(*grid, point);
|
|
133
|
+
grid->point_ids[i] = i;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// sort indices
|
|
137
|
+
radix_sort_pairs_host(grid->point_cells, grid->point_ids, num_points);
|
|
138
|
+
|
|
139
|
+
const int num_cells = grid->dim_x * grid->dim_y * grid->dim_z;
|
|
140
|
+
memset(grid->cell_starts, 0, sizeof(int) * num_cells);
|
|
141
|
+
memset(grid->cell_ends, 0, sizeof(int) * num_cells);
|
|
142
|
+
|
|
143
|
+
// compute cell start / end
|
|
144
|
+
for (int i=0; i < num_points; ++i)
|
|
145
|
+
{
|
|
146
|
+
// scan the particle-cell array to find the start and end
|
|
147
|
+
const int c = grid->point_cells[i];
|
|
148
|
+
|
|
149
|
+
if (i == 0)
|
|
150
|
+
grid->cell_starts[c] = 0;
|
|
151
|
+
else
|
|
152
|
+
{
|
|
153
|
+
const int p = grid->point_cells[i-1];
|
|
154
|
+
|
|
155
|
+
if (c != p)
|
|
156
|
+
{
|
|
157
|
+
grid->cell_starts[c] = i;
|
|
158
|
+
grid->cell_ends[p] = i;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (i == num_points - 1)
|
|
163
|
+
{
|
|
164
|
+
grid->cell_ends[c] = i + 1;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// device methods
|
|
170
|
+
uint64_t hash_grid_create_device(void* context, int dim_x, int dim_y, int dim_z)
|
|
171
|
+
{
|
|
172
|
+
ContextGuard guard(context);
|
|
173
|
+
|
|
174
|
+
HashGrid grid;
|
|
175
|
+
memset(&grid, 0, sizeof(HashGrid));
|
|
176
|
+
|
|
177
|
+
grid.context = context ? context : cuda_context_get_current();
|
|
178
|
+
|
|
179
|
+
grid.dim_x = dim_x;
|
|
180
|
+
grid.dim_y = dim_y;
|
|
181
|
+
grid.dim_z = dim_z;
|
|
182
|
+
|
|
183
|
+
const int num_cells = dim_x*dim_y*dim_z;
|
|
184
|
+
grid.cell_starts = (int*)alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
|
|
185
|
+
grid.cell_ends = (int*)alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
|
|
186
|
+
|
|
187
|
+
// upload to device
|
|
188
|
+
HashGrid* grid_device = (HashGrid*)(alloc_device(WP_CURRENT_CONTEXT, sizeof(HashGrid)));
|
|
189
|
+
memcpy_h2d(WP_CURRENT_CONTEXT, grid_device, &grid, sizeof(HashGrid));
|
|
190
|
+
|
|
191
|
+
uint64_t grid_id = (uint64_t)(grid_device);
|
|
192
|
+
hash_grid_add_descriptor(grid_id, grid);
|
|
193
|
+
|
|
194
|
+
return grid_id;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
void hash_grid_destroy_device(uint64_t id)
|
|
198
|
+
{
|
|
199
|
+
HashGrid grid;
|
|
200
|
+
if (hash_grid_get_descriptor(id, grid))
|
|
201
|
+
{
|
|
202
|
+
ContextGuard guard(grid.context);
|
|
203
|
+
|
|
204
|
+
free_device(WP_CURRENT_CONTEXT, grid.point_ids);
|
|
205
|
+
free_device(WP_CURRENT_CONTEXT, grid.point_cells);
|
|
206
|
+
free_device(WP_CURRENT_CONTEXT, grid.cell_starts);
|
|
207
|
+
free_device(WP_CURRENT_CONTEXT, grid.cell_ends);
|
|
208
|
+
|
|
209
|
+
free_device(WP_CURRENT_CONTEXT, (HashGrid*)id);
|
|
210
|
+
|
|
211
|
+
hash_grid_rem_descriptor(id);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
void hash_grid_reserve_device(uint64_t id, int num_points)
|
|
217
|
+
{
|
|
218
|
+
HashGrid grid;
|
|
219
|
+
|
|
220
|
+
if (hash_grid_get_descriptor(id, grid))
|
|
221
|
+
{
|
|
222
|
+
if (num_points > grid.max_points)
|
|
223
|
+
{
|
|
224
|
+
ContextGuard guard(grid.context);
|
|
225
|
+
|
|
226
|
+
free_device(WP_CURRENT_CONTEXT, grid.point_cells);
|
|
227
|
+
free_device(WP_CURRENT_CONTEXT, grid.point_ids);
|
|
228
|
+
|
|
229
|
+
const int num_to_alloc = num_points*3/2;
|
|
230
|
+
grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
231
|
+
grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
232
|
+
grid.max_points = num_to_alloc;
|
|
233
|
+
|
|
234
|
+
// ensure we pre-size our sort routine to avoid
|
|
235
|
+
// allocations during graph capture
|
|
236
|
+
radix_sort_reserve(WP_CURRENT_CONTEXT, num_to_alloc);
|
|
237
|
+
|
|
238
|
+
// update device side grid descriptor, todo: this is
|
|
239
|
+
// slightly redundant since it is performed again
|
|
240
|
+
// inside hash_grid_update_device(), but since
|
|
241
|
+
// reserve can be called from Python we need to make
|
|
242
|
+
// sure it is consistent
|
|
243
|
+
memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
|
|
244
|
+
|
|
245
|
+
// update host side grid descriptor
|
|
246
|
+
hash_grid_add_descriptor(id, grid);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
void hash_grid_update_device(uint64_t id, float cell_width, const wp::array_t<wp::vec3>* points)
|
|
252
|
+
{
|
|
253
|
+
// Python enforces this, but let's be defensive anyways
|
|
254
|
+
if (!points || points->ndim != 1)
|
|
255
|
+
{
|
|
256
|
+
fprintf(stderr, "Warp error: Invalid points array passed to %s\n", __FUNCTION__);
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
int num_points = points->shape[0];
|
|
261
|
+
|
|
262
|
+
// ensure we have enough memory reserved for update
|
|
263
|
+
// this must be done before retrieving the descriptor
|
|
264
|
+
// below since it may update it
|
|
265
|
+
hash_grid_reserve_device(id, num_points);
|
|
266
|
+
|
|
267
|
+
// host grid must be static so that we can
|
|
268
|
+
// perform host->device memcpy from this variable
|
|
269
|
+
// and have it safely recorded inside CUDA graphs
|
|
270
|
+
static HashGrid grid;
|
|
271
|
+
|
|
272
|
+
if (hash_grid_get_descriptor(id, grid))
|
|
273
|
+
{
|
|
274
|
+
ContextGuard guard(grid.context);
|
|
275
|
+
|
|
276
|
+
grid.num_points = num_points;
|
|
277
|
+
grid.cell_width = cell_width;
|
|
278
|
+
grid.cell_width_inv = 1.0f / cell_width;
|
|
279
|
+
|
|
280
|
+
hash_grid_rebuild_device(grid, *points);
|
|
281
|
+
|
|
282
|
+
// update device side grid descriptor
|
|
283
|
+
memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
|
|
284
|
+
|
|
285
|
+
// update host side grid descriptor
|
|
286
|
+
hash_grid_add_descriptor(id, grid);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
#if !WP_ENABLE_CUDA
|
|
291
|
+
|
|
292
|
+
namespace wp
|
|
293
|
+
{
|
|
294
|
+
|
|
295
|
+
void hash_grid_rebuild_device(const HashGrid& grid, const wp::array_t<wp::vec3>& points)
|
|
296
|
+
{
|
|
297
|
+
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
} // namespace wp
|
|
301
|
+
|
|
279
302
|
#endif // !WP_ENABLE_CUDA
|