warp-lang 1.4.2__py3-none-manylinux2014_x86_64.whl → 1.5.1__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +4 -0
- warp/autograd.py +43 -8
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +21 -2
- warp/build_dll.py +23 -6
- warp/builtins.py +1819 -7
- warp/codegen.py +197 -61
- warp/config.py +2 -2
- warp/context.py +379 -107
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
- warp/examples/benchmarks/benchmark_gemm.py +121 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
- warp/examples/benchmarks/benchmark_tile.py +179 -0
- warp/examples/fem/example_adaptive_grid.py +37 -10
- warp/examples/fem/example_apic_fluid.py +3 -2
- warp/examples/fem/example_convection_diffusion_dg.py +4 -5
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +47 -4
- warp/examples/fem/example_distortion_energy.py +220 -0
- warp/examples/fem/example_magnetostatics.py +127 -85
- warp/examples/fem/example_nonconforming_contact.py +5 -5
- warp/examples/fem/example_stokes.py +3 -1
- warp/examples/fem/example_streamlines.py +12 -19
- warp/examples/fem/utils.py +38 -15
- warp/examples/sim/example_cloth.py +4 -25
- warp/examples/sim/example_quadruped.py +2 -1
- warp/examples/tile/example_tile_convolution.py +58 -0
- warp/examples/tile/example_tile_fft.py +47 -0
- warp/examples/tile/example_tile_filtering.py +105 -0
- warp/examples/tile/example_tile_matmul.py +79 -0
- warp/examples/tile/example_tile_mlp.py +375 -0
- warp/fem/__init__.py +8 -0
- warp/fem/cache.py +16 -12
- warp/fem/dirichlet.py +1 -1
- warp/fem/domain.py +44 -1
- warp/fem/field/__init__.py +1 -2
- warp/fem/field/field.py +31 -19
- warp/fem/field/nodal_field.py +101 -49
- warp/fem/field/virtual.py +794 -0
- warp/fem/geometry/__init__.py +2 -2
- warp/fem/geometry/deformed_geometry.py +3 -105
- warp/fem/geometry/element.py +13 -0
- warp/fem/geometry/geometry.py +165 -7
- warp/fem/geometry/grid_2d.py +3 -6
- warp/fem/geometry/grid_3d.py +31 -28
- warp/fem/geometry/hexmesh.py +3 -46
- warp/fem/geometry/nanogrid.py +3 -2
- warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
- warp/fem/geometry/tetmesh.py +2 -43
- warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
- warp/fem/integrate.py +683 -261
- warp/fem/linalg.py +404 -0
- warp/fem/operator.py +101 -18
- warp/fem/polynomial.py +5 -5
- warp/fem/quadrature/quadrature.py +45 -21
- warp/fem/space/__init__.py +45 -11
- warp/fem/space/basis_function_space.py +451 -0
- warp/fem/space/basis_space.py +58 -11
- warp/fem/space/function_space.py +146 -5
- warp/fem/space/grid_2d_function_space.py +80 -66
- warp/fem/space/grid_3d_function_space.py +113 -68
- warp/fem/space/hexmesh_function_space.py +96 -108
- warp/fem/space/nanogrid_function_space.py +62 -110
- warp/fem/space/quadmesh_function_space.py +208 -0
- warp/fem/space/shape/__init__.py +45 -7
- warp/fem/space/shape/cube_shape_function.py +328 -54
- warp/fem/space/shape/shape_function.py +10 -1
- warp/fem/space/shape/square_shape_function.py +328 -60
- warp/fem/space/shape/tet_shape_function.py +269 -19
- warp/fem/space/shape/triangle_shape_function.py +238 -19
- warp/fem/space/tetmesh_function_space.py +69 -37
- warp/fem/space/topology.py +38 -0
- warp/fem/space/trimesh_function_space.py +179 -0
- warp/fem/utils.py +6 -331
- warp/jax_experimental.py +3 -1
- warp/native/array.h +15 -0
- warp/native/builtin.h +66 -26
- warp/native/bvh.h +4 -0
- warp/native/coloring.cpp +604 -0
- warp/native/cuda_util.cpp +68 -51
- warp/native/cuda_util.h +2 -1
- warp/native/fabric.h +8 -0
- warp/native/hashgrid.h +4 -0
- warp/native/marching.cu +8 -0
- warp/native/mat.h +14 -3
- warp/native/mathdx.cpp +59 -0
- warp/native/mesh.h +4 -0
- warp/native/range.h +13 -1
- warp/native/reduce.cpp +9 -1
- warp/native/reduce.cu +7 -0
- warp/native/runlength_encode.cpp +9 -1
- warp/native/runlength_encode.cu +7 -1
- warp/native/scan.cpp +8 -0
- warp/native/scan.cu +8 -0
- warp/native/scan.h +8 -1
- warp/native/sparse.cpp +8 -0
- warp/native/sparse.cu +8 -0
- warp/native/temp_buffer.h +7 -0
- warp/native/tile.h +1854 -0
- warp/native/tile_gemm.h +341 -0
- warp/native/tile_reduce.h +210 -0
- warp/native/volume_builder.cu +8 -0
- warp/native/volume_builder.h +8 -0
- warp/native/warp.cpp +10 -2
- warp/native/warp.cu +369 -15
- warp/native/warp.h +12 -2
- warp/optim/adam.py +39 -4
- warp/paddle.py +29 -12
- warp/render/render_opengl.py +140 -67
- warp/sim/graph_coloring.py +292 -0
- warp/sim/import_urdf.py +8 -8
- warp/sim/integrator_euler.py +4 -2
- warp/sim/integrator_featherstone.py +115 -44
- warp/sim/integrator_vbd.py +6 -0
- warp/sim/model.py +109 -32
- warp/sparse.py +1 -1
- warp/stubs.py +569 -4
- warp/tape.py +12 -7
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/aux_test_instancing_gc.py +18 -0
- warp/tests/test_array.py +39 -0
- warp/tests/test_codegen.py +81 -1
- warp/tests/test_codegen_instancing.py +30 -0
- warp/tests/test_collision.py +110 -0
- warp/tests/test_coloring.py +251 -0
- warp/tests/test_context.py +34 -0
- warp/tests/test_examples.py +21 -5
- warp/tests/test_fem.py +453 -113
- warp/tests/test_func.py +34 -4
- warp/tests/test_generics.py +52 -0
- warp/tests/test_iter.py +68 -0
- warp/tests/test_lerp.py +13 -87
- warp/tests/test_mat_scalar_ops.py +1 -1
- warp/tests/test_matmul.py +6 -9
- warp/tests/test_matmul_lite.py +6 -11
- warp/tests/test_mesh_query_point.py +1 -1
- warp/tests/test_module_hashing.py +23 -0
- warp/tests/test_overwrite.py +45 -0
- warp/tests/test_paddle.py +27 -87
- warp/tests/test_print.py +56 -1
- warp/tests/test_smoothstep.py +17 -83
- warp/tests/test_spatial.py +1 -1
- warp/tests/test_static.py +3 -3
- warp/tests/test_tile.py +744 -0
- warp/tests/test_tile_mathdx.py +144 -0
- warp/tests/test_tile_mlp.py +383 -0
- warp/tests/test_tile_reduce.py +374 -0
- warp/tests/test_tile_shared_memory.py +190 -0
- warp/tests/test_vbd.py +12 -20
- warp/tests/test_volume.py +43 -0
- warp/tests/unittest_suites.py +19 -2
- warp/tests/unittest_utils.py +4 -2
- warp/types.py +340 -74
- warp/utils.py +23 -3
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
- warp/fem/field/test.py +0 -180
- warp/fem/field/trial.py +0 -183
- warp/fem/space/collocated_function_space.py +0 -102
- warp/fem/space/quadmesh_2d_function_space.py +0 -261
- warp/fem/space/trimesh_2d_function_space.py +0 -153
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0
warp/native/cuda_util.cpp
CHANGED
|
@@ -100,6 +100,8 @@ static PFN_cuGraphicsUnmapResources_v3000 pfn_cuGraphicsUnmapResources;
|
|
|
100
100
|
static PFN_cuGraphicsResourceGetMappedPointer_v3020 pfn_cuGraphicsResourceGetMappedPointer;
|
|
101
101
|
static PFN_cuGraphicsGLRegisterBuffer_v3000 pfn_cuGraphicsGLRegisterBuffer;
|
|
102
102
|
static PFN_cuGraphicsUnregisterResource_v3000 pfn_cuGraphicsUnregisterResource;
|
|
103
|
+
static PFN_cuModuleGetGlobal_v3020 pfn_cuModuleGetGlobal;
|
|
104
|
+
static PFN_cuFuncSetAttribute_v9000 pfn_cuFuncSetAttribute;
|
|
103
105
|
|
|
104
106
|
static bool cuda_driver_initialized = false;
|
|
105
107
|
|
|
@@ -118,15 +120,17 @@ static inline int get_minor(int version)
|
|
|
118
120
|
return (version % 1000) / 10;
|
|
119
121
|
}
|
|
120
122
|
|
|
121
|
-
|
|
123
|
+
// Get versioned driver entry point. The version argument should match the function pointer type.
|
|
124
|
+
// For example, to initialize PFN_cuCtxCreate_v3020 use version 3020.
|
|
125
|
+
static bool get_driver_entry_point(const char* name, int version, void** pfn)
|
|
122
126
|
{
|
|
123
127
|
if (!pfn_cuGetProcAddress || !name || !pfn)
|
|
124
128
|
return false;
|
|
125
129
|
|
|
126
130
|
#if CUDA_VERSION < 12000
|
|
127
|
-
CUresult r = pfn_cuGetProcAddress(name, pfn,
|
|
131
|
+
CUresult r = pfn_cuGetProcAddress(name, pfn, version, CU_GET_PROC_ADDRESS_DEFAULT);
|
|
128
132
|
#else
|
|
129
|
-
CUresult r = pfn_cuGetProcAddress(name, pfn,
|
|
133
|
+
CUresult r = pfn_cuGetProcAddress(name, pfn, version, CU_GET_PROC_ADDRESS_DEFAULT, NULL);
|
|
130
134
|
#endif
|
|
131
135
|
|
|
132
136
|
if (r != CUDA_SUCCESS)
|
|
@@ -168,7 +172,8 @@ bool init_cuda_driver()
|
|
|
168
172
|
|
|
169
173
|
// check the CUDA driver version and report an error if it's too low
|
|
170
174
|
int driver_version = 0;
|
|
171
|
-
if (get_driver_entry_point("cuDriverGetVersion", &(void*&)pfn_cuDriverGetVersion) &&
|
|
175
|
+
if (get_driver_entry_point("cuDriverGetVersion", 2020, &(void*&)pfn_cuDriverGetVersion) &&
|
|
176
|
+
check_cu(pfn_cuDriverGetVersion(&driver_version)))
|
|
172
177
|
{
|
|
173
178
|
if (driver_version < WP_CUDA_DRIVER_VERSION)
|
|
174
179
|
{
|
|
@@ -184,53 +189,55 @@ bool init_cuda_driver()
|
|
|
184
189
|
}
|
|
185
190
|
|
|
186
191
|
// initialize driver entry points
|
|
187
|
-
get_driver_entry_point("cuGetErrorString", &(void*&)pfn_cuGetErrorString);
|
|
188
|
-
get_driver_entry_point("cuGetErrorName", &(void*&)pfn_cuGetErrorName);
|
|
189
|
-
get_driver_entry_point("cuInit", &(void*&)pfn_cuInit);
|
|
190
|
-
get_driver_entry_point("cuDeviceGet", &(void*&)pfn_cuDeviceGet);
|
|
191
|
-
get_driver_entry_point("cuDeviceGetCount", &(void*&)pfn_cuDeviceGetCount);
|
|
192
|
-
get_driver_entry_point("cuDeviceGetName", &(void*&)pfn_cuDeviceGetName);
|
|
193
|
-
get_driver_entry_point("cuDeviceGetAttribute", &(void*&)pfn_cuDeviceGetAttribute);
|
|
194
|
-
get_driver_entry_point("cuDeviceGetUuid", &(void*&)pfn_cuDeviceGetUuid);
|
|
195
|
-
get_driver_entry_point("cuDevicePrimaryCtxRetain", &(void*&)pfn_cuDevicePrimaryCtxRetain);
|
|
196
|
-
get_driver_entry_point("cuDevicePrimaryCtxRelease", &(void*&)pfn_cuDevicePrimaryCtxRelease);
|
|
197
|
-
get_driver_entry_point("cuDeviceCanAccessPeer", &(void*&)pfn_cuDeviceCanAccessPeer);
|
|
198
|
-
get_driver_entry_point("cuMemGetInfo", &(void*&)pfn_cuMemGetInfo);
|
|
199
|
-
get_driver_entry_point("cuCtxSetCurrent", &(void*&)pfn_cuCtxSetCurrent);
|
|
200
|
-
get_driver_entry_point("cuCtxGetCurrent", &(void*&)pfn_cuCtxGetCurrent);
|
|
201
|
-
get_driver_entry_point("cuCtxPushCurrent", &(void*&)pfn_cuCtxPushCurrent);
|
|
202
|
-
get_driver_entry_point("cuCtxPopCurrent", &(void*&)pfn_cuCtxPopCurrent);
|
|
203
|
-
get_driver_entry_point("cuCtxSynchronize", &(void*&)pfn_cuCtxSynchronize);
|
|
204
|
-
get_driver_entry_point("cuCtxGetDevice", &(void*&)pfn_cuCtxGetDevice);
|
|
205
|
-
get_driver_entry_point("cuCtxCreate", &(void*&)pfn_cuCtxCreate);
|
|
206
|
-
get_driver_entry_point("cuCtxDestroy", &(void*&)pfn_cuCtxDestroy);
|
|
207
|
-
get_driver_entry_point("cuCtxEnablePeerAccess", &(void*&)pfn_cuCtxEnablePeerAccess);
|
|
208
|
-
get_driver_entry_point("cuCtxDisablePeerAccess", &(void*&)pfn_cuCtxDisablePeerAccess);
|
|
209
|
-
get_driver_entry_point("cuStreamCreate", &(void*&)pfn_cuStreamCreate);
|
|
210
|
-
get_driver_entry_point("cuStreamDestroy", &(void*&)pfn_cuStreamDestroy);
|
|
211
|
-
get_driver_entry_point("cuStreamSynchronize", &(void*&)pfn_cuStreamSynchronize);
|
|
212
|
-
get_driver_entry_point("cuStreamWaitEvent", &(void*&)pfn_cuStreamWaitEvent);
|
|
213
|
-
get_driver_entry_point("cuStreamGetCtx", &(void*&)pfn_cuStreamGetCtx);
|
|
214
|
-
get_driver_entry_point("cuStreamGetCaptureInfo", &(void*&)pfn_cuStreamGetCaptureInfo);
|
|
215
|
-
get_driver_entry_point("cuStreamUpdateCaptureDependencies", &(void*&)pfn_cuStreamUpdateCaptureDependencies);
|
|
216
|
-
get_driver_entry_point("cuStreamCreateWithPriority", &(void*&)pfn_cuStreamCreateWithPriority);
|
|
217
|
-
get_driver_entry_point("cuStreamGetPriority", &(void*&)pfn_cuStreamGetPriority);
|
|
218
|
-
get_driver_entry_point("cuEventCreate", &(void*&)pfn_cuEventCreate);
|
|
219
|
-
get_driver_entry_point("cuEventDestroy", &(void*&)pfn_cuEventDestroy);
|
|
220
|
-
get_driver_entry_point("cuEventRecord", &(void*&)pfn_cuEventRecord);
|
|
221
|
-
get_driver_entry_point("cuEventRecordWithFlags", &(void*&)pfn_cuEventRecordWithFlags);
|
|
222
|
-
get_driver_entry_point("cuEventSynchronize", &(void*&)pfn_cuEventSynchronize);
|
|
223
|
-
get_driver_entry_point("cuModuleLoadDataEx", &(void*&)pfn_cuModuleLoadDataEx);
|
|
224
|
-
get_driver_entry_point("cuModuleUnload", &(void*&)pfn_cuModuleUnload);
|
|
225
|
-
get_driver_entry_point("cuModuleGetFunction", &(void*&)pfn_cuModuleGetFunction);
|
|
226
|
-
get_driver_entry_point("cuLaunchKernel", &(void*&)pfn_cuLaunchKernel);
|
|
227
|
-
get_driver_entry_point("cuMemcpyPeerAsync", &(void*&)pfn_cuMemcpyPeerAsync);
|
|
228
|
-
get_driver_entry_point("cuPointerGetAttribute", &(void*&)pfn_cuPointerGetAttribute);
|
|
229
|
-
get_driver_entry_point("cuGraphicsMapResources", &(void*&)pfn_cuGraphicsMapResources);
|
|
230
|
-
get_driver_entry_point("cuGraphicsUnmapResources", &(void*&)pfn_cuGraphicsUnmapResources);
|
|
231
|
-
get_driver_entry_point("cuGraphicsResourceGetMappedPointer", &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
|
|
232
|
-
get_driver_entry_point("cuGraphicsGLRegisterBuffer", &(void*&)pfn_cuGraphicsGLRegisterBuffer);
|
|
233
|
-
get_driver_entry_point("cuGraphicsUnregisterResource", &(void*&)pfn_cuGraphicsUnregisterResource);
|
|
192
|
+
get_driver_entry_point("cuGetErrorString", 6000, &(void*&)pfn_cuGetErrorString);
|
|
193
|
+
get_driver_entry_point("cuGetErrorName", 6000, &(void*&)pfn_cuGetErrorName);
|
|
194
|
+
get_driver_entry_point("cuInit", 2000, &(void*&)pfn_cuInit);
|
|
195
|
+
get_driver_entry_point("cuDeviceGet", 2000, &(void*&)pfn_cuDeviceGet);
|
|
196
|
+
get_driver_entry_point("cuDeviceGetCount", 2000, &(void*&)pfn_cuDeviceGetCount);
|
|
197
|
+
get_driver_entry_point("cuDeviceGetName", 2000, &(void*&)pfn_cuDeviceGetName);
|
|
198
|
+
get_driver_entry_point("cuDeviceGetAttribute", 2000, &(void*&)pfn_cuDeviceGetAttribute);
|
|
199
|
+
get_driver_entry_point("cuDeviceGetUuid", 110400, &(void*&)pfn_cuDeviceGetUuid);
|
|
200
|
+
get_driver_entry_point("cuDevicePrimaryCtxRetain", 7000, &(void*&)pfn_cuDevicePrimaryCtxRetain);
|
|
201
|
+
get_driver_entry_point("cuDevicePrimaryCtxRelease", 11000, &(void*&)pfn_cuDevicePrimaryCtxRelease);
|
|
202
|
+
get_driver_entry_point("cuDeviceCanAccessPeer", 4000, &(void*&)pfn_cuDeviceCanAccessPeer);
|
|
203
|
+
get_driver_entry_point("cuMemGetInfo", 3020, &(void*&)pfn_cuMemGetInfo);
|
|
204
|
+
get_driver_entry_point("cuCtxSetCurrent", 4000, &(void*&)pfn_cuCtxSetCurrent);
|
|
205
|
+
get_driver_entry_point("cuCtxGetCurrent", 4000, &(void*&)pfn_cuCtxGetCurrent);
|
|
206
|
+
get_driver_entry_point("cuCtxPushCurrent", 4000, &(void*&)pfn_cuCtxPushCurrent);
|
|
207
|
+
get_driver_entry_point("cuCtxPopCurrent", 4000, &(void*&)pfn_cuCtxPopCurrent);
|
|
208
|
+
get_driver_entry_point("cuCtxSynchronize", 2000, &(void*&)pfn_cuCtxSynchronize);
|
|
209
|
+
get_driver_entry_point("cuCtxGetDevice", 2000, &(void*&)pfn_cuCtxGetDevice);
|
|
210
|
+
get_driver_entry_point("cuCtxCreate", 3020, &(void*&)pfn_cuCtxCreate);
|
|
211
|
+
get_driver_entry_point("cuCtxDestroy", 4000, &(void*&)pfn_cuCtxDestroy);
|
|
212
|
+
get_driver_entry_point("cuCtxEnablePeerAccess", 4000, &(void*&)pfn_cuCtxEnablePeerAccess);
|
|
213
|
+
get_driver_entry_point("cuCtxDisablePeerAccess", 4000, &(void*&)pfn_cuCtxDisablePeerAccess);
|
|
214
|
+
get_driver_entry_point("cuStreamCreate", 2000, &(void*&)pfn_cuStreamCreate);
|
|
215
|
+
get_driver_entry_point("cuStreamDestroy", 4000, &(void*&)pfn_cuStreamDestroy);
|
|
216
|
+
get_driver_entry_point("cuStreamSynchronize", 2000, &(void*&)pfn_cuStreamSynchronize);
|
|
217
|
+
get_driver_entry_point("cuStreamWaitEvent", 3020, &(void*&)pfn_cuStreamWaitEvent);
|
|
218
|
+
get_driver_entry_point("cuStreamGetCtx", 9020, &(void*&)pfn_cuStreamGetCtx);
|
|
219
|
+
get_driver_entry_point("cuStreamGetCaptureInfo", 11030, &(void*&)pfn_cuStreamGetCaptureInfo);
|
|
220
|
+
get_driver_entry_point("cuStreamUpdateCaptureDependencies", 11030, &(void*&)pfn_cuStreamUpdateCaptureDependencies);
|
|
221
|
+
get_driver_entry_point("cuStreamCreateWithPriority", 5050, &(void*&)pfn_cuStreamCreateWithPriority);
|
|
222
|
+
get_driver_entry_point("cuStreamGetPriority", 5050, &(void*&)pfn_cuStreamGetPriority);
|
|
223
|
+
get_driver_entry_point("cuEventCreate", 2000, &(void*&)pfn_cuEventCreate);
|
|
224
|
+
get_driver_entry_point("cuEventDestroy", 4000, &(void*&)pfn_cuEventDestroy);
|
|
225
|
+
get_driver_entry_point("cuEventRecord", 2000, &(void*&)pfn_cuEventRecord);
|
|
226
|
+
get_driver_entry_point("cuEventRecordWithFlags", 11010, &(void*&)pfn_cuEventRecordWithFlags);
|
|
227
|
+
get_driver_entry_point("cuEventSynchronize", 2000, &(void*&)pfn_cuEventSynchronize);
|
|
228
|
+
get_driver_entry_point("cuModuleLoadDataEx", 2010, &(void*&)pfn_cuModuleLoadDataEx);
|
|
229
|
+
get_driver_entry_point("cuModuleUnload", 2000, &(void*&)pfn_cuModuleUnload);
|
|
230
|
+
get_driver_entry_point("cuModuleGetFunction", 2000, &(void*&)pfn_cuModuleGetFunction);
|
|
231
|
+
get_driver_entry_point("cuLaunchKernel", 4000, &(void*&)pfn_cuLaunchKernel);
|
|
232
|
+
get_driver_entry_point("cuMemcpyPeerAsync", 4000, &(void*&)pfn_cuMemcpyPeerAsync);
|
|
233
|
+
get_driver_entry_point("cuPointerGetAttribute", 4000, &(void*&)pfn_cuPointerGetAttribute);
|
|
234
|
+
get_driver_entry_point("cuGraphicsMapResources", 3000, &(void*&)pfn_cuGraphicsMapResources);
|
|
235
|
+
get_driver_entry_point("cuGraphicsUnmapResources", 3000, &(void*&)pfn_cuGraphicsUnmapResources);
|
|
236
|
+
get_driver_entry_point("cuGraphicsResourceGetMappedPointer", 3020, &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
|
|
237
|
+
get_driver_entry_point("cuGraphicsGLRegisterBuffer", 3000, &(void*&)pfn_cuGraphicsGLRegisterBuffer);
|
|
238
|
+
get_driver_entry_point("cuGraphicsUnregisterResource", 3000, &(void*&)pfn_cuGraphicsUnregisterResource);
|
|
239
|
+
get_driver_entry_point("cuModuleGetGlobal", 3020, &(void*&)pfn_cuModuleGetGlobal);
|
|
240
|
+
get_driver_entry_point("cuFuncSetAttribute", 9000, &(void*&)pfn_cuFuncSetAttribute);
|
|
234
241
|
|
|
235
242
|
if (pfn_cuInit)
|
|
236
243
|
cuda_driver_initialized = check_cu(pfn_cuInit(0));
|
|
@@ -568,4 +575,14 @@ CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource)
|
|
|
568
575
|
return pfn_cuGraphicsUnregisterResource ? pfn_cuGraphicsUnregisterResource(resource) : DRIVER_ENTRY_POINT_ERROR;
|
|
569
576
|
}
|
|
570
577
|
|
|
578
|
+
CUresult cuModuleGetGlobal_f(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name )
|
|
579
|
+
{
|
|
580
|
+
return pfn_cuModuleGetGlobal ? pfn_cuModuleGetGlobal(dptr, bytes, hmod, name) : DRIVER_ENTRY_POINT_ERROR;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int value)
|
|
584
|
+
{
|
|
585
|
+
return pfn_cuFuncSetAttribute ? pfn_cuFuncSetAttribute(hfunc, attrib, value) : DRIVER_ENTRY_POINT_ERROR;
|
|
586
|
+
}
|
|
587
|
+
|
|
571
588
|
#endif // WP_ENABLE_CUDA
|
warp/native/cuda_util.h
CHANGED
|
@@ -99,7 +99,8 @@ CUresult cuGraphicsUnmapResources_f(unsigned int count, CUgraphicsResource* reso
|
|
|
99
99
|
CUresult cuGraphicsResourceGetMappedPointer_f(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
|
|
100
100
|
CUresult cuGraphicsGLRegisterBuffer_f(CUgraphicsResource *pCudaResource, unsigned int buffer, unsigned int flags);
|
|
101
101
|
CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);
|
|
102
|
-
|
|
102
|
+
CUresult cuModuleGetGlobal_f(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name );
|
|
103
|
+
CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int value);
|
|
103
104
|
|
|
104
105
|
bool init_cuda_driver();
|
|
105
106
|
bool is_cuda_driver_initialized();
|
warp/native/fabric.h
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#pragma once
|
|
2
10
|
|
|
3
11
|
#include "builtin.h"
|
warp/native/hashgrid.h
CHANGED
|
@@ -209,6 +209,10 @@ CUDA_CALLABLE inline hash_grid_query_t iter_reverse(const hash_grid_query_t& que
|
|
|
209
209
|
return query;
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
+
CUDA_CALLABLE inline void adj_iter_reverse(const hash_grid_query_t& query, hash_grid_query_t& adj_query, hash_grid_query_t& adj_ret)
|
|
213
|
+
{
|
|
214
|
+
}
|
|
215
|
+
|
|
212
216
|
|
|
213
217
|
|
|
214
218
|
CUDA_CALLABLE inline int hash_grid_point_id(uint64_t id, int& index)
|
warp/native/marching.cu
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "warp.h"
|
|
2
10
|
#include "cuda_util.h"
|
|
3
11
|
#include "scan.h"
|
warp/native/mat.h
CHANGED
|
@@ -210,6 +210,12 @@ inline CUDA_CALLABLE mat_t<Rows, Rows, Type> identity()
|
|
|
210
210
|
return m;
|
|
211
211
|
}
|
|
212
212
|
|
|
213
|
+
template<unsigned Rows, typename Type>
|
|
214
|
+
inline CUDA_CALLABLE void adj_identity(const mat_t<Rows, Rows, Type>& adj_ret)
|
|
215
|
+
{
|
|
216
|
+
// nop
|
|
217
|
+
}
|
|
218
|
+
|
|
213
219
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
214
220
|
inline CUDA_CALLABLE bool operator==(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
215
221
|
{
|
|
@@ -650,13 +656,18 @@ inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a
|
|
|
650
656
|
{
|
|
651
657
|
mat_t<Rows,ColsOut,Type> t(0);
|
|
652
658
|
for (unsigned i=0; i < Rows; ++i)
|
|
653
|
-
{
|
|
654
|
-
for (unsigned j=0; j < ColsOut; ++j)
|
|
659
|
+
{
|
|
660
|
+
for (unsigned j=0; j < ColsOut; ++j)
|
|
655
661
|
{
|
|
662
|
+
Type sum(0.0);
|
|
663
|
+
|
|
656
664
|
for (unsigned k=0; k < Cols; ++k)
|
|
657
665
|
{
|
|
658
|
-
t.data[i][j] += a.data[i][k]*b.data[k][j];
|
|
666
|
+
//t.data[i][j] += a.data[i][k]*b.data[k][j];
|
|
667
|
+
sum = fmaf(a.data[i][k], b.data[k][j], sum);
|
|
659
668
|
}
|
|
669
|
+
|
|
670
|
+
t.data[i][j] = sum;
|
|
660
671
|
}
|
|
661
672
|
}
|
|
662
673
|
|
warp/native/mathdx.cpp
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/** Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include "builtin.h"
|
|
10
|
+
|
|
11
|
+
// stubs for platforms where there is no CUDA
|
|
12
|
+
#if !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
|
|
13
|
+
|
|
14
|
+
extern "C"
|
|
15
|
+
{
|
|
16
|
+
|
|
17
|
+
WP_API
|
|
18
|
+
bool cuda_compile_fft(
|
|
19
|
+
const char* ltoir_output_path,
|
|
20
|
+
const char* symbol_name, int num_include_dirs,
|
|
21
|
+
const char** include_dirs,
|
|
22
|
+
const char* mathdx_include_dir,
|
|
23
|
+
int arch,
|
|
24
|
+
int size,
|
|
25
|
+
int elements_per_thread,
|
|
26
|
+
int direction,
|
|
27
|
+
int precision,
|
|
28
|
+
int* shared_memory_size)
|
|
29
|
+
{
|
|
30
|
+
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
WP_API bool cuda_compile_dot(
|
|
35
|
+
const char* ltoir_output_path,
|
|
36
|
+
const char* symbol_name,
|
|
37
|
+
int num_include_dirs,
|
|
38
|
+
const char** include_dirs,
|
|
39
|
+
const char* mathdx_include_dir,
|
|
40
|
+
int arch,
|
|
41
|
+
int M,
|
|
42
|
+
int N,
|
|
43
|
+
int K,
|
|
44
|
+
int precision_A,
|
|
45
|
+
int precision_B,
|
|
46
|
+
int precision_C,
|
|
47
|
+
int type,
|
|
48
|
+
int a_arrangement,
|
|
49
|
+
int b_arrangement,
|
|
50
|
+
int c_arrangement,
|
|
51
|
+
int num_threads)
|
|
52
|
+
{
|
|
53
|
+
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
} // extern "C"
|
|
58
|
+
|
|
59
|
+
#endif // !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
|
warp/native/mesh.h
CHANGED
|
@@ -1693,6 +1693,10 @@ CUDA_CALLABLE inline mesh_query_aabb_t iter_reverse(const mesh_query_aabb_t& que
|
|
|
1693
1693
|
return query;
|
|
1694
1694
|
}
|
|
1695
1695
|
|
|
1696
|
+
CUDA_CALLABLE inline void adj_iter_reverse(const mesh_query_aabb_t& query, mesh_query_aabb_t& adj_query, mesh_query_aabb_t& adj_ret)
|
|
1697
|
+
{
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1696
1700
|
|
|
1697
1701
|
// stub
|
|
1698
1702
|
CUDA_CALLABLE inline void adj_mesh_query_aabb_next(mesh_query_aabb_t& query, int& index, mesh_query_aabb_t&, int&, bool&)
|
warp/native/range.h
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#pragma once
|
|
2
10
|
|
|
3
11
|
namespace wp
|
|
@@ -115,4 +123,8 @@ CUDA_CALLABLE inline range_t iter_reverse(const range_t& r)
|
|
|
115
123
|
return rev;
|
|
116
124
|
}
|
|
117
125
|
|
|
118
|
-
|
|
126
|
+
CUDA_CALLABLE inline void adj_iter_reverse(const range_t& r, range_t& adj_r, range_t& adj_ret)
|
|
127
|
+
{
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
} // namespace wp
|
warp/native/reduce.cpp
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "warp.h"
|
|
2
10
|
|
|
3
11
|
namespace
|
|
@@ -154,4 +162,4 @@ void array_sum_float_device(uint64_t a, uint64_t out, int count, int byte_stride
|
|
|
154
162
|
void array_sum_double_device(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
|
|
155
163
|
{
|
|
156
164
|
}
|
|
157
|
-
#endif
|
|
165
|
+
#endif
|
warp/native/reduce.cu
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
1
8
|
|
|
2
9
|
#include "cuda_util.h"
|
|
3
10
|
#include "warp.h"
|
warp/native/runlength_encode.cpp
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "warp.h"
|
|
2
10
|
|
|
3
11
|
#include <cstdint>
|
|
@@ -59,4 +67,4 @@ void runlength_encode_int_device(
|
|
|
59
67
|
int n)
|
|
60
68
|
{
|
|
61
69
|
}
|
|
62
|
-
#endif
|
|
70
|
+
#endif
|
warp/native/runlength_encode.cu
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
2
8
|
|
|
3
9
|
#include "warp.h"
|
|
4
10
|
#include "cuda_util.h"
|
warp/native/scan.cpp
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "scan.h"
|
|
2
10
|
|
|
3
11
|
#include <numeric>
|
warp/native/scan.cu
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "warp.h"
|
|
2
10
|
#include "scan.h"
|
|
3
11
|
|
warp/native/scan.h
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
|
+
/** Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#pragma once
|
|
2
10
|
|
|
3
11
|
template<typename T>
|
|
4
12
|
void scan_host(const T* values_in, T* values_out, int n, bool inclusive = true);
|
|
5
13
|
template<typename T>
|
|
6
14
|
void scan_device(const T* values_in, T* values_out, int n, bool inclusive = true);
|
|
7
|
-
|
warp/native/sparse.cpp
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "warp.h"
|
|
2
10
|
|
|
3
11
|
#include <algorithm>
|
warp/native/sparse.cu
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
8
|
+
|
|
1
9
|
#include "cuda_util.h"
|
|
2
10
|
#include "warp.h"
|
|
3
11
|
|
warp/native/temp_buffer.h
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
/** Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
* and proprietary rights in and to this software, related documentation
|
|
4
|
+
* and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
* distribution of this software and related documentation without an express
|
|
6
|
+
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
*/
|
|
1
8
|
|
|
2
9
|
#pragma once
|
|
3
10
|
|