warp-lang 1.3.2__py3-none-win_amd64.whl → 1.4.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +6 -0
- warp/autograd.py +59 -6
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build_dll.py +8 -10
- warp/builtins.py +126 -4
- warp/codegen.py +435 -53
- warp/config.py +1 -1
- warp/context.py +678 -403
- warp/dlpack.py +2 -0
- warp/examples/benchmarks/benchmark_cloth.py +10 -0
- warp/examples/core/example_render_opengl.py +12 -10
- warp/examples/fem/example_adaptive_grid.py +251 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +2 -2
- warp/examples/fem/example_magnetostatics.py +1 -1
- warp/examples/fem/example_streamlines.py +1 -0
- warp/examples/fem/utils.py +23 -4
- warp/examples/sim/example_cloth.py +50 -6
- warp/fem/__init__.py +2 -0
- warp/fem/adaptivity.py +493 -0
- warp/fem/field/field.py +2 -1
- warp/fem/field/nodal_field.py +18 -26
- warp/fem/field/test.py +4 -4
- warp/fem/field/trial.py +4 -4
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/adaptive_nanogrid.py +843 -0
- warp/fem/geometry/nanogrid.py +55 -28
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/nanogrid_function_space.py +69 -35
- warp/fem/utils.py +113 -107
- warp/jax_experimental.py +28 -15
- warp/native/array.h +0 -1
- warp/native/builtin.h +103 -6
- warp/native/bvh.cu +2 -0
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/error.cpp +4 -2
- warp/native/exports.h +99 -17
- warp/native/mat.h +97 -0
- warp/native/mesh.cpp +36 -0
- warp/native/mesh.cu +51 -0
- warp/native/mesh.h +1 -0
- warp/native/quat.h +43 -0
- warp/native/spatial.h +6 -0
- warp/native/vec.h +74 -0
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +10 -3
- warp/native/warp.h +8 -1
- warp/paddle.py +382 -0
- warp/sim/__init__.py +1 -0
- warp/sim/collide.py +519 -0
- warp/sim/integrator_euler.py +18 -5
- warp/sim/integrator_featherstone.py +5 -5
- warp/sim/integrator_vbd.py +1026 -0
- warp/sim/model.py +49 -23
- warp/stubs.py +459 -0
- warp/tape.py +2 -0
- warp/tests/aux_test_dependent.py +1 -0
- warp/tests/aux_test_name_clash1.py +32 -0
- warp/tests/aux_test_name_clash2.py +32 -0
- warp/tests/aux_test_square.py +1 -0
- warp/tests/test_array.py +222 -0
- warp/tests/test_async.py +3 -3
- warp/tests/test_atomic.py +6 -0
- warp/tests/test_closest_point_edge_edge.py +93 -1
- warp/tests/test_codegen.py +62 -15
- warp/tests/test_codegen_instancing.py +1457 -0
- warp/tests/test_collision.py +486 -0
- warp/tests/test_compile_consts.py +3 -28
- warp/tests/test_dlpack.py +170 -0
- warp/tests/test_examples.py +22 -8
- warp/tests/test_fast_math.py +10 -4
- warp/tests/test_fem.py +64 -0
- warp/tests/test_func.py +46 -0
- warp/tests/test_implicit_init.py +49 -0
- warp/tests/test_jax.py +58 -0
- warp/tests/test_mat.py +84 -0
- warp/tests/test_mesh_query_point.py +188 -0
- warp/tests/test_module_hashing.py +40 -0
- warp/tests/test_multigpu.py +3 -3
- warp/tests/test_overwrite.py +8 -0
- warp/tests/test_paddle.py +852 -0
- warp/tests/test_print.py +89 -0
- warp/tests/test_quat.py +111 -0
- warp/tests/test_reload.py +31 -1
- warp/tests/test_scalar_ops.py +2 -0
- warp/tests/test_static.py +412 -0
- warp/tests/test_streams.py +64 -3
- warp/tests/test_struct.py +4 -4
- warp/tests/test_torch.py +24 -0
- warp/tests/test_triangle_closest_point.py +137 -0
- warp/tests/test_types.py +1 -1
- warp/tests/test_vbd.py +386 -0
- warp/tests/test_vec.py +143 -0
- warp/tests/test_vec_scalar_ops.py +139 -0
- warp/tests/test_volume.py +30 -0
- warp/tests/unittest_suites.py +12 -0
- warp/tests/unittest_utils.py +9 -5
- warp/thirdparty/dlpack.py +3 -1
- warp/types.py +157 -34
- warp/utils.py +37 -14
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/RECORD +107 -95
- warp/tests/test_point_triangle_closest_point.py +0 -143
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0
warp/native/mesh.cu
CHANGED
|
@@ -290,3 +290,54 @@ void mesh_refit_device(uint64_t id)
|
|
|
290
290
|
}
|
|
291
291
|
}
|
|
292
292
|
|
|
293
|
+
void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points)
|
|
294
|
+
{
|
|
295
|
+
wp::Mesh m;
|
|
296
|
+
if (mesh_get_descriptor(id, m))
|
|
297
|
+
{
|
|
298
|
+
if (points.ndim != 1 || points.shape[0] != m.points.shape[0])
|
|
299
|
+
{
|
|
300
|
+
fprintf(stderr, "The new points input for mesh_set_points_device does not match the shape of the original points!\n");
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
m.points = points;
|
|
305
|
+
|
|
306
|
+
wp::Mesh* mesh_device = (wp::Mesh*)id;
|
|
307
|
+
memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &m, sizeof(wp::Mesh));
|
|
308
|
+
|
|
309
|
+
// update the cpu copy as well
|
|
310
|
+
mesh_set_descriptor(id, m);
|
|
311
|
+
|
|
312
|
+
mesh_refit_device(id);
|
|
313
|
+
}
|
|
314
|
+
else
|
|
315
|
+
{
|
|
316
|
+
fprintf(stderr, "The mesh id provided to mesh_set_points_device is not valid!\n");
|
|
317
|
+
return;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> velocities)
|
|
322
|
+
{
|
|
323
|
+
wp::Mesh m;
|
|
324
|
+
if (mesh_get_descriptor(id, m))
|
|
325
|
+
{
|
|
326
|
+
if (velocities.ndim != 1 || velocities.shape[0] != m.velocities.shape[0])
|
|
327
|
+
{
|
|
328
|
+
fprintf(stderr, "The new velocities input for mesh_set_velocities_device does not match the shape of the original velocities\n");
|
|
329
|
+
return;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
m.velocities = velocities;
|
|
333
|
+
|
|
334
|
+
wp::Mesh* mesh_device = (wp::Mesh*)id;
|
|
335
|
+
memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &m, sizeof(wp::Mesh));
|
|
336
|
+
mesh_set_descriptor(id, m);
|
|
337
|
+
}
|
|
338
|
+
else
|
|
339
|
+
{
|
|
340
|
+
fprintf(stderr, "The mesh id provided to mesh_set_velocities_device is not valid!\n");
|
|
341
|
+
return;
|
|
342
|
+
}
|
|
343
|
+
}
|
warp/native/mesh.h
CHANGED
|
@@ -1881,6 +1881,7 @@ CUDA_CALLABLE inline void adj_mesh_get_index(uint64_t id, int index,
|
|
|
1881
1881
|
}
|
|
1882
1882
|
|
|
1883
1883
|
CUDA_CALLABLE bool mesh_get_descriptor(uint64_t id, Mesh& mesh);
|
|
1884
|
+
CUDA_CALLABLE bool mesh_set_descriptor(uint64_t id, const Mesh& mesh);
|
|
1884
1885
|
CUDA_CALLABLE void mesh_add_descriptor(uint64_t id, const Mesh& mesh);
|
|
1885
1886
|
CUDA_CALLABLE void mesh_rem_descriptor(uint64_t id);
|
|
1886
1887
|
|
warp/native/quat.h
CHANGED
|
@@ -29,6 +29,14 @@ struct quat_t
|
|
|
29
29
|
w = static_cast<Type>(other.w);
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
inline CUDA_CALLABLE quat_t(const initializer_array<4, Type> &l)
|
|
33
|
+
{
|
|
34
|
+
x = l[0];
|
|
35
|
+
y = l[1];
|
|
36
|
+
z = l[2];
|
|
37
|
+
w = l[3];
|
|
38
|
+
}
|
|
39
|
+
|
|
32
40
|
// imaginary part
|
|
33
41
|
Type x;
|
|
34
42
|
Type y;
|
|
@@ -479,6 +487,41 @@ inline CUDA_CALLABLE void adj_indexref(quat_t<Type>* q, int idx,
|
|
|
479
487
|
// nop
|
|
480
488
|
}
|
|
481
489
|
|
|
490
|
+
template<typename Type>
|
|
491
|
+
inline CUDA_CALLABLE quat_t<Type> assign(quat_t<Type>& q, int idx, Type value)
|
|
492
|
+
{
|
|
493
|
+
#ifndef NDEBUG
|
|
494
|
+
if (idx < 0 || idx > 3)
|
|
495
|
+
{
|
|
496
|
+
printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
497
|
+
assert(0);
|
|
498
|
+
}
|
|
499
|
+
#endif
|
|
500
|
+
|
|
501
|
+
quat_t<Type> ret(q);
|
|
502
|
+
ret[idx] = value;
|
|
503
|
+
return ret;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
template<typename Type>
|
|
507
|
+
inline CUDA_CALLABLE void adj_assign(quat_t<Type>& q, int idx, Type value, quat_t<Type>& adj_q, int& adj_idx, Type& adj_value, const quat_t<Type>& adj_ret)
|
|
508
|
+
{
|
|
509
|
+
#ifndef NDEBUG
|
|
510
|
+
if (idx < 0 || idx > 3)
|
|
511
|
+
{
|
|
512
|
+
printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
513
|
+
assert(0);
|
|
514
|
+
}
|
|
515
|
+
#endif
|
|
516
|
+
|
|
517
|
+
adj_value += adj_ret[idx];
|
|
518
|
+
for(unsigned i=0; i < 4; ++i)
|
|
519
|
+
{
|
|
520
|
+
if(i != idx)
|
|
521
|
+
adj_q[i] += adj_ret[i];
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
482
525
|
template<typename Type>
|
|
483
526
|
CUDA_CALLABLE inline quat_t<Type> lerp(const quat_t<Type>& a, const quat_t<Type>& b, Type t)
|
|
484
527
|
{
|
warp/native/spatial.h
CHANGED
|
@@ -127,6 +127,12 @@ struct transform_t
|
|
|
127
127
|
CUDA_CALLABLE inline transform_t(vec_t<3,Type> p=vec_t<3,Type>(), quat_t<Type> q=quat_t<Type>()) : p(p), q(q) {}
|
|
128
128
|
CUDA_CALLABLE inline transform_t(Type) {} // helps uniform initialization
|
|
129
129
|
|
|
130
|
+
CUDA_CALLABLE inline transform_t(const initializer_array<7, Type> &l)
|
|
131
|
+
{
|
|
132
|
+
p = vec_t<3,Type>(l[0], l[1], l[2]);
|
|
133
|
+
q = quat_t<Type>(l[3], l[4], l[5], l[6]);
|
|
134
|
+
}
|
|
135
|
+
|
|
130
136
|
CUDA_CALLABLE inline Type operator[](int index) const
|
|
131
137
|
{
|
|
132
138
|
assert(index < 7);
|
warp/native/vec.h
CHANGED
|
@@ -381,6 +381,31 @@ inline CUDA_CALLABLE vec_t<3, Type> sub(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
|
381
381
|
return vec_t<3, Type>( a.c[0] - b.c[0], a.c[1] - b.c[1], a.c[2] - b.c[2]);
|
|
382
382
|
}
|
|
383
383
|
|
|
384
|
+
// modulo
|
|
385
|
+
template<unsigned Length, typename Type>
|
|
386
|
+
inline CUDA_CALLABLE vec_t<Length, Type> mod(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
387
|
+
{
|
|
388
|
+
vec_t<Length, Type> ret;
|
|
389
|
+
for (unsigned i=0; i < Length; ++i)
|
|
390
|
+
{
|
|
391
|
+
ret[i] = mod(a[i], b[i]);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return ret;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
template<typename Type>
|
|
398
|
+
inline CUDA_CALLABLE vec_t<2, Type> mod(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
399
|
+
{
|
|
400
|
+
return vec_t<2, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]));
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
template<typename Type>
|
|
404
|
+
inline CUDA_CALLABLE vec_t<3, Type> mod(vec_t<3, Type> a, vec_t<3, Type> b)
|
|
405
|
+
{
|
|
406
|
+
return vec_t<3, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]), mod(a.c[2], b.c[2]));
|
|
407
|
+
}
|
|
408
|
+
|
|
384
409
|
// dot product:
|
|
385
410
|
template<unsigned Length, typename Type>
|
|
386
411
|
inline CUDA_CALLABLE Type dot(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -470,6 +495,40 @@ inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
|
|
|
470
495
|
// nop
|
|
471
496
|
}
|
|
472
497
|
|
|
498
|
+
template<unsigned Length, typename Type>
|
|
499
|
+
inline CUDA_CALLABLE vec_t<Length, Type> assign(vec_t<Length, Type>& v, int idx, Type value)
|
|
500
|
+
{
|
|
501
|
+
#ifndef NDEBUG
|
|
502
|
+
if (idx < 0 || idx >= Length)
|
|
503
|
+
{
|
|
504
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
505
|
+
assert(0);
|
|
506
|
+
}
|
|
507
|
+
#endif
|
|
508
|
+
|
|
509
|
+
vec_t<Length, Type> ret(v);
|
|
510
|
+
ret[idx] = value;
|
|
511
|
+
return ret;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
template<unsigned Length, typename Type>
|
|
515
|
+
inline CUDA_CALLABLE void adj_assign(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value, const vec_t<Length, Type>& adj_ret)
|
|
516
|
+
{
|
|
517
|
+
#ifndef NDEBUG
|
|
518
|
+
if (idx < 0 || idx >= Length)
|
|
519
|
+
{
|
|
520
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
521
|
+
assert(0);
|
|
522
|
+
}
|
|
523
|
+
#endif
|
|
524
|
+
|
|
525
|
+
adj_value += adj_ret[idx];
|
|
526
|
+
for(unsigned i=0; i < Length; ++i)
|
|
527
|
+
{
|
|
528
|
+
if (i != idx)
|
|
529
|
+
adj_v[i] += adj_ret[i];
|
|
530
|
+
}
|
|
531
|
+
}
|
|
473
532
|
|
|
474
533
|
template<unsigned Length, typename Type>
|
|
475
534
|
inline CUDA_CALLABLE Type length(vec_t<Length, Type> a)
|
|
@@ -896,6 +955,21 @@ inline CUDA_CALLABLE void adj_sub(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
|
|
|
896
955
|
adj_b.c[2] -= adj_ret.c[2];
|
|
897
956
|
}
|
|
898
957
|
|
|
958
|
+
template<unsigned Length, typename Type>
|
|
959
|
+
inline CUDA_CALLABLE void adj_mod(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
|
|
960
|
+
{
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
template<typename Type>
|
|
964
|
+
inline CUDA_CALLABLE void adj_mod(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
965
|
+
{
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
template<typename Type>
|
|
969
|
+
inline CUDA_CALLABLE void adj_mod(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
|
|
970
|
+
{
|
|
971
|
+
}
|
|
972
|
+
|
|
899
973
|
template<unsigned Length, typename Type>
|
|
900
974
|
inline CUDA_CALLABLE void adj_dot(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const Type adj_ret)
|
|
901
975
|
{
|
warp/native/warp.cpp
CHANGED
|
@@ -1010,7 +1010,7 @@ WP_API int cuda_set_peer_access_enabled(void* target_context, void* peer_context
|
|
|
1010
1010
|
WP_API int cuda_is_mempool_access_enabled(int target_ordinal, int peer_ordinal) { return 0; }
|
|
1011
1011
|
WP_API int cuda_set_mempool_access_enabled(int target_ordinal, int peer_ordinal, int enable) { return 0; }
|
|
1012
1012
|
|
|
1013
|
-
WP_API void* cuda_stream_create(void* context) { return NULL; }
|
|
1013
|
+
WP_API void* cuda_stream_create(void* context, int priority) { return NULL; }
|
|
1014
1014
|
WP_API void cuda_stream_destroy(void* context, void* stream) {}
|
|
1015
1015
|
WP_API void cuda_stream_register(void* context, void* stream) {}
|
|
1016
1016
|
WP_API void cuda_stream_unregister(void* context, void* stream) {}
|
|
@@ -1020,6 +1020,7 @@ WP_API void cuda_stream_wait_event(void* stream, void* event) {}
|
|
|
1020
1020
|
WP_API void cuda_stream_wait_stream(void* stream, void* other_stream, void* event) {}
|
|
1021
1021
|
WP_API int cuda_stream_is_capturing(void* stream) { return 0; }
|
|
1022
1022
|
WP_API uint64_t cuda_stream_get_capture_id(void* stream) { return 0; }
|
|
1023
|
+
WP_API int cuda_stream_get_priority(void* stream) { return 0; }
|
|
1023
1024
|
|
|
1024
1025
|
WP_API void* cuda_event_create(void* context, unsigned flags) { return NULL; }
|
|
1025
1026
|
WP_API void cuda_event_destroy(void* event) {}
|
warp/native/warp.cu
CHANGED
|
@@ -1963,7 +1963,6 @@ void cuda_context_set_stream(void* context, void* stream, int sync)
|
|
|
1963
1963
|
}
|
|
1964
1964
|
}
|
|
1965
1965
|
|
|
1966
|
-
|
|
1967
1966
|
int cuda_is_peer_access_supported(int target_ordinal, int peer_ordinal)
|
|
1968
1967
|
{
|
|
1969
1968
|
int num_devices = int(g_devices.size());
|
|
@@ -2174,12 +2173,12 @@ int cuda_set_mempool_access_enabled(int target_ordinal, int peer_ordinal, int en
|
|
|
2174
2173
|
}
|
|
2175
2174
|
|
|
2176
2175
|
|
|
2177
|
-
void* cuda_stream_create(void* context)
|
|
2176
|
+
void* cuda_stream_create(void* context, int priority)
|
|
2178
2177
|
{
|
|
2179
2178
|
ContextGuard guard(context, true);
|
|
2180
2179
|
|
|
2181
2180
|
CUstream stream;
|
|
2182
|
-
if (check_cu(
|
|
2181
|
+
if (check_cu(cuStreamCreateWithPriority_f(&stream, CU_STREAM_DEFAULT, priority)))
|
|
2183
2182
|
{
|
|
2184
2183
|
cuda_stream_register(WP_CURRENT_CONTEXT, stream);
|
|
2185
2184
|
return stream;
|
|
@@ -2268,6 +2267,14 @@ uint64_t cuda_stream_get_capture_id(void* stream)
|
|
|
2268
2267
|
return get_capture_id(static_cast<CUstream>(stream));
|
|
2269
2268
|
}
|
|
2270
2269
|
|
|
2270
|
+
int cuda_stream_get_priority(void* stream)
|
|
2271
|
+
{
|
|
2272
|
+
int priority = 0;
|
|
2273
|
+
check_cuda(cuStreamGetPriority_f(static_cast<CUstream>(stream), &priority));
|
|
2274
|
+
|
|
2275
|
+
return priority;
|
|
2276
|
+
}
|
|
2277
|
+
|
|
2271
2278
|
void* cuda_event_create(void* context, unsigned flags)
|
|
2272
2279
|
{
|
|
2273
2280
|
ContextGuard guard(context, true);
|
warp/native/warp.h
CHANGED
|
@@ -83,6 +83,12 @@ extern "C"
|
|
|
83
83
|
WP_API void mesh_destroy_device(uint64_t id);
|
|
84
84
|
WP_API void mesh_refit_device(uint64_t id);
|
|
85
85
|
|
|
86
|
+
WP_API void mesh_set_points_host(uint64_t id, wp::array_t<wp::vec3> points);
|
|
87
|
+
WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points);
|
|
88
|
+
|
|
89
|
+
WP_API void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities);
|
|
90
|
+
WP_API void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> velocities);
|
|
91
|
+
|
|
86
92
|
WP_API uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z);
|
|
87
93
|
WP_API void hash_grid_reserve_host(uint64_t id, int num_points);
|
|
88
94
|
WP_API void hash_grid_destroy_host(uint64_t id);
|
|
@@ -286,7 +292,7 @@ extern "C"
|
|
|
286
292
|
WP_API int cuda_is_mempool_access_enabled(int target_ordinal, int peer_ordinal);
|
|
287
293
|
WP_API int cuda_set_mempool_access_enabled(int target_ordinal, int peer_ordinal, int enable);
|
|
288
294
|
|
|
289
|
-
WP_API void* cuda_stream_create(void* context);
|
|
295
|
+
WP_API void* cuda_stream_create(void* context, int priority);
|
|
290
296
|
WP_API void cuda_stream_destroy(void* context, void* stream);
|
|
291
297
|
WP_API void cuda_stream_register(void* context, void* stream);
|
|
292
298
|
WP_API void cuda_stream_unregister(void* context, void* stream);
|
|
@@ -296,6 +302,7 @@ extern "C"
|
|
|
296
302
|
WP_API void cuda_stream_wait_stream(void* stream, void* other_stream, void* event);
|
|
297
303
|
WP_API int cuda_stream_is_capturing(void* stream);
|
|
298
304
|
WP_API uint64_t cuda_stream_get_capture_id(void* stream);
|
|
305
|
+
WP_API int cuda_stream_get_priority(void* stream);
|
|
299
306
|
|
|
300
307
|
WP_API void* cuda_event_create(void* context, unsigned flags);
|
|
301
308
|
WP_API void cuda_event_destroy(void* event);
|
warp/paddle.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
# Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import ctypes
|
|
11
|
+
from typing import TYPE_CHECKING, Optional, Union
|
|
12
|
+
|
|
13
|
+
import numpy
|
|
14
|
+
|
|
15
|
+
import warp
|
|
16
|
+
import warp.context
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
import paddle
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# return the warp device corresponding to a paddle device
|
|
23
|
+
def device_from_paddle(paddle_device: Union[paddle.base.libpaddle.Place, str]) -> warp.context.Device:
|
|
24
|
+
"""Return the Warp device corresponding to a Paddle device.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
paddle_device (`paddle.base.libpaddle.Place` or `str`): Paddle device identifier
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
RuntimeError: Paddle device does not have a corresponding Warp device
|
|
31
|
+
"""
|
|
32
|
+
if type(paddle_device) is str:
|
|
33
|
+
warp_device = warp.context.runtime.device_map.get(paddle_device)
|
|
34
|
+
if warp_device is not None:
|
|
35
|
+
return warp_device
|
|
36
|
+
elif paddle_device.startswith("gpu"):
|
|
37
|
+
return warp.context.runtime.get_current_cuda_device()
|
|
38
|
+
else:
|
|
39
|
+
raise RuntimeError(f"Unsupported Paddle device {paddle_device}")
|
|
40
|
+
else:
|
|
41
|
+
import paddle
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
if paddle_device.is_gpu_place():
|
|
45
|
+
return warp.context.runtime.cuda_devices[paddle_device.gpu_device_id()]
|
|
46
|
+
elif paddle_device.is_cpu_place():
|
|
47
|
+
return warp.context.runtime.cpu_device
|
|
48
|
+
else:
|
|
49
|
+
raise RuntimeError(f"Unsupported Paddle device type {paddle_device}")
|
|
50
|
+
except Exception as e:
|
|
51
|
+
import paddle
|
|
52
|
+
|
|
53
|
+
if not isinstance(paddle_device, paddle.base.libpaddle.Place):
|
|
54
|
+
raise ValueError("Argument must be a paddle.base.libpaddle.Place object or a string") from e
|
|
55
|
+
raise
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def device_to_paddle(warp_device: warp.context.Devicelike) -> str:
|
|
59
|
+
"""Return the Paddle device string corresponding to a Warp device.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
warp_device: An identifier that can be resolved to a :class:`warp.context.Device`.
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
RuntimeError: The Warp device is not compatible with PyPaddle.
|
|
66
|
+
"""
|
|
67
|
+
device = warp.get_device(warp_device)
|
|
68
|
+
if device.is_cpu or device.is_primary:
|
|
69
|
+
return str(device).replace("cuda", "gpu")
|
|
70
|
+
elif device.is_cuda and device.is_uva:
|
|
71
|
+
# it's not a primary context, but paddle can access the data ptr directly thanks to UVA
|
|
72
|
+
return f"gpu:{device.ordinal}"
|
|
73
|
+
raise RuntimeError(f"Warp device {device} is not compatible with paddle")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def dtype_to_paddle(warp_dtype):
|
|
77
|
+
"""Return the Paddle dtype corresponding to a Warp dtype.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
warp_dtype: A Warp data type that has a corresponding ``paddle.dtype``.
|
|
81
|
+
``warp.uint16``, ``warp.uint32``, and ``warp.uint64`` are mapped
|
|
82
|
+
to the signed integer ``paddle.dtype`` of the same width.
|
|
83
|
+
Raises:
|
|
84
|
+
TypeError: Unable to find a corresponding PyPaddle data type.
|
|
85
|
+
"""
|
|
86
|
+
# initialize lookup table on first call to defer paddle import
|
|
87
|
+
if dtype_to_paddle.type_map is None:
|
|
88
|
+
import paddle
|
|
89
|
+
|
|
90
|
+
dtype_to_paddle.type_map = {
|
|
91
|
+
warp.float16: paddle.float16,
|
|
92
|
+
warp.float32: paddle.float32,
|
|
93
|
+
warp.float64: paddle.float64,
|
|
94
|
+
warp.int8: paddle.int8,
|
|
95
|
+
warp.int16: paddle.int16,
|
|
96
|
+
warp.int32: paddle.int32,
|
|
97
|
+
warp.int64: paddle.int64,
|
|
98
|
+
warp.uint8: paddle.uint8,
|
|
99
|
+
warp.bool: paddle.bool,
|
|
100
|
+
# paddle doesn't support unsigned ints bigger than 8 bits
|
|
101
|
+
warp.uint16: paddle.int16,
|
|
102
|
+
warp.uint32: paddle.int32,
|
|
103
|
+
warp.uint64: paddle.int64,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
paddle_dtype = dtype_to_paddle.type_map.get(warp_dtype)
|
|
107
|
+
if paddle_dtype is not None:
|
|
108
|
+
return paddle_dtype
|
|
109
|
+
else:
|
|
110
|
+
raise TypeError(f"Cannot convert {warp_dtype} to a Paddle type")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def dtype_from_paddle(paddle_dtype):
|
|
114
|
+
"""Return the Warp dtype corresponding to a Paddle dtype.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
paddle_dtype: A ``paddle.dtype`` that has a corresponding Warp data type.
|
|
118
|
+
Currently ``paddle.bfloat16``, ``paddle.complex64``, and
|
|
119
|
+
``paddle.complex128`` are not supported.
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
TypeError: Unable to find a corresponding Warp data type.
|
|
123
|
+
"""
|
|
124
|
+
# initialize lookup table on first call to defer paddle import
|
|
125
|
+
if dtype_from_paddle.type_map is None:
|
|
126
|
+
import paddle
|
|
127
|
+
|
|
128
|
+
dtype_from_paddle.type_map = {
|
|
129
|
+
paddle.float16: warp.float16,
|
|
130
|
+
paddle.float32: warp.float32,
|
|
131
|
+
paddle.float64: warp.float64,
|
|
132
|
+
paddle.int8: warp.int8,
|
|
133
|
+
paddle.int16: warp.int16,
|
|
134
|
+
paddle.int32: warp.int32,
|
|
135
|
+
paddle.int64: warp.int64,
|
|
136
|
+
paddle.uint8: warp.uint8,
|
|
137
|
+
paddle.bool: warp.bool,
|
|
138
|
+
# currently unsupported by Warp
|
|
139
|
+
# paddle.bfloat16:
|
|
140
|
+
# paddle.complex64:
|
|
141
|
+
# paddle.complex128:
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
warp_dtype = dtype_from_paddle.type_map.get(paddle_dtype)
|
|
145
|
+
|
|
146
|
+
if warp_dtype is not None:
|
|
147
|
+
return warp_dtype
|
|
148
|
+
else:
|
|
149
|
+
raise TypeError(f"Cannot convert {paddle_dtype} to a Warp type")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def dtype_is_compatible(paddle_dtype: paddle.dtype, warp_dtype) -> bool:
|
|
153
|
+
"""Evaluates whether the given paddle dtype is compatible with the given Warp dtype."""
|
|
154
|
+
# initialize lookup table on first call to defer paddle import
|
|
155
|
+
if dtype_is_compatible.compatible_sets is None:
|
|
156
|
+
import paddle
|
|
157
|
+
|
|
158
|
+
dtype_is_compatible.compatible_sets = {
|
|
159
|
+
paddle.float64: {warp.float64},
|
|
160
|
+
paddle.float32: {warp.float32},
|
|
161
|
+
paddle.float16: {warp.float16},
|
|
162
|
+
# allow aliasing integer tensors as signed or unsigned integer arrays
|
|
163
|
+
paddle.int64: {warp.int64, warp.uint64},
|
|
164
|
+
paddle.int32: {warp.int32, warp.uint32},
|
|
165
|
+
paddle.int16: {warp.int16, warp.uint16},
|
|
166
|
+
paddle.int8: {warp.int8, warp.uint8},
|
|
167
|
+
paddle.uint8: {warp.uint8, warp.int8},
|
|
168
|
+
paddle.bool: {warp.bool, warp.uint8, warp.int8},
|
|
169
|
+
# currently unsupported by Warp
|
|
170
|
+
# paddle.bfloat16:
|
|
171
|
+
# paddle.complex64:
|
|
172
|
+
# paddle.complex128:
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
compatible_set = dtype_is_compatible.compatible_sets.get(paddle_dtype)
|
|
176
|
+
|
|
177
|
+
if compatible_set is not None:
|
|
178
|
+
if warp_dtype in compatible_set:
|
|
179
|
+
return True
|
|
180
|
+
# check if it's a vector or matrix type
|
|
181
|
+
if hasattr(warp_dtype, "_wp_scalar_type_"):
|
|
182
|
+
return warp_dtype._wp_scalar_type_ in compatible_set
|
|
183
|
+
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# lookup tables initialized when needed
|
|
188
|
+
dtype_from_paddle.type_map = None
|
|
189
|
+
dtype_to_paddle.type_map = None
|
|
190
|
+
dtype_is_compatible.compatible_sets = None
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# wrap a paddle tensor to a wp array, data is not copied
|
|
194
|
+
def from_paddle(
|
|
195
|
+
t: paddle.Tensor,
|
|
196
|
+
dtype: Optional[paddle.dtype] = None,
|
|
197
|
+
requires_grad: Optional[bool] = None,
|
|
198
|
+
grad: Optional[paddle.Tensor] = None,
|
|
199
|
+
return_ctype: bool = False,
|
|
200
|
+
) -> warp.array:
|
|
201
|
+
"""Convert a Paddle tensor to a Warp array without copying the data.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
t (paddle.Tensor): The paddle tensor to wrap.
|
|
205
|
+
dtype (warp.dtype, optional): The target data type of the resulting Warp array. Defaults to the tensor value type mapped to a Warp array value type.
|
|
206
|
+
requires_grad (bool, optional): Whether the resulting array should wrap the tensor's gradient, if it exists (the grad tensor will be allocated otherwise). Defaults to the tensor's `requires_grad` value.
|
|
207
|
+
grad (paddle.Tensor, optional): The grad attached to given tensor. Defaults to None.
|
|
208
|
+
return_ctype (bool, optional): Whether to return a low-level array descriptor instead of a ``wp.array`` object (faster). The descriptor can be passed to Warp kernels.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
warp.array: The wrapped array or array descriptor.
|
|
212
|
+
"""
|
|
213
|
+
if dtype is None:
|
|
214
|
+
dtype = dtype_from_paddle(t.dtype)
|
|
215
|
+
elif not dtype_is_compatible(t.dtype, dtype):
|
|
216
|
+
raise RuntimeError(f"Cannot convert Paddle type {t.dtype} to Warp type {dtype}")
|
|
217
|
+
|
|
218
|
+
# get size of underlying data type to compute strides
|
|
219
|
+
ctype_size = ctypes.sizeof(dtype._type_)
|
|
220
|
+
|
|
221
|
+
shape = tuple(t.shape)
|
|
222
|
+
strides = tuple(s * ctype_size for s in t.strides)
|
|
223
|
+
|
|
224
|
+
# if target is a vector or matrix type
|
|
225
|
+
# then check if trailing dimensions match
|
|
226
|
+
# the target type and update the shape
|
|
227
|
+
if hasattr(dtype, "_shape_"):
|
|
228
|
+
dtype_shape = dtype._shape_
|
|
229
|
+
dtype_dims = len(dtype._shape_)
|
|
230
|
+
# ensure inner shape matches
|
|
231
|
+
if dtype_dims > len(shape) or dtype_shape != shape[-dtype_dims:]:
|
|
232
|
+
raise RuntimeError(
|
|
233
|
+
f"Could not convert Paddle tensor with shape {shape} to Warp array with dtype={dtype}, ensure that source inner shape is {dtype_shape}"
|
|
234
|
+
)
|
|
235
|
+
# ensure inner strides are contiguous
|
|
236
|
+
if strides[-1] != ctype_size or (dtype_dims > 1 and strides[-2] != ctype_size * dtype_shape[-1]):
|
|
237
|
+
raise RuntimeError(
|
|
238
|
+
f"Could not convert Paddle tensor with shape {shape} to Warp array with dtype={dtype}, because the source inner strides are not contiguous"
|
|
239
|
+
)
|
|
240
|
+
# trim shape and strides
|
|
241
|
+
shape = tuple(shape[:-dtype_dims]) or (1,)
|
|
242
|
+
strides = tuple(strides[:-dtype_dims]) or (ctype_size,)
|
|
243
|
+
|
|
244
|
+
# gradient
|
|
245
|
+
# - if return_ctype is False, we set `grad` to a wp.array or None
|
|
246
|
+
# - if return_ctype is True, we set `grad_ptr` and set `grad` as the owner (wp.array or paddle.Tensor)
|
|
247
|
+
requires_grad = (not t.stop_gradient) if requires_grad is None else requires_grad
|
|
248
|
+
grad_ptr = 0
|
|
249
|
+
if grad is not None:
|
|
250
|
+
if isinstance(grad, warp.array):
|
|
251
|
+
if return_ctype:
|
|
252
|
+
if grad.strides != strides:
|
|
253
|
+
raise RuntimeError(
|
|
254
|
+
f"Gradient strides must match array strides, expected {strides} but got {grad.strides}"
|
|
255
|
+
)
|
|
256
|
+
grad_ptr = grad.ptr
|
|
257
|
+
else:
|
|
258
|
+
# assume grad is a paddle.Tensor
|
|
259
|
+
if return_ctype:
|
|
260
|
+
if t.strides != grad.strides:
|
|
261
|
+
raise RuntimeError(
|
|
262
|
+
f"Gradient strides must match array strides, expected {t.strides} but got {grad.strides}"
|
|
263
|
+
)
|
|
264
|
+
grad_ptr = grad.data_ptr()
|
|
265
|
+
else:
|
|
266
|
+
grad = from_paddle(grad, dtype=dtype, requires_grad=False)
|
|
267
|
+
elif requires_grad:
|
|
268
|
+
# wrap the tensor gradient, allocate if necessary
|
|
269
|
+
if t.grad is not None:
|
|
270
|
+
if return_ctype:
|
|
271
|
+
grad = t.grad
|
|
272
|
+
if t.strides != grad.strides:
|
|
273
|
+
raise RuntimeError(
|
|
274
|
+
f"Gradient strides must match array strides, expected {t.strides} but got {grad.strides}"
|
|
275
|
+
)
|
|
276
|
+
grad_ptr = grad.data_ptr()
|
|
277
|
+
else:
|
|
278
|
+
grad = from_paddle(t.grad, dtype=dtype, requires_grad=False)
|
|
279
|
+
else:
|
|
280
|
+
# allocate a zero-filled gradient if it doesn't exist
|
|
281
|
+
# Note: we use Warp to allocate the shared gradient with compatible strides
|
|
282
|
+
grad = warp.zeros(dtype=dtype, shape=shape, strides=strides, device=device_from_paddle(t.place))
|
|
283
|
+
# use .grad_ for zero-copy
|
|
284
|
+
t.grad_ = to_paddle(grad, requires_grad=False)
|
|
285
|
+
grad_ptr = grad.ptr
|
|
286
|
+
|
|
287
|
+
if return_ctype:
|
|
288
|
+
ptr = t.data_ptr()
|
|
289
|
+
|
|
290
|
+
# create array descriptor
|
|
291
|
+
array_ctype = warp.types.array_t(ptr, grad_ptr, len(shape), shape, strides)
|
|
292
|
+
|
|
293
|
+
# keep data and gradient alive
|
|
294
|
+
array_ctype._ref = t
|
|
295
|
+
array_ctype._gradref = grad
|
|
296
|
+
|
|
297
|
+
return array_ctype
|
|
298
|
+
|
|
299
|
+
else:
|
|
300
|
+
a = warp.array(
|
|
301
|
+
ptr=t.data_ptr(),
|
|
302
|
+
dtype=dtype,
|
|
303
|
+
shape=shape,
|
|
304
|
+
strides=strides,
|
|
305
|
+
device=device_from_paddle(t.place),
|
|
306
|
+
copy=False,
|
|
307
|
+
grad=grad,
|
|
308
|
+
requires_grad=requires_grad,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# save a reference to the source tensor, otherwise it may get deallocated
|
|
312
|
+
a._tensor = t
|
|
313
|
+
|
|
314
|
+
return a
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def to_paddle(a: warp.array, requires_grad: bool = None) -> paddle.Tensor:
|
|
318
|
+
"""
|
|
319
|
+
Convert a Warp array to a Paddle tensor without copying the data.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
a (warp.array): The Warp array to convert.
|
|
323
|
+
requires_grad (bool, optional): Whether the resulting tensor should convert the array's gradient, if it exists, to a grad tensor. Defaults to the array's `requires_grad` value.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
paddle.Tensor: The converted tensor.
|
|
327
|
+
"""
|
|
328
|
+
import paddle
|
|
329
|
+
import paddle.utils.dlpack
|
|
330
|
+
|
|
331
|
+
if requires_grad is None:
|
|
332
|
+
requires_grad = a.requires_grad
|
|
333
|
+
|
|
334
|
+
# Paddle does not support structured arrays
|
|
335
|
+
if isinstance(a.dtype, warp.codegen.Struct):
|
|
336
|
+
raise RuntimeError("Cannot convert structured Warp arrays to Paddle.")
|
|
337
|
+
|
|
338
|
+
if a.device.is_cpu:
|
|
339
|
+
# Paddle has an issue wrapping CPU objects
|
|
340
|
+
# that support the __array_interface__ protocol
|
|
341
|
+
# in this case we need to workaround by going
|
|
342
|
+
# to an ndarray first, see https://pearu.github.io/array_interface_pypaddle.html
|
|
343
|
+
t = paddle.to_tensor(numpy.asarray(a), place="cpu")
|
|
344
|
+
t.stop_gradient = not requires_grad
|
|
345
|
+
if requires_grad and a.requires_grad:
|
|
346
|
+
# use .grad_ for zero-copy
|
|
347
|
+
t.grad_ = paddle.to_tensor(numpy.asarray(a.grad), place="cpu")
|
|
348
|
+
return t
|
|
349
|
+
|
|
350
|
+
elif a.device.is_cuda:
|
|
351
|
+
# Paddle does support the __cuda_array_interface__
|
|
352
|
+
# correctly, but we must be sure to maintain a reference
|
|
353
|
+
# to the owning object to prevent memory allocs going out of scope
|
|
354
|
+
t = paddle.utils.dlpack.from_dlpack(warp.to_dlpack(a)).to(device=device_to_paddle(a.device))
|
|
355
|
+
t.stop_gradient = not requires_grad
|
|
356
|
+
if requires_grad and a.requires_grad:
|
|
357
|
+
# use .grad_ for zero-copy
|
|
358
|
+
t.grad_ = paddle.utils.dlpack.from_dlpack(warp.to_dlpack(a.grad)).to(device=device_to_paddle(a.device))
|
|
359
|
+
return t
|
|
360
|
+
|
|
361
|
+
else:
|
|
362
|
+
raise RuntimeError("Unsupported device")
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def stream_from_paddle(stream_or_device=None):
|
|
366
|
+
"""Convert from a Paddle CUDA stream to a Warp CUDA stream."""
|
|
367
|
+
import paddle
|
|
368
|
+
|
|
369
|
+
if isinstance(stream_or_device, paddle.device.Stream):
|
|
370
|
+
stream = stream_or_device
|
|
371
|
+
else:
|
|
372
|
+
# assume arg is a paddle device
|
|
373
|
+
stream = paddle.device.current_stream(stream_or_device)
|
|
374
|
+
|
|
375
|
+
device = device_from_paddle(stream.device)
|
|
376
|
+
|
|
377
|
+
warp_stream = warp.Stream(device, cuda_stream=stream.stream_base.cuda_stream)
|
|
378
|
+
|
|
379
|
+
# save a reference to the source stream, otherwise it may be destroyed
|
|
380
|
+
warp_stream._paddle_stream = stream
|
|
381
|
+
|
|
382
|
+
return warp_stream
|