warp-lang 1.3.3__py3-none-macosx_10_13_universal2.whl → 1.4.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (110) hide show
  1. warp/__init__.py +6 -0
  2. warp/autograd.py +59 -6
  3. warp/bin/libwarp.dylib +0 -0
  4. warp/build_dll.py +8 -10
  5. warp/builtins.py +103 -3
  6. warp/codegen.py +447 -53
  7. warp/config.py +1 -1
  8. warp/context.py +682 -405
  9. warp/dlpack.py +2 -0
  10. warp/examples/benchmarks/benchmark_cloth.py +10 -0
  11. warp/examples/core/example_render_opengl.py +12 -10
  12. warp/examples/fem/example_adaptive_grid.py +251 -0
  13. warp/examples/fem/example_apic_fluid.py +1 -1
  14. warp/examples/fem/example_diffusion_3d.py +2 -2
  15. warp/examples/fem/example_magnetostatics.py +1 -1
  16. warp/examples/fem/example_streamlines.py +1 -0
  17. warp/examples/fem/utils.py +25 -5
  18. warp/examples/sim/example_cloth.py +50 -6
  19. warp/fem/__init__.py +2 -0
  20. warp/fem/adaptivity.py +493 -0
  21. warp/fem/field/field.py +2 -1
  22. warp/fem/field/nodal_field.py +18 -26
  23. warp/fem/field/test.py +4 -4
  24. warp/fem/field/trial.py +4 -4
  25. warp/fem/geometry/__init__.py +1 -0
  26. warp/fem/geometry/adaptive_nanogrid.py +843 -0
  27. warp/fem/geometry/nanogrid.py +55 -28
  28. warp/fem/space/__init__.py +1 -1
  29. warp/fem/space/nanogrid_function_space.py +69 -35
  30. warp/fem/utils.py +118 -107
  31. warp/jax_experimental.py +28 -15
  32. warp/native/array.h +0 -1
  33. warp/native/builtin.h +103 -6
  34. warp/native/bvh.cu +4 -2
  35. warp/native/cuda_util.cpp +14 -0
  36. warp/native/cuda_util.h +2 -0
  37. warp/native/error.cpp +4 -2
  38. warp/native/exports.h +99 -0
  39. warp/native/mat.h +97 -0
  40. warp/native/mesh.cpp +36 -0
  41. warp/native/mesh.cu +52 -1
  42. warp/native/mesh.h +1 -0
  43. warp/native/quat.h +43 -0
  44. warp/native/range.h +11 -2
  45. warp/native/spatial.h +6 -0
  46. warp/native/vec.h +74 -0
  47. warp/native/warp.cpp +2 -1
  48. warp/native/warp.cu +10 -3
  49. warp/native/warp.h +8 -1
  50. warp/paddle.py +382 -0
  51. warp/sim/__init__.py +1 -0
  52. warp/sim/collide.py +519 -0
  53. warp/sim/integrator_euler.py +18 -5
  54. warp/sim/integrator_featherstone.py +5 -5
  55. warp/sim/integrator_vbd.py +1026 -0
  56. warp/sim/integrator_xpbd.py +2 -6
  57. warp/sim/model.py +50 -25
  58. warp/sparse.py +9 -7
  59. warp/stubs.py +459 -0
  60. warp/tape.py +2 -0
  61. warp/tests/aux_test_dependent.py +1 -0
  62. warp/tests/aux_test_name_clash1.py +32 -0
  63. warp/tests/aux_test_name_clash2.py +32 -0
  64. warp/tests/aux_test_square.py +1 -0
  65. warp/tests/test_array.py +188 -0
  66. warp/tests/test_async.py +3 -3
  67. warp/tests/test_atomic.py +6 -0
  68. warp/tests/test_closest_point_edge_edge.py +93 -1
  69. warp/tests/test_codegen.py +93 -15
  70. warp/tests/test_codegen_instancing.py +1457 -0
  71. warp/tests/test_collision.py +486 -0
  72. warp/tests/test_compile_consts.py +3 -28
  73. warp/tests/test_dlpack.py +170 -0
  74. warp/tests/test_examples.py +22 -8
  75. warp/tests/test_fast_math.py +10 -4
  76. warp/tests/test_fem.py +81 -1
  77. warp/tests/test_func.py +46 -0
  78. warp/tests/test_implicit_init.py +49 -0
  79. warp/tests/test_jax.py +58 -0
  80. warp/tests/test_mat.py +84 -0
  81. warp/tests/test_mesh_query_point.py +188 -0
  82. warp/tests/test_model.py +13 -0
  83. warp/tests/test_module_hashing.py +40 -0
  84. warp/tests/test_multigpu.py +3 -3
  85. warp/tests/test_overwrite.py +8 -0
  86. warp/tests/test_paddle.py +852 -0
  87. warp/tests/test_print.py +89 -0
  88. warp/tests/test_quat.py +111 -0
  89. warp/tests/test_reload.py +31 -1
  90. warp/tests/test_scalar_ops.py +2 -0
  91. warp/tests/test_static.py +568 -0
  92. warp/tests/test_streams.py +64 -3
  93. warp/tests/test_struct.py +4 -4
  94. warp/tests/test_torch.py +24 -0
  95. warp/tests/test_triangle_closest_point.py +137 -0
  96. warp/tests/test_types.py +1 -1
  97. warp/tests/test_vbd.py +386 -0
  98. warp/tests/test_vec.py +143 -0
  99. warp/tests/test_vec_scalar_ops.py +139 -0
  100. warp/tests/unittest_suites.py +12 -0
  101. warp/tests/unittest_utils.py +9 -5
  102. warp/thirdparty/dlpack.py +3 -1
  103. warp/types.py +167 -36
  104. warp/utils.py +37 -14
  105. {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/METADATA +10 -8
  106. {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/RECORD +109 -97
  107. warp/tests/test_point_triangle_closest_point.py +0 -143
  108. {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/LICENSE.md +0 -0
  109. {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/WHEEL +0 -0
  110. {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/top_level.txt +0 -0
warp/native/mesh.cu CHANGED
@@ -101,7 +101,7 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
101
101
  int finished = atomicAdd(&child_count[parent], 1);
102
102
 
103
103
  // if we have are the last thread (such that the parent node is now complete)
104
- // then update its bounds and move onto the the next parent in the hierarchy
104
+ // then update its bounds and move onto the next parent in the hierarchy
105
105
  if (finished == 1)
106
106
  {
107
107
  //printf("Compute non-leaf at %d\n", index);
@@ -290,3 +290,54 @@ void mesh_refit_device(uint64_t id)
290
290
  }
291
291
  }
292
292
 
293
+ void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points)
294
+ {
295
+ wp::Mesh m;
296
+ if (mesh_get_descriptor(id, m))
297
+ {
298
+ if (points.ndim != 1 || points.shape[0] != m.points.shape[0])
299
+ {
300
+ fprintf(stderr, "The new points input for mesh_set_points_device does not match the shape of the original points!\n");
301
+ return;
302
+ }
303
+
304
+ m.points = points;
305
+
306
+ wp::Mesh* mesh_device = (wp::Mesh*)id;
307
+ memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &m, sizeof(wp::Mesh));
308
+
309
+ // update the cpu copy as well
310
+ mesh_set_descriptor(id, m);
311
+
312
+ mesh_refit_device(id);
313
+ }
314
+ else
315
+ {
316
+ fprintf(stderr, "The mesh id provided to mesh_set_points_device is not valid!\n");
317
+ return;
318
+ }
319
+ }
320
+
321
+ void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> velocities)
322
+ {
323
+ wp::Mesh m;
324
+ if (mesh_get_descriptor(id, m))
325
+ {
326
+ if (velocities.ndim != 1 || velocities.shape[0] != m.velocities.shape[0])
327
+ {
328
+ fprintf(stderr, "The new velocities input for mesh_set_velocities_device does not match the shape of the original velocities\n");
329
+ return;
330
+ }
331
+
332
+ m.velocities = velocities;
333
+
334
+ wp::Mesh* mesh_device = (wp::Mesh*)id;
335
+ memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &m, sizeof(wp::Mesh));
336
+ mesh_set_descriptor(id, m);
337
+ }
338
+ else
339
+ {
340
+ fprintf(stderr, "The mesh id provided to mesh_set_velocities_device is not valid!\n");
341
+ return;
342
+ }
343
+ }
warp/native/mesh.h CHANGED
@@ -1881,6 +1881,7 @@ CUDA_CALLABLE inline void adj_mesh_get_index(uint64_t id, int index,
1881
1881
  }
1882
1882
 
1883
1883
  CUDA_CALLABLE bool mesh_get_descriptor(uint64_t id, Mesh& mesh);
1884
+ CUDA_CALLABLE bool mesh_set_descriptor(uint64_t id, const Mesh& mesh);
1884
1885
  CUDA_CALLABLE void mesh_add_descriptor(uint64_t id, const Mesh& mesh);
1885
1886
  CUDA_CALLABLE void mesh_rem_descriptor(uint64_t id);
1886
1887
 
warp/native/quat.h CHANGED
@@ -29,6 +29,14 @@ struct quat_t
29
29
  w = static_cast<Type>(other.w);
30
30
  }
31
31
 
32
+ inline CUDA_CALLABLE quat_t(const initializer_array<4, Type> &l)
33
+ {
34
+ x = l[0];
35
+ y = l[1];
36
+ z = l[2];
37
+ w = l[3];
38
+ }
39
+
32
40
  // imaginary part
33
41
  Type x;
34
42
  Type y;
@@ -479,6 +487,41 @@ inline CUDA_CALLABLE void adj_indexref(quat_t<Type>* q, int idx,
479
487
  // nop
480
488
  }
481
489
 
490
+ template<typename Type>
491
+ inline CUDA_CALLABLE quat_t<Type> assign(quat_t<Type>& q, int idx, Type value)
492
+ {
493
+ #ifndef NDEBUG
494
+ if (idx < 0 || idx > 3)
495
+ {
496
+ printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
497
+ assert(0);
498
+ }
499
+ #endif
500
+
501
+ quat_t<Type> ret(q);
502
+ ret[idx] = value;
503
+ return ret;
504
+ }
505
+
506
+ template<typename Type>
507
+ inline CUDA_CALLABLE void adj_assign(quat_t<Type>& q, int idx, Type value, quat_t<Type>& adj_q, int& adj_idx, Type& adj_value, const quat_t<Type>& adj_ret)
508
+ {
509
+ #ifndef NDEBUG
510
+ if (idx < 0 || idx > 3)
511
+ {
512
+ printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
513
+ assert(0);
514
+ }
515
+ #endif
516
+
517
+ adj_value += adj_ret[idx];
518
+ for(unsigned i=0; i < 4; ++i)
519
+ {
520
+ if(i != idx)
521
+ adj_q[i] += adj_ret[i];
522
+ }
523
+ }
524
+
482
525
  template<typename Type>
483
526
  CUDA_CALLABLE inline quat_t<Type> lerp(const quat_t<Type>& a, const quat_t<Type>& b, Type t)
484
527
  {
warp/native/range.h CHANGED
@@ -97,8 +97,17 @@ CUDA_CALLABLE inline range_t iter_reverse(const range_t& r)
97
97
  {
98
98
  // generates a reverse range, equivalent to reversed(range())
99
99
  range_t rev;
100
- rev.start = r.end-1;
101
- rev.end = r.start-1;
100
+
101
+ if (r.step > 0)
102
+ {
103
+ rev.start = r.start + int((r.end - r.start - 1) / r.step) * r.step;
104
+ }
105
+ else
106
+ {
107
+ rev.start = r.start + int((r.end - r.start + 1) / r.step) * r.step;
108
+ }
109
+
110
+ rev.end = r.start - r.step;
102
111
  rev.step = -r.step;
103
112
 
104
113
  rev.i = rev.start;
warp/native/spatial.h CHANGED
@@ -127,6 +127,12 @@ struct transform_t
127
127
  CUDA_CALLABLE inline transform_t(vec_t<3,Type> p=vec_t<3,Type>(), quat_t<Type> q=quat_t<Type>()) : p(p), q(q) {}
128
128
  CUDA_CALLABLE inline transform_t(Type) {} // helps uniform initialization
129
129
 
130
+ CUDA_CALLABLE inline transform_t(const initializer_array<7, Type> &l)
131
+ {
132
+ p = vec_t<3,Type>(l[0], l[1], l[2]);
133
+ q = quat_t<Type>(l[3], l[4], l[5], l[6]);
134
+ }
135
+
130
136
  CUDA_CALLABLE inline Type operator[](int index) const
131
137
  {
132
138
  assert(index < 7);
warp/native/vec.h CHANGED
@@ -381,6 +381,31 @@ inline CUDA_CALLABLE vec_t<3, Type> sub(vec_t<3, Type> a, vec_t<3, Type> b)
381
381
  return vec_t<3, Type>( a.c[0] - b.c[0], a.c[1] - b.c[1], a.c[2] - b.c[2]);
382
382
  }
383
383
 
384
+ // modulo
385
+ template<unsigned Length, typename Type>
386
+ inline CUDA_CALLABLE vec_t<Length, Type> mod(vec_t<Length, Type> a, vec_t<Length, Type> b)
387
+ {
388
+ vec_t<Length, Type> ret;
389
+ for (unsigned i=0; i < Length; ++i)
390
+ {
391
+ ret[i] = mod(a[i], b[i]);
392
+ }
393
+
394
+ return ret;
395
+ }
396
+
397
+ template<typename Type>
398
+ inline CUDA_CALLABLE vec_t<2, Type> mod(vec_t<2, Type> a, vec_t<2, Type> b)
399
+ {
400
+ return vec_t<2, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]));
401
+ }
402
+
403
+ template<typename Type>
404
+ inline CUDA_CALLABLE vec_t<3, Type> mod(vec_t<3, Type> a, vec_t<3, Type> b)
405
+ {
406
+ return vec_t<3, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]), mod(a.c[2], b.c[2]));
407
+ }
408
+
384
409
  // dot product:
385
410
  template<unsigned Length, typename Type>
386
411
  inline CUDA_CALLABLE Type dot(vec_t<Length, Type> a, vec_t<Length, Type> b)
@@ -470,6 +495,40 @@ inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
470
495
  // nop
471
496
  }
472
497
 
498
+ template<unsigned Length, typename Type>
499
+ inline CUDA_CALLABLE vec_t<Length, Type> assign(vec_t<Length, Type>& v, int idx, Type value)
500
+ {
501
+ #ifndef NDEBUG
502
+ if (idx < 0 || idx >= Length)
503
+ {
504
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
505
+ assert(0);
506
+ }
507
+ #endif
508
+
509
+ vec_t<Length, Type> ret(v);
510
+ ret[idx] = value;
511
+ return ret;
512
+ }
513
+
514
+ template<unsigned Length, typename Type>
515
+ inline CUDA_CALLABLE void adj_assign(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value, const vec_t<Length, Type>& adj_ret)
516
+ {
517
+ #ifndef NDEBUG
518
+ if (idx < 0 || idx >= Length)
519
+ {
520
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
521
+ assert(0);
522
+ }
523
+ #endif
524
+
525
+ adj_value += adj_ret[idx];
526
+ for(unsigned i=0; i < Length; ++i)
527
+ {
528
+ if (i != idx)
529
+ adj_v[i] += adj_ret[i];
530
+ }
531
+ }
473
532
 
474
533
  template<unsigned Length, typename Type>
475
534
  inline CUDA_CALLABLE Type length(vec_t<Length, Type> a)
@@ -896,6 +955,21 @@ inline CUDA_CALLABLE void adj_sub(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
896
955
  adj_b.c[2] -= adj_ret.c[2];
897
956
  }
898
957
 
958
+ template<unsigned Length, typename Type>
959
+ inline CUDA_CALLABLE void adj_mod(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
960
+ {
961
+ }
962
+
963
+ template<typename Type>
964
+ inline CUDA_CALLABLE void adj_mod(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
965
+ {
966
+ }
967
+
968
+ template<typename Type>
969
+ inline CUDA_CALLABLE void adj_mod(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
970
+ {
971
+ }
972
+
899
973
  template<unsigned Length, typename Type>
900
974
  inline CUDA_CALLABLE void adj_dot(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const Type adj_ret)
901
975
  {
warp/native/warp.cpp CHANGED
@@ -1010,7 +1010,7 @@ WP_API int cuda_set_peer_access_enabled(void* target_context, void* peer_context
1010
1010
  WP_API int cuda_is_mempool_access_enabled(int target_ordinal, int peer_ordinal) { return 0; }
1011
1011
  WP_API int cuda_set_mempool_access_enabled(int target_ordinal, int peer_ordinal, int enable) { return 0; }
1012
1012
 
1013
- WP_API void* cuda_stream_create(void* context) { return NULL; }
1013
+ WP_API void* cuda_stream_create(void* context, int priority) { return NULL; }
1014
1014
  WP_API void cuda_stream_destroy(void* context, void* stream) {}
1015
1015
  WP_API void cuda_stream_register(void* context, void* stream) {}
1016
1016
  WP_API void cuda_stream_unregister(void* context, void* stream) {}
@@ -1020,6 +1020,7 @@ WP_API void cuda_stream_wait_event(void* stream, void* event) {}
1020
1020
  WP_API void cuda_stream_wait_stream(void* stream, void* other_stream, void* event) {}
1021
1021
  WP_API int cuda_stream_is_capturing(void* stream) { return 0; }
1022
1022
  WP_API uint64_t cuda_stream_get_capture_id(void* stream) { return 0; }
1023
+ WP_API int cuda_stream_get_priority(void* stream) { return 0; }
1023
1024
 
1024
1025
  WP_API void* cuda_event_create(void* context, unsigned flags) { return NULL; }
1025
1026
  WP_API void cuda_event_destroy(void* event) {}
warp/native/warp.cu CHANGED
@@ -1963,7 +1963,6 @@ void cuda_context_set_stream(void* context, void* stream, int sync)
1963
1963
  }
1964
1964
  }
1965
1965
 
1966
-
1967
1966
  int cuda_is_peer_access_supported(int target_ordinal, int peer_ordinal)
1968
1967
  {
1969
1968
  int num_devices = int(g_devices.size());
@@ -2174,12 +2173,12 @@ int cuda_set_mempool_access_enabled(int target_ordinal, int peer_ordinal, int en
2174
2173
  }
2175
2174
 
2176
2175
 
2177
- void* cuda_stream_create(void* context)
2176
+ void* cuda_stream_create(void* context, int priority)
2178
2177
  {
2179
2178
  ContextGuard guard(context, true);
2180
2179
 
2181
2180
  CUstream stream;
2182
- if (check_cu(cuStreamCreate_f(&stream, CU_STREAM_DEFAULT)))
2181
+ if (check_cu(cuStreamCreateWithPriority_f(&stream, CU_STREAM_DEFAULT, priority)))
2183
2182
  {
2184
2183
  cuda_stream_register(WP_CURRENT_CONTEXT, stream);
2185
2184
  return stream;
@@ -2268,6 +2267,14 @@ uint64_t cuda_stream_get_capture_id(void* stream)
2268
2267
  return get_capture_id(static_cast<CUstream>(stream));
2269
2268
  }
2270
2269
 
2270
+ int cuda_stream_get_priority(void* stream)
2271
+ {
2272
+ int priority = 0;
2273
+ check_cuda(cuStreamGetPriority_f(static_cast<CUstream>(stream), &priority));
2274
+
2275
+ return priority;
2276
+ }
2277
+
2271
2278
  void* cuda_event_create(void* context, unsigned flags)
2272
2279
  {
2273
2280
  ContextGuard guard(context, true);
warp/native/warp.h CHANGED
@@ -83,6 +83,12 @@ extern "C"
83
83
  WP_API void mesh_destroy_device(uint64_t id);
84
84
  WP_API void mesh_refit_device(uint64_t id);
85
85
 
86
+ WP_API void mesh_set_points_host(uint64_t id, wp::array_t<wp::vec3> points);
87
+ WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points);
88
+
89
+ WP_API void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities);
90
+ WP_API void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> velocities);
91
+
86
92
  WP_API uint64_t hash_grid_create_host(int dim_x, int dim_y, int dim_z);
87
93
  WP_API void hash_grid_reserve_host(uint64_t id, int num_points);
88
94
  WP_API void hash_grid_destroy_host(uint64_t id);
@@ -286,7 +292,7 @@ extern "C"
286
292
  WP_API int cuda_is_mempool_access_enabled(int target_ordinal, int peer_ordinal);
287
293
  WP_API int cuda_set_mempool_access_enabled(int target_ordinal, int peer_ordinal, int enable);
288
294
 
289
- WP_API void* cuda_stream_create(void* context);
295
+ WP_API void* cuda_stream_create(void* context, int priority);
290
296
  WP_API void cuda_stream_destroy(void* context, void* stream);
291
297
  WP_API void cuda_stream_register(void* context, void* stream);
292
298
  WP_API void cuda_stream_unregister(void* context, void* stream);
@@ -296,6 +302,7 @@ extern "C"
296
302
  WP_API void cuda_stream_wait_stream(void* stream, void* other_stream, void* event);
297
303
  WP_API int cuda_stream_is_capturing(void* stream);
298
304
  WP_API uint64_t cuda_stream_get_capture_id(void* stream);
305
+ WP_API int cuda_stream_get_priority(void* stream);
299
306
 
300
307
  WP_API void* cuda_event_create(void* context, unsigned flags);
301
308
  WP_API void cuda_event_destroy(void* event);