warp-lang 1.7.2__py3-none-manylinux_2_34_aarch64.whl → 1.8.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (180) hide show
  1. warp/__init__.py +3 -1
  2. warp/__init__.pyi +3489 -1
  3. warp/autograd.py +45 -122
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +241 -252
  6. warp/build_dll.py +125 -26
  7. warp/builtins.py +1907 -384
  8. warp/codegen.py +257 -101
  9. warp/config.py +12 -1
  10. warp/constants.py +1 -1
  11. warp/context.py +657 -223
  12. warp/dlpack.py +1 -1
  13. warp/examples/benchmarks/benchmark_cloth.py +2 -2
  14. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  15. warp/examples/core/example_sample_mesh.py +1 -1
  16. warp/examples/core/example_spin_lock.py +93 -0
  17. warp/examples/core/example_work_queue.py +118 -0
  18. warp/examples/fem/example_adaptive_grid.py +5 -5
  19. warp/examples/fem/example_apic_fluid.py +1 -1
  20. warp/examples/fem/example_burgers.py +1 -1
  21. warp/examples/fem/example_convection_diffusion.py +9 -6
  22. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  23. warp/examples/fem/example_deformed_geometry.py +1 -1
  24. warp/examples/fem/example_diffusion.py +2 -2
  25. warp/examples/fem/example_diffusion_3d.py +1 -1
  26. warp/examples/fem/example_distortion_energy.py +1 -1
  27. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  28. warp/examples/fem/example_magnetostatics.py +5 -3
  29. warp/examples/fem/example_mixed_elasticity.py +5 -3
  30. warp/examples/fem/example_navier_stokes.py +11 -9
  31. warp/examples/fem/example_nonconforming_contact.py +5 -3
  32. warp/examples/fem/example_streamlines.py +8 -3
  33. warp/examples/fem/utils.py +9 -8
  34. warp/examples/interop/example_jax_ffi_callback.py +2 -2
  35. warp/examples/optim/example_drone.py +1 -1
  36. warp/examples/sim/example_cloth.py +1 -1
  37. warp/examples/sim/example_cloth_self_contact.py +48 -54
  38. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  39. warp/examples/tile/example_tile_cholesky.py +2 -1
  40. warp/examples/tile/example_tile_convolution.py +1 -1
  41. warp/examples/tile/example_tile_filtering.py +1 -1
  42. warp/examples/tile/example_tile_matmul.py +1 -1
  43. warp/examples/tile/example_tile_mlp.py +2 -0
  44. warp/fabric.py +7 -7
  45. warp/fem/__init__.py +5 -0
  46. warp/fem/adaptivity.py +1 -1
  47. warp/fem/cache.py +152 -63
  48. warp/fem/dirichlet.py +2 -2
  49. warp/fem/domain.py +136 -6
  50. warp/fem/field/field.py +141 -99
  51. warp/fem/field/nodal_field.py +85 -39
  52. warp/fem/field/virtual.py +97 -52
  53. warp/fem/geometry/adaptive_nanogrid.py +91 -86
  54. warp/fem/geometry/closest_point.py +13 -0
  55. warp/fem/geometry/deformed_geometry.py +102 -40
  56. warp/fem/geometry/element.py +56 -2
  57. warp/fem/geometry/geometry.py +323 -22
  58. warp/fem/geometry/grid_2d.py +157 -62
  59. warp/fem/geometry/grid_3d.py +116 -20
  60. warp/fem/geometry/hexmesh.py +86 -20
  61. warp/fem/geometry/nanogrid.py +166 -86
  62. warp/fem/geometry/partition.py +59 -25
  63. warp/fem/geometry/quadmesh.py +86 -135
  64. warp/fem/geometry/tetmesh.py +47 -119
  65. warp/fem/geometry/trimesh.py +77 -270
  66. warp/fem/integrate.py +107 -52
  67. warp/fem/linalg.py +25 -58
  68. warp/fem/operator.py +124 -27
  69. warp/fem/quadrature/pic_quadrature.py +36 -14
  70. warp/fem/quadrature/quadrature.py +40 -16
  71. warp/fem/space/__init__.py +1 -1
  72. warp/fem/space/basis_function_space.py +66 -46
  73. warp/fem/space/basis_space.py +17 -4
  74. warp/fem/space/dof_mapper.py +1 -1
  75. warp/fem/space/function_space.py +2 -2
  76. warp/fem/space/grid_2d_function_space.py +4 -1
  77. warp/fem/space/hexmesh_function_space.py +4 -2
  78. warp/fem/space/nanogrid_function_space.py +3 -1
  79. warp/fem/space/partition.py +11 -2
  80. warp/fem/space/quadmesh_function_space.py +4 -1
  81. warp/fem/space/restriction.py +5 -2
  82. warp/fem/space/shape/__init__.py +10 -8
  83. warp/fem/space/tetmesh_function_space.py +4 -1
  84. warp/fem/space/topology.py +52 -21
  85. warp/fem/space/trimesh_function_space.py +4 -1
  86. warp/fem/utils.py +53 -8
  87. warp/jax.py +1 -2
  88. warp/jax_experimental/ffi.py +12 -17
  89. warp/jax_experimental/xla_ffi.py +37 -24
  90. warp/math.py +171 -1
  91. warp/native/array.h +99 -0
  92. warp/native/builtin.h +174 -31
  93. warp/native/coloring.cpp +1 -1
  94. warp/native/exports.h +118 -63
  95. warp/native/intersect.h +3 -3
  96. warp/native/mat.h +5 -10
  97. warp/native/mathdx.cpp +11 -5
  98. warp/native/matnn.h +1 -123
  99. warp/native/quat.h +28 -4
  100. warp/native/sparse.cpp +121 -258
  101. warp/native/sparse.cu +181 -274
  102. warp/native/spatial.h +305 -17
  103. warp/native/tile.h +583 -72
  104. warp/native/tile_radix_sort.h +1108 -0
  105. warp/native/tile_reduce.h +237 -2
  106. warp/native/tile_scan.h +240 -0
  107. warp/native/tuple.h +189 -0
  108. warp/native/vec.h +6 -16
  109. warp/native/warp.cpp +36 -4
  110. warp/native/warp.cu +574 -51
  111. warp/native/warp.h +47 -74
  112. warp/optim/linear.py +5 -1
  113. warp/paddle.py +7 -8
  114. warp/py.typed +0 -0
  115. warp/render/render_opengl.py +58 -29
  116. warp/render/render_usd.py +124 -61
  117. warp/sim/__init__.py +9 -0
  118. warp/sim/collide.py +252 -78
  119. warp/sim/graph_coloring.py +8 -1
  120. warp/sim/import_mjcf.py +4 -3
  121. warp/sim/import_usd.py +11 -7
  122. warp/sim/integrator.py +5 -2
  123. warp/sim/integrator_euler.py +1 -1
  124. warp/sim/integrator_featherstone.py +1 -1
  125. warp/sim/integrator_vbd.py +751 -320
  126. warp/sim/integrator_xpbd.py +1 -1
  127. warp/sim/model.py +265 -260
  128. warp/sim/utils.py +10 -7
  129. warp/sparse.py +303 -166
  130. warp/tape.py +52 -51
  131. warp/tests/cuda/test_conditional_captures.py +1046 -0
  132. warp/tests/cuda/test_streams.py +1 -1
  133. warp/tests/geometry/test_volume.py +2 -2
  134. warp/tests/interop/test_dlpack.py +9 -9
  135. warp/tests/interop/test_jax.py +0 -1
  136. warp/tests/run_coverage_serial.py +1 -1
  137. warp/tests/sim/disabled_kinematics.py +2 -2
  138. warp/tests/sim/{test_vbd.py → test_cloth.py} +296 -113
  139. warp/tests/sim/test_collision.py +159 -51
  140. warp/tests/sim/test_coloring.py +15 -1
  141. warp/tests/test_array.py +254 -2
  142. warp/tests/test_array_reduce.py +2 -2
  143. warp/tests/test_atomic_cas.py +299 -0
  144. warp/tests/test_codegen.py +142 -19
  145. warp/tests/test_conditional.py +47 -1
  146. warp/tests/test_ctypes.py +0 -20
  147. warp/tests/test_devices.py +8 -0
  148. warp/tests/test_fabricarray.py +4 -2
  149. warp/tests/test_fem.py +58 -25
  150. warp/tests/test_func.py +42 -1
  151. warp/tests/test_grad.py +1 -1
  152. warp/tests/test_lerp.py +1 -3
  153. warp/tests/test_map.py +481 -0
  154. warp/tests/test_mat.py +1 -24
  155. warp/tests/test_quat.py +6 -15
  156. warp/tests/test_rounding.py +10 -38
  157. warp/tests/test_runlength_encode.py +7 -7
  158. warp/tests/test_smoothstep.py +1 -1
  159. warp/tests/test_sparse.py +51 -2
  160. warp/tests/test_spatial.py +507 -1
  161. warp/tests/test_struct.py +2 -2
  162. warp/tests/test_tuple.py +265 -0
  163. warp/tests/test_types.py +2 -2
  164. warp/tests/test_utils.py +24 -18
  165. warp/tests/tile/test_tile.py +420 -1
  166. warp/tests/tile/test_tile_mathdx.py +518 -14
  167. warp/tests/tile/test_tile_reduce.py +213 -0
  168. warp/tests/tile/test_tile_shared_memory.py +130 -1
  169. warp/tests/tile/test_tile_sort.py +117 -0
  170. warp/tests/unittest_suites.py +4 -6
  171. warp/types.py +462 -308
  172. warp/utils.py +647 -86
  173. {warp_lang-1.7.2.dist-info → warp_lang-1.8.0.dist-info}/METADATA +20 -6
  174. {warp_lang-1.7.2.dist-info → warp_lang-1.8.0.dist-info}/RECORD +177 -165
  175. warp/stubs.py +0 -3381
  176. warp/tests/sim/test_xpbd.py +0 -399
  177. warp/tests/test_mlp.py +0 -282
  178. {warp_lang-1.7.2.dist-info → warp_lang-1.8.0.dist-info}/WHEEL +0 -0
  179. {warp_lang-1.7.2.dist-info → warp_lang-1.8.0.dist-info}/licenses/LICENSE.md +0 -0
  180. {warp_lang-1.7.2.dist-info → warp_lang-1.8.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h CHANGED
@@ -149,26 +149,15 @@ using vec2d = vec_t<2,double>;
149
149
  using vec3d = vec_t<3,double>;
150
150
  using vec4d = vec_t<4,double>;
151
151
 
152
- //--------------
153
- // vec<Length, Type> methods
154
-
155
- // Should these accept const references as arguments? It's all
156
- // inlined so maybe it doesn't matter? Even if it does, it
157
- // probably depends on the Length of the vector...
158
-
159
- // negation:
160
152
  template<unsigned Length, typename Type>
161
- inline CUDA_CALLABLE vec_t<Length, Type> operator - (vec_t<Length, Type> a)
153
+ inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
162
154
  {
163
- // NB: this constructor will initialize all ret's components to 0, which is
164
- // unnecessary...
165
155
  vec_t<Length, Type> ret;
166
- for( unsigned i=0; i < Length; ++i )
156
+ for(unsigned i=0; i < Length; ++i)
167
157
  {
168
- ret[i] = -a[i];
158
+ ret[i] = -x[i];
169
159
  }
170
160
 
171
- // Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
172
161
  return ret;
173
162
  }
174
163
 
@@ -843,8 +832,9 @@ inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const v
843
832
  if (diff > tolerance)
844
833
  {
845
834
  printf("Error, expect_near() failed with tolerance "); print(tolerance);
846
- printf("\t Expected: "); print(expected);
847
- printf("\t Actual: "); print(actual);
835
+ printf(" Expected: "); print(expected);
836
+ printf(" Actual: "); print(actual);
837
+ printf(" Max absolute difference: "); print(diff);
848
838
  }
849
839
  }
850
840
 
warp/native/warp.cpp CHANGED
@@ -24,6 +24,11 @@
24
24
  #include <stdlib.h>
25
25
  #include <string.h>
26
26
 
27
+ // MSVC provides _aligned_malloc() instead of the standard aligned_alloc()
28
+ #if defined(_MSC_VER)
29
+ #include <malloc.h>
30
+ #endif
31
+
27
32
  uint16_t float_to_half_bits(float x)
28
33
  {
29
34
  // adapted from Fabien Giesen's post: https://gist.github.com/rygorous/2156668
@@ -114,7 +119,7 @@ float half_bits_to_float(uint16_t u)
114
119
  int init()
115
120
  {
116
121
  #if WP_ENABLE_CUDA
117
- int cuda_init();
122
+ int cuda_init(void);
118
123
  // note: it's safe to proceed even if CUDA initialization failed
119
124
  cuda_init();
120
125
  #endif
@@ -163,12 +168,28 @@ int is_debug_enabled()
163
168
 
164
169
  void* alloc_host(size_t s)
165
170
  {
166
- return malloc(s);
171
+ // increase CPU array alignment for compatibility with other libs, e.g., JAX, XLA, Eigen.
172
+ size_t alignment = 64;
173
+
174
+ // msvc does not provide the standard aligned_alloc()
175
+ #if defined(_MSC_VER)
176
+ return _aligned_malloc(s, alignment);
177
+ #else
178
+ // ensure that the size is a multiple of alignment
179
+ size_t remainder = s % alignment;
180
+ if (remainder != 0)
181
+ s += alignment - remainder;
182
+ return aligned_alloc(alignment, s);
183
+ #endif
167
184
  }
168
185
 
169
186
  void free_host(void* ptr)
170
187
  {
171
- free(ptr);
188
+ #if defined(_MSC_VER)
189
+ _aligned_free(ptr);
190
+ #else
191
+ free(ptr);
192
+ #endif
172
193
  }
173
194
 
174
195
  bool memcpy_h2h(void* dest, void* src, size_t n)
@@ -990,6 +1011,7 @@ WP_API int cuda_device_get_count() { return 0; }
990
1011
  WP_API void* cuda_device_get_primary_context(int ordinal) { return NULL; }
991
1012
  WP_API const char* cuda_device_get_name(int ordinal) { return NULL; }
992
1013
  WP_API int cuda_device_get_arch(int ordinal) { return 0; }
1014
+ WP_API int cuda_device_get_sm_count(int ordinal) { return 0; }
993
1015
  WP_API void cuda_device_get_uuid(int ordinal, char uuid[16]) {}
994
1016
  WP_API int cuda_device_get_pci_domain_id(int ordinal) { return -1; }
995
1017
  WP_API int cuda_device_get_pci_bus_id(int ordinal) { return -1; }
@@ -1050,10 +1072,20 @@ WP_API float cuda_event_elapsed_time(void* start_event, void* end_event) { retur
1050
1072
 
1051
1073
  WP_API bool cuda_graph_begin_capture(void* context, void* stream, int external) { return false; }
1052
1074
  WP_API bool cuda_graph_end_capture(void* context, void* stream, void** graph_ret) { return false; }
1075
+ WP_API bool cuda_graph_create_exec(void* context, void* graph, void** graph_exec_ret) { return false; }
1053
1076
  WP_API bool cuda_graph_launch(void* graph, void* stream) { return false; }
1054
1077
  WP_API bool cuda_graph_destroy(void* context, void* graph) { return false; }
1078
+ WP_API bool cuda_graph_exec_destroy(void* context, void* graph_exec) { return false; }
1079
+ WP_API bool capture_debug_dot_print(void* graph, const char *path, uint32_t flags) { return false; }
1080
+
1081
+ WP_API bool cuda_graph_insert_if_else(void* context, void* stream, int* condition, void** if_graph_ret, void** else_graph_ret) { return false; }
1082
+ WP_API bool cuda_graph_insert_while(void* context, void* stream, int* condition, void** body_graph_ret, uint64_t* handle_ret) { return false; }
1083
+ WP_API bool cuda_graph_set_condition(void* context, void* stream, int* condition, uint64_t handle) { return false; }
1084
+ WP_API bool cuda_graph_pause_capture(void* context, void* stream, void** graph_ret) { return false; }
1085
+ WP_API bool cuda_graph_resume_capture(void* context, void* stream, void* graph) { return false; }
1086
+ WP_API bool cuda_graph_insert_child_graph(void* context, void* stream, void* child_graph) { return false; }
1055
1087
 
1056
- WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
1088
+ WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, bool compile_time_trace, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
1057
1089
 
1058
1090
  WP_API void* cuda_load_module(void* context, const char* ptx) { return NULL; }
1059
1091
  WP_API void cuda_unload_module(void* context, void* module) {}