PyPI - warp-lang - Versions diffs - 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl - Mend

warp-lang 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (193) hide show

warp/__init__.py +3 -1
warp/__init__.pyi +3489 -1
warp/autograd.py +45 -122
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +241 -252
warp/build_dll.py +130 -26
warp/builtins.py +1907 -384
warp/codegen.py +272 -104
warp/config.py +12 -1
warp/constants.py +1 -1
warp/context.py +770 -238
warp/dlpack.py +1 -1
warp/examples/benchmarks/benchmark_cloth.py +2 -2
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/core/example_sample_mesh.py +1 -1
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/fem/example_adaptive_grid.py +5 -5
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +1 -1
warp/examples/fem/example_convection_diffusion.py +9 -6
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion.py +2 -2
warp/examples/fem/example_diffusion_3d.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +5 -3
warp/examples/fem/example_mixed_elasticity.py +5 -3
warp/examples/fem/example_navier_stokes.py +11 -9
warp/examples/fem/example_nonconforming_contact.py +5 -3
warp/examples/fem/example_streamlines.py +8 -3
warp/examples/fem/utils.py +9 -8
warp/examples/interop/example_jax_callable.py +34 -4
warp/examples/interop/example_jax_ffi_callback.py +2 -2
warp/examples/interop/example_jax_kernel.py +27 -1
warp/examples/optim/example_drone.py +1 -1
warp/examples/sim/example_cloth.py +1 -1
warp/examples/sim/example_cloth_self_contact.py +48 -54
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +2 -1
warp/examples/tile/example_tile_convolution.py +1 -1
warp/examples/tile/example_tile_filtering.py +1 -1
warp/examples/tile/example_tile_matmul.py +1 -1
warp/examples/tile/example_tile_mlp.py +2 -0
warp/fabric.py +7 -7
warp/fem/__init__.py +5 -0
warp/fem/adaptivity.py +1 -1
warp/fem/cache.py +152 -63
warp/fem/dirichlet.py +2 -2
warp/fem/domain.py +136 -6
warp/fem/field/field.py +141 -99
warp/fem/field/nodal_field.py +85 -39
warp/fem/field/virtual.py +99 -52
warp/fem/geometry/adaptive_nanogrid.py +91 -86
warp/fem/geometry/closest_point.py +13 -0
warp/fem/geometry/deformed_geometry.py +102 -40
warp/fem/geometry/element.py +56 -2
warp/fem/geometry/geometry.py +323 -22
warp/fem/geometry/grid_2d.py +157 -62
warp/fem/geometry/grid_3d.py +116 -20
warp/fem/geometry/hexmesh.py +86 -20
warp/fem/geometry/nanogrid.py +166 -86
warp/fem/geometry/partition.py +59 -25
warp/fem/geometry/quadmesh.py +86 -135
warp/fem/geometry/tetmesh.py +47 -119
warp/fem/geometry/trimesh.py +77 -270
warp/fem/integrate.py +181 -95
warp/fem/linalg.py +25 -58
warp/fem/operator.py +124 -27
warp/fem/quadrature/pic_quadrature.py +36 -14
warp/fem/quadrature/quadrature.py +40 -16
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +66 -46
warp/fem/space/basis_space.py +17 -4
warp/fem/space/dof_mapper.py +1 -1
warp/fem/space/function_space.py +2 -2
warp/fem/space/grid_2d_function_space.py +4 -1
warp/fem/space/hexmesh_function_space.py +4 -2
warp/fem/space/nanogrid_function_space.py +3 -1
warp/fem/space/partition.py +11 -2
warp/fem/space/quadmesh_function_space.py +4 -1
warp/fem/space/restriction.py +5 -2
warp/fem/space/shape/__init__.py +10 -8
warp/fem/space/tetmesh_function_space.py +4 -1
warp/fem/space/topology.py +52 -21
warp/fem/space/trimesh_function_space.py +4 -1
warp/fem/utils.py +53 -8
warp/jax.py +1 -2
warp/jax_experimental/ffi.py +210 -67
warp/jax_experimental/xla_ffi.py +37 -24
warp/math.py +171 -1
warp/native/array.h +103 -4
warp/native/builtin.h +182 -35
warp/native/coloring.cpp +6 -2
warp/native/cuda_util.cpp +1 -1
warp/native/exports.h +118 -63
warp/native/intersect.h +5 -5
warp/native/mat.h +8 -13
warp/native/mathdx.cpp +11 -5
warp/native/matnn.h +1 -123
warp/native/mesh.h +1 -1
warp/native/quat.h +34 -6
warp/native/rand.h +7 -7
warp/native/sparse.cpp +121 -258
warp/native/sparse.cu +181 -274
warp/native/spatial.h +305 -17
warp/native/svd.h +23 -8
warp/native/tile.h +603 -73
warp/native/tile_radix_sort.h +1112 -0
warp/native/tile_reduce.h +239 -13
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +10 -20
warp/native/warp.cpp +36 -4
warp/native/warp.cu +588 -52
warp/native/warp.h +47 -74
warp/optim/linear.py +5 -1
warp/paddle.py +7 -8
warp/py.typed +0 -0
warp/render/render_opengl.py +110 -80
warp/render/render_usd.py +124 -62
warp/sim/__init__.py +9 -0
warp/sim/collide.py +253 -80
warp/sim/graph_coloring.py +8 -1
warp/sim/import_mjcf.py +4 -3
warp/sim/import_usd.py +11 -7
warp/sim/integrator.py +5 -2
warp/sim/integrator_euler.py +1 -1
warp/sim/integrator_featherstone.py +1 -1
warp/sim/integrator_vbd.py +761 -322
warp/sim/integrator_xpbd.py +1 -1
warp/sim/model.py +265 -260
warp/sim/utils.py +10 -7
warp/sparse.py +303 -166
warp/tape.py +54 -51
warp/tests/cuda/test_conditional_captures.py +1046 -0
warp/tests/cuda/test_streams.py +1 -1
warp/tests/geometry/test_volume.py +2 -2
warp/tests/interop/test_dlpack.py +9 -9
warp/tests/interop/test_jax.py +0 -1
warp/tests/run_coverage_serial.py +1 -1
warp/tests/sim/disabled_kinematics.py +2 -2
warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
warp/tests/sim/test_collision.py +159 -51
warp/tests/sim/test_coloring.py +91 -2
warp/tests/test_array.py +254 -2
warp/tests/test_array_reduce.py +2 -2
warp/tests/test_assert.py +53 -0
warp/tests/test_atomic_cas.py +312 -0
warp/tests/test_codegen.py +142 -19
warp/tests/test_conditional.py +47 -1
warp/tests/test_ctypes.py +0 -20
warp/tests/test_devices.py +8 -0
warp/tests/test_fabricarray.py +4 -2
warp/tests/test_fem.py +58 -25
warp/tests/test_func.py +42 -1
warp/tests/test_grad.py +1 -1
warp/tests/test_lerp.py +1 -3
warp/tests/test_map.py +481 -0
warp/tests/test_mat.py +23 -24
warp/tests/test_quat.py +28 -15
warp/tests/test_rounding.py +10 -38
warp/tests/test_runlength_encode.py +7 -7
warp/tests/test_smoothstep.py +1 -1
warp/tests/test_sparse.py +83 -2
warp/tests/test_spatial.py +507 -1
warp/tests/test_static.py +48 -0
warp/tests/test_struct.py +2 -2
warp/tests/test_tape.py +38 -0
warp/tests/test_tuple.py +265 -0
warp/tests/test_types.py +2 -2
warp/tests/test_utils.py +24 -18
warp/tests/test_vec.py +38 -408
warp/tests/test_vec_constructors.py +325 -0
warp/tests/tile/test_tile.py +438 -131
warp/tests/tile/test_tile_mathdx.py +518 -14
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_reduce.py +307 -5
warp/tests/tile/test_tile_shared_memory.py +136 -7
warp/tests/tile/test_tile_sort.py +121 -0
warp/tests/unittest_suites.py +14 -6
warp/types.py +462 -308
warp/utils.py +647 -86
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +190 -176
warp/stubs.py +0 -3381
warp/tests/sim/test_xpbd.py +0 -399
warp/tests/test_mlp.py +0 -282
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0

warp/native/tuple.h ADDED Viewed

@@ -0,0 +1,189 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+namespace wp
+{
+template <typename... Types>
+struct tuple_t;
+template <>
+struct tuple_t<>
+{
+    static constexpr int size() { return 0; }
+    // Base case: empty tuple.
+    template <typename Callable>
+    void apply(Callable&&) const { }
+};
+template <typename Head, typename... Tail>
+struct tuple_t<Head, Tail...>
+{
+    Head head;
+    tuple_t<Tail...> tail;
+    CUDA_CALLABLE inline tuple_t() {}
+    CUDA_CALLABLE inline tuple_t(Head h, Tail... t) : head(h), tail(t...) {}
+    static constexpr int size() { return 1 + tuple_t<Tail...>::size(); }
+    // Applies a callable to each element.
+    template <typename Callable>
+    void apply(Callable&& func) const
+    {
+        func(head);        // Apply the callable to the current element.
+        tail.apply(func);  // Recursively process the rest of the tuple.
+    }
+};
+// Tuple constructor.
+template <typename... Args>
+CUDA_CALLABLE inline tuple_t<Args...>
+tuple(
+    Args... args
+)
+{
+    return tuple_t<Args...>(args...);
+}
+// Helper to extract a value from the tuple.
+// Can be replaced with simpler member function version when our CPU compiler
+// backend supports constexpr if statements.
+template <int N, typename Head, typename... Tail>
+struct tuple_get
+{
+    static CUDA_CALLABLE inline const auto&
+    value(
+        const tuple_t<Head, Tail...>& t
+    )
+    {
+        return tuple_get<N - 1, Tail...>::value(t.tail);
+    }
+};
+// Specialization for the base case N == 0. Simply return the head of the tuple.
+template <typename Head, typename... Tail>
+struct tuple_get<0, Head, Tail...>
+{
+    static CUDA_CALLABLE inline const auto&
+    value(
+        const tuple_t<Head, Tail...>& t
+    )
+    {
+        return t.head;
+    }
+};
+template <int Index, typename... Args>
+CUDA_CALLABLE inline auto
+extract(
+    const tuple_t<Args...>& t
+)
+{
+    return tuple_get<Index, Args...>::value(t);
+}
+template <typename... Args>
+CUDA_CALLABLE inline int
+len(
+    const tuple_t<Args...>& t
+)
+{
+    return t.size();
+}
+template <typename... Args>
+CUDA_CALLABLE inline void
+adj_len(
+    const tuple_t<Args...>& t,
+    tuple_t<Args...>& adj_t,
+    int adj_ret
+)
+{
+}
+template <typename... Args>
+CUDA_CALLABLE inline void
+print(
+    const tuple_t<Args...>& t
+)
+{
+    t.apply([&](auto a) { print(a); });
+}
+template <typename... Args>
+CUDA_CALLABLE inline void
+adj_print(
+    const tuple_t<Args...>& t,
+    tuple_t<Args...>& adj_t
+)
+{
+    adj_t.apply([&](auto a) { print(a); });
+}
+CUDA_CALLABLE inline tuple_t<>
+add(
+    const tuple_t<>& a,
+    const tuple_t<>& b
+)
+{
+    return tuple_t<>();
+}
+template <typename Head, typename... Tail>
+CUDA_CALLABLE inline tuple_t<Head, Tail...>
+add(
+    const tuple_t<Head, Tail...>& a,
+    const tuple_t<Head, Tail...>& b
+)
+{
+    tuple_t<Head, Tail...> out;
+    out.head = add(a.head, b.head);
+    out.tail = add(a.tail, b.tail);
+    return out;
+}
+CUDA_CALLABLE inline void
+adj_add(
+    const tuple_t<>& a,
+    const tuple_t<>& b,
+    tuple_t<>& adj_a,
+    tuple_t<>& adj_b,
+    const tuple_t<>& adj_ret
+)
+{
+}
+template <typename Head, typename... Tail>
+CUDA_CALLABLE inline void
+adj_add(
+    const tuple_t<Head, Tail...>& a,
+    const tuple_t<Head, Tail...>& b,
+    tuple_t<Head, Tail...>& adj_a,
+    tuple_t<Head, Tail...>& adj_b,
+    const tuple_t<Head, Tail...>& adj_ret
+)
+{
+    adj_add(a.head, b.head, adj_a.head, adj_b.head, adj_ret.head);
+    adj_add(a.tail, b.tail, adj_a.tail, adj_b.tail, adj_ret.tail);
+}
+} // namespace wp

warp/native/vec.h CHANGED Viewed

@@ -149,26 +149,15 @@ using vec2d = vec_t<2,double>;
 using vec3d = vec_t<3,double>;
 using vec4d = vec_t<4,double>;
-//--------------
-// vec<Length, Type> methods
-// Should these accept const references as arguments? It's all
-// inlined so maybe it doesn't matter? Even if it does, it
-// probably depends on the Length of the vector...
-// negation:
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE vec_t<Length, Type> operator - (vec_t<Length, Type> a)
+inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
 {
-    // NB: this constructor will initialize all ret's components to 0, which is
-    // unnecessary...
     vec_t<Length, Type> ret;
-    for( unsigned i=0; i < Length; ++i )
+    for(unsigned i=0; i < Length; ++i)
     {
-        ret[i] = -a[i];
+        ret[i] = -x[i];
     }
-    // Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
     return ret;
 }
@@ -843,8 +832,9 @@ inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const v
     if (diff > tolerance)
     {
         printf("Error, expect_near() failed with tolerance "); print(tolerance);
-        printf("\t Expected: "); print(expected);
-        printf("\t Actual: "); print(actual);
+        printf("    Expected: "); print(expected);
+        printf("    Actual: "); print(actual);
+        printf("    Max absolute difference: "); print(diff);
     }
 }
@@ -979,11 +969,11 @@ template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
 {
-    adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
-    for( unsigned i=0; i < Length; ++i )
+    for (unsigned i=0; i < Length; ++i)
     {
-        adj_a[i] += s / adj_ret[i];
+        Type inv = Type(1) / a[i];
+        adj_a[i] -= s * adj_ret[i] * inv * inv;
+        adj_s += adj_ret[i] * inv;
     }
 #if FP_CHECK

warp/native/warp.cpp CHANGED Viewed

@@ -24,6 +24,11 @@
 #include <stdlib.h>
 #include <string.h>
+// MSVC provides _aligned_malloc() instead of the standard aligned_alloc()
+#if defined(_MSC_VER)
+#include <malloc.h>
+#endif
 uint16_t float_to_half_bits(float x)
 {
     // adapted from Fabien Giesen's post: https://gist.github.com/rygorous/2156668
@@ -114,7 +119,7 @@ float half_bits_to_float(uint16_t u)
 int init()
 {
 #if WP_ENABLE_CUDA
-    int cuda_init();
+    int cuda_init(void);
     // note: it's safe to proceed even if CUDA initialization failed
     cuda_init();
 #endif
@@ -163,12 +168,28 @@ int is_debug_enabled()
 void* alloc_host(size_t s)
 {
-    return malloc(s);
+    // increase CPU array alignment for compatibility with other libs, e.g., JAX, XLA, Eigen.
+    size_t alignment = 64;
+    // msvc does not provide the standard aligned_alloc()
+    #if defined(_MSC_VER)
+        return _aligned_malloc(s, alignment);
+    #else
+        // ensure that the size is a multiple of alignment
+        size_t remainder = s % alignment;
+        if (remainder != 0)
+            s += alignment - remainder;
+        return aligned_alloc(alignment, s);
+    #endif
 }
 void free_host(void* ptr)
 {
-    free(ptr);
+    #if defined(_MSC_VER)
+        _aligned_free(ptr);
+    #else
+        free(ptr);
+    #endif
 }
 bool memcpy_h2h(void* dest, void* src, size_t n)
@@ -990,6 +1011,7 @@ WP_API int cuda_device_get_count() { return 0; }
 WP_API void* cuda_device_get_primary_context(int ordinal) { return NULL; }
 WP_API const char* cuda_device_get_name(int ordinal) { return NULL; }
 WP_API int cuda_device_get_arch(int ordinal) { return 0; }
+WP_API int cuda_device_get_sm_count(int ordinal) { return 0; }
 WP_API void cuda_device_get_uuid(int ordinal, char uuid[16]) {}
 WP_API int cuda_device_get_pci_domain_id(int ordinal) { return -1; }
 WP_API int cuda_device_get_pci_bus_id(int ordinal) { return -1; }
@@ -1050,10 +1072,20 @@ WP_API float cuda_event_elapsed_time(void* start_event, void* end_event) { retur
 WP_API bool cuda_graph_begin_capture(void* context, void* stream, int external) { return false; }
 WP_API bool cuda_graph_end_capture(void* context, void* stream, void** graph_ret) { return false; }
+WP_API bool cuda_graph_create_exec(void* context, void* stream, void* graph, void** graph_exec_ret) { return false; }
 WP_API bool cuda_graph_launch(void* graph, void* stream) { return false; }
 WP_API bool cuda_graph_destroy(void* context, void* graph) { return false; }
+WP_API bool cuda_graph_exec_destroy(void* context, void* graph_exec) { return false; }
+WP_API bool capture_debug_dot_print(void* graph, const char *path, uint32_t flags) { return false; }
+WP_API bool cuda_graph_insert_if_else(void* context, void* stream, int* condition, void** if_graph_ret, void** else_graph_ret) { return false; }
+WP_API bool cuda_graph_insert_while(void* context, void* stream, int* condition, void** body_graph_ret, uint64_t* handle_ret) { return false; }
+WP_API bool cuda_graph_set_condition(void* context, void* stream, int* condition, uint64_t handle) { return false; }
+WP_API bool cuda_graph_pause_capture(void* context, void* stream, void** graph_ret) { return false; }
+WP_API bool cuda_graph_resume_capture(void* context, void* stream, void* graph) { return false; }
+WP_API bool cuda_graph_insert_child_graph(void* context, void* stream, void* child_graph) { return false; }
-WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
+WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, bool compile_time_trace, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
 WP_API void* cuda_load_module(void* context, const char* ptx) { return NULL; }
 WP_API void cuda_unload_module(void* context, void* module) {}