PyPI - warp-lang - Versions diffs - 1.3.3__py3-none-macosx_10_13_universal2.whl → 1.4.1__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.3.3__py3-none-macosx_10_13_universal2.whl → 1.4.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (110) hide show

warp/__init__.py +6 -0
warp/autograd.py +59 -6
warp/bin/libwarp.dylib +0 -0
warp/build_dll.py +8 -10
warp/builtins.py +103 -3
warp/codegen.py +447 -53
warp/config.py +1 -1
warp/context.py +682 -405
warp/dlpack.py +2 -0
warp/examples/benchmarks/benchmark_cloth.py +10 -0
warp/examples/core/example_render_opengl.py +12 -10
warp/examples/fem/example_adaptive_grid.py +251 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_diffusion_3d.py +2 -2
warp/examples/fem/example_magnetostatics.py +1 -1
warp/examples/fem/example_streamlines.py +1 -0
warp/examples/fem/utils.py +25 -5
warp/examples/sim/example_cloth.py +50 -6
warp/fem/__init__.py +2 -0
warp/fem/adaptivity.py +493 -0
warp/fem/field/field.py +2 -1
warp/fem/field/nodal_field.py +18 -26
warp/fem/field/test.py +4 -4
warp/fem/field/trial.py +4 -4
warp/fem/geometry/__init__.py +1 -0
warp/fem/geometry/adaptive_nanogrid.py +843 -0
warp/fem/geometry/nanogrid.py +55 -28
warp/fem/space/__init__.py +1 -1
warp/fem/space/nanogrid_function_space.py +69 -35
warp/fem/utils.py +118 -107
warp/jax_experimental.py +28 -15
warp/native/array.h +0 -1
warp/native/builtin.h +103 -6
warp/native/bvh.cu +4 -2
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/error.cpp +4 -2
warp/native/exports.h +99 -0
warp/native/mat.h +97 -0
warp/native/mesh.cpp +36 -0
warp/native/mesh.cu +52 -1
warp/native/mesh.h +1 -0
warp/native/quat.h +43 -0
warp/native/range.h +11 -2
warp/native/spatial.h +6 -0
warp/native/vec.h +74 -0
warp/native/warp.cpp +2 -1
warp/native/warp.cu +10 -3
warp/native/warp.h +8 -1
warp/paddle.py +382 -0
warp/sim/__init__.py +1 -0
warp/sim/collide.py +519 -0
warp/sim/integrator_euler.py +18 -5
warp/sim/integrator_featherstone.py +5 -5
warp/sim/integrator_vbd.py +1026 -0
warp/sim/integrator_xpbd.py +2 -6
warp/sim/model.py +50 -25
warp/sparse.py +9 -7
warp/stubs.py +459 -0
warp/tape.py +2 -0
warp/tests/aux_test_dependent.py +1 -0
warp/tests/aux_test_name_clash1.py +32 -0
warp/tests/aux_test_name_clash2.py +32 -0
warp/tests/aux_test_square.py +1 -0
warp/tests/test_array.py +188 -0
warp/tests/test_async.py +3 -3
warp/tests/test_atomic.py +6 -0
warp/tests/test_closest_point_edge_edge.py +93 -1
warp/tests/test_codegen.py +93 -15
warp/tests/test_codegen_instancing.py +1457 -0
warp/tests/test_collision.py +486 -0
warp/tests/test_compile_consts.py +3 -28
warp/tests/test_dlpack.py +170 -0
warp/tests/test_examples.py +22 -8
warp/tests/test_fast_math.py +10 -4
warp/tests/test_fem.py +81 -1
warp/tests/test_func.py +46 -0
warp/tests/test_implicit_init.py +49 -0
warp/tests/test_jax.py +58 -0
warp/tests/test_mat.py +84 -0
warp/tests/test_mesh_query_point.py +188 -0
warp/tests/test_model.py +13 -0
warp/tests/test_module_hashing.py +40 -0
warp/tests/test_multigpu.py +3 -3
warp/tests/test_overwrite.py +8 -0
warp/tests/test_paddle.py +852 -0
warp/tests/test_print.py +89 -0
warp/tests/test_quat.py +111 -0
warp/tests/test_reload.py +31 -1
warp/tests/test_scalar_ops.py +2 -0
warp/tests/test_static.py +568 -0
warp/tests/test_streams.py +64 -3
warp/tests/test_struct.py +4 -4
warp/tests/test_torch.py +24 -0
warp/tests/test_triangle_closest_point.py +137 -0
warp/tests/test_types.py +1 -1
warp/tests/test_vbd.py +386 -0
warp/tests/test_vec.py +143 -0
warp/tests/test_vec_scalar_ops.py +139 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +9 -5
warp/thirdparty/dlpack.py +3 -1
warp/types.py +167 -36
warp/utils.py +37 -14
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/METADATA +10 -8
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/RECORD +109 -97
warp/tests/test_point_triangle_closest_point.py +0 -143
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/WHEEL +0 -0
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/top_level.txt +0 -0

warp/jax_experimental.py CHANGED Viewed

@@ -21,17 +21,22 @@ _registered_kernels = [None]
 _registered_kernel_to_id = {}
-def jax_kernel(wp_kernel):
+def jax_kernel(wp_kernel, launch_dims=None):
     """Create a Jax primitive from a Warp kernel.
     NOTE: This is an experimental feature under development.
+    Args:
+        wp_kernel: The Warp kernel to be wrapped.
+        launch_dims: Optional. Specify the kernel launch dimensions. If None,
+                     dimensions are inferred from the shape of the first argument.
+                     This option when set will specify the output dimensions.
     Current limitations:
     - All kernel arguments must be arrays.
-    - Kernel launch dimensions are inferred from the shape of the first argument.
+    - If launch_dims is not provided, kernel launch dimensions are inferred from the shape of the first argument.
     - Input arguments are followed by output arguments in the Warp kernel definition.
     - There must be at least one input argument and at least one output argument.
-    - Output shapes must match the launch dimensions (i.e., output shapes must match the shape of the first argument).
     - All arrays must be contiguous.
     - Only the CUDA backend is supported.
     """
@@ -47,7 +52,7 @@ def jax_kernel(wp_kernel):
         id = _registered_kernel_to_id[wp_kernel]
     def bind(*args):
-        return _jax_warp_p.bind(*args, kernel=id)
+        return _jax_warp_p.bind(*args, kernel=id, launch_dims=launch_dims)
     return bind
@@ -106,7 +111,7 @@ def _get_jax_device():
     device = jax.config.jax_default_device
     # if default device is not set, use first device
     if device is None:
-        device = jax.devices()[0]
+        device = jax.local_devices()[0]
     return device
@@ -223,12 +228,17 @@ def _create_jax_warp_primitive():
             raise TypeError(f"Invalid or unsupported data type: {jax_ir_type}")
     # Abstract evaluation.
-    def jax_warp_abstract(*args, kernel=None):
+    def jax_warp_abstract(*args, kernel=None, launch_dims=None):
         wp_kernel = _registered_kernels[kernel]
         # All the extra arguments to the warp kernel are outputs.
         warp_outputs = [o.type for o in wp_kernel.adj.args[len(args) :]]
-        # TODO. Let's just use the first input dimension to infer the output's dimensions.
-        dims = strip_vecmat_dimensions(wp_kernel.adj.args[0], list(args[0].shape))
+        if launch_dims is None:
+            # Use the first input dimension to infer the output's dimensions if launch_dims is not provided
+            dims = strip_vecmat_dimensions(wp_kernel.adj.args[0], list(args[0].shape))
+        else:
+            dims = launch_dims
         jax_outputs = []
         for o in warp_outputs:
             shape = list(dims) + list(get_vecmat_shape(o))
@@ -260,7 +270,7 @@ def _create_jax_warp_primitive():
     def default_layout(shape):
         return range(len(shape) - 1, -1, -1)
-    def warp_call_lowering(ctx, *args, kernel=None):
+    def warp_call_lowering(ctx, *args, kernel=None, launch_dims=None):
         if not kernel:
             raise Exception("Unknown kernel id " + str(kernel))
         wp_kernel = _registered_kernels[kernel]
@@ -272,12 +282,15 @@ def _create_jax_warp_primitive():
         if not module.load(device):
             raise Exception("Could not load kernel on device")
-        # Infer dimensions from the first input.
-        warp_arg0 = wp_kernel.adj.args[0]
-        actual_shape0 = ir.RankedTensorType(args[0].type).shape
-        dims = strip_vecmat_dimensions(warp_arg0, actual_shape0)
-        warp_dims = collapse_into_leading_dimension(warp_arg0, dims)
+        if launch_dims is None:
+            # Infer dimensions from the first input.
+            warp_arg0 = wp_kernel.adj.args[0]
+            actual_shape0 = ir.RankedTensorType(args[0].type).shape
+            dims = strip_vecmat_dimensions(warp_arg0, actual_shape0)
+            warp_dims = collapse_into_leading_dimension(warp_arg0, dims)
+        else:
+            dims = launch_dims
+            warp_dims = launch_dims
         # Figure out the types and shapes of the input arrays.
         arg_strings = []
         operand_layouts = []

warp/native/array.h CHANGED Viewed

@@ -938,7 +938,6 @@ inline CUDA_CALLABLE void adj_atomic_add(const array_t<T>& buf, int i, int j, in
     FP_VERIFY_ADJ_4(value, adj_value)
 }
 template<typename T>
 inline CUDA_CALLABLE void adj_atomic_sub(const array_t<T>& buf, int i, T value, const array_t<T>& adj_buf, int& adj_i, T& adj_value, const T& adj_ret)
 {

warp/native/builtin.h CHANGED Viewed

@@ -1240,7 +1240,7 @@ inline CUDA_CALLABLE float16 atomic_add(float16* buf, float16 value)
 }
-// emulate atomic float max
+// emulate atomic float max with atomicCAS()
 inline CUDA_CALLABLE float atomic_max(float* address, float val)
 {
 #if defined(__CUDA_ARCH__)
@@ -1263,7 +1263,7 @@ inline CUDA_CALLABLE float atomic_max(float* address, float val)
 #endif
 }
-// emulate atomic float min/max with atomicCAS()
+// emulate atomic float min with atomicCAS()
 inline CUDA_CALLABLE float atomic_min(float* address, float val)
 {
 #if defined(__CUDA_ARCH__)
@@ -1286,6 +1286,88 @@ inline CUDA_CALLABLE float atomic_min(float* address, float val)
 #endif
 }
+template<>
+inline CUDA_CALLABLE float64 atomic_add(float64* buf, float64 value)
+{
+#if !defined(__CUDA_ARCH__)
+    float64 old = buf[0];
+    buf[0] += value;
+    return old;
+#elif defined(__clang__)  // CUDA compiled by Clang
+	return atomicAdd(buf, value);
+#else  // CUDA compiled by NVRTC
+    /* Define __PTR for atomicAdd prototypes below, undef after done */
+    #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
+    #define __PTR   "l"
+    #else
+    #define __PTR   "r"
+    #endif /*(defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)*/
+    double r = 0.0;
+    #if __CUDA_ARCH__ >= 600
+        asm volatile ("{ atom.add.f64 %0,[%1],%2; }\n"
+                    : "=d"(r)
+                    : __PTR(buf), "d"(value)
+                    : "memory");
+    #endif
+    return r;
+    #undef __PTR
+#endif  // CUDA compiled by NVRTC
+}
+// emulate atomic double max with atomicCAS()
+inline CUDA_CALLABLE double atomic_max(double* address, double val)
+{
+#if defined(__CUDA_ARCH__)
+        unsigned long long int *address_as_ull = (unsigned long long int*)address;
+        unsigned long long int old = *address_as_ull, assumed;
+	while (val > __longlong_as_double(old))
+	{
+        assumed = old;
+        old = atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val));
+    }
+    return __longlong_as_double(old);
+#else
+    double old = *address;
+    *address = max(old, val);
+    return old;
+#endif
+}
+// emulate atomic double min with atomicCAS()
+inline CUDA_CALLABLE double atomic_min(double* address, double val)
+{
+#if defined(__CUDA_ARCH__)
+    unsigned long long int *address_as_ull = (unsigned long long int*)address;
+    unsigned long long int old = *address_as_ull, assumed;
+    while (val < __longlong_as_double(old))
+	{
+        assumed = old;
+        old = atomicCAS(address_as_ull, assumed,
+                        __double_as_longlong(val));
+    }
+    return __longlong_as_double(old);
+#else
+    double old = *address;
+    *address = min(old, val);
+    return old;
+#endif
+}
 inline CUDA_CALLABLE int atomic_max(int* address, int val)
 {
 #if defined(__CUDA_ARCH__)
@@ -1403,14 +1485,19 @@ inline CUDA_CALLABLE void print(const str s)
     printf("%s\n", s);
 }
-inline CUDA_CALLABLE void print(int i)
+inline CUDA_CALLABLE void print(signed char i)
 {
     printf("%d\n", i);
 }
 inline CUDA_CALLABLE void print(short i)
 {
-    printf("%hd\n", i);
+    printf("%d\n", i);
+}
+inline CUDA_CALLABLE void print(int i)
+{
+    printf("%d\n", i);
 }
 inline CUDA_CALLABLE void print(long i)
@@ -1423,14 +1510,19 @@ inline CUDA_CALLABLE void print(long long i)
     printf("%lld\n", i);
 }
-inline CUDA_CALLABLE void print(unsigned i)
+inline CUDA_CALLABLE void print(unsigned char i)
 {
     printf("%u\n", i);
 }
 inline CUDA_CALLABLE void print(unsigned short i)
 {
-    printf("%hu\n", i);
+    printf("%u\n", i);
+}
+inline CUDA_CALLABLE void print(unsigned int i)
+{
+    printf("%u\n", i);
 }
 inline CUDA_CALLABLE void print(unsigned long i)
@@ -1443,6 +1535,11 @@ inline CUDA_CALLABLE void print(unsigned long long i)
     printf("%llu\n", i);
 }
+inline CUDA_CALLABLE void print(bool b)
+{
+    printf(b ? "True\n" : "False\n");
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void print(vec_t<Length, Type> v)
 {

warp/native/bvh.cu CHANGED Viewed

@@ -65,7 +65,7 @@ __global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __
             int finished = atomicAdd(&child_count[parent], 1);
             // if we have are the last thread (such that the parent node is now complete)
-            // then update its bounds and move onto the the next parent in the hierarchy
+            // then update its bounds and move onto the next parent in the hierarchy
             if (finished == 1)
             {
                 const int left_child = node_lowers[parent].i;
@@ -273,7 +273,7 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
             }
             // if we have are the last thread (such that the parent node is now complete)
-            // then update its bounds and move onto the the next parent in the hierarchy
+            // then update its bounds and move onto the next parent in the hierarchy
             if (childCount == 1)
             {
                 const int left_child = lowers[parent].i;
@@ -463,7 +463,9 @@ void bvh_create_device(void* context, vec3* lowers, vec3* uppers, int num_items,
     bvh_host.num_items = num_items;
     bvh_host.max_nodes = 2*num_items;
     bvh_host.node_lowers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
+    memset_device(WP_CURRENT_CONTEXT, bvh_host.node_lowers, 0, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
     bvh_host.node_uppers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
+    memset_device(WP_CURRENT_CONTEXT, bvh_host.node_uppers, 0, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
     bvh_host.node_parents = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
     bvh_host.node_counts = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
     bvh_host.root = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int));

warp/native/cuda_util.cpp CHANGED Viewed

@@ -82,6 +82,8 @@ static PFN_cuStreamWaitEvent_v3020 pfn_cuStreamWaitEvent;
 static PFN_cuStreamGetCtx_v9020 pfn_cuStreamGetCtx;
 static PFN_cuStreamGetCaptureInfo_v11030 pfn_cuStreamGetCaptureInfo;
 static PFN_cuStreamUpdateCaptureDependencies_v11030 pfn_cuStreamUpdateCaptureDependencies;
+static PFN_cuStreamCreateWithPriority_v5050 pfn_cuStreamCreateWithPriority;
+static PFN_cuStreamGetPriority_v5050 pfn_cuStreamGetPriority;
 static PFN_cuEventCreate_v2000 pfn_cuEventCreate;
 static PFN_cuEventDestroy_v4000 pfn_cuEventDestroy;
 static PFN_cuEventRecord_v2000 pfn_cuEventRecord;
@@ -211,6 +213,8 @@ bool init_cuda_driver()
     get_driver_entry_point("cuStreamGetCtx", &(void*&)pfn_cuStreamGetCtx);
     get_driver_entry_point("cuStreamGetCaptureInfo", &(void*&)pfn_cuStreamGetCaptureInfo);
     get_driver_entry_point("cuStreamUpdateCaptureDependencies", &(void*&)pfn_cuStreamUpdateCaptureDependencies);
+    get_driver_entry_point("cuStreamCreateWithPriority", &(void*&)pfn_cuStreamCreateWithPriority);
+    get_driver_entry_point("cuStreamGetPriority", &(void*&)pfn_cuStreamGetPriority);
     get_driver_entry_point("cuEventCreate", &(void*&)pfn_cuEventCreate);
     get_driver_entry_point("cuEventDestroy", &(void*&)pfn_cuEventDestroy);
     get_driver_entry_point("cuEventRecord", &(void*&)pfn_cuEventRecord);
@@ -474,6 +478,16 @@ CUresult cuStreamUpdateCaptureDependencies_f(CUstream stream, CUgraphNode *depen
     return pfn_cuStreamUpdateCaptureDependencies ? pfn_cuStreamUpdateCaptureDependencies(stream, dependencies, numDependencies, flags) : DRIVER_ENTRY_POINT_ERROR;
 }
+CUresult cuStreamCreateWithPriority_f(CUstream* phStream, unsigned int flags, int priority)
+{
+    return pfn_cuStreamCreateWithPriority ? pfn_cuStreamCreateWithPriority(phStream, flags, priority) : DRIVER_ENTRY_POINT_ERROR;
+}
+CUresult cuStreamGetPriority_f(CUstream hStream, int* priority)
+{
+    return pfn_cuStreamGetPriority ? pfn_cuStreamGetPriority(hStream, priority) : DRIVER_ENTRY_POINT_ERROR;
+}
 CUresult cuEventCreate_f(CUevent* event, unsigned int flags)
 {
     return pfn_cuEventCreate ? pfn_cuEventCreate(event, flags) : DRIVER_ENTRY_POINT_ERROR;

warp/native/cuda_util.h CHANGED Viewed

@@ -81,6 +81,8 @@ CUresult cuStreamWaitEvent_f(CUstream stream, CUevent event, unsigned int flags)
 CUresult cuStreamGetCtx_f(CUstream stream, CUcontext* pctx);
 CUresult cuStreamGetCaptureInfo_f(CUstream stream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, size_t *numDependencies_out);
 CUresult cuStreamUpdateCaptureDependencies_f(CUstream stream, CUgraphNode *dependencies, size_t numDependencies, unsigned int flags);
+CUresult cuStreamCreateWithPriority_f(CUstream* phStream, unsigned int flags, int priority);
+CUresult cuStreamGetPriority_f(CUstream hStream, int* priority);
 CUresult cuEventCreate_f(CUevent* event, unsigned int flags);
 CUresult cuEventDestroy_f(CUevent event);
 CUresult cuEventRecord_f(CUevent event, CUstream stream);

warp/native/error.cpp CHANGED Viewed

@@ -28,7 +28,8 @@ void set_error_string(const char* fmt, ...)
     vsnprintf(g_error_buffer, sizeof(g_error_buffer), fmt, args);
     if (g_error_output_enabled)
     {
-        vfprintf(g_error_stream, fmt, args);
+        // note: we deliberately avoid vfprintf() due to problems with runtime glibc mismatch
+        fputs(g_error_buffer, g_error_stream);
         fputc('\n', g_error_stream);
         fflush(g_error_stream);
     }
@@ -46,7 +47,8 @@ void append_error_string(const char* fmt, ...)
     vsnprintf(g_error_buffer + offset, sizeof(g_error_buffer) - offset, fmt, args);
     if (g_error_output_enabled)
     {
-        vfprintf(g_error_stream, fmt, args);
+        // note: we deliberately avoid vfprintf() due to problems with runtime glibc mismatch
+        fputs(g_error_buffer + offset, g_error_stream);
         fputc('\n', g_error_stream);
         fflush(g_error_stream);
     }

warp/native/exports.h CHANGED Viewed

@@ -1041,6 +1041,69 @@ WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, ve
 WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
 WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
 WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
+WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_spatial_vectorh_int32_float16(spatial_vectorh& a, int32 i, float16 value, spatial_vectorh* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2f_int32_float32(vec2f& a, int32 i, float32 value, vec2f* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3f_int32_float32(vec3f& a, int32 i, float32 value, vec3f* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4f_int32_float32(vec4f& a, int32 i, float32 value, vec4f* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_spatial_vectorf_int32_float32(spatial_vectorf& a, int32 i, float32 value, spatial_vectorf* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2d_int32_float64(vec2d& a, int32 i, float64 value, vec2d* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3d_int32_float64(vec3d& a, int32 i, float64 value, vec3d* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4d_int32_float64(vec4d& a, int32 i, float64 value, vec4d* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_spatial_vectord_int32_float64(spatial_vectord& a, int32 i, float64 value, spatial_vectord* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2s_int32_int16(vec2s& a, int32 i, int16 value, vec2s* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3s_int32_int16(vec3s& a, int32 i, int16 value, vec3s* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4s_int32_int16(vec4s& a, int32 i, int16 value, vec4s* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2i_int32_int32(vec2i& a, int32 i, int32 value, vec2i* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3i_int32_int32(vec3i& a, int32 i, int32 value, vec3i* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4i_int32_int32(vec4i& a, int32 i, int32 value, vec4i* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2l_int32_int64(vec2l& a, int32 i, int64 value, vec2l* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3l_int32_int64(vec3l& a, int32 i, int64 value, vec3l* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4l_int32_int64(vec4l& a, int32 i, int64 value, vec4l* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2b_int32_int8(vec2b& a, int32 i, int8 value, vec2b* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3b_int32_int8(vec3b& a, int32 i, int8 value, vec3b* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4b_int32_int8(vec4b& a, int32 i, int8 value, vec4b* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2us_int32_uint16(vec2us& a, int32 i, uint16 value, vec2us* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3us_int32_uint16(vec3us& a, int32 i, uint16 value, vec3us* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4us_int32_uint16(vec4us& a, int32 i, uint16 value, vec4us* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2ui_int32_uint32(vec2ui& a, int32 i, uint32 value, vec2ui* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3ui_int32_uint32(vec3ui& a, int32 i, uint32 value, vec3ui* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4ui_int32_uint32(vec4ui& a, int32 i, uint32 value, vec4ui* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2ul_int32_uint64(vec2ul& a, int32 i, uint64 value, vec2ul* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3ul_int32_uint64(vec3ul& a, int32 i, uint64 value, vec3ul* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4ul_int32_uint64(vec4ul& a, int32 i, uint64 value, vec4ul* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec2ub_int32_uint8(vec2ub& a, int32 i, uint8 value, vec2ub* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec3ub_int32_uint8(vec3ub& a, int32 i, uint8 value, vec3ub* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_vec4ub_int32_uint8(vec4ub& a, int32 i, uint8 value, vec4ub* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_quath_int32_float16(quath& a, int32 i, float16 value, quath* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_quatf_int32_float32(quatf& a, int32 i, float32 value, quatf* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_quatd_int32_float64(quatd& a, int32 i, float64 value, quatd* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat22h_int32_int32_float16(mat22h& a, int32 i, int32 j, float16 value, mat22h* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat33h_int32_int32_float16(mat33h& a, int32 i, int32 j, float16 value, mat33h* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat44h_int32_int32_float16(mat44h& a, int32 i, int32 j, float16 value, mat44h* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_spatial_matrixh_int32_int32_float16(spatial_matrixh& a, int32 i, int32 j, float16 value, spatial_matrixh* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat22f_int32_int32_float32(mat22f& a, int32 i, int32 j, float32 value, mat22f* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat33f_int32_int32_float32(mat33f& a, int32 i, int32 j, float32 value, mat33f* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat44f_int32_int32_float32(mat44f& a, int32 i, int32 j, float32 value, mat44f* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_spatial_matrixf_int32_int32_float32(spatial_matrixf& a, int32 i, int32 j, float32 value, spatial_matrixf* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat22d_int32_int32_float64(mat22d& a, int32 i, int32 j, float64 value, mat22d* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat33d_int32_int32_float64(mat33d& a, int32 i, int32 j, float64 value, mat33d* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat44d_int32_int32_float64(mat44d& a, int32 i, int32 j, float64 value, mat44d* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_spatial_matrixd_int32_int32_float64(spatial_matrixd& a, int32 i, int32 j, float64 value, spatial_matrixd* ret) { *ret = wp::assign(a, i, j, value); }
+WP_API void builtin_assign_mat22h_int32_vec2h(mat22h& a, int32 i, vec2h& value, mat22h* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat33h_int32_vec3h(mat33h& a, int32 i, vec3h& value, mat33h* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat44h_int32_vec4h(mat44h& a, int32 i, vec4h& value, mat44h* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_spatial_matrixh_int32_spatial_vectorh(spatial_matrixh& a, int32 i, spatial_vectorh& value, spatial_matrixh* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat22f_int32_vec2f(mat22f& a, int32 i, vec2f& value, mat22f* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat33f_int32_vec3f(mat33f& a, int32 i, vec3f& value, mat33f* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat44f_int32_vec4f(mat44f& a, int32 i, vec4f& value, mat44f* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_spatial_matrixf_int32_spatial_vectorf(spatial_matrixf& a, int32 i, spatial_vectorf& value, spatial_matrixf* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value, mat22d* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
+WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
 WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
 WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
 WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
@@ -1494,6 +1557,42 @@ WP_API void builtin_mod_uint16_uint16(uint16 a, uint16 b, uint16* ret) { *ret =
 WP_API void builtin_mod_uint32_uint32(uint32 a, uint32 b, uint32* ret) { *ret = wp::mod(a, b); }
 WP_API void builtin_mod_uint64_uint64(uint64 a, uint64 b, uint64* ret) { *ret = wp::mod(a, b); }
 WP_API void builtin_mod_uint8_uint8(uint8 a, uint8 b, uint8* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2h_vec2h(vec2h& a, vec2h& b, vec2h* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3h_vec3h(vec3h& a, vec3h& b, vec3h* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4h_vec4h(vec4h& a, vec4h& b, vec4h* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_spatial_vectorh_spatial_vectorh(spatial_vectorh& a, spatial_vectorh& b, spatial_vectorh* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2f_vec2f(vec2f& a, vec2f& b, vec2f* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3f_vec3f(vec3f& a, vec3f& b, vec3f* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4f_vec4f(vec4f& a, vec4f& b, vec4f* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_spatial_vectorf_spatial_vectorf(spatial_vectorf& a, spatial_vectorf& b, spatial_vectorf* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2d_vec2d(vec2d& a, vec2d& b, vec2d* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3d_vec3d(vec3d& a, vec3d& b, vec3d* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4d_vec4d(vec4d& a, vec4d& b, vec4d* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_spatial_vectord_spatial_vectord(spatial_vectord& a, spatial_vectord& b, spatial_vectord* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2s_vec2s(vec2s& a, vec2s& b, vec2s* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3s_vec3s(vec3s& a, vec3s& b, vec3s* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4s_vec4s(vec4s& a, vec4s& b, vec4s* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2i_vec2i(vec2i& a, vec2i& b, vec2i* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3i_vec3i(vec3i& a, vec3i& b, vec3i* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4i_vec4i(vec4i& a, vec4i& b, vec4i* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2l_vec2l(vec2l& a, vec2l& b, vec2l* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3l_vec3l(vec3l& a, vec3l& b, vec3l* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4l_vec4l(vec4l& a, vec4l& b, vec4l* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2b_vec2b(vec2b& a, vec2b& b, vec2b* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3b_vec3b(vec3b& a, vec3b& b, vec3b* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4b_vec4b(vec4b& a, vec4b& b, vec4b* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2us_vec2us(vec2us& a, vec2us& b, vec2us* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3us_vec3us(vec3us& a, vec3us& b, vec3us* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4us_vec4us(vec4us& a, vec4us& b, vec4us* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2ui_vec2ui(vec2ui& a, vec2ui& b, vec2ui* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3ui_vec3ui(vec3ui& a, vec3ui& b, vec3ui* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4ui_vec4ui(vec4ui& a, vec4ui& b, vec4ui* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2ul_vec2ul(vec2ul& a, vec2ul& b, vec2ul* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3ul_vec3ul(vec3ul& a, vec3ul& b, vec3ul* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4ul_vec4ul(vec4ul& a, vec4ul& b, vec4ul* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec2ub_vec2ub(vec2ub& a, vec2ub& b, vec2ub* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec3ub_vec3ub(vec3ub& a, vec3ub& b, vec3ub* ret) { *ret = wp::mod(a, b); }
+WP_API void builtin_mod_vec4ub_vec4ub(vec4ub& a, vec4ub& b, vec4ub* ret) { *ret = wp::mod(a, b); }
 WP_API void builtin_div_float16_float16(float16 a, float16 b, float16* ret) { *ret = wp::div(a, b); }
 WP_API void builtin_div_float32_float32(float32 a, float32 b, float32* ret) { *ret = wp::div(a, b); }
 WP_API void builtin_div_float64_float64(float64 a, float64 b, float64* ret) { *ret = wp::div(a, b); }

warp/native/mat.h CHANGED Viewed

@@ -387,6 +387,103 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
     // nop
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    mat_t<Rows,Cols,Type> ret(m);
+    ret.data[row][col] = value;
+    return ret;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    mat_t<Rows,Cols,Type> ret(m);
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        ret.data[row][i] = value[i];
+    }
+    return ret;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
+                                        mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    adj_value += adj_ret.data[row][col];
+    for(unsigned i=0; i < Rows; ++i)
+    {
+        for(unsigned j=0; j < Cols; ++j)
+        {
+            if(i != row || j != col)
+                adj_m.data[i][j] += adj_ret.data[i][j];
+        }
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
+                                        mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Rows; ++i)
+    {
+        for(unsigned j=0; j < Cols; ++j)
+        {
+            if (i==row)
+                adj_value[j] += adj_ret.data[i][j];
+            else
+                adj_m.data[i][j] += adj_ret.data[i][j];
+        }
+    }
+}
 template<unsigned Rows, unsigned Cols, typename Type>
 inline bool CUDA_CALLABLE isfinite(const mat_t<Rows,Cols,Type>& m)
 {

warp/native/mesh.cpp CHANGED Viewed

@@ -36,6 +36,16 @@ bool mesh_get_descriptor(uint64_t id, Mesh& mesh)
         return true;
 }
+bool mesh_set_descriptor(uint64_t id, const Mesh& mesh)
+{
+    const auto& iter = g_mesh_descriptors.find(id);
+    if (iter == g_mesh_descriptors.end())
+        return false;
+    else
+        iter->second = mesh;
+    return true;
+}
 void mesh_add_descriptor(uint64_t id, const Mesh& mesh)
 {
     g_mesh_descriptors[id] = mesh;
@@ -191,6 +201,30 @@ void mesh_refit_host(uint64_t id)
     }
 }
+void mesh_set_points_host(uint64_t id, wp::array_t<wp::vec3> points)
+{
+    Mesh* m = (Mesh*)(id);
+    if (points.ndim != 1 || points.shape[0] != m->points.shape[0])
+    {
+        fprintf(stderr, "The new points input for mesh_set_points_host does not match the shape of the original points!\n");
+        return;
+    }
+    m->points = points;
+    mesh_refit_host(id);
+}
+void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities)
+{
+    Mesh* m = (Mesh*)(id);
+    if (velocities.ndim != 1 || velocities.shape[0] != m->velocities.shape[0])
+    {
+        fprintf(stderr, "The new velocities input for mesh_set_velocities_host does not match the shape of the original velocities!\n");
+        return;
+    }
+    m->velocities = velocities;
+}
 // stubs for non-CUDA platforms
 #if !WP_ENABLE_CUDA
@@ -199,6 +233,8 @@ void mesh_refit_host(uint64_t id)
 WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
 WP_API void mesh_destroy_device(uint64_t id) {}
 WP_API void mesh_refit_device(uint64_t id) {}
+WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points) {};
+WP_API void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> points) {};
 #endif // !WP_ENABLE_CUDA