PyPI - warp-lang - Versions diffs - 1.3.0__py3-none-manylinux2014_x86_64.whl → 1.3.2__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 1.3.0__py3-none-manylinux2014_x86_64.whl → 1.3.2__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (38) hide show

warp/autograd.py +6 -6
warp/bin/warp.so +0 -0
warp/builtins.py +46 -43
warp/codegen.py +27 -38
warp/config.py +1 -1
warp/context.py +160 -111
warp/examples/fem/example_mixed_elasticity.py +33 -23
warp/fem/field/nodal_field.py +1 -1
warp/fem/quadrature/quadrature.py +1 -0
warp/native/builtin.h +3 -3
warp/native/bvh.h +1 -1
warp/native/svd.h +22 -7
warp/native/warp.cpp +1 -0
warp/native/warp.cu +5 -0
warp/native/warp.h +1 -0
warp/sim/collide.py +1 -1
warp/sim/model.py +16 -3
warp/sim/utils.py +1 -1
warp/stubs.py +112 -112
warp/tape.py +3 -3
warp/tests/test_array.py +11 -0
warp/tests/test_async.py +3 -1
warp/tests/test_bvh.py +33 -8
warp/tests/test_codegen.py +25 -0
warp/tests/test_compile_consts.py +15 -0
warp/tests/test_examples.py +6 -1
warp/tests/test_fem.py +51 -0
warp/tests/test_grad_debug.py +2 -1
warp/tests/test_model.py +55 -0
warp/tests/test_point_triangle_closest_point.py +143 -0
warp/tests/test_reload.py +28 -0
warp/tests/test_struct.py +48 -30
warp/types.py +4 -2
{warp_lang-1.3.0.dist-info → warp_lang-1.3.2.dist-info}/METADATA +14 -14
{warp_lang-1.3.0.dist-info → warp_lang-1.3.2.dist-info}/RECORD +38 -37
{warp_lang-1.3.0.dist-info → warp_lang-1.3.2.dist-info}/WHEEL +1 -1
{warp_lang-1.3.0.dist-info → warp_lang-1.3.2.dist-info}/LICENSE.md +0 -0
{warp_lang-1.3.0.dist-info → warp_lang-1.3.2.dist-info}/top_level.txt +0 -0

warp/context.py CHANGED Viewed

@@ -1411,12 +1411,65 @@ class ModuleBuilder:
         return source
+# ModuleExec holds the compiled executable code for a specific device.
+# It can be used to obtain kernel hooks on that device and serves
+# as a reference-counted wrapper of the loaded module.
+# Clients can keep a reference to a ModuleExec object to prevent the
+# executable code from being unloaded prematurely.
+# For example, the Graph class retains references to all the CUDA modules
+# needed by a graph.  This ensures that graphs remain valid even if
+# the original Modules get reloaded.
+class ModuleExec:
+    def __new__(cls, *args, **kwargs):
+        instance = super(ModuleExec, cls).__new__(cls)
+        instance.handle = None
+        return instance
+    def __init__(self, handle, device):
+        self.handle = handle
+        self.device = device
+        self.kernel_hooks = {}
+    # release the loaded module
+    def __del__(self):
+        if self.handle is not None:
+            if self.device.is_cuda:
+                # use CUDA context guard to avoid side effects during garbage collection
+                with self.device.context_guard:
+                    runtime.core.cuda_unload_module(self.device.context, self.handle)
+            else:
+                runtime.llvm.unload_obj(self.handle.encode("utf-8"))
+    # lookup and cache kernel entry points
+    def get_kernel_hooks(self, kernel):
+        hooks = self.kernel_hooks.get(kernel)
+        if hooks is not None:
+            return hooks
+        name = kernel.get_mangled_name()
+        if self.device.is_cuda:
+            forward = runtime.core.cuda_get_kernel(
+                self.device.context, self.handle, (name + "_cuda_kernel_forward").encode("utf-8")
+            )
+            backward = runtime.core.cuda_get_kernel(
+                self.device.context, self.handle, (name + "_cuda_kernel_backward").encode("utf-8")
+            )
+        else:
+            func = ctypes.CFUNCTYPE(None)
+            forward = func(runtime.llvm.lookup(self.handle.encode("utf-8"), (name + "_cpu_forward").encode("utf-8")))
+            backward = func(runtime.llvm.lookup(self.handle.encode("utf-8"), (name + "_cpu_backward").encode("utf-8")))
+        hooks = KernelHooks(forward, backward)
+        self.kernel_hooks[kernel] = hooks
+        return hooks
 # -----------------------------------------------------
 # stores all functions and kernels for a Python module
 # creates a hash of the function to use for checking
 # build cache
 class Module:
     def __init__(self, name, loader):
         self.name = name
@@ -1427,8 +1480,8 @@ class Module:
         self.constants = {}  # Any constants referenced in this module including those defined in other modules
         self.structs = {}
-        self.cpu_module = None
-        self.cuda_modules = {}  # module lookup by CUDA context
+        self.cpu_exec = None  # executable CPU module
+        self.cuda_execs = {}  # executable CUDA module lookup by CUDA context
         self.cpu_build_failed = False
         self.cuda_build_failed = False
@@ -1441,11 +1494,6 @@ class Module:
             "mode": warp.config.mode,
         }
-        # kernel hook lookup per device
-        # hooks are stored with the module so they can be easily cleared when the module is reloaded.
-        # -> See ``Module.get_kernel_hooks()``
-        self.kernel_hooks = {}
         # Module dependencies are determined by scanning each function
         # and kernel for references to external functions and structs.
         #
@@ -1558,10 +1606,13 @@ class Module:
         computed ``content_hash`` will be used.
         """
-        def get_type_name(type_hint):
+        def get_type_name(type_hint) -> str:
             if isinstance(type_hint, warp.codegen.Struct):
                 return get_type_name(type_hint.cls)
-            return type_hint
+            elif isinstance(type_hint, warp.array) and isinstance(type_hint.dtype, warp.codegen.Struct):
+                return f"array{get_type_name(type_hint.dtype)}"
+            return str(type_hint)
         def hash_recursive(module, visited):
             # Hash this module, including all referenced modules recursively.
@@ -1682,27 +1733,26 @@ class Module:
         return hash_recursive(self, visited=set())
-    def load(self, device) -> bool:
-        from warp.utils import ScopedTimer
-        device = get_device(device)
+    def load(self, device) -> ModuleExec:
+        device = runtime.get_device(device)
         if device.is_cpu:
             # check if already loaded
-            if self.cpu_module:
-                return True
+            if self.cpu_exec:
+                return self.cpu_exec
             # avoid repeated build attempts
             if self.cpu_build_failed:
-                return False
+                return None
             if not warp.is_cpu_available():
                 raise RuntimeError("Failed to build CPU module because no CPU buildchain was found")
         else:
             # check if already loaded
-            if device.context in self.cuda_modules:
-                return True
+            cuda_exec = self.cuda_execs.get(device.context)
+            if cuda_exec is not None:
+                return cuda_exec
             # avoid repeated build attempts
             if self.cuda_build_failed:
-                return False
+                return None
             if not warp.is_cuda_available():
                 raise RuntimeError("Failed to build CUDA module because CUDA is not available")
@@ -1712,7 +1762,7 @@ class Module:
         # use a unique module path using the module short hash
         module_dir = os.path.join(warp.config.kernel_cache_dir, f"{module_name}_{module_hash.hex()[:7]}")
-        with ScopedTimer(
+        with warp.ScopedTimer(
             f"Module {self.name} {module_hash.hex()[:7]} load on device '{device}'", active=not warp.config.quiet
         ) as module_load_timer:
             # -----------------------------------------------------------
@@ -1784,7 +1834,7 @@ class Module:
                         output_path = os.path.join(build_dir, output_name)
                         # build object code
-                        with ScopedTimer("Compile x86", active=warp.config.verbose):
+                        with warp.ScopedTimer("Compile x86", active=warp.config.verbose):
                             warp.build.build_cpu(
                                 output_path,
                                 source_code_path,
@@ -1812,7 +1862,7 @@ class Module:
                         output_path = os.path.join(build_dir, output_name)
                         # generate PTX or CUBIN
-                        with ScopedTimer("Compile CUDA", active=warp.config.verbose):
+                        with warp.ScopedTimer("Compile CUDA", active=warp.config.verbose):
                             warp.build.build_cuda(
                                 source_code_path,
                                 output_arch,
@@ -1865,12 +1915,14 @@ class Module:
             # Load CPU or CUDA binary
             if device.is_cpu:
                 runtime.llvm.load_obj(binary_path.encode("utf-8"), module_name.encode("utf-8"))
-                self.cpu_module = module_name
+                module_exec = ModuleExec(module_name, device)
+                self.cpu_exec = module_exec
             elif device.is_cuda:
                 cuda_module = warp.build.load_cuda(binary_path, device)
                 if cuda_module is not None:
-                    self.cuda_modules[device.context] = cuda_module
+                    module_exec = ModuleExec(cuda_module, device)
+                    self.cuda_execs[device.context] = module_exec
                 else:
                     module_load_timer.extra_msg = " (error)"
                     raise Exception(f"Failed to load CUDA module '{self.name}'")
@@ -1881,65 +1933,27 @@ class Module:
                 # clean up build_dir used for this process regardless
                 shutil.rmtree(build_dir, ignore_errors=True)
-        return True
+        return module_exec
     def unload(self):
-        if self.cpu_module:
-            runtime.llvm.unload_obj(self.cpu_module.encode("utf-8"))
-            self.cpu_module = None
-        # need to unload the CUDA module from all CUDA contexts where it is loaded
-        # note: we ensure that this doesn't change the current CUDA context
-        if self.cuda_modules:
-            saved_context = runtime.core.cuda_context_get_current()
-            for context, module in self.cuda_modules.items():
-                device = runtime.context_map[context]
-                if device.is_capturing:
-                    raise RuntimeError(f"Failed to unload CUDA module '{self.name}' because graph capture is active")
-                runtime.core.cuda_unload_module(context, module)
-            runtime.core.cuda_context_set_current(saved_context)
-            self.cuda_modules = {}
-        # clear kernel hooks
-        self.kernel_hooks = {}
+        # clear loaded modules
+        self.cpu_exec = None
+        self.cuda_execs = {}
         # clear content hash
         self.content_hash = None
-    # lookup and cache kernel entry points based on name, called after compilation / module load
+    # lookup kernel entry points based on name, called after compilation / module load
     def get_kernel_hooks(self, kernel, device):
-        # get all hooks for this device
-        device_hooks = self.kernel_hooks.get(device.context)
-        if device_hooks is None:
-            self.kernel_hooks[device.context] = device_hooks = {}
-        # look up this kernel
-        hooks = device_hooks.get(kernel)
-        if hooks is not None:
-            return hooks
-        name = kernel.get_mangled_name()
-        if device.is_cpu:
-            func = ctypes.CFUNCTYPE(None)
-            forward = func(
-                runtime.llvm.lookup(self.cpu_module.encode("utf-8"), (name + "_cpu_forward").encode("utf-8"))
-            )
-            backward = func(
-                runtime.llvm.lookup(self.cpu_module.encode("utf-8"), (name + "_cpu_backward").encode("utf-8"))
-            )
+        if device.is_cuda:
+            module_exec = self.cuda_execs.get(device.context)
         else:
-            cu_module = self.cuda_modules[device.context]
-            forward = runtime.core.cuda_get_kernel(
-                device.context, cu_module, (name + "_cuda_kernel_forward").encode("utf-8")
-            )
-            backward = runtime.core.cuda_get_kernel(
-                device.context, cu_module, (name + "_cuda_kernel_backward").encode("utf-8")
-            )
+            module_exec = self.cpu_exec
-        hooks = KernelHooks(forward, backward)
-        device_hooks[kernel] = hooks
-        return hooks
+        if module_exec is not None:
+            return module_exec.get_kernel_hooks(kernel)
+        else:
+            raise RuntimeError(f"Module is not loaded on device {device}")
 # -------------------------------------------
@@ -2196,8 +2210,8 @@ class Device:
         self._stream = None
         self.null_stream = None
-        # set of streams where capture has started
-        self.captures = set()
+        # maps streams to started graph captures
+        self.captures = {}
         self.context_guard = ContextGuard(self)
@@ -2434,20 +2448,25 @@ Devicelike = Union[Device, str, None]
 class Graph:
     def __new__(cls, *args, **kwargs):
         instance = super(Graph, cls).__new__(cls)
-        instance.exec = None
+        instance.graph_exec = None
         return instance
-    def __init__(self, device: Device, exec: ctypes.c_void_p):
+    def __init__(self, device: Device, capture_id: int):
         self.device = device
-        self.exec = exec
+        self.capture_id = capture_id
+        self.module_execs = set()
     def __del__(self):
-        if not self.exec:
+        if not self.graph_exec:
             return
         # use CUDA context guard to avoid side effects during garbage collection
         with self.device.context_guard:
-            runtime.core.cuda_graph_destroy(self.device.context, self.exec)
+            runtime.core.cuda_graph_destroy(self.device.context, self.graph_exec)
+    # retain executable CUDA modules used by this graph, which prevents them from being unloaded
+    def retain_module_exec(self, module_exec: ModuleExec):
+        self.module_execs.add(module_exec)
 class Runtime:
@@ -2488,6 +2507,9 @@ class Runtime:
         else:
             self.llvm = None
+        # maps capture ids to graphs
+        self.captures = {}
         # setup c-types for warp.dll
         try:
             self.core.get_error_string.argtypes = []
@@ -3023,6 +3045,8 @@ class Runtime:
             self.core.cuda_stream_wait_stream.restype = None
             self.core.cuda_stream_is_capturing.argtypes = [ctypes.c_void_p]
             self.core.cuda_stream_is_capturing.restype = ctypes.c_int
+            self.core.cuda_stream_get_capture_id.argtypes = [ctypes.c_void_p]
+            self.core.cuda_stream_get_capture_id.restype = ctypes.c_uint64
             self.core.cuda_event_create.argtypes = [ctypes.c_void_p, ctypes.c_uint]
             self.core.cuda_event_create.restype = ctypes.c_void_p
@@ -4490,13 +4514,14 @@ def pack_arg(kernel, arg_type, arg_name, value, device, adjoint=False):
 # so that launches can be replayed quickly, use `wp.launch(..., record_cmd=True)`
 class Launch:
     def __init__(self, kernel, device, hooks=None, params=None, params_addr=None, bounds=None, max_blocks=0):
+        # retain the module executable so it doesn't get unloaded
+        self.module_exec = kernel.module.load(device)
+        if not self.module_exec:
+            raise RuntimeError(f"Failed to load module {kernel.module.name} on device {device}")
         # if not specified look up hooks
         if not hooks:
-            module = kernel.module
-            if not module.load(device):
-                return
-            hooks = module.get_kernel_hooks(kernel, device)
+            hooks = self.module_exec.get_kernel_hooks(kernel)
         # if not specified set a zero bound
         if not bounds:
@@ -4594,6 +4619,15 @@ class Launch:
         else:
             if stream is None:
                 stream = self.device.stream
+            # If the stream is capturing, we retain the CUDA module so that it doesn't get unloaded
+            # before the captured graph is released.
+            if runtime.core.cuda_stream_is_capturing(stream.cuda_stream):
+                capture_id = runtime.core.cuda_stream_get_capture_id(stream.cuda_stream)
+                graph = runtime.captures.get(capture_id)
+                if graph is not None:
+                    graph.retain_module_exec(self.module_exec)
             runtime.core.cuda_launch_kernel(
                 self.device.context,
                 self.hooks.forward,
@@ -4689,12 +4723,12 @@ def launch(
             kernel = kernel.add_overload(fwd_types)
         # delay load modules, including new overload if needed
-        module = kernel.module
-        if not module.load(device):
+        module_exec = kernel.module.load(device)
+        if not module_exec:
             return
         # late bind
-        hooks = module.get_kernel_hooks(kernel, device)
+        hooks = module_exec.get_kernel_hooks(kernel)
         pack_args(fwd_args, params)
         pack_args(adj_args, params, adjoint=True)
@@ -4730,6 +4764,14 @@ def launch(
             if stream is None:
                 stream = device.stream
+            # If the stream is capturing, we retain the CUDA module so that it doesn't get unloaded
+            # before the captured graph is released.
+            if runtime.core.cuda_stream_is_capturing(stream.cuda_stream):
+                capture_id = runtime.core.cuda_stream_get_capture_id(stream.cuda_stream)
+                graph = runtime.captures.get(capture_id)
+                if graph is not None:
+                    graph.retain_module_exec(module_exec)
             if adjoint:
                 if hooks.backward is None:
                     raise RuntimeError(
@@ -4778,7 +4820,7 @@ def launch(
         # detect illegal inter-kernel read/write access patterns if verification flag is set
         if warp.config.verify_autograd_array_access:
-            runtime.tape.check_kernel_array_access(kernel, fwd_args)
+            runtime.tape._check_kernel_array_access(kernel, fwd_args)
 def synchronize():
@@ -5014,11 +5056,18 @@ def capture_begin(device: Devicelike = None, stream=None, force_module_load=None
         if force_module_load:
             force_load(device)
-    device.captures.add(stream)
     if not runtime.core.cuda_graph_begin_capture(device.context, stream.cuda_stream, int(external)):
         raise RuntimeError(runtime.get_error_string())
+    capture_id = runtime.core.cuda_stream_get_capture_id(stream.cuda_stream)
+    graph = Graph(device, capture_id)
+    # add to ongoing captures on the device
+    device.captures[stream] = graph
+    # add to lookup table by globally unique capture id
+    runtime.captures[capture_id] = graph
 def capture_end(device: Devicelike = None, stream: Stream = None) -> Graph:
     """Ends the capture of a CUDA graph
@@ -5040,21 +5089,27 @@ def capture_end(device: Devicelike = None, stream: Stream = None) -> Graph:
             raise RuntimeError("Must be a CUDA device")
         stream = device.stream
-    if stream not in device.captures:
+    # get the graph being captured
+    graph = device.captures.get(stream)
+    if graph is None:
         raise RuntimeError("Graph capture is not active on this stream")
-    device.captures.remove(stream)
+    del device.captures[stream]
+    del runtime.captures[graph.capture_id]
-    graph = ctypes.c_void_p()
-    result = runtime.core.cuda_graph_end_capture(device.context, stream.cuda_stream, ctypes.byref(graph))
+    # get the graph executable
+    graph_exec = ctypes.c_void_p()
+    result = runtime.core.cuda_graph_end_capture(device.context, stream.cuda_stream, ctypes.byref(graph_exec))
     if not result:
         # A concrete error should've already been reported, so we don't need to go into details here
         raise RuntimeError(f"CUDA graph capture failed. {runtime.get_error_string()}")
-    # note that for external captures, we do not return a graph, because we don't instantiate it ourselves
-    if graph:
-        return Graph(device, graph)
+    # set the graph executable
+    graph.graph_exec = graph_exec
+    return graph
 def capture_launch(graph: Graph, stream: Stream = None):
@@ -5073,7 +5128,7 @@ def capture_launch(graph: Graph, stream: Stream = None):
         device = graph.device
         stream = device.stream
-    if not runtime.core.cuda_graph_launch(graph.exec, stream.cuda_stream):
+    if not runtime.core.cuda_graph_launch(graph.graph_exec, stream.cuda_stream):
         raise RuntimeError(f"Graph launch error: {runtime.get_error_string()}")
@@ -5522,15 +5577,9 @@ def export_stubs(file):  # pragma: no cover
             if not f.export or f.hidden:  # or f.generic:
                 continue
-            try:
-                # todo: construct a default value for each of the functions args
-                # so we can generate the return type for overloaded functions
-                return_type = f.value_func(None, None)
-                if return_type:
-                    return_str = " -> " + type_str(return_type)
-            except Exception:
-                pass
+            return_type = f.value_func(None, None)
+            if return_type:
+                return_str = " -> " + type_str(return_type)
             print("@over", file=file)
             print(f"def {f.key}({args}){return_str}:", file=file)

warp/examples/fem/example_mixed_elasticity.py CHANGED Viewed

@@ -12,8 +12,9 @@
 #
 # Div[ d/dF Psi(F(u)) ] = 0
 #
-# with Dirichlet boundary conditions on vertical sides,
-# and Psi an elastic potential function of the deformation gradient (here Neo-Hookean)
+# with Dirichlet boundary conditions on vertical sides and Psi an elastic potential function of the deformation gradient.
+# Here we choose Psi Neo-Hookean, as per Sec 3.2 of "Stable Neo-Hookean Flesh Simulation" (Smith et al. 2018),
+# Psi(F) = mu ||F||^2 + lambda (det J - 1 - mu/lambda)^2
 #
 # which we write as a sequence of Newton iterations:
 # int {sigma : grad v}  = 0   for all displacement test functions v
@@ -37,19 +38,28 @@ def displacement_gradient_form(
     return wp.ddot(tau(s), fem.grad(u, s))
+@wp.func
+def nh_parameters_from_lame(lame: wp.vec2):
+    """Parameters such that for small strains model behaves according to Hooke's law"""
+    mu_nh = lame[1]
+    lambda_nh = lame[0] + lame[1]
+    return mu_nh, lambda_nh
 @fem.integrand
 def nh_stress_form(s: fem.Sample, tau: fem.Field, u_cur: fem.Field, lame: wp.vec2):
     """d Psi/dF : tau"""
+    # Deformation gradient
     F = wp.identity(n=2, dtype=float) + fem.grad(u_cur, s)
+    # Area term and its derivative w.r.t F
     J = wp.determinant(F)
-    mu_nh = 2.0 * lame[1]
-    lambda_nh = lame[0] + lame[1]
-    gamma = 1.0 + mu_nh / lambda_nh
+    dJ_dF = wp.mat22(F[1, 1], -F[1, 0], -F[0, 1], F[0, 0])
-    dJ_dS = wp.mat22(F[1, 1], -F[1, 0], -F[0, 1], F[0, 0])
-    nh_stress = mu_nh * F + lambda_nh * (J - gamma) * dJ_dS
+    mu_nh, lambda_nh = nh_parameters_from_lame(lame)
+    nh_stress = mu_nh * F + (lambda_nh * (J - 1.0) - mu_nh) * dJ_dF
     return wp.ddot(tau(s), nh_stress)
@@ -62,23 +72,11 @@ def nh_stress_delta_form(s: fem.Sample, tau: fem.Field, u: fem.Field, u_cur: fem
     sigma_s = fem.grad(u, s)
     F = wp.identity(n=2, dtype=float) + fem.grad(u_cur, s)
     dJ_dF = wp.mat22(F[1, 1], -F[1, 0], -F[0, 1], F[0, 0])
-    mu_nh = 2.0 * lame[1]
-    lambda_nh = lame[0] + lame[1]
-    dpsi_dpsi = mu_nh * wp.ddot(tau_s, sigma_s) + lambda_nh * wp.ddot(dJ_dF * tau_s, dJ_dF * sigma_s)
-    # positive part of d2J_dS2
-    gamma = 1.0 + mu_nh / lambda_nh
-    J = wp.determinant(F)
-    if J >= gamma:
-        d2J_dF_sig = wp.mat22(sigma_s[1, 1], 0.0, 0.0, sigma_s[0, 0])
-    else:
-        d2J_dF_sig = wp.mat22(0.0, -sigma_s[1, 0], -sigma_s[0, 1], 0.0)
-    return dpsi_dpsi + lambda_nh * (J - gamma) * wp.ddot(d2J_dF_sig, tau_s)
+    # Gauss--Newton approximation; ignore d2J/dF2 term
+    mu_nh, lambda_nh = nh_parameters_from_lame(lame)
+    return mu_nh * wp.ddot(tau_s, sigma_s) + lambda_nh * wp.ddot(dJ_dF, tau_s) * wp.ddot(dJ_dF, sigma_s)
 @fem.integrand
@@ -114,6 +112,12 @@ def tensor_mass_form(
     return wp.ddot(tau(s), sig(s))
+@fem.integrand
+def area_form(s: fem.Sample, u_cur: fem.Field):
+    F = wp.identity(n=2, dtype=float) + fem.grad(u_cur, s)
+    return wp.determinant(F)
 class Example:
     def __init__(
         self,
@@ -228,6 +232,12 @@ class Example:
             wp.utils.array_cast(in_array=x, out_array=delta_u)
             fem.utils.array_axpy(x=delta_u, y=self._u_field.dof_values)
+        # Evaluate area conservation, should converge to 1.0 as Poisson ratio approaches 1.0
+        final_area = fem.integrate(
+            area_form, quadrature=fem.RegularQuadrature(domain, order=4), fields={"u_cur": self._u_field}
+        )
+        print(f"Area gain: {final_area}  (using Poisson ratio={self._lame[0] / (self._lame[0] + 2.0*self._lame[1])})")
     def render(self):
         self.renderer.add_field("solution", self._u_field)
@@ -242,7 +252,7 @@ if __name__ == "__main__":
     parser.add_argument("--resolution", type=int, default=25, help="Grid resolution.")
     parser.add_argument("--degree", type=int, default=2, help="Polynomial degree of shape functions.")
     parser.add_argument("--displacement", type=float, default=-0.5)
-    parser.add_argument("--poisson_ratio", type=float, default=0.5)
+    parser.add_argument("--poisson_ratio", type=float, default=0.99)
     parser.add_argument("--mesh", choices=("grid", "tri", "quad"), default="grid", help="Mesh type")
     parser.add_argument(
         "--nonconforming_stresses", action="store_true", help="For grid, use non-conforming stresses (Q_d/P_d)"

warp/fem/field/nodal_field.py CHANGED Viewed

@@ -247,7 +247,7 @@ class NodalFieldBase(DiscreteField):
     def _make_node_partition_index(self):
         @cache.dynamic_func(suffix=self.name)
-        def node_partition_index(args: self.EvalArg, node_index: int):
+        def node_partition_index(args: self.ElementEvalArg, node_index: int):
             return self.space_partition.partition_node_index(args.eval_arg.partition_arg, node_index)
         return node_partition_index

warp/fem/quadrature/quadrature.py CHANGED Viewed

@@ -336,6 +336,7 @@ class ExplicitQuadrature(Quadrature):
     @cache.cached_arg_value
     def arg_value(self, device):
         arg = self.Arg()
+        arg.points_per_cell = self._points_per_cell
         arg.points = self._points.to(device)
         arg.weights = self._weights.to(device)

warp/native/builtin.h CHANGED Viewed

@@ -748,7 +748,7 @@ inline CUDA_CALLABLE half floordiv(half a, half b)
 #if FP_CHECK
     if (!isfinite(a) || !isfinite(b) || float(b) == 0.0f)
     {
-        printf("%s:%d mod(%f, %f)\n", __FILE__, __LINE__, float(a), float(b));
+        printf("%s:%d floordiv(%f, %f)\n", __FILE__, __LINE__, float(a), float(b));
         assert(0);
     }
 #endif
@@ -759,7 +759,7 @@ inline CUDA_CALLABLE float floordiv(float a, float b)
 #if FP_CHECK
     if (!isfinite(a) || !isfinite(b) || b == 0.0f)
     {
-        printf("%s:%d mod(%f, %f)\n", __FILE__, __LINE__, a, b);
+        printf("%s:%d floordiv(%f, %f)\n", __FILE__, __LINE__, a, b);
         assert(0);
     }
 #endif
@@ -770,7 +770,7 @@ inline CUDA_CALLABLE double floordiv(double a, double b)
 #if FP_CHECK
     if (!isfinite(a) || !isfinite(b) || b == 0.0)
     {
-        printf("%s:%d mod(%f, %f)\n", __FILE__, __LINE__, a, b);
+        printf("%s:%d floordiv(%f, %f)\n", __FILE__, __LINE__, a, b);
         assert(0);
     }
 #endif

warp/native/bvh.h CHANGED Viewed

@@ -320,7 +320,7 @@ CUDA_CALLABLE inline bvh_query_t bvh_query_aabb(
 CUDA_CALLABLE inline bvh_query_t bvh_query_ray(
     uint64_t id, const vec3& start, const vec3& dir)
 {
-	return bvh_query(id, true, start, dir);
+	return bvh_query(id, true, start, 1.0f / dir);
 }
 //Stub