PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl - Mend

warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +93 -30
warp/build_dll.py +47 -67
warp/builtins.py +955 -137
warp/codegen.py +312 -206
warp/config.py +1 -1
warp/context.py +1249 -784
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +2 -1
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +82 -5
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +283 -69
warp/native/vec.h +381 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +323 -192
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +85 -6
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +56 -5
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +184 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/fem/integrate.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import ast
 import inspect
 import textwrap
-from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Union
+from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union
 import warp as wp
 import warp.fem.operator as operator
@@ -34,7 +34,10 @@ from warp.fem.field import (
     TrialField,
     make_restriction,
 )
-from warp.fem.field.virtual import make_bilinear_dispatch_kernel, make_linear_dispatch_kernel
+from warp.fem.field.virtual import (
+    make_bilinear_dispatch_kernel,
+    make_linear_dispatch_kernel,
+)
 from warp.fem.linalg import array_axpy, basis_coefficient
 from warp.fem.operator import (
     Integrand,
@@ -101,7 +104,8 @@ class IntegrandVisitor(ast.NodeTransformer):
         field: FieldLike
         abstract_type: type
         concrete_type: type
-        root_arg_name: type
+        root_arg_name: str
+        local_arg_name: str
     def __init__(
         self,
@@ -111,6 +115,7 @@ class IntegrandVisitor(ast.NodeTransformer):
         self._integrand = integrand
         self._field_symbols = field_info.copy()
         self._field_nodes = {}
+        self._field_arg_annotation_nodes = {}
     @staticmethod
     def _build_field_info(integrand: Integrand, field_args: Dict[str, FieldLike]):
@@ -127,6 +132,7 @@ class IntegrandVisitor(ast.NodeTransformer):
                 abstract_type=integrand.argspec.annotations[name],
                 concrete_type=get_concrete_type(field),
                 root_arg_name=name,
+                local_arg_name=name,
             )
             for name, field in field_args.items()
         }
@@ -167,6 +173,7 @@ class IntegrandVisitor(ast.NodeTransformer):
                         field=res[0],
                         abstract_type=res[1],
                         concrete_type=res[2],
+                        local_arg_name=field_info.local_arg_name,
                         root_arg_name=f"{field_info.root_arg_name}.{func.name}",
                     )
@@ -191,6 +198,13 @@ class IntegrandVisitor(ast.NodeTransformer):
         return node
+    def visit_FunctionDef(self, node: ast.FunctionDef):
+        # record field arg annotation nodes
+        for arg in node.args.args:
+            self._field_arg_annotation_nodes[arg.arg] = arg.annotation
+        return self.generic_visit(node)
     def _get_callee_field_args(self, callee: Integrand, args: List[ast.AST]):
         # Get field types for call site arguments
         call_site_field_args: List[IntegrandVisitor.FieldInfo] = []
@@ -211,7 +225,13 @@ class IntegrandVisitor(ast.NodeTransformer):
                     raise TypeError(
                         f"Attempting to pass a {passed_field_info.abstract_type.__name__} to argument '{arg}' of '{callee.name}' expecting a {arg_type.__name__}"
                     )
-                callee_field_args[arg] = passed_field_info
+                callee_field_args[arg] = IntegrandVisitor.FieldInfo(
+                    field=passed_field_info.field,
+                    abstract_type=passed_field_info.abstract_type,
+                    concrete_type=passed_field_info.concrete_type,
+                    local_arg_name=arg,
+                    root_arg_name=passed_field_info.root_arg_name,
+                )
         return callee_field_args
@@ -263,18 +283,14 @@ class IntegrandTransformer(IntegrandVisitor):
                 f"Operator {operator.func.__name__} is not defined for {field_info.abstract_type.__name__} {field.name}"
             ) from e
-        # Update the ast Call node to use the new function pointer
-        call.func = ast.Attribute(value=call.func, attr=pointer.key, ctx=ast.Load())
         # Save the pointer as an attribute than can be accessed from the calling scope
-        # For usual operator call syntax, we can use the operator itself, but for the
-        # shortcut default operator syntax, we store it on the callee's concrete type
-        if isinstance(callee, Operator):
-            setattr(callee, pointer.key, pointer)
-        else:
-            setattr(field_info.concrete_type, pointer.key, pointer)
+        # (use the annotation node of the argument this field is constructed from)
+        callee_node = self._field_arg_annotation_nodes[field_info.local_arg_name]
+        setattr(self._field_symbols[field_info.local_arg_name].abstract_type, pointer.key, pointer)
+        call.func = ast.Attribute(value=callee_node, attr=pointer.key, ctx=ast.Load())
-            # also insert callee as first argument
+        # For shortcut default operator syntax, insert callee as first argument
+        if not isinstance(callee, Operator):
             call.args = [ast.Name(id=callee, ctx=ast.Load()), *call.args]
         # replace first argument with selected attribute
@@ -592,6 +608,9 @@ def _combined_kernel_options(integrand_options: Optional[Dict[str, Any]], call_s
     return options
+_INTEGRATE_CONSTANT_TILE_SIZE = 256
 def get_integrate_constant_kernel(
     integrand_func: wp.Function,
     domain: GeometryDomain,
@@ -599,8 +618,12 @@ def get_integrate_constant_kernel(
     FieldStruct: wp.codegen.Struct,
     ValueStruct: wp.codegen.Struct,
     accumulate_dtype,
+    tile_size: int = _INTEGRATE_CONSTANT_TILE_SIZE,
 ):
+    zero_element = type_zero_element(accumulate_dtype)
     def integrate_kernel_fn(
+        qp_count: int,
         qp_arg: quadrature.Arg,
         qp_element_index_arg: quadrature.ElementIndexArg,
         domain_arg: domain.ElementArg,
@@ -609,26 +632,33 @@ def get_integrate_constant_kernel(
         values: ValueStruct,
         result: wp.array(dtype=accumulate_dtype),
     ):
-        qp_eval_index = wp.tid()
-        domain_element_index, qp = quadrature.evaluation_point_element_index(qp_element_index_arg, qp_eval_index)
-        if domain_element_index == NULL_ELEMENT_INDEX:
-            return
+        block_index, lane = wp.tid()
+        qp_eval_index = block_index * tile_size + lane
-        element_index = domain.element_index(domain_index_arg, domain_element_index)
+        if qp_eval_index >= qp_count:
+            domain_element_index, qp = NULL_ELEMENT_INDEX, 0
+        else:
+            domain_element_index, qp = quadrature.evaluation_point_element_index(qp_element_index_arg, qp_eval_index)
-        qp_coords = quadrature.point_coords(domain_arg, qp_arg, domain_element_index, element_index, qp)
-        qp_weight = quadrature.point_weight(domain_arg, qp_arg, domain_element_index, element_index, qp)
-        qp_index = quadrature.point_index(domain_arg, qp_arg, domain_element_index, element_index, qp)
+        if domain_element_index == NULL_ELEMENT_INDEX:
+            val = zero_element()
+        else:
+            element_index = domain.element_index(domain_index_arg, domain_element_index)
-        test_dof_index = NULL_DOF_INDEX
-        trial_dof_index = NULL_DOF_INDEX
+            qp_coords = quadrature.point_coords(domain_arg, qp_arg, domain_element_index, element_index, qp)
+            qp_weight = quadrature.point_weight(domain_arg, qp_arg, domain_element_index, element_index, qp)
+            qp_index = quadrature.point_index(domain_arg, qp_arg, domain_element_index, element_index, qp)
-        sample = Sample(element_index, qp_coords, qp_index, qp_weight, test_dof_index, trial_dof_index)
-        vol = domain.element_measure(domain_arg, sample)
+            test_dof_index = NULL_DOF_INDEX
+            trial_dof_index = NULL_DOF_INDEX
-        val = integrand_func(sample, fields, values)
+            sample = Sample(element_index, qp_coords, qp_index, qp_weight, test_dof_index, trial_dof_index)
+            vol = domain.element_measure(domain_arg, sample)
-        wp.atomic_add(result, 0, accumulate_dtype(qp_weight * vol * val))
+            val = accumulate_dtype(qp_weight * vol * integrand_func(sample, fields, values))
+        tile_integral = wp.tile_sum(wp.tile(val))
+        wp.tile_atomic_add(result, tile_integral, offset=0)
     return integrate_kernel_fn
@@ -1020,7 +1050,7 @@ def get_integrate_bilinear_local_kernel(
             sample = Sample(element_index, qp_coords, qp_index, qp_weight, test_dof_index, trial_dof_index)
             val = integrand_func(sample, fields, values)
-            result[qp_eval_index, test_dof, trial_dof, taylor_dof] = qp_vol * val
+            result[test_dof, trial_dof, qp_eval_index, taylor_dof] = qp_vol * val
     return integrate_kernel_fn
@@ -1150,9 +1180,46 @@ def _generate_integrate_kernel(
     return kernel, FieldStruct, ValueStruct
+def _generate_auxiliary_kernels(
+    quadrature: Quadrature,
+    test: Optional[TestField],
+    trial: Optional[TrialField],
+    accumulate_dtype: type,
+    device,
+    kernel_options: Optional[Dict[str, Any]] = None,
+) -> List[Tuple[wp.Kernel, int]]:
+    if test is None or not isinstance(test, LocalTestField):
+        return ()
+    # For dispatched assembly, generate additional kernels
+    # heuristic to use tiles for "long" quadratures
+    dispatch_tile_size = 32
+    qp_eval_count = quadrature.evaluation_point_count()
+    if trial is None:
+        if (
+            not device.is_cuda
+            or qp_eval_count * test.space_restriction.total_node_element_count()
+            < 3 * dispatch_tile_size * test.space_restriction.node_count() * test.domain.element_count()
+        ):
+            dispatch_tile_size = 1
+        dispatch_kernel = make_linear_dispatch_kernel(
+            test, quadrature, accumulate_dtype, dispatch_tile_size, kernel_options
+        )
+    else:
+        if not device.is_cuda or qp_eval_count < 3 * dispatch_tile_size * test.domain.element_count():
+            dispatch_tile_size = 1
+        dispatch_kernel = make_bilinear_dispatch_kernel(
+            test, trial, quadrature, accumulate_dtype, dispatch_tile_size, kernel_options
+        )
+    return ((dispatch_kernel, dispatch_tile_size),)
 def _launch_integrate_kernel(
     integrand: Integrand,
     kernel: wp.Kernel,
+    auxiliary_kernels: List[Tuple[wp.Kernel, int]],
     FieldStruct: wp.codegen.Struct,
     ValueStruct: wp.codegen.Struct,
     domain: GeometryDomain,
@@ -1202,10 +1269,15 @@ def _launch_integrate_kernel(
         if output != accumulate_array or not add_to_output:
             accumulate_array.zero_()
+        qp_count = quadrature.evaluation_point_count()
+        tile_size = _INTEGRATE_CONSTANT_TILE_SIZE
+        block_count = (qp_count + tile_size - 1) // tile_size
         wp.launch(
             kernel=kernel,
-            dim=quadrature.evaluation_point_count(),
+            dim=(block_count, tile_size),
+            block_dim=tile_size,
             inputs=[
+                qp_count,
                 qp_arg,
                 quadrature.element_index_arg_value(device),
                 domain_elt_arg,
@@ -1335,10 +1407,11 @@ def _launch_integrate_kernel(
                     stacklevel=2,
                 )
             else:
-                dispatch_kernel = make_linear_dispatch_kernel(test, quadrature, accumulate_dtype)
+                dispatch_kernel, dispatch_tile_size = auxiliary_kernels[0]
                 wp.launch(
                     kernel=dispatch_kernel,
-                    dim=(test.space_restriction.node_count(), test.node_dof_count),
+                    dim=(test.space_restriction.node_count(), dispatch_tile_size),
+                    block_dim=dispatch_tile_size if dispatch_tile_size > 1 else 256,
                     inputs=[
                         qp_arg,
                         domain_elt_arg,
@@ -1422,14 +1495,15 @@ def _launch_integrate_kernel(
             device=device,
         )
     elif isinstance(test, LocalTestField):
+        qp_eval_count = quadrature.evaluation_point_count()
         local_result = cache.borrow_temporary(
             temporary_store=temporary_store,
             device=device,
             requires_grad=False,
             shape=(
-                quadrature.evaluation_point_count(),
                 test.value_dof_count,
                 trial.value_dof_count,
+                qp_eval_count,
                 test.TAYLOR_DOF_COUNT * trial.TAYLOR_DOF_COUNT,
             ),
             dtype=float,
@@ -1438,7 +1512,7 @@ def _launch_integrate_kernel(
         wp.launch(
             kernel=kernel,
             dim=(
-                quadrature.evaluation_point_count(),
+                qp_eval_count,
                 test.value_dof_count,
                 trial.value_dof_count,
                 trial.TAYLOR_DOF_COUNT,
@@ -1455,17 +1529,6 @@ def _launch_integrate_kernel(
             device=device,
         )
-        vec_array_shape = (*local_result.array.shape[:-1], test.TAYLOR_DOF_COUNT)
-        vec_array_dtype = cache.cached_vec_type(length=trial.TAYLOR_DOF_COUNT, dtype=float)
-        local_result_as_vec = wp.array(
-            data=None,
-            ptr=local_result.array.ptr,
-            capacity=local_result.array.capacity,
-            device=local_result.array.device,
-            shape=vec_array_shape,
-            dtype=vec_array_dtype,
-        )
         if test.TAYLOR_DOF_COUNT * trial.TAYLOR_DOF_COUNT == 0:
             wp.utils.warn(
                 f"Test and/or trial fields are never evaluated in integrand '{integrand.name}', result will be zero",
@@ -1474,18 +1537,17 @@ def _launch_integrate_kernel(
             )
             triplet_rows.fill_(-1)
         else:
-            dispatch_kernel = make_bilinear_dispatch_kernel(test, trial, quadrature, accumulate_dtype)
+            dispatch_kernel, dispatch_tile_size = auxiliary_kernels[0]
             trial_partition_arg = trial.space_partition.partition_arg_value(device)
             trial_topology_arg = trial.space_partition.space_topology.topo_arg_value(device)
             wp.launch(
                 kernel=dispatch_kernel,
                 dim=(
-                    test.space_restriction.node_count(),
-                    test.node_dof_count,
-                    trial.node_dof_count,
+                    test.space_restriction.total_node_element_count(),
                     trial.space.topology.MAX_NODES_PER_ELEMENT,
+                    dispatch_tile_size,
                 ),
+                block_dim=dispatch_tile_size if dispatch_tile_size > 1 else 256,
                 inputs=[
                     qp_arg,
                     domain_elt_arg,
@@ -1495,7 +1557,7 @@ def _launch_integrate_kernel(
                     trial_partition_arg,
                     trial_topology_arg,
                     trial.space.space_arg_value(device),
-                    local_result_as_vec,
+                    local_result.array,
                     triplet_rows,
                     triplet_cols,
                     triplet_values,
@@ -1636,6 +1698,9 @@ def integrate(
     if values is None:
         values = {}
+    if device is None:
+        device = wp.get_device()
     if not isinstance(integrand, Integrand):
         raise ValueError("integrand must be tagged with @warp.fem.integrand decorator")
@@ -1728,9 +1793,19 @@ def integrate(
         kernel_options=kernel_options,
     )
+    auxiliary_kernels = _generate_auxiliary_kernels(
+        quadrature=quadrature,
+        test=test,
+        trial=trial,
+        accumulate_dtype=accumulate_dtype,
+        device=device,
+        kernel_options=kernel_options,
+    )
     return _launch_integrate_kernel(
         integrand=integrand,
         kernel=kernel,
+        auxiliary_kernels=auxiliary_kernels,
         FieldStruct=FieldStruct,
         ValueStruct=ValueStruct,
         domain=domain,
@@ -2355,6 +2430,9 @@ def interpolate(
     if values is None:
         values = {}
+    if device is None:
+        device = wp.get_device()
     if not isinstance(integrand, Integrand):
         raise ValueError("integrand must be tagged with @integrand decorator")

warp/fem/space/restriction.py CHANGED Viewed

@@ -159,6 +159,10 @@ class SpaceRestriction:
     def node_partition_index(args: NodeArg, restriction_node_index: int):
         return args.dof_partition_indices[restriction_node_index]
+    @wp.func
+    def node_partition_index_from_element_offset(args: NodeArg, element_offset: int):
+        return wp.lower_bound(args.dof_element_offsets, element_offset + 1) - 1
     @wp.func
     def node_element_range(args: NodeArg, partition_node_index: int):
         return args.dof_element_offsets[partition_node_index], args.dof_element_offsets[partition_node_index + 1]

warp/fem/space/shape/tet_shape_function.py CHANGED Viewed

@@ -168,19 +168,12 @@ class TetrahedronPolynomialShapeFunctions(TetrahedronShapeFunction):
         self.VERTEX_NODE_COUNT = wp.constant(1)
         self.EDGE_NODE_COUNT = wp.constant(degree - 1)
+        self.FACE_NODE_COUNT = wp.constant(max(0, degree - 2) * max(0, degree - 1) // 2)
+        self.INTERIOR_NODE_COUNT = wp.constant(max(0, degree - 1) * max(0, degree - 2) * max(0, degree - 3) // 6)
         self.NODES_PER_ELEMENT = wp.constant((degree + 1) * (degree + 2) * (degree + 3) // 6)
         self.NODES_PER_SIDE = wp.constant((degree + 1) * (degree + 2) // 2)
-        self.SIDE_NODE_COUNT = wp.constant(self.NODES_PER_ELEMENT - 3 * (self.VERTEX_NODE_COUNT + self.EDGE_NODE_COUNT))
-        self.INTERIOR_NODE_COUNT = wp.constant(
-            self.NODES_PER_ELEMENT - 3 * (self.VERTEX_NODE_COUNT + self.EDGE_NODE_COUNT)
-        )
-        self.VERTEX_NODE_COUNT = wp.constant(1)
-        self.EDGE_NODE_COUNT = wp.constant(degree - 1)
-        self.FACE_NODE_COUNT = wp.constant(max(0, degree - 2) * max(0, degree - 1) // 2)
-        self.INERIOR_NODE_COUNT = wp.constant(max(0, degree - 1) * max(0, degree - 2) * max(0, degree - 3) // 6)
         tet_coords = np.empty((self.NODES_PER_ELEMENT, 3), dtype=int)
         for tx in range(degree + 1):

warp/jax_experimental/custom_call.py CHANGED Viewed

@@ -107,7 +107,7 @@ def _warp_custom_callback(stream, buffers, opaque, opaque_len):
     assert hooks.forward, "Failed to find kernel entry point"
     # Launch the kernel.
-    wp.context.runtime.core.cuda_launch_kernel(
+    wp.context.runtime.core.wp_cuda_launch_kernel(
         device.context, hooks.forward, bounds.size, 0, 256, hooks.forward_smem_bytes, kernel_params, stream
     )

warp/jax_experimental/ffi.py CHANGED Viewed

@@ -317,7 +317,7 @@ class FfiKernel:
             assert hooks.forward, "Failed to find kernel entry point"
             # launch the kernel
-            wp.context.runtime.core.cuda_launch_kernel(
+            wp.context.runtime.core.wp_cuda_launch_kernel(
                 device.context,
                 hooks.forward,
                 launch_bounds.size,
@@ -381,6 +381,7 @@ class FfiCallable:
             if arg_name == "return":
                 if arg_type is not None:
                     raise TypeError("Function must not return a value")
+                continue
             else:
                 arg = FfiArg(arg_name, arg_type, arg_name in in_out_argnames)
                 if arg_name in in_out_argnames: