PyPI - warp-lang - Versions diffs - 1.3.3__py3-none-manylinux2014_aarch64.whl → 1.4.0__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.3.3__py3-none-manylinux2014_aarch64.whl → 1.4.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (106) hide show

warp/__init__.py +6 -0
warp/autograd.py +59 -6
warp/bin/warp.so +0 -0
warp/build_dll.py +8 -10
warp/builtins.py +126 -4
warp/codegen.py +435 -53
warp/config.py +1 -1
warp/context.py +678 -403
warp/dlpack.py +2 -0
warp/examples/benchmarks/benchmark_cloth.py +10 -0
warp/examples/core/example_render_opengl.py +12 -10
warp/examples/fem/example_adaptive_grid.py +251 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_diffusion_3d.py +2 -2
warp/examples/fem/example_magnetostatics.py +1 -1
warp/examples/fem/example_streamlines.py +1 -0
warp/examples/fem/utils.py +23 -4
warp/examples/sim/example_cloth.py +50 -6
warp/fem/__init__.py +2 -0
warp/fem/adaptivity.py +493 -0
warp/fem/field/field.py +2 -1
warp/fem/field/nodal_field.py +18 -26
warp/fem/field/test.py +4 -4
warp/fem/field/trial.py +4 -4
warp/fem/geometry/__init__.py +1 -0
warp/fem/geometry/adaptive_nanogrid.py +843 -0
warp/fem/geometry/nanogrid.py +55 -28
warp/fem/space/__init__.py +1 -1
warp/fem/space/nanogrid_function_space.py +69 -35
warp/fem/utils.py +113 -107
warp/jax_experimental.py +28 -15
warp/native/array.h +0 -1
warp/native/builtin.h +103 -6
warp/native/bvh.cu +2 -0
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/error.cpp +4 -2
warp/native/exports.h +99 -17
warp/native/mat.h +97 -0
warp/native/mesh.cpp +36 -0
warp/native/mesh.cu +51 -0
warp/native/mesh.h +1 -0
warp/native/quat.h +43 -0
warp/native/spatial.h +6 -0
warp/native/vec.h +74 -0
warp/native/warp.cpp +2 -1
warp/native/warp.cu +10 -3
warp/native/warp.h +8 -1
warp/paddle.py +382 -0
warp/sim/__init__.py +1 -0
warp/sim/collide.py +519 -0
warp/sim/integrator_euler.py +18 -5
warp/sim/integrator_featherstone.py +5 -5
warp/sim/integrator_vbd.py +1026 -0
warp/sim/model.py +49 -23
warp/stubs.py +459 -0
warp/tape.py +2 -0
warp/tests/aux_test_dependent.py +1 -0
warp/tests/aux_test_name_clash1.py +32 -0
warp/tests/aux_test_name_clash2.py +32 -0
warp/tests/aux_test_square.py +1 -0
warp/tests/test_array.py +188 -0
warp/tests/test_async.py +3 -3
warp/tests/test_atomic.py +6 -0
warp/tests/test_closest_point_edge_edge.py +93 -1
warp/tests/test_codegen.py +62 -15
warp/tests/test_codegen_instancing.py +1457 -0
warp/tests/test_collision.py +486 -0
warp/tests/test_compile_consts.py +3 -28
warp/tests/test_dlpack.py +170 -0
warp/tests/test_examples.py +22 -8
warp/tests/test_fast_math.py +10 -4
warp/tests/test_fem.py +64 -0
warp/tests/test_func.py +46 -0
warp/tests/test_implicit_init.py +49 -0
warp/tests/test_jax.py +58 -0
warp/tests/test_mat.py +84 -0
warp/tests/test_mesh_query_point.py +188 -0
warp/tests/test_module_hashing.py +40 -0
warp/tests/test_multigpu.py +3 -3
warp/tests/test_overwrite.py +8 -0
warp/tests/test_paddle.py +852 -0
warp/tests/test_print.py +89 -0
warp/tests/test_quat.py +111 -0
warp/tests/test_reload.py +31 -1
warp/tests/test_scalar_ops.py +2 -0
warp/tests/test_static.py +412 -0
warp/tests/test_streams.py +64 -3
warp/tests/test_struct.py +4 -4
warp/tests/test_torch.py +24 -0
warp/tests/test_triangle_closest_point.py +137 -0
warp/tests/test_types.py +1 -1
warp/tests/test_vbd.py +386 -0
warp/tests/test_vec.py +143 -0
warp/tests/test_vec_scalar_ops.py +139 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +9 -5
warp/thirdparty/dlpack.py +3 -1
warp/types.py +150 -28
warp/utils.py +37 -14
{warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
{warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/RECORD +105 -93
warp/tests/test_point_triangle_closest_point.py +0 -143
{warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
{warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0

warp/fem/geometry/nanogrid.py CHANGED Viewed

@@ -10,10 +10,12 @@ from .element import Cube, Square
 from .geometry import Geometry
 # Flag used for building edge/face grids to disambiguiate axis within the grid
+# Morton indexing allows for
 GRID_AXIS_FLAG = wp.constant(wp.int32(1 << 20))
-FACE_AXIS_MASK = wp.constant(wp.uint8((1 << 3) - 1))
-FACE_INNER_OFFSET_BIT = wp.constant(wp.uint8(3))
-FACE_OUTER_OFFSET_BIT = wp.constant(wp.uint8(4))
+FACE_AXIS_MASK = wp.constant(wp.uint8((1 << 2) - 1))
+FACE_INNER_OFFSET_BIT = wp.constant(wp.uint8(2))
+FACE_OUTER_OFFSET_BIT = wp.constant(wp.uint8(3))
 _mat32 = wp.mat(shape=(3, 2), dtype=float)
@@ -93,7 +95,7 @@ class Nanogrid(Geometry):
         self._face_ijk = None
         self._edge_grid = None
-        self._edge_ijk = None
+        self._edge_count = 0
     @property
     def cell_grid(self) -> wp.Volume:
@@ -129,7 +131,7 @@ class Nanogrid(Geometry):
     def edge_count(self):
         self._ensure_edge_grid()
-        return self._edge_ijk.shape[0]
+        return self._edge_count
     def reference_cell(self) -> Cube:
         return Cube()
@@ -168,13 +170,30 @@ class Nanogrid(Geometry):
     def cell_lookup(args: CellArg, pos: wp.vec3):
         uvw = wp.volume_world_to_index(args.cell_grid, pos) + wp.vec3(0.5)
         ijk = wp.vec3i(int(wp.floor(uvw[0])), int(wp.floor(uvw[1])), int(wp.floor(uvw[2])))
-        element_index = wp.volume_lookup_index(args.cell_grid, ijk[0], ijk[1], ijk[2])
+        cell_index = wp.volume_lookup_index(args.cell_grid, ijk[0], ijk[1], ijk[2])
-        return wp.select(
-            element_index == -1,
-            make_free_sample(element_index, uvw - wp.vec3(ijk)),
-            make_free_sample(NULL_ELEMENT_INDEX, Coords(OUTSIDE)),
-        )
+        coords = uvw - wp.vec3(ijk)
+        if cell_index == -1:
+            if wp.min(coords) == 0.0 or wp.max(coords) == 1.0:
+                il = wp.select(coords[0] > 0.5, -1, 0)
+                jl = wp.select(coords[1] > 0.5, -1, 0)
+                kl = wp.select(coords[2] > 0.5, -1, 0)
+                for n in range(8):
+                    ni = n >> 2
+                    nj = (n & 2) >> 1
+                    nk = n & 1
+                    nijk = ijk + wp.vec3i(ni + il, nj + jl, nk + kl)
+                    coords = uvw - wp.vec3(nijk)
+                    if wp.min(coords) >= 0.0 and wp.max(coords) <= 1.0:
+                        cell_index = wp.volume_lookup_index(args.cell_grid, nijk[0], nijk[1], nijk[2])
+                        if cell_index != -1:
+                            return make_free_sample(cell_index, coords)
+            return make_free_sample(NULL_ELEMENT_INDEX, Coords(OUTSIDE))
+        return make_free_sample(cell_index, coords)
     @wp.func
     def _project_on_voxel_at_origin(coords: wp.vec3):
@@ -280,12 +299,10 @@ class Nanogrid(Geometry):
         return wp.volume_index_to_world(cell_grid, uvw - wp.vec3(0.5))
     @wp.func
-    def _face_tangent_vecs(args: SideArg, axis: int, flip: int):
+    def _face_tangent_vecs(cell_grid: wp.uint64, axis: int, flip: int):
         u_axis = utils.unit_element(wp.vec3(), (axis + 1 + flip) % 3)
         v_axis = utils.unit_element(wp.vec3(), (axis + 2 - flip) % 3)
-        cell_grid = args.cell_arg.cell_grid
         return wp.volume_index_to_world_dir(cell_grid, u_axis), wp.volume_index_to_world_dir(cell_grid, v_axis)
     @wp.func
@@ -293,7 +310,7 @@ class Nanogrid(Geometry):
         flags = args.face_flags[s.element_index]
         axis = Nanogrid._get_face_axis(flags)
         flip = Nanogrid._get_face_inner_offset(flags)
-        v1, v2 = Nanogrid._face_tangent_vecs(args, axis, flip)
+        v1, v2 = Nanogrid._face_tangent_vecs(args.cell_arg.cell_grid, axis, flip)
         return _mat32(v1, v2)
     @wp.func
@@ -320,7 +337,7 @@ class Nanogrid(Geometry):
         axis = Nanogrid._get_face_axis(flags)
         flip = Nanogrid._get_face_inner_offset(flags)
-        v1, v2 = Nanogrid._face_tangent_vecs(args, axis, flip)
+        v1, v2 = Nanogrid._face_tangent_vecs(args.cell_arg.cell_grid, axis, flip)
         return wp.cross(v1, v2) / args.face_areas[axis]
     @wp.func
@@ -405,16 +422,14 @@ class Nanogrid(Geometry):
     def _build_edge_grid(self, temporary_store: Optional[cache.TemporaryStore] = None):
         self._edge_grid = _build_edge_grid(self._cell_ijk, self._cell_grid, temporary_store)
-        edge_count = self._edge_grid.get_voxel_count()
-        self._edge_ijk = wp.array(shape=(edge_count,), dtype=wp.vec3i, device=self._edge_grid.device)
-        self._edge_grid.get_voxels(out=self._edge_ijk)
+        self._edge_count = self._edge_grid.get_voxel_count()
     def _ensure_face_grid(self):
         if self._face_ijk is None:
             self._build_face_grid()
     def _ensure_edge_grid(self):
-        if self._edge_ijk is None:
+        if self._edge_grid is None:
             self._build_edge_grid()
@@ -475,7 +490,7 @@ def _build_node_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
         _cell_node_indices, dim=cell_nodes.array.shape, inputs=[cell_ijk, cell_nodes.array], device=cell_ijk.device
     )
     node_grid = wp.Volume.allocate_by_voxels(
-        cell_nodes.array.flatten(), voxel_size=grid.get_voxel_size()[0], device=cell_ijk.device
+        cell_nodes.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
     )
     return node_grid
@@ -487,7 +502,7 @@ def _build_face_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
     cell_faces = cache.borrow_temporary(temporary_store, shape=(cell_count, 6), dtype=wp.vec3i, device=cell_ijk.device)
     wp.launch(_cell_face_indices, dim=cell_count, inputs=[cell_ijk, cell_faces.array], device=cell_ijk.device)
     face_grid = wp.Volume.allocate_by_voxels(
-        cell_faces.array.flatten(), voxel_size=grid.get_voxel_size()[0], device=cell_ijk.device
+        cell_faces.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
     )
     return face_grid
@@ -499,12 +514,25 @@ def _build_edge_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
     cell_edges = cache.borrow_temporary(temporary_store, shape=(cell_count, 12), dtype=wp.vec3i, device=cell_ijk.device)
     wp.launch(_cell_edge_indices, dim=cell_count, inputs=[cell_ijk, cell_edges.array], device=cell_ijk.device)
     edge_grid = wp.Volume.allocate_by_voxels(
-        cell_edges.array.flatten(), voxel_size=grid.get_voxel_size()[0], device=cell_ijk.device
+        cell_edges.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
     )
     return edge_grid
+@wp.func
+def _make_face_flags(axis: int, plus_cell_index: int, minus_cell_index: int):
+    plus_boundary = wp.uint8(wp.select(plus_cell_index == -1, 0, 1)) << FACE_OUTER_OFFSET_BIT
+    minus_boundary = wp.uint8(wp.select(minus_cell_index == -1, 0, 1)) << FACE_INNER_OFFSET_BIT
+    return wp.uint8(axis) | plus_boundary | minus_boundary
+@wp.func
+def _get_boundary_mask(flags: wp.uint8):
+    return int((flags >> FACE_OUTER_OFFSET_BIT) | (flags >> FACE_INNER_OFFSET_BIT)) & 1
 @wp.kernel
 def _build_face_flags(
     cell_grid: wp.uint64,
@@ -522,9 +550,8 @@ def _build_face_flags(
     plus_cell_index = wp.volume_lookup_index(cell_grid, ijk[0], ijk[1], ijk[2])
     minus_cell_index = wp.volume_lookup_index(cell_grid, ijk_minus[0], ijk_minus[1], ijk_minus[2])
-    plus_boundary = wp.uint8(wp.select(plus_cell_index == -1, 0, 1)) << FACE_OUTER_OFFSET_BIT
-    minus_boundary = wp.uint8(wp.select(minus_cell_index == -1, 0, 1)) << FACE_INNER_OFFSET_BIT
     face_ijk[face] = ijk
-    face_flags[face] = wp.uint8(axis) | plus_boundary | minus_boundary
-    boundary_face_mask[face] = wp.select((plus_boundary | minus_boundary) == 0, 1, 0)
+    flags = _make_face_flags(axis, plus_cell_index, minus_cell_index)
+    face_flags[face] = flags
+    boundary_face_mask[face] = _get_boundary_mask(flags)

warp/fem/space/__init__.py CHANGED Viewed

@@ -123,7 +123,7 @@ def make_polynomial_basis_space(
         topology = make_quadmesh_2d_space_topology(geo, shape)
     elif isinstance(base_geo, _geometry.Hexmesh):
         topology = make_hexmesh_space_topology(geo, shape)
-    elif isinstance(base_geo, _geometry.Nanogrid):
+    elif isinstance(base_geo, _geometry.Nanogrid) or isinstance(base_geo, _geometry.AdaptiveNanogrid):
         topology = make_nanogrid_space_topology(geo, shape)
     if topology is None:

warp/fem/space/nanogrid_function_space.py CHANGED Viewed

@@ -1,7 +1,8 @@
+from typing import Union
 import warp as wp
 from warp.fem import cache
-from warp.fem.geometry import Nanogrid
-from warp.fem.geometry.nanogrid import _add_axis_flag
+from warp.fem.geometry import AdaptiveNanogrid, Nanogrid
 from warp.fem.polynomial import is_closed
 from warp.fem.types import ElementIndex
@@ -29,7 +30,7 @@ class NanogridSpaceTopology(SpaceTopology):
     def __init__(
         self,
-        grid: Nanogrid,
+        grid: Union[Nanogrid, AdaptiveNanogrid],
         shape: ShapeFunction,
         need_edge_indices: bool = True,
         need_face_indices: bool = True,
@@ -43,10 +44,16 @@ class NanogridSpaceTopology(SpaceTopology):
         self._vertex_grid = grid.vertex_grid.id
-        self._edge_grid = grid.edge_grid.id if need_edge_indices else -1
-        self._face_grid = grid.face_grid.id if need_face_indices else -1
-        self._edge_count = grid.edge_count() if need_edge_indices else 0
-        self._face_count = grid.side_count() if need_face_indices else 0
+        if isinstance(grid, Nanogrid):
+            self._edge_grid = grid.edge_grid.id if need_edge_indices else -1
+            self._face_grid = grid.face_grid.id if need_face_indices else -1
+            self._edge_count = grid.edge_count() if need_edge_indices else 0
+            self._face_count = grid.side_count() if need_face_indices else 0
+        else:
+            self._edge_grid = grid.stacked_edge_grid.id if need_edge_indices else -1
+            self._face_grid = grid.stacked_face_grid.id if need_face_indices else -1
+            self._edge_count = grid.stacked_edge_count() if need_edge_indices else 0
+            self._face_count = grid.stacked_face_count() if need_face_indices else 0
     @cache.cached_arg_value
     def topo_arg_value(self, device):
@@ -61,29 +68,58 @@ class NanogridSpaceTopology(SpaceTopology):
         arg.edge_count = self._edge_count
         return arg
+    def _make_element_node_index(self):
+        element_node_index_generic = self._make_element_node_index_generic()
+        @cache.dynamic_func(suffix=self.name)
+        def element_node_index(
+            geo_arg: Nanogrid.CellArg,
+            topo_arg: NanogridTopologyArg,
+            element_index: ElementIndex,
+            node_index_in_elt: int,
+        ):
+            ijk = geo_arg.cell_ijk[element_index]
+            return element_node_index_generic(topo_arg, element_index, node_index_in_elt, ijk, 0)
+        if isinstance(self._grid, Nanogrid):
+            return element_node_index
+        @cache.dynamic_func(suffix=self.name)
+        def element_node_index_adaptive(
+            geo_arg: AdaptiveNanogrid.CellArg,
+            topo_arg: NanogridTopologyArg,
+            element_index: ElementIndex,
+            node_index_in_elt: int,
+        ):
+            ijk = geo_arg.cell_ijk[element_index]
+            level = int(geo_arg.cell_level[element_index])
+            return element_node_index_generic(topo_arg, element_index, node_index_in_elt, ijk, level)
+        return element_node_index_adaptive
 @wp.func
-def _cell_vertex_coord(cell_ijk: wp.vec3i, n: int):
-    return cell_ijk + wp.vec3i((n & 4) >> 2, (n & 2) >> 1, n & 1)
+def _cell_vertex_coord(cell_ijk: wp.vec3i, cell_level: int, n: int):
+    return cell_ijk + AdaptiveNanogrid.fine_ijk(wp.vec3i((n & 4) >> 2, (n & 2) >> 1, n & 1), cell_level)
 @wp.func
-def _cell_edge_coord(cell_ijk: wp.vec3i, axis: int, offset: int):
-    e_ijk = cell_ijk
+def _cell_edge_coord(cell_ijk: wp.vec3i, cell_level: int, axis: int, offset: int):
+    e_ijk = AdaptiveNanogrid.coarse_ijk(cell_ijk, cell_level)
     e_ijk[(axis + 1) % 3] += offset >> 1
     e_ijk[(axis + 2) % 3] += offset & 1
-    return _add_axis_flag(e_ijk, axis)
+    return AdaptiveNanogrid.encode_axis_and_level(e_ijk, axis, cell_level)
 @wp.func
-def _cell_face_coord(cell_ijk: wp.vec3i, axis: int, offset: int):
-    f_ijk = cell_ijk
+def _cell_face_coord(cell_ijk: wp.vec3i, cell_level: int, axis: int, offset: int):
+    f_ijk = AdaptiveNanogrid.coarse_ijk(cell_ijk, cell_level)
     f_ijk[axis] += offset
-    return _add_axis_flag(f_ijk, axis)
+    return AdaptiveNanogrid.encode_axis_and_level(f_ijk, axis, cell_level)
 class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
-    def __init__(self, grid: Nanogrid, shape: CubeTripolynomialShapeFunctions):
+    def __init__(self, grid: Union[Nanogrid, AdaptiveNanogrid], shape: CubeTripolynomialShapeFunctions):
         super().__init__(grid, shape, need_edge_indices=shape.ORDER >= 2, need_face_indices=shape.ORDER >= 2)
         self.element_node_index = self._make_element_node_index()
@@ -101,25 +137,24 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
             + self._grid.cell_count() * INTERIOR_NODES_PER_CELL
         )
-    def _make_element_node_index(self):
+    def _make_element_node_index_generic(self):
         ORDER = self._shape.ORDER
         INTERIOR_NODES_PER_EDGE = wp.constant(max(0, ORDER - 1))
         INTERIOR_NODES_PER_FACE = wp.constant(INTERIOR_NODES_PER_EDGE**2)
         INTERIOR_NODES_PER_CELL = wp.constant(INTERIOR_NODES_PER_EDGE**3)
         @cache.dynamic_func(suffix=self.name)
-        def element_node_index(
-            geo_arg: Nanogrid.CellArg,
+        def element_node_index_generic(
             topo_arg: NanogridTopologyArg,
             element_index: ElementIndex,
             node_index_in_elt: int,
+            ijk: wp.vec3i,
+            level: int,
         ):
             node_type, type_instance, type_index = self._shape.node_type_and_type_index(node_index_in_elt)
-            ijk = geo_arg.cell_ijk[element_index]
             if node_type == CubeTripolynomialShapeFunctions.VERTEX:
-                n_ijk = _cell_vertex_coord(ijk, type_instance)
+                n_ijk = _cell_vertex_coord(ijk, level, type_instance)
                 return wp.volume_lookup_index(topo_arg.vertex_grid, n_ijk[0], n_ijk[1], n_ijk[2])
             offset = topo_arg.vertex_count
@@ -128,7 +163,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
                 axis = type_instance >> 2
                 node_offset = type_instance & 3
-                n_ijk = _cell_edge_coord(ijk, axis, node_offset)
+                n_ijk = _cell_edge_coord(ijk, level, axis, node_offset)
                 edge_index = wp.volume_lookup_index(topo_arg.edge_grid, n_ijk[0], n_ijk[1], n_ijk[2])
                 return offset + INTERIOR_NODES_PER_EDGE * edge_index + type_index
@@ -139,7 +174,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
                 axis = type_instance >> 1
                 node_offset = type_instance & 1
-                n_ijk = _cell_face_coord(ijk, axis, node_offset)
+                n_ijk = _cell_face_coord(ijk, level, axis, node_offset)
                 face_index = wp.volume_lookup_index(topo_arg.face_grid, n_ijk[0], n_ijk[1], n_ijk[2])
                 return offset + INTERIOR_NODES_PER_FACE * face_index + type_index
@@ -148,7 +183,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
             return offset + INTERIOR_NODES_PER_CELL * element_index + type_index
-        return element_node_index
+        return element_node_index_generic
 class NanogridSerendipitySpaceTopology(NanogridSpaceTopology):
@@ -160,37 +195,36 @@ class NanogridSerendipitySpaceTopology(NanogridSpaceTopology):
     def node_count(self) -> int:
         return self.geometry.vertex_count() + (self._shape.ORDER - 1) * self._edge_count
-    def _make_element_node_index(self):
+    def _make_element_node_index_generic(self):
         ORDER = self._shape.ORDER
         @cache.dynamic_func(suffix=self.name)
-        def element_node_index(
-            cell_arg: Nanogrid.CellArg,
-            topo_arg: NanogridSpaceTopology.TopologyArg,
+        def element_node_index_generic(
+            topo_arg: NanogridTopologyArg,
             element_index: ElementIndex,
             node_index_in_elt: int,
+            ijk: wp.vec3i,
+            level: int,
         ):
             node_type, type_index = self._shape.node_type_and_type_index(node_index_in_elt)
-            ijk = cell_arg.cell_ijk[element_index]
             if node_type == CubeSerendipityShapeFunctions.VERTEX:
-                n_ijk = _cell_vertex_coord(ijk, type_index)
+                n_ijk = _cell_vertex_coord(ijk, level, type_index)
                 return wp.volume_lookup_index(topo_arg.vertex_grid, n_ijk[0], n_ijk[1], n_ijk[2])
             type_instance, index_in_edge = CubeSerendipityShapeFunctions._cube_edge_index(node_type, type_index)
             axis = type_instance >> 2
             node_offset = type_instance & 3
-            n_ijk = _cell_edge_coord(ijk, axis, node_offset)
+            n_ijk = _cell_edge_coord(ijk, level, axis, node_offset)
             edge_index = wp.volume_lookup_index(topo_arg.edge_grid, n_ijk[0], n_ijk[1], n_ijk[2])
             return topo_arg.vertex_count + (ORDER - 1) * edge_index + index_in_edge
-        return element_node_index
+        return element_node_index_generic
-def make_nanogrid_space_topology(grid: Nanogrid, shape: ShapeFunction):
+def make_nanogrid_space_topology(grid: Union[Nanogrid, AdaptiveNanogrid], shape: ShapeFunction):
     if isinstance(shape, CubeSerendipityShapeFunctions):
         return forward_base_topology(NanogridSerendipitySpaceTopology, grid, shape)

warp/fem/utils.py CHANGED Viewed

@@ -40,24 +40,6 @@ def generalized_inner(x: wp.mat33, y: wp.vec3):
     return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
-@wp.func
-def apply_right(x: Any, y: Any):
-    """Performs x y multiplication with y a square matrix and x either a row-vector or a matrix.
-    Will be removed once native @ operator is implemented.
-    """
-    return x * y
-@wp.func
-def apply_right(x: wp.vec2, y: wp.mat22):
-    return x[0] * y[0] + x[1] * y[1]
-@wp.func
-def apply_right(x: wp.vec3, y: wp.mat33):
-    return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
 @wp.func
 def unit_element(template_type: Any, coord: int):
     """Returns a instance of `template_type` with a single coordinate set to 1 in the canonical basis"""
@@ -200,111 +182,135 @@ def inverse_qr(A: Any):
 @wp.func
-def symmetric_eigenvalues_qr(A: Any, tol: Any):
+def _wilkinson_shift(a: Any, b: Any, c: Any, tol: Any):
+    # Wilkinson shift: estimate eigenvalue of 2x2 symmetric matrix [a, c, c, b]
+    d = (a - b) * type(tol)(0.5)
+    return b + d - wp.sign(d) * wp.sqrt(d * d + c * c)
+@wp.func
+def _givens_rotation(a: Any, b: Any):
+    # Givens rotation [[c -s], [s c]] such that sa+cb =0
+    zero = type(a)(0.0)
+    one = type(a)(1.0)
+    abn_sq = a * a + b * b
+    abn = wp.select(abn_sq == zero, one / wp.sqrt(abn_sq), zero)
+    return a * abn, -b * abn
+@wp.func
+def tridiagonal_symmetric_eigenvalues_qr(D: Any, L: Any, Q: Any, tol: Any):
     """
-    Computes the eigenvalues and eigen vectors of a square symmetric matrix A using the QR algorithm
+    Computes the eigenvalues and eigen vectors of a symmetric tridiagonal matrix using the
+    Symmetric tridiagonal QR algorithm with implicit Wilkinson shift
     Args:
-        A: square symmetric matrix
-        tol: Tolerance for the diagonalization residual (squared L2 norm of off-diagonal terms)
+        D: Main diagonal of the matrix
+        L: Lower diagonal of the matrix, indexed such that L[i] = A[i+1, i]
+        Q: Initialization for the eigenvectors, useful if a pre-transformation has been applied, otherwise may be identity
+        tol: Tolerance for the diagonalization residual (Linf norm of off-diagonal over diagonal terms)
     Returns a tuple (D: vector of eigenvalues, P: matrix with one eigenvector per row) such that A = P^T D P
-    """
-    two = A.dtype(2.0)
-    zero = A.dtype(0.0)
-    # temp storage for matrix rows
-    ri = type(A[0])()
-    rn = type(ri)()
+    Ref: Arbenz P, Numerical Methods for Solving Large Scale Eigenvalue Problems, Chapter 4 (QR algorithm, Mar 13, 2018)
+    """
-    # tridiagonal storage for R
-    R_L = type(ri)()
-    R_L = type(ri)(zero)
-    R_U = type(ri)(zero)
+    two = D.dtype(2.0)
-    # so that we can use the type length in expression
+    # so that we can use the type length in expressions
     # this will prevent unrolling by warp, but should be ok for native code
     m = int(0)
-    for _ in range(type(ri).length):
+    for _ in range(type(D).length):
         m += 1
+    start = int(0)
+    y = D.dtype(0.0)  # moving buldge
+    x = D.dtype(0.0)  # coeff atop buldge
+    for _ in range(32 * m):  # failsafe, usually converges faster than that
+        # Iterate over all idependant (deflated) blocks
+        end = int(-1)
+        for k in range(m - 1):
+            if k >= end:
+                # Check if new block is starting
+                if k == end or wp.abs(L[k]) <= tol * (wp.abs(D[k]) + wp.abs(D[k + 1])):
+                    continue
+                # Find end of block
+                start = k
+                end = start + 1
+                while end + 1 < m:
+                    if wp.abs(L[end]) <= tol * (wp.abs(D[end + 1]) + wp.abs(D[end])):
+                        break
+                    end += 1
+                # Wilkinson shift (an eigenvalue of the last 2x2 block)
+                shift = _wilkinson_shift(D[end - 1], D[end], L[end - 1], tol)
+                # start with eliminating lower diag of first column of shifted matrix
+                # (i.e. first step of excplit QR factorization)
+                # Then all further steps eliminate the buldge (second diag) of the non-shifted matrix
+                x = D[start] - shift
+                y = L[start]
+            c, s = _givens_rotation(x, y)
+            # Apply Givens rotation on both sides of tridiagonal matrix
+            # middle block
+            d = D[k] - D[k + 1]
+            z = (two * c * L[k] + d * s) * s
+            D[k] -= z
+            D[k + 1] += z
+            L[k] = d * c * s + (c * c - s * s) * L[k]
+            if k > start:
+                L[k - 1] = c * x - s * y
+            x = L[k]
+            y = -s * L[k + 1]  # new buldge
+            L[k + 1] *= c
+            # apply givens rotation on left of Q
+            # note: Q is transposed compared to usual impls, as Warp makes it easier to index rows
+            Qk0 = Q[k]
+            Qk1 = Q[k + 1]
+            Q[k] = c * Qk0 - s * Qk1
+            Q[k + 1] = c * Qk1 + s * Qk0
+        if end <= 0:
+            # We did nothing, so diagonalization must have been achieved
+            break
+    return D, Q
+@wp.func
+def symmetric_eigenvalues_qr(A: Any, tol: Any):
+    """
+    Computes the eigenvalues and eigen vectors of a square symmetric matrix A using the QR algorithm
+    Args:
+        A: square symmetric matrix
+        tol: Tolerance for the diagonalization residual (Linf norm of off-diagonal over diagonal terms)
+    Returns a tuple (D: vector of eigenvalues, P: matrix with one eigenvector per row) such that A = P^T D P
+    """
     # Put A under Hessenberg form (tridiagonal)
     Q, H = householder_make_hessenberg(A)
-    Q = wp.transpose(Q)  # algorithm below works and transposed Q as rows are easier to index
-    for _ in range(16 * m):  # failsafe, usually converges faster than that
-        # Initialize R with current H
-        R_D = wp.get_diag(H)
-        for i in range(1, type(ri).length):
-            R_L[i - 1] = H[i, i - 1]
-            R_U[i - 1] = H[i - 1, i]
-        # compute QR decomposition, directly transform H and eigenvectors
-        for n in range(1, m):
-            i = n - 1
-            # compute reflection
-            xi = R_D[i]
-            xn = R_L[i]
-            xii = xi * xi
-            xnn = xn * xn
-            alpha = wp.sqrt(xii + xnn) * wp.sign(xi)
-            xi += alpha
-            xii = xi * xi
-            xin = xi * xn
-            two_over_x_sq = wp.select(alpha == zero, two / (xii + xnn), zero)
-            xii *= two_over_x_sq
-            xin *= two_over_x_sq
-            xnn *= two_over_x_sq
-            # Left-multiply R and Q, multiply H on both sides
-            # Note that R should get non-zero coefficients on the second upper diagonal,
-            # but those won't get read afterwards, so we can ignore them
-            R_D[n] -= R_U[i] * xin + R_D[n] * xnn
-            R_U[n] -= R_U[n] * xnn
-            ri = Q[i]
-            rn = Q[n]
-            Q[i] -= ri * xii + rn * xin
-            Q[n] -= ri * xin + rn * xnn
-            # H is multiplied on both sides, but stays tridiagonal except for moving buldge
-            # Note: we could reduce the stencil to for 4 columns qui we do below,
-            # but unlikely to be worth it for our small matrix sizes
-            ri = H[i]
-            rn = H[n]
-            H[i] -= ri * xii + rn * xin
-            H[n] -= ri * xin + rn * xnn
-            # multiply on right, manually. We just need to consider 4 rows
-            if i > 0:
-                ci = H[i - 1, i]
-                cn = H[i - 1, n]
-                H[i - 1, i] -= ci * xii + cn * xin
-                H[i - 1, n] -= ci * xin + cn * xnn
-            for k in range(2):
-                ci = H[i + k, i]
-                cn = H[i + k, n]
-                H[i + k, i] -= ci * xii + cn * xin
-                H[i + k, n] -= ci * xin + cn * xnn
-            if n + 1 < m:
-                ci = H[n + 1, i]
-                cn = H[n + 1, n]
-                H[n + 1, i] -= ci * xii + cn * xin
-                H[n + 1, n] -= ci * xin + cn * xnn
-        # Terminate if the upper diagonal of R is near zero
-        if wp.length_sq(R_U) < tol:
-            break
-    return wp.get_diag(H), Q
+    # tridiagonal storage for H
+    D = wp.get_diag(H)
+    L = type(D)(A.dtype(0.0))
+    for i in range(1, type(D).length):
+        L[i - 1] = H[i, i - 1]
+    Qt = wp.transpose(Q)
+    ev, P = tridiagonal_symmetric_eigenvalues_qr(D, L, Qt, tol)
+    return ev, P
 def compress_node_indices(