warp-lang 1.3.3__py3-none-manylinux2014_aarch64.whl → 1.4.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (106) hide show
  1. warp/__init__.py +6 -0
  2. warp/autograd.py +59 -6
  3. warp/bin/warp.so +0 -0
  4. warp/build_dll.py +8 -10
  5. warp/builtins.py +126 -4
  6. warp/codegen.py +435 -53
  7. warp/config.py +1 -1
  8. warp/context.py +678 -403
  9. warp/dlpack.py +2 -0
  10. warp/examples/benchmarks/benchmark_cloth.py +10 -0
  11. warp/examples/core/example_render_opengl.py +12 -10
  12. warp/examples/fem/example_adaptive_grid.py +251 -0
  13. warp/examples/fem/example_apic_fluid.py +1 -1
  14. warp/examples/fem/example_diffusion_3d.py +2 -2
  15. warp/examples/fem/example_magnetostatics.py +1 -1
  16. warp/examples/fem/example_streamlines.py +1 -0
  17. warp/examples/fem/utils.py +23 -4
  18. warp/examples/sim/example_cloth.py +50 -6
  19. warp/fem/__init__.py +2 -0
  20. warp/fem/adaptivity.py +493 -0
  21. warp/fem/field/field.py +2 -1
  22. warp/fem/field/nodal_field.py +18 -26
  23. warp/fem/field/test.py +4 -4
  24. warp/fem/field/trial.py +4 -4
  25. warp/fem/geometry/__init__.py +1 -0
  26. warp/fem/geometry/adaptive_nanogrid.py +843 -0
  27. warp/fem/geometry/nanogrid.py +55 -28
  28. warp/fem/space/__init__.py +1 -1
  29. warp/fem/space/nanogrid_function_space.py +69 -35
  30. warp/fem/utils.py +113 -107
  31. warp/jax_experimental.py +28 -15
  32. warp/native/array.h +0 -1
  33. warp/native/builtin.h +103 -6
  34. warp/native/bvh.cu +2 -0
  35. warp/native/cuda_util.cpp +14 -0
  36. warp/native/cuda_util.h +2 -0
  37. warp/native/error.cpp +4 -2
  38. warp/native/exports.h +99 -17
  39. warp/native/mat.h +97 -0
  40. warp/native/mesh.cpp +36 -0
  41. warp/native/mesh.cu +51 -0
  42. warp/native/mesh.h +1 -0
  43. warp/native/quat.h +43 -0
  44. warp/native/spatial.h +6 -0
  45. warp/native/vec.h +74 -0
  46. warp/native/warp.cpp +2 -1
  47. warp/native/warp.cu +10 -3
  48. warp/native/warp.h +8 -1
  49. warp/paddle.py +382 -0
  50. warp/sim/__init__.py +1 -0
  51. warp/sim/collide.py +519 -0
  52. warp/sim/integrator_euler.py +18 -5
  53. warp/sim/integrator_featherstone.py +5 -5
  54. warp/sim/integrator_vbd.py +1026 -0
  55. warp/sim/model.py +49 -23
  56. warp/stubs.py +459 -0
  57. warp/tape.py +2 -0
  58. warp/tests/aux_test_dependent.py +1 -0
  59. warp/tests/aux_test_name_clash1.py +32 -0
  60. warp/tests/aux_test_name_clash2.py +32 -0
  61. warp/tests/aux_test_square.py +1 -0
  62. warp/tests/test_array.py +188 -0
  63. warp/tests/test_async.py +3 -3
  64. warp/tests/test_atomic.py +6 -0
  65. warp/tests/test_closest_point_edge_edge.py +93 -1
  66. warp/tests/test_codegen.py +62 -15
  67. warp/tests/test_codegen_instancing.py +1457 -0
  68. warp/tests/test_collision.py +486 -0
  69. warp/tests/test_compile_consts.py +3 -28
  70. warp/tests/test_dlpack.py +170 -0
  71. warp/tests/test_examples.py +22 -8
  72. warp/tests/test_fast_math.py +10 -4
  73. warp/tests/test_fem.py +64 -0
  74. warp/tests/test_func.py +46 -0
  75. warp/tests/test_implicit_init.py +49 -0
  76. warp/tests/test_jax.py +58 -0
  77. warp/tests/test_mat.py +84 -0
  78. warp/tests/test_mesh_query_point.py +188 -0
  79. warp/tests/test_module_hashing.py +40 -0
  80. warp/tests/test_multigpu.py +3 -3
  81. warp/tests/test_overwrite.py +8 -0
  82. warp/tests/test_paddle.py +852 -0
  83. warp/tests/test_print.py +89 -0
  84. warp/tests/test_quat.py +111 -0
  85. warp/tests/test_reload.py +31 -1
  86. warp/tests/test_scalar_ops.py +2 -0
  87. warp/tests/test_static.py +412 -0
  88. warp/tests/test_streams.py +64 -3
  89. warp/tests/test_struct.py +4 -4
  90. warp/tests/test_torch.py +24 -0
  91. warp/tests/test_triangle_closest_point.py +137 -0
  92. warp/tests/test_types.py +1 -1
  93. warp/tests/test_vbd.py +386 -0
  94. warp/tests/test_vec.py +143 -0
  95. warp/tests/test_vec_scalar_ops.py +139 -0
  96. warp/tests/unittest_suites.py +12 -0
  97. warp/tests/unittest_utils.py +9 -5
  98. warp/thirdparty/dlpack.py +3 -1
  99. warp/types.py +150 -28
  100. warp/utils.py +37 -14
  101. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
  102. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/RECORD +105 -93
  103. warp/tests/test_point_triangle_closest_point.py +0 -143
  104. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
  105. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
  106. {warp_lang-1.3.3.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0
@@ -10,10 +10,12 @@ from .element import Cube, Square
10
10
  from .geometry import Geometry
11
11
 
12
12
  # Flag used for building edge/face grids to disambiguiate axis within the grid
13
+ # Morton indexing allows for
13
14
  GRID_AXIS_FLAG = wp.constant(wp.int32(1 << 20))
14
- FACE_AXIS_MASK = wp.constant(wp.uint8((1 << 3) - 1))
15
- FACE_INNER_OFFSET_BIT = wp.constant(wp.uint8(3))
16
- FACE_OUTER_OFFSET_BIT = wp.constant(wp.uint8(4))
15
+
16
+ FACE_AXIS_MASK = wp.constant(wp.uint8((1 << 2) - 1))
17
+ FACE_INNER_OFFSET_BIT = wp.constant(wp.uint8(2))
18
+ FACE_OUTER_OFFSET_BIT = wp.constant(wp.uint8(3))
17
19
 
18
20
  _mat32 = wp.mat(shape=(3, 2), dtype=float)
19
21
 
@@ -93,7 +95,7 @@ class Nanogrid(Geometry):
93
95
  self._face_ijk = None
94
96
 
95
97
  self._edge_grid = None
96
- self._edge_ijk = None
98
+ self._edge_count = 0
97
99
 
98
100
  @property
99
101
  def cell_grid(self) -> wp.Volume:
@@ -129,7 +131,7 @@ class Nanogrid(Geometry):
129
131
 
130
132
  def edge_count(self):
131
133
  self._ensure_edge_grid()
132
- return self._edge_ijk.shape[0]
134
+ return self._edge_count
133
135
 
134
136
  def reference_cell(self) -> Cube:
135
137
  return Cube()
@@ -168,13 +170,30 @@ class Nanogrid(Geometry):
168
170
  def cell_lookup(args: CellArg, pos: wp.vec3):
169
171
  uvw = wp.volume_world_to_index(args.cell_grid, pos) + wp.vec3(0.5)
170
172
  ijk = wp.vec3i(int(wp.floor(uvw[0])), int(wp.floor(uvw[1])), int(wp.floor(uvw[2])))
171
- element_index = wp.volume_lookup_index(args.cell_grid, ijk[0], ijk[1], ijk[2])
173
+ cell_index = wp.volume_lookup_index(args.cell_grid, ijk[0], ijk[1], ijk[2])
172
174
 
173
- return wp.select(
174
- element_index == -1,
175
- make_free_sample(element_index, uvw - wp.vec3(ijk)),
176
- make_free_sample(NULL_ELEMENT_INDEX, Coords(OUTSIDE)),
177
- )
175
+ coords = uvw - wp.vec3(ijk)
176
+ if cell_index == -1:
177
+ if wp.min(coords) == 0.0 or wp.max(coords) == 1.0:
178
+ il = wp.select(coords[0] > 0.5, -1, 0)
179
+ jl = wp.select(coords[1] > 0.5, -1, 0)
180
+ kl = wp.select(coords[2] > 0.5, -1, 0)
181
+
182
+ for n in range(8):
183
+ ni = n >> 2
184
+ nj = (n & 2) >> 1
185
+ nk = n & 1
186
+ nijk = ijk + wp.vec3i(ni + il, nj + jl, nk + kl)
187
+
188
+ coords = uvw - wp.vec3(nijk)
189
+ if wp.min(coords) >= 0.0 and wp.max(coords) <= 1.0:
190
+ cell_index = wp.volume_lookup_index(args.cell_grid, nijk[0], nijk[1], nijk[2])
191
+ if cell_index != -1:
192
+ return make_free_sample(cell_index, coords)
193
+
194
+ return make_free_sample(NULL_ELEMENT_INDEX, Coords(OUTSIDE))
195
+
196
+ return make_free_sample(cell_index, coords)
178
197
 
179
198
  @wp.func
180
199
  def _project_on_voxel_at_origin(coords: wp.vec3):
@@ -280,12 +299,10 @@ class Nanogrid(Geometry):
280
299
  return wp.volume_index_to_world(cell_grid, uvw - wp.vec3(0.5))
281
300
 
282
301
  @wp.func
283
- def _face_tangent_vecs(args: SideArg, axis: int, flip: int):
302
+ def _face_tangent_vecs(cell_grid: wp.uint64, axis: int, flip: int):
284
303
  u_axis = utils.unit_element(wp.vec3(), (axis + 1 + flip) % 3)
285
304
  v_axis = utils.unit_element(wp.vec3(), (axis + 2 - flip) % 3)
286
305
 
287
- cell_grid = args.cell_arg.cell_grid
288
-
289
306
  return wp.volume_index_to_world_dir(cell_grid, u_axis), wp.volume_index_to_world_dir(cell_grid, v_axis)
290
307
 
291
308
  @wp.func
@@ -293,7 +310,7 @@ class Nanogrid(Geometry):
293
310
  flags = args.face_flags[s.element_index]
294
311
  axis = Nanogrid._get_face_axis(flags)
295
312
  flip = Nanogrid._get_face_inner_offset(flags)
296
- v1, v2 = Nanogrid._face_tangent_vecs(args, axis, flip)
313
+ v1, v2 = Nanogrid._face_tangent_vecs(args.cell_arg.cell_grid, axis, flip)
297
314
  return _mat32(v1, v2)
298
315
 
299
316
  @wp.func
@@ -320,7 +337,7 @@ class Nanogrid(Geometry):
320
337
  axis = Nanogrid._get_face_axis(flags)
321
338
  flip = Nanogrid._get_face_inner_offset(flags)
322
339
 
323
- v1, v2 = Nanogrid._face_tangent_vecs(args, axis, flip)
340
+ v1, v2 = Nanogrid._face_tangent_vecs(args.cell_arg.cell_grid, axis, flip)
324
341
  return wp.cross(v1, v2) / args.face_areas[axis]
325
342
 
326
343
  @wp.func
@@ -405,16 +422,14 @@ class Nanogrid(Geometry):
405
422
 
406
423
  def _build_edge_grid(self, temporary_store: Optional[cache.TemporaryStore] = None):
407
424
  self._edge_grid = _build_edge_grid(self._cell_ijk, self._cell_grid, temporary_store)
408
- edge_count = self._edge_grid.get_voxel_count()
409
- self._edge_ijk = wp.array(shape=(edge_count,), dtype=wp.vec3i, device=self._edge_grid.device)
410
- self._edge_grid.get_voxels(out=self._edge_ijk)
425
+ self._edge_count = self._edge_grid.get_voxel_count()
411
426
 
412
427
  def _ensure_face_grid(self):
413
428
  if self._face_ijk is None:
414
429
  self._build_face_grid()
415
430
 
416
431
  def _ensure_edge_grid(self):
417
- if self._edge_ijk is None:
432
+ if self._edge_grid is None:
418
433
  self._build_edge_grid()
419
434
 
420
435
 
@@ -475,7 +490,7 @@ def _build_node_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
475
490
  _cell_node_indices, dim=cell_nodes.array.shape, inputs=[cell_ijk, cell_nodes.array], device=cell_ijk.device
476
491
  )
477
492
  node_grid = wp.Volume.allocate_by_voxels(
478
- cell_nodes.array.flatten(), voxel_size=grid.get_voxel_size()[0], device=cell_ijk.device
493
+ cell_nodes.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
479
494
  )
480
495
 
481
496
  return node_grid
@@ -487,7 +502,7 @@ def _build_face_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
487
502
  cell_faces = cache.borrow_temporary(temporary_store, shape=(cell_count, 6), dtype=wp.vec3i, device=cell_ijk.device)
488
503
  wp.launch(_cell_face_indices, dim=cell_count, inputs=[cell_ijk, cell_faces.array], device=cell_ijk.device)
489
504
  face_grid = wp.Volume.allocate_by_voxels(
490
- cell_faces.array.flatten(), voxel_size=grid.get_voxel_size()[0], device=cell_ijk.device
505
+ cell_faces.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
491
506
  )
492
507
 
493
508
  return face_grid
@@ -499,12 +514,25 @@ def _build_edge_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
499
514
  cell_edges = cache.borrow_temporary(temporary_store, shape=(cell_count, 12), dtype=wp.vec3i, device=cell_ijk.device)
500
515
  wp.launch(_cell_edge_indices, dim=cell_count, inputs=[cell_ijk, cell_edges.array], device=cell_ijk.device)
501
516
  edge_grid = wp.Volume.allocate_by_voxels(
502
- cell_edges.array.flatten(), voxel_size=grid.get_voxel_size()[0], device=cell_ijk.device
517
+ cell_edges.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
503
518
  )
504
519
 
505
520
  return edge_grid
506
521
 
507
522
 
523
+ @wp.func
524
+ def _make_face_flags(axis: int, plus_cell_index: int, minus_cell_index: int):
525
+ plus_boundary = wp.uint8(wp.select(plus_cell_index == -1, 0, 1)) << FACE_OUTER_OFFSET_BIT
526
+ minus_boundary = wp.uint8(wp.select(minus_cell_index == -1, 0, 1)) << FACE_INNER_OFFSET_BIT
527
+
528
+ return wp.uint8(axis) | plus_boundary | minus_boundary
529
+
530
+
531
+ @wp.func
532
+ def _get_boundary_mask(flags: wp.uint8):
533
+ return int((flags >> FACE_OUTER_OFFSET_BIT) | (flags >> FACE_INNER_OFFSET_BIT)) & 1
534
+
535
+
508
536
  @wp.kernel
509
537
  def _build_face_flags(
510
538
  cell_grid: wp.uint64,
@@ -522,9 +550,8 @@ def _build_face_flags(
522
550
  plus_cell_index = wp.volume_lookup_index(cell_grid, ijk[0], ijk[1], ijk[2])
523
551
  minus_cell_index = wp.volume_lookup_index(cell_grid, ijk_minus[0], ijk_minus[1], ijk_minus[2])
524
552
 
525
- plus_boundary = wp.uint8(wp.select(plus_cell_index == -1, 0, 1)) << FACE_OUTER_OFFSET_BIT
526
- minus_boundary = wp.uint8(wp.select(minus_cell_index == -1, 0, 1)) << FACE_INNER_OFFSET_BIT
527
-
528
553
  face_ijk[face] = ijk
529
- face_flags[face] = wp.uint8(axis) | plus_boundary | minus_boundary
530
- boundary_face_mask[face] = wp.select((plus_boundary | minus_boundary) == 0, 1, 0)
554
+
555
+ flags = _make_face_flags(axis, plus_cell_index, minus_cell_index)
556
+ face_flags[face] = flags
557
+ boundary_face_mask[face] = _get_boundary_mask(flags)
@@ -123,7 +123,7 @@ def make_polynomial_basis_space(
123
123
  topology = make_quadmesh_2d_space_topology(geo, shape)
124
124
  elif isinstance(base_geo, _geometry.Hexmesh):
125
125
  topology = make_hexmesh_space_topology(geo, shape)
126
- elif isinstance(base_geo, _geometry.Nanogrid):
126
+ elif isinstance(base_geo, _geometry.Nanogrid) or isinstance(base_geo, _geometry.AdaptiveNanogrid):
127
127
  topology = make_nanogrid_space_topology(geo, shape)
128
128
 
129
129
  if topology is None:
@@ -1,7 +1,8 @@
1
+ from typing import Union
2
+
1
3
  import warp as wp
2
4
  from warp.fem import cache
3
- from warp.fem.geometry import Nanogrid
4
- from warp.fem.geometry.nanogrid import _add_axis_flag
5
+ from warp.fem.geometry import AdaptiveNanogrid, Nanogrid
5
6
  from warp.fem.polynomial import is_closed
6
7
  from warp.fem.types import ElementIndex
7
8
 
@@ -29,7 +30,7 @@ class NanogridSpaceTopology(SpaceTopology):
29
30
 
30
31
  def __init__(
31
32
  self,
32
- grid: Nanogrid,
33
+ grid: Union[Nanogrid, AdaptiveNanogrid],
33
34
  shape: ShapeFunction,
34
35
  need_edge_indices: bool = True,
35
36
  need_face_indices: bool = True,
@@ -43,10 +44,16 @@ class NanogridSpaceTopology(SpaceTopology):
43
44
 
44
45
  self._vertex_grid = grid.vertex_grid.id
45
46
 
46
- self._edge_grid = grid.edge_grid.id if need_edge_indices else -1
47
- self._face_grid = grid.face_grid.id if need_face_indices else -1
48
- self._edge_count = grid.edge_count() if need_edge_indices else 0
49
- self._face_count = grid.side_count() if need_face_indices else 0
47
+ if isinstance(grid, Nanogrid):
48
+ self._edge_grid = grid.edge_grid.id if need_edge_indices else -1
49
+ self._face_grid = grid.face_grid.id if need_face_indices else -1
50
+ self._edge_count = grid.edge_count() if need_edge_indices else 0
51
+ self._face_count = grid.side_count() if need_face_indices else 0
52
+ else:
53
+ self._edge_grid = grid.stacked_edge_grid.id if need_edge_indices else -1
54
+ self._face_grid = grid.stacked_face_grid.id if need_face_indices else -1
55
+ self._edge_count = grid.stacked_edge_count() if need_edge_indices else 0
56
+ self._face_count = grid.stacked_face_count() if need_face_indices else 0
50
57
 
51
58
  @cache.cached_arg_value
52
59
  def topo_arg_value(self, device):
@@ -61,29 +68,58 @@ class NanogridSpaceTopology(SpaceTopology):
61
68
  arg.edge_count = self._edge_count
62
69
  return arg
63
70
 
71
+ def _make_element_node_index(self):
72
+ element_node_index_generic = self._make_element_node_index_generic()
73
+
74
+ @cache.dynamic_func(suffix=self.name)
75
+ def element_node_index(
76
+ geo_arg: Nanogrid.CellArg,
77
+ topo_arg: NanogridTopologyArg,
78
+ element_index: ElementIndex,
79
+ node_index_in_elt: int,
80
+ ):
81
+ ijk = geo_arg.cell_ijk[element_index]
82
+ return element_node_index_generic(topo_arg, element_index, node_index_in_elt, ijk, 0)
83
+
84
+ if isinstance(self._grid, Nanogrid):
85
+ return element_node_index
86
+
87
+ @cache.dynamic_func(suffix=self.name)
88
+ def element_node_index_adaptive(
89
+ geo_arg: AdaptiveNanogrid.CellArg,
90
+ topo_arg: NanogridTopologyArg,
91
+ element_index: ElementIndex,
92
+ node_index_in_elt: int,
93
+ ):
94
+ ijk = geo_arg.cell_ijk[element_index]
95
+ level = int(geo_arg.cell_level[element_index])
96
+ return element_node_index_generic(topo_arg, element_index, node_index_in_elt, ijk, level)
97
+
98
+ return element_node_index_adaptive
99
+
64
100
 
65
101
  @wp.func
66
- def _cell_vertex_coord(cell_ijk: wp.vec3i, n: int):
67
- return cell_ijk + wp.vec3i((n & 4) >> 2, (n & 2) >> 1, n & 1)
102
+ def _cell_vertex_coord(cell_ijk: wp.vec3i, cell_level: int, n: int):
103
+ return cell_ijk + AdaptiveNanogrid.fine_ijk(wp.vec3i((n & 4) >> 2, (n & 2) >> 1, n & 1), cell_level)
68
104
 
69
105
 
70
106
  @wp.func
71
- def _cell_edge_coord(cell_ijk: wp.vec3i, axis: int, offset: int):
72
- e_ijk = cell_ijk
107
+ def _cell_edge_coord(cell_ijk: wp.vec3i, cell_level: int, axis: int, offset: int):
108
+ e_ijk = AdaptiveNanogrid.coarse_ijk(cell_ijk, cell_level)
73
109
  e_ijk[(axis + 1) % 3] += offset >> 1
74
110
  e_ijk[(axis + 2) % 3] += offset & 1
75
- return _add_axis_flag(e_ijk, axis)
111
+ return AdaptiveNanogrid.encode_axis_and_level(e_ijk, axis, cell_level)
76
112
 
77
113
 
78
114
  @wp.func
79
- def _cell_face_coord(cell_ijk: wp.vec3i, axis: int, offset: int):
80
- f_ijk = cell_ijk
115
+ def _cell_face_coord(cell_ijk: wp.vec3i, cell_level: int, axis: int, offset: int):
116
+ f_ijk = AdaptiveNanogrid.coarse_ijk(cell_ijk, cell_level)
81
117
  f_ijk[axis] += offset
82
- return _add_axis_flag(f_ijk, axis)
118
+ return AdaptiveNanogrid.encode_axis_and_level(f_ijk, axis, cell_level)
83
119
 
84
120
 
85
121
  class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
86
- def __init__(self, grid: Nanogrid, shape: CubeTripolynomialShapeFunctions):
122
+ def __init__(self, grid: Union[Nanogrid, AdaptiveNanogrid], shape: CubeTripolynomialShapeFunctions):
87
123
  super().__init__(grid, shape, need_edge_indices=shape.ORDER >= 2, need_face_indices=shape.ORDER >= 2)
88
124
 
89
125
  self.element_node_index = self._make_element_node_index()
@@ -101,25 +137,24 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
101
137
  + self._grid.cell_count() * INTERIOR_NODES_PER_CELL
102
138
  )
103
139
 
104
- def _make_element_node_index(self):
140
+ def _make_element_node_index_generic(self):
105
141
  ORDER = self._shape.ORDER
106
142
  INTERIOR_NODES_PER_EDGE = wp.constant(max(0, ORDER - 1))
107
143
  INTERIOR_NODES_PER_FACE = wp.constant(INTERIOR_NODES_PER_EDGE**2)
108
144
  INTERIOR_NODES_PER_CELL = wp.constant(INTERIOR_NODES_PER_EDGE**3)
109
145
 
110
146
  @cache.dynamic_func(suffix=self.name)
111
- def element_node_index(
112
- geo_arg: Nanogrid.CellArg,
147
+ def element_node_index_generic(
113
148
  topo_arg: NanogridTopologyArg,
114
149
  element_index: ElementIndex,
115
150
  node_index_in_elt: int,
151
+ ijk: wp.vec3i,
152
+ level: int,
116
153
  ):
117
154
  node_type, type_instance, type_index = self._shape.node_type_and_type_index(node_index_in_elt)
118
155
 
119
- ijk = geo_arg.cell_ijk[element_index]
120
-
121
156
  if node_type == CubeTripolynomialShapeFunctions.VERTEX:
122
- n_ijk = _cell_vertex_coord(ijk, type_instance)
157
+ n_ijk = _cell_vertex_coord(ijk, level, type_instance)
123
158
  return wp.volume_lookup_index(topo_arg.vertex_grid, n_ijk[0], n_ijk[1], n_ijk[2])
124
159
 
125
160
  offset = topo_arg.vertex_count
@@ -128,7 +163,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
128
163
  axis = type_instance >> 2
129
164
  node_offset = type_instance & 3
130
165
 
131
- n_ijk = _cell_edge_coord(ijk, axis, node_offset)
166
+ n_ijk = _cell_edge_coord(ijk, level, axis, node_offset)
132
167
 
133
168
  edge_index = wp.volume_lookup_index(topo_arg.edge_grid, n_ijk[0], n_ijk[1], n_ijk[2])
134
169
  return offset + INTERIOR_NODES_PER_EDGE * edge_index + type_index
@@ -139,7 +174,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
139
174
  axis = type_instance >> 1
140
175
  node_offset = type_instance & 1
141
176
 
142
- n_ijk = _cell_face_coord(ijk, axis, node_offset)
177
+ n_ijk = _cell_face_coord(ijk, level, axis, node_offset)
143
178
 
144
179
  face_index = wp.volume_lookup_index(topo_arg.face_grid, n_ijk[0], n_ijk[1], n_ijk[2])
145
180
  return offset + INTERIOR_NODES_PER_FACE * face_index + type_index
@@ -148,7 +183,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
148
183
 
149
184
  return offset + INTERIOR_NODES_PER_CELL * element_index + type_index
150
185
 
151
- return element_node_index
186
+ return element_node_index_generic
152
187
 
153
188
 
154
189
  class NanogridSerendipitySpaceTopology(NanogridSpaceTopology):
@@ -160,37 +195,36 @@ class NanogridSerendipitySpaceTopology(NanogridSpaceTopology):
160
195
  def node_count(self) -> int:
161
196
  return self.geometry.vertex_count() + (self._shape.ORDER - 1) * self._edge_count
162
197
 
163
- def _make_element_node_index(self):
198
+ def _make_element_node_index_generic(self):
164
199
  ORDER = self._shape.ORDER
165
200
 
166
201
  @cache.dynamic_func(suffix=self.name)
167
- def element_node_index(
168
- cell_arg: Nanogrid.CellArg,
169
- topo_arg: NanogridSpaceTopology.TopologyArg,
202
+ def element_node_index_generic(
203
+ topo_arg: NanogridTopologyArg,
170
204
  element_index: ElementIndex,
171
205
  node_index_in_elt: int,
206
+ ijk: wp.vec3i,
207
+ level: int,
172
208
  ):
173
209
  node_type, type_index = self._shape.node_type_and_type_index(node_index_in_elt)
174
210
 
175
- ijk = cell_arg.cell_ijk[element_index]
176
-
177
211
  if node_type == CubeSerendipityShapeFunctions.VERTEX:
178
- n_ijk = _cell_vertex_coord(ijk, type_index)
212
+ n_ijk = _cell_vertex_coord(ijk, level, type_index)
179
213
  return wp.volume_lookup_index(topo_arg.vertex_grid, n_ijk[0], n_ijk[1], n_ijk[2])
180
214
 
181
215
  type_instance, index_in_edge = CubeSerendipityShapeFunctions._cube_edge_index(node_type, type_index)
182
216
  axis = type_instance >> 2
183
217
  node_offset = type_instance & 3
184
218
 
185
- n_ijk = _cell_edge_coord(ijk, axis, node_offset)
219
+ n_ijk = _cell_edge_coord(ijk, level, axis, node_offset)
186
220
 
187
221
  edge_index = wp.volume_lookup_index(topo_arg.edge_grid, n_ijk[0], n_ijk[1], n_ijk[2])
188
222
  return topo_arg.vertex_count + (ORDER - 1) * edge_index + index_in_edge
189
223
 
190
- return element_node_index
224
+ return element_node_index_generic
191
225
 
192
226
 
193
- def make_nanogrid_space_topology(grid: Nanogrid, shape: ShapeFunction):
227
+ def make_nanogrid_space_topology(grid: Union[Nanogrid, AdaptiveNanogrid], shape: ShapeFunction):
194
228
  if isinstance(shape, CubeSerendipityShapeFunctions):
195
229
  return forward_base_topology(NanogridSerendipitySpaceTopology, grid, shape)
196
230
 
warp/fem/utils.py CHANGED
@@ -40,24 +40,6 @@ def generalized_inner(x: wp.mat33, y: wp.vec3):
40
40
  return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
41
41
 
42
42
 
43
- @wp.func
44
- def apply_right(x: Any, y: Any):
45
- """Performs x y multiplication with y a square matrix and x either a row-vector or a matrix.
46
- Will be removed once native @ operator is implemented.
47
- """
48
- return x * y
49
-
50
-
51
- @wp.func
52
- def apply_right(x: wp.vec2, y: wp.mat22):
53
- return x[0] * y[0] + x[1] * y[1]
54
-
55
-
56
- @wp.func
57
- def apply_right(x: wp.vec3, y: wp.mat33):
58
- return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
59
-
60
-
61
43
  @wp.func
62
44
  def unit_element(template_type: Any, coord: int):
63
45
  """Returns a instance of `template_type` with a single coordinate set to 1 in the canonical basis"""
@@ -200,111 +182,135 @@ def inverse_qr(A: Any):
200
182
 
201
183
 
202
184
  @wp.func
203
- def symmetric_eigenvalues_qr(A: Any, tol: Any):
185
+ def _wilkinson_shift(a: Any, b: Any, c: Any, tol: Any):
186
+ # Wilkinson shift: estimate eigenvalue of 2x2 symmetric matrix [a, c, c, b]
187
+ d = (a - b) * type(tol)(0.5)
188
+ return b + d - wp.sign(d) * wp.sqrt(d * d + c * c)
189
+
190
+
191
+ @wp.func
192
+ def _givens_rotation(a: Any, b: Any):
193
+ # Givens rotation [[c -s], [s c]] such that sa+cb =0
194
+ zero = type(a)(0.0)
195
+ one = type(a)(1.0)
196
+ abn_sq = a * a + b * b
197
+ abn = wp.select(abn_sq == zero, one / wp.sqrt(abn_sq), zero)
198
+ return a * abn, -b * abn
199
+
200
+
201
+ @wp.func
202
+ def tridiagonal_symmetric_eigenvalues_qr(D: Any, L: Any, Q: Any, tol: Any):
204
203
  """
205
- Computes the eigenvalues and eigen vectors of a square symmetric matrix A using the QR algorithm
204
+ Computes the eigenvalues and eigen vectors of a symmetric tridiagonal matrix using the
205
+ Symmetric tridiagonal QR algorithm with implicit Wilkinson shift
206
206
 
207
207
  Args:
208
- A: square symmetric matrix
209
- tol: Tolerance for the diagonalization residual (squared L2 norm of off-diagonal terms)
208
+ D: Main diagonal of the matrix
209
+ L: Lower diagonal of the matrix, indexed such that L[i] = A[i+1, i]
210
+ Q: Initialization for the eigenvectors, useful if a pre-transformation has been applied, otherwise may be identity
211
+ tol: Tolerance for the diagonalization residual (Linf norm of off-diagonal over diagonal terms)
210
212
 
211
213
  Returns a tuple (D: vector of eigenvalues, P: matrix with one eigenvector per row) such that A = P^T D P
212
- """
213
214
 
214
- two = A.dtype(2.0)
215
- zero = A.dtype(0.0)
216
215
 
217
- # temp storage for matrix rows
218
- ri = type(A[0])()
219
- rn = type(ri)()
216
+ Ref: Arbenz P, Numerical Methods for Solving Large Scale Eigenvalue Problems, Chapter 4 (QR algorithm, Mar 13, 2018)
217
+ """
220
218
 
221
- # tridiagonal storage for R
222
- R_L = type(ri)()
223
- R_L = type(ri)(zero)
224
- R_U = type(ri)(zero)
219
+ two = D.dtype(2.0)
225
220
 
226
- # so that we can use the type length in expression
221
+ # so that we can use the type length in expressions
227
222
  # this will prevent unrolling by warp, but should be ok for native code
228
223
  m = int(0)
229
- for _ in range(type(ri).length):
224
+ for _ in range(type(D).length):
230
225
  m += 1
231
226
 
227
+ start = int(0)
228
+ y = D.dtype(0.0) # moving buldge
229
+ x = D.dtype(0.0) # coeff atop buldge
230
+
231
+ for _ in range(32 * m): # failsafe, usually converges faster than that
232
+ # Iterate over all idependant (deflated) blocks
233
+ end = int(-1)
234
+
235
+ for k in range(m - 1):
236
+ if k >= end:
237
+ # Check if new block is starting
238
+ if k == end or wp.abs(L[k]) <= tol * (wp.abs(D[k]) + wp.abs(D[k + 1])):
239
+ continue
240
+
241
+ # Find end of block
242
+ start = k
243
+ end = start + 1
244
+ while end + 1 < m:
245
+ if wp.abs(L[end]) <= tol * (wp.abs(D[end + 1]) + wp.abs(D[end])):
246
+ break
247
+ end += 1
248
+
249
+ # Wilkinson shift (an eigenvalue of the last 2x2 block)
250
+ shift = _wilkinson_shift(D[end - 1], D[end], L[end - 1], tol)
251
+
252
+ # start with eliminating lower diag of first column of shifted matrix
253
+ # (i.e. first step of excplit QR factorization)
254
+ # Then all further steps eliminate the buldge (second diag) of the non-shifted matrix
255
+ x = D[start] - shift
256
+ y = L[start]
257
+
258
+ c, s = _givens_rotation(x, y)
259
+
260
+ # Apply Givens rotation on both sides of tridiagonal matrix
261
+
262
+ # middle block
263
+ d = D[k] - D[k + 1]
264
+ z = (two * c * L[k] + d * s) * s
265
+ D[k] -= z
266
+ D[k + 1] += z
267
+ L[k] = d * c * s + (c * c - s * s) * L[k]
268
+
269
+ if k > start:
270
+ L[k - 1] = c * x - s * y
271
+
272
+ x = L[k]
273
+ y = -s * L[k + 1] # new buldge
274
+ L[k + 1] *= c
275
+
276
+ # apply givens rotation on left of Q
277
+ # note: Q is transposed compared to usual impls, as Warp makes it easier to index rows
278
+ Qk0 = Q[k]
279
+ Qk1 = Q[k + 1]
280
+ Q[k] = c * Qk0 - s * Qk1
281
+ Q[k + 1] = c * Qk1 + s * Qk0
282
+
283
+ if end <= 0:
284
+ # We did nothing, so diagonalization must have been achieved
285
+ break
286
+
287
+ return D, Q
288
+
289
+
290
+ @wp.func
291
+ def symmetric_eigenvalues_qr(A: Any, tol: Any):
292
+ """
293
+ Computes the eigenvalues and eigen vectors of a square symmetric matrix A using the QR algorithm
294
+
295
+ Args:
296
+ A: square symmetric matrix
297
+ tol: Tolerance for the diagonalization residual (Linf norm of off-diagonal over diagonal terms)
298
+
299
+ Returns a tuple (D: vector of eigenvalues, P: matrix with one eigenvector per row) such that A = P^T D P
300
+ """
301
+
232
302
  # Put A under Hessenberg form (tridiagonal)
233
303
  Q, H = householder_make_hessenberg(A)
234
- Q = wp.transpose(Q) # algorithm below works and transposed Q as rows are easier to index
235
-
236
- for _ in range(16 * m): # failsafe, usually converges faster than that
237
- # Initialize R with current H
238
- R_D = wp.get_diag(H)
239
- for i in range(1, type(ri).length):
240
- R_L[i - 1] = H[i, i - 1]
241
- R_U[i - 1] = H[i - 1, i]
242
-
243
- # compute QR decomposition, directly transform H and eigenvectors
244
- for n in range(1, m):
245
- i = n - 1
246
-
247
- # compute reflection
248
- xi = R_D[i]
249
- xn = R_L[i]
250
-
251
- xii = xi * xi
252
- xnn = xn * xn
253
- alpha = wp.sqrt(xii + xnn) * wp.sign(xi)
254
-
255
- xi += alpha
256
- xii = xi * xi
257
- xin = xi * xn
258
-
259
- two_over_x_sq = wp.select(alpha == zero, two / (xii + xnn), zero)
260
- xii *= two_over_x_sq
261
- xin *= two_over_x_sq
262
- xnn *= two_over_x_sq
263
-
264
- # Left-multiply R and Q, multiply H on both sides
265
- # Note that R should get non-zero coefficients on the second upper diagonal,
266
- # but those won't get read afterwards, so we can ignore them
267
-
268
- R_D[n] -= R_U[i] * xin + R_D[n] * xnn
269
- R_U[n] -= R_U[n] * xnn
270
-
271
- ri = Q[i]
272
- rn = Q[n]
273
- Q[i] -= ri * xii + rn * xin
274
- Q[n] -= ri * xin + rn * xnn
275
-
276
- # H is multiplied on both sides, but stays tridiagonal except for moving buldge
277
- # Note: we could reduce the stencil to for 4 columns qui we do below,
278
- # but unlikely to be worth it for our small matrix sizes
279
- ri = H[i]
280
- rn = H[n]
281
- H[i] -= ri * xii + rn * xin
282
- H[n] -= ri * xin + rn * xnn
283
-
284
- # multiply on right, manually. We just need to consider 4 rows
285
- if i > 0:
286
- ci = H[i - 1, i]
287
- cn = H[i - 1, n]
288
- H[i - 1, i] -= ci * xii + cn * xin
289
- H[i - 1, n] -= ci * xin + cn * xnn
290
-
291
- for k in range(2):
292
- ci = H[i + k, i]
293
- cn = H[i + k, n]
294
- H[i + k, i] -= ci * xii + cn * xin
295
- H[i + k, n] -= ci * xin + cn * xnn
296
-
297
- if n + 1 < m:
298
- ci = H[n + 1, i]
299
- cn = H[n + 1, n]
300
- H[n + 1, i] -= ci * xii + cn * xin
301
- H[n + 1, n] -= ci * xin + cn * xnn
302
-
303
- # Terminate if the upper diagonal of R is near zero
304
- if wp.length_sq(R_U) < tol:
305
- break
306
304
 
307
- return wp.get_diag(H), Q
305
+ # tridiagonal storage for H
306
+ D = wp.get_diag(H)
307
+ L = type(D)(A.dtype(0.0))
308
+ for i in range(1, type(D).length):
309
+ L[i - 1] = H[i, i - 1]
310
+
311
+ Qt = wp.transpose(Q)
312
+ ev, P = tridiagonal_symmetric_eigenvalues_qr(D, L, Qt, tol)
313
+ return ev, P
308
314
 
309
315
 
310
316
  def compress_node_indices(