warp-lang 1.3.2__py3-none-macosx_10_13_universal2.whl → 1.4.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +6 -0
- warp/autograd.py +59 -6
- warp/bin/libwarp.dylib +0 -0
- warp/build_dll.py +8 -10
- warp/builtins.py +126 -4
- warp/codegen.py +435 -53
- warp/config.py +1 -1
- warp/context.py +678 -403
- warp/dlpack.py +2 -0
- warp/examples/benchmarks/benchmark_cloth.py +10 -0
- warp/examples/core/example_render_opengl.py +12 -10
- warp/examples/fem/example_adaptive_grid.py +251 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +2 -2
- warp/examples/fem/example_magnetostatics.py +1 -1
- warp/examples/fem/example_streamlines.py +1 -0
- warp/examples/fem/utils.py +23 -4
- warp/examples/sim/example_cloth.py +50 -6
- warp/fem/__init__.py +2 -0
- warp/fem/adaptivity.py +493 -0
- warp/fem/field/field.py +2 -1
- warp/fem/field/nodal_field.py +18 -26
- warp/fem/field/test.py +4 -4
- warp/fem/field/trial.py +4 -4
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/adaptive_nanogrid.py +843 -0
- warp/fem/geometry/nanogrid.py +55 -28
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/nanogrid_function_space.py +69 -35
- warp/fem/utils.py +113 -107
- warp/jax_experimental.py +28 -15
- warp/native/array.h +0 -1
- warp/native/builtin.h +103 -6
- warp/native/bvh.cu +2 -0
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/error.cpp +4 -2
- warp/native/exports.h +99 -17
- warp/native/mat.h +97 -0
- warp/native/mesh.cpp +36 -0
- warp/native/mesh.cu +51 -0
- warp/native/mesh.h +1 -0
- warp/native/quat.h +43 -0
- warp/native/spatial.h +6 -0
- warp/native/vec.h +74 -0
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +10 -3
- warp/native/warp.h +8 -1
- warp/paddle.py +382 -0
- warp/sim/__init__.py +1 -0
- warp/sim/collide.py +519 -0
- warp/sim/integrator_euler.py +18 -5
- warp/sim/integrator_featherstone.py +5 -5
- warp/sim/integrator_vbd.py +1026 -0
- warp/sim/model.py +49 -23
- warp/stubs.py +459 -0
- warp/tape.py +2 -0
- warp/tests/aux_test_dependent.py +1 -0
- warp/tests/aux_test_name_clash1.py +32 -0
- warp/tests/aux_test_name_clash2.py +32 -0
- warp/tests/aux_test_square.py +1 -0
- warp/tests/test_array.py +222 -0
- warp/tests/test_async.py +3 -3
- warp/tests/test_atomic.py +6 -0
- warp/tests/test_closest_point_edge_edge.py +93 -1
- warp/tests/test_codegen.py +62 -15
- warp/tests/test_codegen_instancing.py +1457 -0
- warp/tests/test_collision.py +486 -0
- warp/tests/test_compile_consts.py +3 -28
- warp/tests/test_dlpack.py +170 -0
- warp/tests/test_examples.py +22 -8
- warp/tests/test_fast_math.py +10 -4
- warp/tests/test_fem.py +64 -0
- warp/tests/test_func.py +46 -0
- warp/tests/test_implicit_init.py +49 -0
- warp/tests/test_jax.py +58 -0
- warp/tests/test_mat.py +84 -0
- warp/tests/test_mesh_query_point.py +188 -0
- warp/tests/test_module_hashing.py +40 -0
- warp/tests/test_multigpu.py +3 -3
- warp/tests/test_overwrite.py +8 -0
- warp/tests/test_paddle.py +852 -0
- warp/tests/test_print.py +89 -0
- warp/tests/test_quat.py +111 -0
- warp/tests/test_reload.py +31 -1
- warp/tests/test_scalar_ops.py +2 -0
- warp/tests/test_static.py +412 -0
- warp/tests/test_streams.py +64 -3
- warp/tests/test_struct.py +4 -4
- warp/tests/test_torch.py +24 -0
- warp/tests/test_triangle_closest_point.py +137 -0
- warp/tests/test_types.py +1 -1
- warp/tests/test_vbd.py +386 -0
- warp/tests/test_vec.py +143 -0
- warp/tests/test_vec_scalar_ops.py +139 -0
- warp/tests/test_volume.py +30 -0
- warp/tests/unittest_suites.py +12 -0
- warp/tests/unittest_utils.py +9 -5
- warp/thirdparty/dlpack.py +3 -1
- warp/types.py +157 -34
- warp/utils.py +37 -14
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/RECORD +106 -94
- warp/tests/test_point_triangle_closest_point.py +0 -143
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0
warp/fem/geometry/nanogrid.py
CHANGED
|
@@ -10,10 +10,12 @@ from .element import Cube, Square
|
|
|
10
10
|
from .geometry import Geometry
|
|
11
11
|
|
|
12
12
|
# Flag used for building edge/face grids to disambiguiate axis within the grid
|
|
13
|
+
# Morton indexing allows for
|
|
13
14
|
GRID_AXIS_FLAG = wp.constant(wp.int32(1 << 20))
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
|
|
16
|
+
FACE_AXIS_MASK = wp.constant(wp.uint8((1 << 2) - 1))
|
|
17
|
+
FACE_INNER_OFFSET_BIT = wp.constant(wp.uint8(2))
|
|
18
|
+
FACE_OUTER_OFFSET_BIT = wp.constant(wp.uint8(3))
|
|
17
19
|
|
|
18
20
|
_mat32 = wp.mat(shape=(3, 2), dtype=float)
|
|
19
21
|
|
|
@@ -93,7 +95,7 @@ class Nanogrid(Geometry):
|
|
|
93
95
|
self._face_ijk = None
|
|
94
96
|
|
|
95
97
|
self._edge_grid = None
|
|
96
|
-
self.
|
|
98
|
+
self._edge_count = 0
|
|
97
99
|
|
|
98
100
|
@property
|
|
99
101
|
def cell_grid(self) -> wp.Volume:
|
|
@@ -129,7 +131,7 @@ class Nanogrid(Geometry):
|
|
|
129
131
|
|
|
130
132
|
def edge_count(self):
|
|
131
133
|
self._ensure_edge_grid()
|
|
132
|
-
return self.
|
|
134
|
+
return self._edge_count
|
|
133
135
|
|
|
134
136
|
def reference_cell(self) -> Cube:
|
|
135
137
|
return Cube()
|
|
@@ -168,13 +170,30 @@ class Nanogrid(Geometry):
|
|
|
168
170
|
def cell_lookup(args: CellArg, pos: wp.vec3):
|
|
169
171
|
uvw = wp.volume_world_to_index(args.cell_grid, pos) + wp.vec3(0.5)
|
|
170
172
|
ijk = wp.vec3i(int(wp.floor(uvw[0])), int(wp.floor(uvw[1])), int(wp.floor(uvw[2])))
|
|
171
|
-
|
|
173
|
+
cell_index = wp.volume_lookup_index(args.cell_grid, ijk[0], ijk[1], ijk[2])
|
|
172
174
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
175
|
+
coords = uvw - wp.vec3(ijk)
|
|
176
|
+
if cell_index == -1:
|
|
177
|
+
if wp.min(coords) == 0.0 or wp.max(coords) == 1.0:
|
|
178
|
+
il = wp.select(coords[0] > 0.5, -1, 0)
|
|
179
|
+
jl = wp.select(coords[1] > 0.5, -1, 0)
|
|
180
|
+
kl = wp.select(coords[2] > 0.5, -1, 0)
|
|
181
|
+
|
|
182
|
+
for n in range(8):
|
|
183
|
+
ni = n >> 2
|
|
184
|
+
nj = (n & 2) >> 1
|
|
185
|
+
nk = n & 1
|
|
186
|
+
nijk = ijk + wp.vec3i(ni + il, nj + jl, nk + kl)
|
|
187
|
+
|
|
188
|
+
coords = uvw - wp.vec3(nijk)
|
|
189
|
+
if wp.min(coords) >= 0.0 and wp.max(coords) <= 1.0:
|
|
190
|
+
cell_index = wp.volume_lookup_index(args.cell_grid, nijk[0], nijk[1], nijk[2])
|
|
191
|
+
if cell_index != -1:
|
|
192
|
+
return make_free_sample(cell_index, coords)
|
|
193
|
+
|
|
194
|
+
return make_free_sample(NULL_ELEMENT_INDEX, Coords(OUTSIDE))
|
|
195
|
+
|
|
196
|
+
return make_free_sample(cell_index, coords)
|
|
178
197
|
|
|
179
198
|
@wp.func
|
|
180
199
|
def _project_on_voxel_at_origin(coords: wp.vec3):
|
|
@@ -280,12 +299,10 @@ class Nanogrid(Geometry):
|
|
|
280
299
|
return wp.volume_index_to_world(cell_grid, uvw - wp.vec3(0.5))
|
|
281
300
|
|
|
282
301
|
@wp.func
|
|
283
|
-
def _face_tangent_vecs(
|
|
302
|
+
def _face_tangent_vecs(cell_grid: wp.uint64, axis: int, flip: int):
|
|
284
303
|
u_axis = utils.unit_element(wp.vec3(), (axis + 1 + flip) % 3)
|
|
285
304
|
v_axis = utils.unit_element(wp.vec3(), (axis + 2 - flip) % 3)
|
|
286
305
|
|
|
287
|
-
cell_grid = args.cell_arg.cell_grid
|
|
288
|
-
|
|
289
306
|
return wp.volume_index_to_world_dir(cell_grid, u_axis), wp.volume_index_to_world_dir(cell_grid, v_axis)
|
|
290
307
|
|
|
291
308
|
@wp.func
|
|
@@ -293,7 +310,7 @@ class Nanogrid(Geometry):
|
|
|
293
310
|
flags = args.face_flags[s.element_index]
|
|
294
311
|
axis = Nanogrid._get_face_axis(flags)
|
|
295
312
|
flip = Nanogrid._get_face_inner_offset(flags)
|
|
296
|
-
v1, v2 = Nanogrid._face_tangent_vecs(args, axis, flip)
|
|
313
|
+
v1, v2 = Nanogrid._face_tangent_vecs(args.cell_arg.cell_grid, axis, flip)
|
|
297
314
|
return _mat32(v1, v2)
|
|
298
315
|
|
|
299
316
|
@wp.func
|
|
@@ -320,7 +337,7 @@ class Nanogrid(Geometry):
|
|
|
320
337
|
axis = Nanogrid._get_face_axis(flags)
|
|
321
338
|
flip = Nanogrid._get_face_inner_offset(flags)
|
|
322
339
|
|
|
323
|
-
v1, v2 = Nanogrid._face_tangent_vecs(args, axis, flip)
|
|
340
|
+
v1, v2 = Nanogrid._face_tangent_vecs(args.cell_arg.cell_grid, axis, flip)
|
|
324
341
|
return wp.cross(v1, v2) / args.face_areas[axis]
|
|
325
342
|
|
|
326
343
|
@wp.func
|
|
@@ -405,16 +422,14 @@ class Nanogrid(Geometry):
|
|
|
405
422
|
|
|
406
423
|
def _build_edge_grid(self, temporary_store: Optional[cache.TemporaryStore] = None):
|
|
407
424
|
self._edge_grid = _build_edge_grid(self._cell_ijk, self._cell_grid, temporary_store)
|
|
408
|
-
|
|
409
|
-
self._edge_ijk = wp.array(shape=(edge_count,), dtype=wp.vec3i, device=self._edge_grid.device)
|
|
410
|
-
self._edge_grid.get_voxels(out=self._edge_ijk)
|
|
425
|
+
self._edge_count = self._edge_grid.get_voxel_count()
|
|
411
426
|
|
|
412
427
|
def _ensure_face_grid(self):
|
|
413
428
|
if self._face_ijk is None:
|
|
414
429
|
self._build_face_grid()
|
|
415
430
|
|
|
416
431
|
def _ensure_edge_grid(self):
|
|
417
|
-
if self.
|
|
432
|
+
if self._edge_grid is None:
|
|
418
433
|
self._build_edge_grid()
|
|
419
434
|
|
|
420
435
|
|
|
@@ -475,7 +490,7 @@ def _build_node_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
|
|
|
475
490
|
_cell_node_indices, dim=cell_nodes.array.shape, inputs=[cell_ijk, cell_nodes.array], device=cell_ijk.device
|
|
476
491
|
)
|
|
477
492
|
node_grid = wp.Volume.allocate_by_voxels(
|
|
478
|
-
cell_nodes.array.flatten(), voxel_size=grid.get_voxel_size()
|
|
493
|
+
cell_nodes.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
|
|
479
494
|
)
|
|
480
495
|
|
|
481
496
|
return node_grid
|
|
@@ -487,7 +502,7 @@ def _build_face_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
|
|
|
487
502
|
cell_faces = cache.borrow_temporary(temporary_store, shape=(cell_count, 6), dtype=wp.vec3i, device=cell_ijk.device)
|
|
488
503
|
wp.launch(_cell_face_indices, dim=cell_count, inputs=[cell_ijk, cell_faces.array], device=cell_ijk.device)
|
|
489
504
|
face_grid = wp.Volume.allocate_by_voxels(
|
|
490
|
-
cell_faces.array.flatten(), voxel_size=grid.get_voxel_size()
|
|
505
|
+
cell_faces.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
|
|
491
506
|
)
|
|
492
507
|
|
|
493
508
|
return face_grid
|
|
@@ -499,12 +514,25 @@ def _build_edge_grid(cell_ijk, grid: wp.Volume, temporary_store: cache.Temporary
|
|
|
499
514
|
cell_edges = cache.borrow_temporary(temporary_store, shape=(cell_count, 12), dtype=wp.vec3i, device=cell_ijk.device)
|
|
500
515
|
wp.launch(_cell_edge_indices, dim=cell_count, inputs=[cell_ijk, cell_edges.array], device=cell_ijk.device)
|
|
501
516
|
edge_grid = wp.Volume.allocate_by_voxels(
|
|
502
|
-
cell_edges.array.flatten(), voxel_size=grid.get_voxel_size()
|
|
517
|
+
cell_edges.array.flatten(), voxel_size=grid.get_voxel_size(), device=cell_ijk.device
|
|
503
518
|
)
|
|
504
519
|
|
|
505
520
|
return edge_grid
|
|
506
521
|
|
|
507
522
|
|
|
523
|
+
@wp.func
|
|
524
|
+
def _make_face_flags(axis: int, plus_cell_index: int, minus_cell_index: int):
|
|
525
|
+
plus_boundary = wp.uint8(wp.select(plus_cell_index == -1, 0, 1)) << FACE_OUTER_OFFSET_BIT
|
|
526
|
+
minus_boundary = wp.uint8(wp.select(minus_cell_index == -1, 0, 1)) << FACE_INNER_OFFSET_BIT
|
|
527
|
+
|
|
528
|
+
return wp.uint8(axis) | plus_boundary | minus_boundary
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
@wp.func
|
|
532
|
+
def _get_boundary_mask(flags: wp.uint8):
|
|
533
|
+
return int((flags >> FACE_OUTER_OFFSET_BIT) | (flags >> FACE_INNER_OFFSET_BIT)) & 1
|
|
534
|
+
|
|
535
|
+
|
|
508
536
|
@wp.kernel
|
|
509
537
|
def _build_face_flags(
|
|
510
538
|
cell_grid: wp.uint64,
|
|
@@ -522,9 +550,8 @@ def _build_face_flags(
|
|
|
522
550
|
plus_cell_index = wp.volume_lookup_index(cell_grid, ijk[0], ijk[1], ijk[2])
|
|
523
551
|
minus_cell_index = wp.volume_lookup_index(cell_grid, ijk_minus[0], ijk_minus[1], ijk_minus[2])
|
|
524
552
|
|
|
525
|
-
plus_boundary = wp.uint8(wp.select(plus_cell_index == -1, 0, 1)) << FACE_OUTER_OFFSET_BIT
|
|
526
|
-
minus_boundary = wp.uint8(wp.select(minus_cell_index == -1, 0, 1)) << FACE_INNER_OFFSET_BIT
|
|
527
|
-
|
|
528
553
|
face_ijk[face] = ijk
|
|
529
|
-
|
|
530
|
-
|
|
554
|
+
|
|
555
|
+
flags = _make_face_flags(axis, plus_cell_index, minus_cell_index)
|
|
556
|
+
face_flags[face] = flags
|
|
557
|
+
boundary_face_mask[face] = _get_boundary_mask(flags)
|
warp/fem/space/__init__.py
CHANGED
|
@@ -123,7 +123,7 @@ def make_polynomial_basis_space(
|
|
|
123
123
|
topology = make_quadmesh_2d_space_topology(geo, shape)
|
|
124
124
|
elif isinstance(base_geo, _geometry.Hexmesh):
|
|
125
125
|
topology = make_hexmesh_space_topology(geo, shape)
|
|
126
|
-
elif isinstance(base_geo, _geometry.Nanogrid):
|
|
126
|
+
elif isinstance(base_geo, _geometry.Nanogrid) or isinstance(base_geo, _geometry.AdaptiveNanogrid):
|
|
127
127
|
topology = make_nanogrid_space_topology(geo, shape)
|
|
128
128
|
|
|
129
129
|
if topology is None:
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
1
3
|
import warp as wp
|
|
2
4
|
from warp.fem import cache
|
|
3
|
-
from warp.fem.geometry import Nanogrid
|
|
4
|
-
from warp.fem.geometry.nanogrid import _add_axis_flag
|
|
5
|
+
from warp.fem.geometry import AdaptiveNanogrid, Nanogrid
|
|
5
6
|
from warp.fem.polynomial import is_closed
|
|
6
7
|
from warp.fem.types import ElementIndex
|
|
7
8
|
|
|
@@ -29,7 +30,7 @@ class NanogridSpaceTopology(SpaceTopology):
|
|
|
29
30
|
|
|
30
31
|
def __init__(
|
|
31
32
|
self,
|
|
32
|
-
grid: Nanogrid,
|
|
33
|
+
grid: Union[Nanogrid, AdaptiveNanogrid],
|
|
33
34
|
shape: ShapeFunction,
|
|
34
35
|
need_edge_indices: bool = True,
|
|
35
36
|
need_face_indices: bool = True,
|
|
@@ -43,10 +44,16 @@ class NanogridSpaceTopology(SpaceTopology):
|
|
|
43
44
|
|
|
44
45
|
self._vertex_grid = grid.vertex_grid.id
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
if isinstance(grid, Nanogrid):
|
|
48
|
+
self._edge_grid = grid.edge_grid.id if need_edge_indices else -1
|
|
49
|
+
self._face_grid = grid.face_grid.id if need_face_indices else -1
|
|
50
|
+
self._edge_count = grid.edge_count() if need_edge_indices else 0
|
|
51
|
+
self._face_count = grid.side_count() if need_face_indices else 0
|
|
52
|
+
else:
|
|
53
|
+
self._edge_grid = grid.stacked_edge_grid.id if need_edge_indices else -1
|
|
54
|
+
self._face_grid = grid.stacked_face_grid.id if need_face_indices else -1
|
|
55
|
+
self._edge_count = grid.stacked_edge_count() if need_edge_indices else 0
|
|
56
|
+
self._face_count = grid.stacked_face_count() if need_face_indices else 0
|
|
50
57
|
|
|
51
58
|
@cache.cached_arg_value
|
|
52
59
|
def topo_arg_value(self, device):
|
|
@@ -61,29 +68,58 @@ class NanogridSpaceTopology(SpaceTopology):
|
|
|
61
68
|
arg.edge_count = self._edge_count
|
|
62
69
|
return arg
|
|
63
70
|
|
|
71
|
+
def _make_element_node_index(self):
|
|
72
|
+
element_node_index_generic = self._make_element_node_index_generic()
|
|
73
|
+
|
|
74
|
+
@cache.dynamic_func(suffix=self.name)
|
|
75
|
+
def element_node_index(
|
|
76
|
+
geo_arg: Nanogrid.CellArg,
|
|
77
|
+
topo_arg: NanogridTopologyArg,
|
|
78
|
+
element_index: ElementIndex,
|
|
79
|
+
node_index_in_elt: int,
|
|
80
|
+
):
|
|
81
|
+
ijk = geo_arg.cell_ijk[element_index]
|
|
82
|
+
return element_node_index_generic(topo_arg, element_index, node_index_in_elt, ijk, 0)
|
|
83
|
+
|
|
84
|
+
if isinstance(self._grid, Nanogrid):
|
|
85
|
+
return element_node_index
|
|
86
|
+
|
|
87
|
+
@cache.dynamic_func(suffix=self.name)
|
|
88
|
+
def element_node_index_adaptive(
|
|
89
|
+
geo_arg: AdaptiveNanogrid.CellArg,
|
|
90
|
+
topo_arg: NanogridTopologyArg,
|
|
91
|
+
element_index: ElementIndex,
|
|
92
|
+
node_index_in_elt: int,
|
|
93
|
+
):
|
|
94
|
+
ijk = geo_arg.cell_ijk[element_index]
|
|
95
|
+
level = int(geo_arg.cell_level[element_index])
|
|
96
|
+
return element_node_index_generic(topo_arg, element_index, node_index_in_elt, ijk, level)
|
|
97
|
+
|
|
98
|
+
return element_node_index_adaptive
|
|
99
|
+
|
|
64
100
|
|
|
65
101
|
@wp.func
|
|
66
|
-
def _cell_vertex_coord(cell_ijk: wp.vec3i, n: int):
|
|
67
|
-
return cell_ijk + wp.vec3i((n & 4) >> 2, (n & 2) >> 1, n & 1)
|
|
102
|
+
def _cell_vertex_coord(cell_ijk: wp.vec3i, cell_level: int, n: int):
|
|
103
|
+
return cell_ijk + AdaptiveNanogrid.fine_ijk(wp.vec3i((n & 4) >> 2, (n & 2) >> 1, n & 1), cell_level)
|
|
68
104
|
|
|
69
105
|
|
|
70
106
|
@wp.func
|
|
71
|
-
def _cell_edge_coord(cell_ijk: wp.vec3i, axis: int, offset: int):
|
|
72
|
-
e_ijk = cell_ijk
|
|
107
|
+
def _cell_edge_coord(cell_ijk: wp.vec3i, cell_level: int, axis: int, offset: int):
|
|
108
|
+
e_ijk = AdaptiveNanogrid.coarse_ijk(cell_ijk, cell_level)
|
|
73
109
|
e_ijk[(axis + 1) % 3] += offset >> 1
|
|
74
110
|
e_ijk[(axis + 2) % 3] += offset & 1
|
|
75
|
-
return
|
|
111
|
+
return AdaptiveNanogrid.encode_axis_and_level(e_ijk, axis, cell_level)
|
|
76
112
|
|
|
77
113
|
|
|
78
114
|
@wp.func
|
|
79
|
-
def _cell_face_coord(cell_ijk: wp.vec3i, axis: int, offset: int):
|
|
80
|
-
f_ijk = cell_ijk
|
|
115
|
+
def _cell_face_coord(cell_ijk: wp.vec3i, cell_level: int, axis: int, offset: int):
|
|
116
|
+
f_ijk = AdaptiveNanogrid.coarse_ijk(cell_ijk, cell_level)
|
|
81
117
|
f_ijk[axis] += offset
|
|
82
|
-
return
|
|
118
|
+
return AdaptiveNanogrid.encode_axis_and_level(f_ijk, axis, cell_level)
|
|
83
119
|
|
|
84
120
|
|
|
85
121
|
class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
|
|
86
|
-
def __init__(self, grid: Nanogrid, shape: CubeTripolynomialShapeFunctions):
|
|
122
|
+
def __init__(self, grid: Union[Nanogrid, AdaptiveNanogrid], shape: CubeTripolynomialShapeFunctions):
|
|
87
123
|
super().__init__(grid, shape, need_edge_indices=shape.ORDER >= 2, need_face_indices=shape.ORDER >= 2)
|
|
88
124
|
|
|
89
125
|
self.element_node_index = self._make_element_node_index()
|
|
@@ -101,25 +137,24 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
|
|
|
101
137
|
+ self._grid.cell_count() * INTERIOR_NODES_PER_CELL
|
|
102
138
|
)
|
|
103
139
|
|
|
104
|
-
def
|
|
140
|
+
def _make_element_node_index_generic(self):
|
|
105
141
|
ORDER = self._shape.ORDER
|
|
106
142
|
INTERIOR_NODES_PER_EDGE = wp.constant(max(0, ORDER - 1))
|
|
107
143
|
INTERIOR_NODES_PER_FACE = wp.constant(INTERIOR_NODES_PER_EDGE**2)
|
|
108
144
|
INTERIOR_NODES_PER_CELL = wp.constant(INTERIOR_NODES_PER_EDGE**3)
|
|
109
145
|
|
|
110
146
|
@cache.dynamic_func(suffix=self.name)
|
|
111
|
-
def
|
|
112
|
-
geo_arg: Nanogrid.CellArg,
|
|
147
|
+
def element_node_index_generic(
|
|
113
148
|
topo_arg: NanogridTopologyArg,
|
|
114
149
|
element_index: ElementIndex,
|
|
115
150
|
node_index_in_elt: int,
|
|
151
|
+
ijk: wp.vec3i,
|
|
152
|
+
level: int,
|
|
116
153
|
):
|
|
117
154
|
node_type, type_instance, type_index = self._shape.node_type_and_type_index(node_index_in_elt)
|
|
118
155
|
|
|
119
|
-
ijk = geo_arg.cell_ijk[element_index]
|
|
120
|
-
|
|
121
156
|
if node_type == CubeTripolynomialShapeFunctions.VERTEX:
|
|
122
|
-
n_ijk = _cell_vertex_coord(ijk, type_instance)
|
|
157
|
+
n_ijk = _cell_vertex_coord(ijk, level, type_instance)
|
|
123
158
|
return wp.volume_lookup_index(topo_arg.vertex_grid, n_ijk[0], n_ijk[1], n_ijk[2])
|
|
124
159
|
|
|
125
160
|
offset = topo_arg.vertex_count
|
|
@@ -128,7 +163,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
|
|
|
128
163
|
axis = type_instance >> 2
|
|
129
164
|
node_offset = type_instance & 3
|
|
130
165
|
|
|
131
|
-
n_ijk = _cell_edge_coord(ijk, axis, node_offset)
|
|
166
|
+
n_ijk = _cell_edge_coord(ijk, level, axis, node_offset)
|
|
132
167
|
|
|
133
168
|
edge_index = wp.volume_lookup_index(topo_arg.edge_grid, n_ijk[0], n_ijk[1], n_ijk[2])
|
|
134
169
|
return offset + INTERIOR_NODES_PER_EDGE * edge_index + type_index
|
|
@@ -139,7 +174,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
|
|
|
139
174
|
axis = type_instance >> 1
|
|
140
175
|
node_offset = type_instance & 1
|
|
141
176
|
|
|
142
|
-
n_ijk = _cell_face_coord(ijk, axis, node_offset)
|
|
177
|
+
n_ijk = _cell_face_coord(ijk, level, axis, node_offset)
|
|
143
178
|
|
|
144
179
|
face_index = wp.volume_lookup_index(topo_arg.face_grid, n_ijk[0], n_ijk[1], n_ijk[2])
|
|
145
180
|
return offset + INTERIOR_NODES_PER_FACE * face_index + type_index
|
|
@@ -148,7 +183,7 @@ class NanogridTripolynomialSpaceTopology(NanogridSpaceTopology):
|
|
|
148
183
|
|
|
149
184
|
return offset + INTERIOR_NODES_PER_CELL * element_index + type_index
|
|
150
185
|
|
|
151
|
-
return
|
|
186
|
+
return element_node_index_generic
|
|
152
187
|
|
|
153
188
|
|
|
154
189
|
class NanogridSerendipitySpaceTopology(NanogridSpaceTopology):
|
|
@@ -160,37 +195,36 @@ class NanogridSerendipitySpaceTopology(NanogridSpaceTopology):
|
|
|
160
195
|
def node_count(self) -> int:
|
|
161
196
|
return self.geometry.vertex_count() + (self._shape.ORDER - 1) * self._edge_count
|
|
162
197
|
|
|
163
|
-
def
|
|
198
|
+
def _make_element_node_index_generic(self):
|
|
164
199
|
ORDER = self._shape.ORDER
|
|
165
200
|
|
|
166
201
|
@cache.dynamic_func(suffix=self.name)
|
|
167
|
-
def
|
|
168
|
-
|
|
169
|
-
topo_arg: NanogridSpaceTopology.TopologyArg,
|
|
202
|
+
def element_node_index_generic(
|
|
203
|
+
topo_arg: NanogridTopologyArg,
|
|
170
204
|
element_index: ElementIndex,
|
|
171
205
|
node_index_in_elt: int,
|
|
206
|
+
ijk: wp.vec3i,
|
|
207
|
+
level: int,
|
|
172
208
|
):
|
|
173
209
|
node_type, type_index = self._shape.node_type_and_type_index(node_index_in_elt)
|
|
174
210
|
|
|
175
|
-
ijk = cell_arg.cell_ijk[element_index]
|
|
176
|
-
|
|
177
211
|
if node_type == CubeSerendipityShapeFunctions.VERTEX:
|
|
178
|
-
n_ijk = _cell_vertex_coord(ijk, type_index)
|
|
212
|
+
n_ijk = _cell_vertex_coord(ijk, level, type_index)
|
|
179
213
|
return wp.volume_lookup_index(topo_arg.vertex_grid, n_ijk[0], n_ijk[1], n_ijk[2])
|
|
180
214
|
|
|
181
215
|
type_instance, index_in_edge = CubeSerendipityShapeFunctions._cube_edge_index(node_type, type_index)
|
|
182
216
|
axis = type_instance >> 2
|
|
183
217
|
node_offset = type_instance & 3
|
|
184
218
|
|
|
185
|
-
n_ijk = _cell_edge_coord(ijk, axis, node_offset)
|
|
219
|
+
n_ijk = _cell_edge_coord(ijk, level, axis, node_offset)
|
|
186
220
|
|
|
187
221
|
edge_index = wp.volume_lookup_index(topo_arg.edge_grid, n_ijk[0], n_ijk[1], n_ijk[2])
|
|
188
222
|
return topo_arg.vertex_count + (ORDER - 1) * edge_index + index_in_edge
|
|
189
223
|
|
|
190
|
-
return
|
|
224
|
+
return element_node_index_generic
|
|
191
225
|
|
|
192
226
|
|
|
193
|
-
def make_nanogrid_space_topology(grid: Nanogrid, shape: ShapeFunction):
|
|
227
|
+
def make_nanogrid_space_topology(grid: Union[Nanogrid, AdaptiveNanogrid], shape: ShapeFunction):
|
|
194
228
|
if isinstance(shape, CubeSerendipityShapeFunctions):
|
|
195
229
|
return forward_base_topology(NanogridSerendipitySpaceTopology, grid, shape)
|
|
196
230
|
|
warp/fem/utils.py
CHANGED
|
@@ -40,24 +40,6 @@ def generalized_inner(x: wp.mat33, y: wp.vec3):
|
|
|
40
40
|
return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
@wp.func
|
|
44
|
-
def apply_right(x: Any, y: Any):
|
|
45
|
-
"""Performs x y multiplication with y a square matrix and x either a row-vector or a matrix.
|
|
46
|
-
Will be removed once native @ operator is implemented.
|
|
47
|
-
"""
|
|
48
|
-
return x * y
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
@wp.func
|
|
52
|
-
def apply_right(x: wp.vec2, y: wp.mat22):
|
|
53
|
-
return x[0] * y[0] + x[1] * y[1]
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
@wp.func
|
|
57
|
-
def apply_right(x: wp.vec3, y: wp.mat33):
|
|
58
|
-
return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
|
|
59
|
-
|
|
60
|
-
|
|
61
43
|
@wp.func
|
|
62
44
|
def unit_element(template_type: Any, coord: int):
|
|
63
45
|
"""Returns a instance of `template_type` with a single coordinate set to 1 in the canonical basis"""
|
|
@@ -200,111 +182,135 @@ def inverse_qr(A: Any):
|
|
|
200
182
|
|
|
201
183
|
|
|
202
184
|
@wp.func
|
|
203
|
-
def
|
|
185
|
+
def _wilkinson_shift(a: Any, b: Any, c: Any, tol: Any):
|
|
186
|
+
# Wilkinson shift: estimate eigenvalue of 2x2 symmetric matrix [a, c, c, b]
|
|
187
|
+
d = (a - b) * type(tol)(0.5)
|
|
188
|
+
return b + d - wp.sign(d) * wp.sqrt(d * d + c * c)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@wp.func
|
|
192
|
+
def _givens_rotation(a: Any, b: Any):
|
|
193
|
+
# Givens rotation [[c -s], [s c]] such that sa+cb =0
|
|
194
|
+
zero = type(a)(0.0)
|
|
195
|
+
one = type(a)(1.0)
|
|
196
|
+
abn_sq = a * a + b * b
|
|
197
|
+
abn = wp.select(abn_sq == zero, one / wp.sqrt(abn_sq), zero)
|
|
198
|
+
return a * abn, -b * abn
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@wp.func
|
|
202
|
+
def tridiagonal_symmetric_eigenvalues_qr(D: Any, L: Any, Q: Any, tol: Any):
|
|
204
203
|
"""
|
|
205
|
-
Computes the eigenvalues and eigen vectors of a
|
|
204
|
+
Computes the eigenvalues and eigen vectors of a symmetric tridiagonal matrix using the
|
|
205
|
+
Symmetric tridiagonal QR algorithm with implicit Wilkinson shift
|
|
206
206
|
|
|
207
207
|
Args:
|
|
208
|
-
|
|
209
|
-
|
|
208
|
+
D: Main diagonal of the matrix
|
|
209
|
+
L: Lower diagonal of the matrix, indexed such that L[i] = A[i+1, i]
|
|
210
|
+
Q: Initialization for the eigenvectors, useful if a pre-transformation has been applied, otherwise may be identity
|
|
211
|
+
tol: Tolerance for the diagonalization residual (Linf norm of off-diagonal over diagonal terms)
|
|
210
212
|
|
|
211
213
|
Returns a tuple (D: vector of eigenvalues, P: matrix with one eigenvector per row) such that A = P^T D P
|
|
212
|
-
"""
|
|
213
214
|
|
|
214
|
-
two = A.dtype(2.0)
|
|
215
|
-
zero = A.dtype(0.0)
|
|
216
215
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
rn = type(ri)()
|
|
216
|
+
Ref: Arbenz P, Numerical Methods for Solving Large Scale Eigenvalue Problems, Chapter 4 (QR algorithm, Mar 13, 2018)
|
|
217
|
+
"""
|
|
220
218
|
|
|
221
|
-
|
|
222
|
-
R_L = type(ri)()
|
|
223
|
-
R_L = type(ri)(zero)
|
|
224
|
-
R_U = type(ri)(zero)
|
|
219
|
+
two = D.dtype(2.0)
|
|
225
220
|
|
|
226
|
-
# so that we can use the type length in
|
|
221
|
+
# so that we can use the type length in expressions
|
|
227
222
|
# this will prevent unrolling by warp, but should be ok for native code
|
|
228
223
|
m = int(0)
|
|
229
|
-
for _ in range(type(
|
|
224
|
+
for _ in range(type(D).length):
|
|
230
225
|
m += 1
|
|
231
226
|
|
|
227
|
+
start = int(0)
|
|
228
|
+
y = D.dtype(0.0) # moving buldge
|
|
229
|
+
x = D.dtype(0.0) # coeff atop buldge
|
|
230
|
+
|
|
231
|
+
for _ in range(32 * m): # failsafe, usually converges faster than that
|
|
232
|
+
# Iterate over all idependant (deflated) blocks
|
|
233
|
+
end = int(-1)
|
|
234
|
+
|
|
235
|
+
for k in range(m - 1):
|
|
236
|
+
if k >= end:
|
|
237
|
+
# Check if new block is starting
|
|
238
|
+
if k == end or wp.abs(L[k]) <= tol * (wp.abs(D[k]) + wp.abs(D[k + 1])):
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
# Find end of block
|
|
242
|
+
start = k
|
|
243
|
+
end = start + 1
|
|
244
|
+
while end + 1 < m:
|
|
245
|
+
if wp.abs(L[end]) <= tol * (wp.abs(D[end + 1]) + wp.abs(D[end])):
|
|
246
|
+
break
|
|
247
|
+
end += 1
|
|
248
|
+
|
|
249
|
+
# Wilkinson shift (an eigenvalue of the last 2x2 block)
|
|
250
|
+
shift = _wilkinson_shift(D[end - 1], D[end], L[end - 1], tol)
|
|
251
|
+
|
|
252
|
+
# start with eliminating lower diag of first column of shifted matrix
|
|
253
|
+
# (i.e. first step of excplit QR factorization)
|
|
254
|
+
# Then all further steps eliminate the buldge (second diag) of the non-shifted matrix
|
|
255
|
+
x = D[start] - shift
|
|
256
|
+
y = L[start]
|
|
257
|
+
|
|
258
|
+
c, s = _givens_rotation(x, y)
|
|
259
|
+
|
|
260
|
+
# Apply Givens rotation on both sides of tridiagonal matrix
|
|
261
|
+
|
|
262
|
+
# middle block
|
|
263
|
+
d = D[k] - D[k + 1]
|
|
264
|
+
z = (two * c * L[k] + d * s) * s
|
|
265
|
+
D[k] -= z
|
|
266
|
+
D[k + 1] += z
|
|
267
|
+
L[k] = d * c * s + (c * c - s * s) * L[k]
|
|
268
|
+
|
|
269
|
+
if k > start:
|
|
270
|
+
L[k - 1] = c * x - s * y
|
|
271
|
+
|
|
272
|
+
x = L[k]
|
|
273
|
+
y = -s * L[k + 1] # new buldge
|
|
274
|
+
L[k + 1] *= c
|
|
275
|
+
|
|
276
|
+
# apply givens rotation on left of Q
|
|
277
|
+
# note: Q is transposed compared to usual impls, as Warp makes it easier to index rows
|
|
278
|
+
Qk0 = Q[k]
|
|
279
|
+
Qk1 = Q[k + 1]
|
|
280
|
+
Q[k] = c * Qk0 - s * Qk1
|
|
281
|
+
Q[k + 1] = c * Qk1 + s * Qk0
|
|
282
|
+
|
|
283
|
+
if end <= 0:
|
|
284
|
+
# We did nothing, so diagonalization must have been achieved
|
|
285
|
+
break
|
|
286
|
+
|
|
287
|
+
return D, Q
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
@wp.func
|
|
291
|
+
def symmetric_eigenvalues_qr(A: Any, tol: Any):
|
|
292
|
+
"""
|
|
293
|
+
Computes the eigenvalues and eigen vectors of a square symmetric matrix A using the QR algorithm
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
A: square symmetric matrix
|
|
297
|
+
tol: Tolerance for the diagonalization residual (Linf norm of off-diagonal over diagonal terms)
|
|
298
|
+
|
|
299
|
+
Returns a tuple (D: vector of eigenvalues, P: matrix with one eigenvector per row) such that A = P^T D P
|
|
300
|
+
"""
|
|
301
|
+
|
|
232
302
|
# Put A under Hessenberg form (tridiagonal)
|
|
233
303
|
Q, H = householder_make_hessenberg(A)
|
|
234
|
-
Q = wp.transpose(Q) # algorithm below works and transposed Q as rows are easier to index
|
|
235
|
-
|
|
236
|
-
for _ in range(16 * m): # failsafe, usually converges faster than that
|
|
237
|
-
# Initialize R with current H
|
|
238
|
-
R_D = wp.get_diag(H)
|
|
239
|
-
for i in range(1, type(ri).length):
|
|
240
|
-
R_L[i - 1] = H[i, i - 1]
|
|
241
|
-
R_U[i - 1] = H[i - 1, i]
|
|
242
|
-
|
|
243
|
-
# compute QR decomposition, directly transform H and eigenvectors
|
|
244
|
-
for n in range(1, m):
|
|
245
|
-
i = n - 1
|
|
246
|
-
|
|
247
|
-
# compute reflection
|
|
248
|
-
xi = R_D[i]
|
|
249
|
-
xn = R_L[i]
|
|
250
|
-
|
|
251
|
-
xii = xi * xi
|
|
252
|
-
xnn = xn * xn
|
|
253
|
-
alpha = wp.sqrt(xii + xnn) * wp.sign(xi)
|
|
254
|
-
|
|
255
|
-
xi += alpha
|
|
256
|
-
xii = xi * xi
|
|
257
|
-
xin = xi * xn
|
|
258
|
-
|
|
259
|
-
two_over_x_sq = wp.select(alpha == zero, two / (xii + xnn), zero)
|
|
260
|
-
xii *= two_over_x_sq
|
|
261
|
-
xin *= two_over_x_sq
|
|
262
|
-
xnn *= two_over_x_sq
|
|
263
|
-
|
|
264
|
-
# Left-multiply R and Q, multiply H on both sides
|
|
265
|
-
# Note that R should get non-zero coefficients on the second upper diagonal,
|
|
266
|
-
# but those won't get read afterwards, so we can ignore them
|
|
267
|
-
|
|
268
|
-
R_D[n] -= R_U[i] * xin + R_D[n] * xnn
|
|
269
|
-
R_U[n] -= R_U[n] * xnn
|
|
270
|
-
|
|
271
|
-
ri = Q[i]
|
|
272
|
-
rn = Q[n]
|
|
273
|
-
Q[i] -= ri * xii + rn * xin
|
|
274
|
-
Q[n] -= ri * xin + rn * xnn
|
|
275
|
-
|
|
276
|
-
# H is multiplied on both sides, but stays tridiagonal except for moving buldge
|
|
277
|
-
# Note: we could reduce the stencil to for 4 columns qui we do below,
|
|
278
|
-
# but unlikely to be worth it for our small matrix sizes
|
|
279
|
-
ri = H[i]
|
|
280
|
-
rn = H[n]
|
|
281
|
-
H[i] -= ri * xii + rn * xin
|
|
282
|
-
H[n] -= ri * xin + rn * xnn
|
|
283
|
-
|
|
284
|
-
# multiply on right, manually. We just need to consider 4 rows
|
|
285
|
-
if i > 0:
|
|
286
|
-
ci = H[i - 1, i]
|
|
287
|
-
cn = H[i - 1, n]
|
|
288
|
-
H[i - 1, i] -= ci * xii + cn * xin
|
|
289
|
-
H[i - 1, n] -= ci * xin + cn * xnn
|
|
290
|
-
|
|
291
|
-
for k in range(2):
|
|
292
|
-
ci = H[i + k, i]
|
|
293
|
-
cn = H[i + k, n]
|
|
294
|
-
H[i + k, i] -= ci * xii + cn * xin
|
|
295
|
-
H[i + k, n] -= ci * xin + cn * xnn
|
|
296
|
-
|
|
297
|
-
if n + 1 < m:
|
|
298
|
-
ci = H[n + 1, i]
|
|
299
|
-
cn = H[n + 1, n]
|
|
300
|
-
H[n + 1, i] -= ci * xii + cn * xin
|
|
301
|
-
H[n + 1, n] -= ci * xin + cn * xnn
|
|
302
|
-
|
|
303
|
-
# Terminate if the upper diagonal of R is near zero
|
|
304
|
-
if wp.length_sq(R_U) < tol:
|
|
305
|
-
break
|
|
306
304
|
|
|
307
|
-
|
|
305
|
+
# tridiagonal storage for H
|
|
306
|
+
D = wp.get_diag(H)
|
|
307
|
+
L = type(D)(A.dtype(0.0))
|
|
308
|
+
for i in range(1, type(D).length):
|
|
309
|
+
L[i - 1] = H[i, i - 1]
|
|
310
|
+
|
|
311
|
+
Qt = wp.transpose(Q)
|
|
312
|
+
ev, P = tridiagonal_symmetric_eigenvalues_qr(D, L, Qt, tol)
|
|
313
|
+
return ev, P
|
|
308
314
|
|
|
309
315
|
|
|
310
316
|
def compress_node_indices(
|