PyPI - warp-lang - Versions diffs - 1.4.2__py3-none-manylinux2014_aarch64.whl → 1.5.1__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.4.2__py3-none-manylinux2014_aarch64.whl → 1.5.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (166) hide show

warp/__init__.py +4 -0
warp/autograd.py +43 -8
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +21 -2
warp/build_dll.py +23 -6
warp/builtins.py +1819 -7
warp/codegen.py +197 -61
warp/config.py +2 -2
warp/context.py +379 -107
warp/examples/assets/pixel.jpg +0 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
warp/examples/benchmarks/benchmark_gemm.py +121 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
warp/examples/benchmarks/benchmark_tile.py +179 -0
warp/examples/fem/example_adaptive_grid.py +37 -10
warp/examples/fem/example_apic_fluid.py +3 -2
warp/examples/fem/example_convection_diffusion_dg.py +4 -5
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion_3d.py +47 -4
warp/examples/fem/example_distortion_energy.py +220 -0
warp/examples/fem/example_magnetostatics.py +127 -85
warp/examples/fem/example_nonconforming_contact.py +5 -5
warp/examples/fem/example_stokes.py +3 -1
warp/examples/fem/example_streamlines.py +12 -19
warp/examples/fem/utils.py +38 -15
warp/examples/sim/example_cloth.py +4 -25
warp/examples/sim/example_quadruped.py +2 -1
warp/examples/tile/example_tile_convolution.py +58 -0
warp/examples/tile/example_tile_fft.py +47 -0
warp/examples/tile/example_tile_filtering.py +105 -0
warp/examples/tile/example_tile_matmul.py +79 -0
warp/examples/tile/example_tile_mlp.py +375 -0
warp/fem/__init__.py +8 -0
warp/fem/cache.py +16 -12
warp/fem/dirichlet.py +1 -1
warp/fem/domain.py +44 -1
warp/fem/field/__init__.py +1 -2
warp/fem/field/field.py +31 -19
warp/fem/field/nodal_field.py +101 -49
warp/fem/field/virtual.py +794 -0
warp/fem/geometry/__init__.py +2 -2
warp/fem/geometry/deformed_geometry.py +3 -105
warp/fem/geometry/element.py +13 -0
warp/fem/geometry/geometry.py +165 -7
warp/fem/geometry/grid_2d.py +3 -6
warp/fem/geometry/grid_3d.py +31 -28
warp/fem/geometry/hexmesh.py +3 -46
warp/fem/geometry/nanogrid.py +3 -2
warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
warp/fem/geometry/tetmesh.py +2 -43
warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
warp/fem/integrate.py +683 -261
warp/fem/linalg.py +404 -0
warp/fem/operator.py +101 -18
warp/fem/polynomial.py +5 -5
warp/fem/quadrature/quadrature.py +45 -21
warp/fem/space/__init__.py +45 -11
warp/fem/space/basis_function_space.py +451 -0
warp/fem/space/basis_space.py +58 -11
warp/fem/space/function_space.py +146 -5
warp/fem/space/grid_2d_function_space.py +80 -66
warp/fem/space/grid_3d_function_space.py +113 -68
warp/fem/space/hexmesh_function_space.py +96 -108
warp/fem/space/nanogrid_function_space.py +62 -110
warp/fem/space/quadmesh_function_space.py +208 -0
warp/fem/space/shape/__init__.py +45 -7
warp/fem/space/shape/cube_shape_function.py +328 -54
warp/fem/space/shape/shape_function.py +10 -1
warp/fem/space/shape/square_shape_function.py +328 -60
warp/fem/space/shape/tet_shape_function.py +269 -19
warp/fem/space/shape/triangle_shape_function.py +238 -19
warp/fem/space/tetmesh_function_space.py +69 -37
warp/fem/space/topology.py +38 -0
warp/fem/space/trimesh_function_space.py +179 -0
warp/fem/utils.py +6 -331
warp/jax_experimental.py +3 -1
warp/native/array.h +15 -0
warp/native/builtin.h +66 -26
warp/native/bvh.h +4 -0
warp/native/coloring.cpp +604 -0
warp/native/cuda_util.cpp +68 -51
warp/native/cuda_util.h +2 -1
warp/native/fabric.h +8 -0
warp/native/hashgrid.h +4 -0
warp/native/marching.cu +8 -0
warp/native/mat.h +14 -3
warp/native/mathdx.cpp +59 -0
warp/native/mesh.h +4 -0
warp/native/range.h +13 -1
warp/native/reduce.cpp +9 -1
warp/native/reduce.cu +7 -0
warp/native/runlength_encode.cpp +9 -1
warp/native/runlength_encode.cu +7 -1
warp/native/scan.cpp +8 -0
warp/native/scan.cu +8 -0
warp/native/scan.h +8 -1
warp/native/sparse.cpp +8 -0
warp/native/sparse.cu +8 -0
warp/native/temp_buffer.h +7 -0
warp/native/tile.h +1854 -0
warp/native/tile_gemm.h +341 -0
warp/native/tile_reduce.h +210 -0
warp/native/volume_builder.cu +8 -0
warp/native/volume_builder.h +8 -0
warp/native/warp.cpp +10 -2
warp/native/warp.cu +369 -15
warp/native/warp.h +12 -2
warp/optim/adam.py +39 -4
warp/paddle.py +29 -12
warp/render/render_opengl.py +140 -67
warp/sim/graph_coloring.py +292 -0
warp/sim/import_urdf.py +8 -8
warp/sim/integrator_euler.py +4 -2
warp/sim/integrator_featherstone.py +115 -44
warp/sim/integrator_vbd.py +6 -0
warp/sim/model.py +109 -32
warp/sparse.py +1 -1
warp/stubs.py +569 -4
warp/tape.py +12 -7
warp/tests/assets/pixel.npy +0 -0
warp/tests/aux_test_instancing_gc.py +18 -0
warp/tests/test_array.py +39 -0
warp/tests/test_codegen.py +81 -1
warp/tests/test_codegen_instancing.py +30 -0
warp/tests/test_collision.py +110 -0
warp/tests/test_coloring.py +251 -0
warp/tests/test_context.py +34 -0
warp/tests/test_examples.py +21 -5
warp/tests/test_fem.py +453 -113
warp/tests/test_func.py +34 -4
warp/tests/test_generics.py +52 -0
warp/tests/test_iter.py +68 -0
warp/tests/test_lerp.py +13 -87
warp/tests/test_mat_scalar_ops.py +1 -1
warp/tests/test_matmul.py +6 -9
warp/tests/test_matmul_lite.py +6 -11
warp/tests/test_mesh_query_point.py +1 -1
warp/tests/test_module_hashing.py +23 -0
warp/tests/test_overwrite.py +45 -0
warp/tests/test_paddle.py +27 -87
warp/tests/test_print.py +56 -1
warp/tests/test_smoothstep.py +17 -83
warp/tests/test_spatial.py +1 -1
warp/tests/test_static.py +3 -3
warp/tests/test_tile.py +744 -0
warp/tests/test_tile_mathdx.py +144 -0
warp/tests/test_tile_mlp.py +383 -0
warp/tests/test_tile_reduce.py +374 -0
warp/tests/test_tile_shared_memory.py +190 -0
warp/tests/test_vbd.py +12 -20
warp/tests/test_volume.py +43 -0
warp/tests/unittest_suites.py +19 -2
warp/tests/unittest_utils.py +4 -2
warp/types.py +340 -74
warp/utils.py +23 -3
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
warp/fem/field/test.py +0 -180
warp/fem/field/trial.py +0 -183
warp/fem/space/collocated_function_space.py +0 -102
warp/fem/space/quadmesh_2d_function_space.py +0 -261
warp/fem/space/trimesh_2d_function_space.py +0 -153
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0

warp/examples/tile/example_tile_mlp.py ADDED Viewed

@@ -0,0 +1,375 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+###########################################################################
+# Example Image Multilayer Perceptron (MLP)
+#
+# Shows how to train a coordinate-based MLP on an image to predict the RGB
+# color at a given input position. By default, a positional encoding is
+# applied to the input coordinates to improve the ability of the MLP to
+# represent higher-frequency content. This can be disabled by passing the
+# '--no_encoding' option.
+#
+# References:
+#   Ben Mildenhall et al. 2021. NeRF: representing scenes
+#   as neural radiance fields for view synthesis. Commun. ACM 65, 1
+#   (January 2022), 99–106. https://doi.org/10.1145/3503250
+#
+###########################################################################
+import math
+import os
+import numpy as np
+from PIL import Image
+import warp as wp
+import warp.examples
+import warp.optim
+rng = np.random.default_rng(45)
+def create_layer(dim_in, dim_hid, dtype=float):
+    w = rng.uniform(-1.0 / np.sqrt(dim_in), 1.0 / np.sqrt(dim_in), (dim_hid, dim_in))
+    b = rng.uniform(-1.0 / np.sqrt(dim_in), 1.0 / np.sqrt(dim_in), (dim_hid, 1))
+    weights = wp.array(w, dtype=dtype, requires_grad=True)
+    bias = wp.array(b, dtype=dtype, requires_grad=True)
+    return (weights, bias)
+def create_array(dim_in, dim_hid, dtype=float):
+    s = rng.uniform(-1.0 / np.sqrt(dim_in), 1.0 / np.sqrt(dim_in), (dim_hid, dim_in))
+    a = wp.array(s, dtype=dtype, requires_grad=True)
+    return a
+# number of frequencies for the positional encoding
+NUM_FREQ = wp.constant(8)
+DIM_IN = wp.constant(4 * NUM_FREQ)  # sin,cos for both x,y at each frequenecy
+DIM_HID = 32
+DIM_OUT = 3
+# threads per-block
+NUM_THREADS = 32
+IMG_WIDTH = 512
+IMG_HEIGHT = 512
+BATCH_SIZE = min(1024, int((IMG_WIDTH * IMG_HEIGHT) / 8))
+# dtype for our weights and bias matrices
+dtype = wp.float16
+@wp.func
+def relu(x: dtype):
+    return wp.max(x, dtype(0.0))
+@wp.kernel
+def compute(
+    indices: wp.array(dtype=int),
+    weights_0: wp.array2d(dtype=dtype),
+    bias_0: wp.array2d(dtype=dtype),
+    weights_1: wp.array2d(dtype=dtype),
+    bias_1: wp.array2d(dtype=dtype),
+    weights_2: wp.array2d(dtype=dtype),
+    bias_2: wp.array2d(dtype=dtype),
+    weights_3: wp.array2d(dtype=dtype),
+    bias_3: wp.array2d(dtype=dtype),
+    reference: wp.array2d(dtype=float),
+    loss: wp.array1d(dtype=float),
+    out: wp.array2d(dtype=float),
+):
+    # batch indices
+    linear = indices[wp.tid()]
+    row = linear / IMG_WIDTH
+    col = linear % IMG_WIDTH
+    # normalize input coordinates to [-1, 1]
+    x = (float(row) / float(IMG_WIDTH) - 0.5) * 2.0
+    y = (float(col) / float(IMG_HEIGHT) - 0.5) * 2.0
+    local = wp.vector(dtype=dtype, length=DIM_IN)
+    # construct positional encoding
+    for s in range(NUM_FREQ):
+        scale = wp.pow(2.0, float(s)) * wp.pi
+        # x-coord
+        local[s * 4 + 0] = dtype(wp.sin(x * scale))
+        local[s * 4 + 1] = dtype(wp.cos(x * scale))
+        # y-coord
+        local[s * 4 + 2] = dtype(wp.sin(y * scale))
+        local[s * 4 + 3] = dtype(wp.cos(y * scale))
+    # tile feature vectors across the block, returns [dim(f), NUM_THREADS]
+    f = wp.tile(local)
+    # input layer
+    w0 = wp.tile_load(weights_0, 0, 0, m=DIM_HID, n=DIM_IN)
+    b0 = wp.tile_load(bias_0, 0, 0, m=DIM_HID, n=1)
+    z = wp.tile_map(relu, wp.tile_matmul(w0, f) + wp.tile_broadcast(b0, m=DIM_HID, n=NUM_THREADS))
+    # hidden layer
+    w1 = wp.tile_load(weights_1, 0, 0, m=DIM_HID, n=DIM_HID)
+    b1 = wp.tile_load(bias_1, 0, 0, m=DIM_HID, n=1)
+    z = wp.tile_map(relu, wp.tile_matmul(w1, z) + wp.tile_broadcast(b1, m=DIM_HID, n=NUM_THREADS))
+    w2 = wp.tile_load(weights_2, 0, 0, m=DIM_HID, n=DIM_HID)
+    b2 = wp.tile_load(bias_2, 0, 0, m=DIM_HID, n=1)
+    z = wp.tile_map(relu, wp.tile_matmul(w2, z) + wp.tile_broadcast(b2, m=DIM_HID, n=NUM_THREADS))
+    # output layer
+    w3 = wp.tile_load(weights_3, 0, 0, m=DIM_OUT, n=DIM_HID)
+    b3 = wp.tile_load(bias_3, 0, 0, m=DIM_OUT, n=1)
+    o = wp.tile_map(relu, wp.tile_matmul(w3, z) + wp.tile_broadcast(b3, m=DIM_OUT, n=NUM_THREADS))
+    # untile back to SIMT
+    output = wp.untile(o)
+    # compute error
+    error = wp.vec3(
+        float(output[0]) - reference[0, linear],
+        float(output[1]) - reference[1, linear],
+        float(output[2]) - reference[2, linear],
+    )
+    # write MSE loss
+    if loss:
+        wp.atomic_add(loss, 0, wp.length_sq(error) / float(3 * BATCH_SIZE))
+    #  write image output
+    if out:
+        for i in range(DIM_OUT):
+            out[i, linear] = float(output[i])
+class Example:
+    def __init__(self, train_iters):
+        self.weights_0, self.bias_0 = create_layer(DIM_IN, DIM_HID, dtype=dtype)
+        self.weights_1, self.bias_1 = create_layer(DIM_HID, DIM_HID, dtype=dtype)
+        self.weights_2, self.bias_2 = create_layer(DIM_HID, DIM_HID, dtype=dtype)
+        self.weights_3, self.bias_3 = create_layer(DIM_HID, DIM_OUT, dtype=dtype)
+        # reference
+        reference_path = os.path.join(wp.examples.get_asset_directory(), "pixel.jpg")
+        with Image.open(reference_path) as im:
+            reference_image = np.asarray(im.resize((IMG_WIDTH, IMG_HEIGHT)).convert("RGB")) / 255.0
+        self.reference = wp.array(reference_image.reshape(IMG_WIDTH * IMG_HEIGHT, 3).T, dtype=float)
+        # create randomized batch indices
+        indices = np.arange(0, IMG_WIDTH * IMG_HEIGHT, dtype=np.int32)
+        rng.shuffle(indices)
+        self.indices = wp.array(indices)
+        self.num_batches = int((IMG_WIDTH * IMG_HEIGHT) / BATCH_SIZE)
+        self.max_iters = train_iters
+        self.max_epochs = max(1, int(self.max_iters / self.num_batches))
+    def train_warp(self):
+        params = [
+            self.weights_0,
+            self.bias_0,
+            self.weights_1,
+            self.bias_1,
+            self.weights_2,
+            self.bias_2,
+            self.weights_3,
+            self.bias_3,
+        ]
+        optimizer_grads = [p.grad.flatten() for p in params]
+        optimizer_inputs = [p.flatten() for p in params]
+        optimizer = warp.optim.Adam(optimizer_inputs, lr=0.01)
+        loss = wp.zeros(1, dtype=float, requires_grad=True)
+        output = create_array(IMG_WIDTH * IMG_HEIGHT, DIM_OUT)
+        # capture graph for whole epoch
+        wp.capture_begin()
+        for b in range(0, IMG_WIDTH * IMG_HEIGHT, BATCH_SIZE):
+            loss.zero_()
+            with wp.Tape() as tape:
+                wp.launch(
+                    compute,
+                    dim=[BATCH_SIZE],
+                    inputs=[
+                        self.indices[b : b + BATCH_SIZE],
+                        self.weights_0,
+                        self.bias_0,
+                        self.weights_1,
+                        self.bias_1,
+                        self.weights_2,
+                        self.bias_2,
+                        self.weights_3,
+                        self.bias_3,
+                        self.reference,
+                        loss,
+                        None,
+                    ],
+                    block_dim=NUM_THREADS,
+                )
+            tape.backward(loss)
+            optimizer.step(optimizer_grads)
+            tape.zero()
+        graph = wp.capture_end()
+        with wp.ScopedTimer("Training"):
+            for i in range(self.max_epochs):
+                with wp.ScopedTimer("Epoch"):
+                    wp.capture_launch(graph)
+                    print(f"Epoch: {i} Loss: {loss.numpy()}")
+        # evaluate full image
+        wp.launch(
+            compute,
+            dim=[IMG_WIDTH * IMG_HEIGHT],
+            inputs=[
+                self.indices,
+                self.weights_0,
+                self.bias_0,
+                self.weights_1,
+                self.bias_1,
+                self.weights_2,
+                self.bias_2,
+                self.weights_3,
+                self.bias_3,
+                self.reference,
+                loss,
+                output,
+            ],
+            block_dim=NUM_THREADS,
+        )
+        self.save_image("example_tile_mlp.jpg", output.numpy())
+    def train_torch(self):
+        import torch as tc
+        weights_0 = tc.nn.Parameter(wp.to_torch(self.weights_0))
+        weights_1 = tc.nn.Parameter(wp.to_torch(self.weights_1))
+        weights_2 = tc.nn.Parameter(wp.to_torch(self.weights_2))
+        weights_3 = tc.nn.Parameter(wp.to_torch(self.weights_3))
+        bias_0 = tc.nn.Parameter(wp.to_torch(self.bias_0))
+        bias_1 = tc.nn.Parameter(wp.to_torch(self.bias_1))
+        bias_2 = tc.nn.Parameter(wp.to_torch(self.bias_2))
+        bias_3 = tc.nn.Parameter(wp.to_torch(self.bias_3))
+        indices = wp.to_torch(self.indices)
+        reference = wp.to_torch(self.reference)
+        optimizer = tc.optim.Adam(
+            [weights_0, bias_0, weights_1, bias_1, weights_2, bias_2, weights_3, bias_3],
+            capturable=True,
+            lr=0.0001,
+            betas=(0.9, 0.95),
+            eps=1.0e-6,
+        )
+        # generate frequency space encoding of pixels
+        # based on their linear index in the image
+        def encode(linear):
+            row = (linear // IMG_WIDTH).float()
+            col = (linear % IMG_WIDTH).float()
+            x = (row / float(IMG_WIDTH) - 0.5) * 2.0
+            y = (col / float(IMG_HEIGHT) - 0.5) * 2.0
+            encoding = tc.zeros((NUM_FREQ * 4, len(linear)), dtype=tc.float16, device="cuda")
+            for s in range(NUM_FREQ):
+                scale = math.pow(2.0, float(s)) * math.pi
+                # Directly write the computed values into the encoding tensor
+                encoding[s * 4 + 0, :] = tc.sin(scale * x)
+                encoding[s * 4 + 1, :] = tc.cos(scale * x)
+                encoding[s * 4 + 2, :] = tc.sin(scale * y)
+                encoding[s * 4 + 3, :] = tc.cos(scale * y)
+            return encoding
+        stream = tc.cuda.Stream()
+        graph = tc.cuda.CUDAGraph()
+        # warm-up
+        with tc.cuda.stream(stream):
+            f = tc.rand((NUM_FREQ * 4, BATCH_SIZE), dtype=tc.float16, device="cuda")
+            z = tc.relu(weights_0 @ f + bias_0)
+            z = tc.relu(weights_1 @ z + bias_1)
+            z = tc.relu(weights_2 @ z + bias_2)
+            z = tc.relu(weights_3 @ z + bias_3)
+            ref = tc.rand((3, BATCH_SIZE), dtype=tc.float16, device="cuda")
+            loss = tc.mean((z - ref) ** 2)
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+        with tc.cuda.graph(graph):
+            for b in range(0, IMG_WIDTH * IMG_HEIGHT, BATCH_SIZE):
+                linear = indices[b : b + BATCH_SIZE]
+                f = encode(linear)
+                z = tc.relu(weights_0 @ f + bias_0)
+                z = tc.relu(weights_1 @ z + bias_1)
+                z = tc.relu(weights_2 @ z + bias_2)
+                z = tc.relu(weights_3 @ z + bias_3)
+                ref = reference[:, linear]
+                loss = tc.mean((z - ref) ** 2)
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+        with wp.ScopedTimer("Training (Torch)"):
+            for _i in range(self.max_epochs):
+                with wp.ScopedTimer("Epoch"):
+                    graph.replay()
+                    print(loss)
+        f = encode(tc.arange(0, IMG_WIDTH * IMG_HEIGHT))
+        z = tc.relu(weights_0 @ f + bias_0)
+        z = tc.relu(weights_1 @ z + bias_1)
+        z = tc.relu(weights_2 @ z + bias_2)
+        z = tc.relu(weights_3 @ z + bias_3)
+        self.save_image("example_tile_mlp_torch.jpg", z.detach().cpu().numpy())
+    def save_image(self, name, output):
+        predicted_image = output.T.reshape(IMG_WIDTH, IMG_HEIGHT, 3)
+        predicted_image = (predicted_image * 255).astype(np.uint8)
+        predicted_image_pil = Image.fromarray(predicted_image)
+        predicted_image_pil.save(name)
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--train_iters", type=int, default=20000, help="Total number of training iterations.")
+    args = parser.parse_known_args()[0]
+    with wp.ScopedDevice("cuda:0"):
+        example = Example(args.train_iters)
+        example.train_warp()
+        # example.train_torch()

warp/fem/__init__.py CHANGED Viewed

@@ -24,14 +24,17 @@ from .geometry import (
     LinearGeometryPartition,
     Nanogrid,
     Quadmesh2D,
+    Quadmesh3D,
     Tetmesh,
     Trimesh2D,
+    Trimesh3D,
 )
 from .integrate import integrate, interpolate
 from .operator import (
     D,
     at_node,
     average,
+    cells,
     curl,
     deformation_gradient,
     degree,
@@ -50,6 +53,9 @@ from .operator import (
     normal,
     outer,
     position,
+    to_cell_side,
+    to_inner_cell,
+    to_outer_cell,
 )
 from .polynomial import Polynomial
 from .quadrature import ExplicitQuadrature, NodalQuadrature, PicQuadrature, Quadrature, RegularQuadrature
@@ -65,6 +71,8 @@ from .space import (
     SpaceTopology,
     SymmetricTensorMapper,
     make_collocated_function_space,
+    make_contravariant_function_space,
+    make_covariant_function_space,
     make_polynomial_basis_space,
     make_polynomial_space,
     make_space_partition,

warp/fem/cache.py CHANGED Viewed

@@ -6,6 +6,7 @@ from copy import copy
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 import warp as wp
+from warp.fem.operator import Integrand
 _kernel_cache = {}
 _struct_cache = {}
@@ -186,7 +187,7 @@ class ExpandStarredArgumentStruct(ast.NodeTransformer):
 def get_integrand_function(
-    integrand: "warp.fem.operator.Integrand",  # noqa: F821
+    integrand: Integrand,
     suffix: str,
     func=None,
     annotations=None,
@@ -208,27 +209,30 @@ def get_integrand_function(
 def get_integrand_kernel(
-    integrand: "warp.fem.operator.Integrand",  # noqa: F821
+    integrand: Integrand,
     suffix: str,
     kernel_fn: Optional[Callable] = None,
     kernel_options: Dict[str, Any] = None,
     code_transformers=None,
 ):
-    if kernel_options is None:
-        kernel_options = {}
+    options = integrand.module.options.copy()
+    options.update(integrand.kernel_options)
+    if kernel_options is not None:
+        options.update(kernel_options)
-    key = _make_key(integrand.func, suffix, use_qualified_name=True)
+    kernel_key = _make_key(integrand.func, suffix, use_qualified_name=True)
+    opts_key = "".join([f"{k}:{v}" for k, v in sorted(options.items())])
+    cache_key = kernel_key + opts_key
-    if key not in _kernel_cache:
+    if cache_key not in _kernel_cache:
         if kernel_fn is None:
             return None
         module = wp.get_module(f"{integrand.module.name}.{integrand.name}")
-        module.options = copy(integrand.module.options)
-        module.options.update(kernel_options)
-        _kernel_cache[key] = wp.Kernel(func=kernel_fn, key=key, module=module, code_transformers=code_transformers)
-    return _kernel_cache[key]
+        _kernel_cache[cache_key] = wp.Kernel(
+            func=kernel_fn, key=kernel_key, module=module, code_transformers=code_transformers, options=options
+        )
+    return _kernel_cache[cache_key]
 def cached_arg_value(func: Callable):
@@ -478,7 +482,7 @@ def borrow_temporary(
     if temporary_store is None:
         temporary_store = TemporaryStore._default_store
-    if temporary_store is None:
+    if temporary_store is None or (requires_grad and wp.context.runtime.tape is not None):
         return Temporary(
             array=wp.empty(shape=shape, dtype=dtype, pinned=pinned, device=device, requires_grad=requires_grad)
         )

warp/fem/dirichlet.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Any, Optional
 import warp as wp
-from warp.fem.utils import array_axpy, symmetric_eigenvalues_qr
+from warp.fem.linalg import array_axpy, symmetric_eigenvalues_qr
 from warp.sparse import BsrMatrix, bsr_assign, bsr_axpy, bsr_copy, bsr_mm, bsr_mv
 from warp.types import type_is_matrix, type_length

warp/fem/domain.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Any, Optional, Set, Union
 import warp as wp
 import warp.codegen
@@ -11,6 +11,7 @@ from warp.fem.geometry import (
     GeometryPartition,
     WholeGeometryPartition,
 )
+from warp.fem.operator import Operator
 from warp.fem.types import ElementKind
 GeometryOrPartition = Union[Geometry, GeometryPartition]
@@ -94,6 +95,10 @@ class GeometryDomain:
     element_lookup: wp.Function
     """Device function returning the sample point corresponding to a world position"""
+    def notify_operator_usage(self, ops: Set[Operator]):
+        """Makes the Domain aware that the operators `ops` will be applied"""
+        pass
 class Cells(GeometryDomain):
     """A Domain containing all cells of the geometry or geometry partition"""
@@ -160,6 +165,17 @@ class Cells(GeometryDomain):
     def element_lookup(self) -> wp.Function:
         return self.geometry.cell_lookup
+    @property
+    def domain_cell_arg(self) -> wp.Function:
+        return Cells._identity_fn
+    def cell_domain(self):
+        return self
+    @wp.func
+    def _identity_fn(x: Any):
+        return x
 class Sides(GeometryDomain):
     """A Domain containing all (interior and boundary) sides of the geometry or geometry partition"""
@@ -225,6 +241,33 @@ class Sides(GeometryDomain):
     def element_normal(self) -> wp.Function:
         return self.geometry.side_normal
+    @property
+    def element_inner_cell_index(self) -> wp.Function:
+        return self.geometry.side_inner_cell_index
+    @property
+    def element_outer_cell_index(self) -> wp.Function:
+        return self.geometry.side_outer_cell_index
+    @property
+    def element_inner_cell_coords(self) -> wp.Function:
+        return self.geometry.side_inner_cell_coords
+    @property
+    def element_outer_cell_coords(self) -> wp.Function:
+        return self.geometry.side_outer_cell_coords
+    @property
+    def cell_to_element_coords(self) -> wp.Function:
+        return self.geometry.side_from_cell_coords
+    @property
+    def domain_cell_arg(self) -> wp.Function:
+        return self.geometry.side_to_cell_arg
+    def cell_domain(self):
+        return Cells(self.geometry_partition)
 class BoundarySides(Sides):
     """A Domain containing boundary sides of the geometry or geometry partition"""

warp/fem/field/__init__.py CHANGED Viewed

@@ -6,8 +6,7 @@ from warp.fem.space import FunctionSpace, SpacePartition, SpaceRestriction, make
 from .field import DiscreteField, FieldLike, GeometryField, ImplicitField, NonconformingField, SpaceField, UniformField
 from .nodal_field import NodalField
 from .restriction import FieldRestriction
-from .test import TestField
-from .trial import TrialField
+from .virtual import LocalTestField, LocalTrialField, TestField, TrialField
 def make_restriction(

warp/fem/field/field.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Set
 import warp as wp
 from warp.fem import cache
 from warp.fem.domain import GeometryDomain, Sides
 from warp.fem.geometry import DeformedGeometry, Geometry
-from warp.fem.operator import integrand
+from warp.fem.operator import Operator, integrand
 from warp.fem.space import FunctionSpace, SpacePartition
 from warp.fem.types import NULL_ELEMENT_INDEX, ElementKind, Sample
@@ -48,32 +48,32 @@ class FieldLike:
         return False
     @staticmethod
-    def eval_inner(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_inner(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the inner field value at a sample point"""
         raise NotImplementedError
     @staticmethod
-    def eval_grad_inner(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_grad_inner(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the inner field gradient at a sample point"""
         raise NotImplementedError
     @staticmethod
-    def eval_div_inner(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_div_inner(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the inner field divergence at a sample point"""
         raise NotImplementedError
     @staticmethod
-    def eval_outer(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_outer(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the outer field value at a sample point"""
         raise NotImplementedError
     @staticmethod
-    def eval_grad_outer(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_grad_outer(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the outer field gradient at a sample point"""
         raise NotImplementedError
     @staticmethod
-    def eval_div_outer(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_div_outer(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the outer field divergence at a sample point"""
         raise NotImplementedError
@@ -82,6 +82,10 @@ class FieldLike:
         """Polynomial degree of the field is applicable, or hint for determination of interpolation order"""
         raise NotImplementedError
+    def notify_operator_usage(self, ops: Set[Operator]):
+        """Makes the Domain aware that the operators `ops` will be applied"""
+        pass
 class GeometryField(FieldLike):
     """Base class for fields defined over a geometry"""
@@ -97,12 +101,12 @@ class GeometryField(FieldLike):
         raise NotImplementedError
     @staticmethod
-    def eval_reference_grad_inner(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_reference_grad_inner(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the inner field gradient with respect to reference element coordinates at a sample point"""
         raise NotImplementedError
     @staticmethod
-    def eval_reference_grad_outer(args: "ElementEvalArg", s: "Sample"):  # noqa: F821
+    def eval_reference_grad_outer(args: "ElementEvalArg", s: Sample):  # noqa: F821
         """Device function evaluating the outer field gradient with respect to reference element coordinates at a sample point"""
         raise NotImplementedError
@@ -128,6 +132,9 @@ class SpaceField(GeometryField):
         self._space = space
         self._space_partition = space_partition
+        self.gradient_valid = self.space.gradient_valid
+        self.divergence_valid = self.space.divergence_valid
     @property
     def geometry(self) -> Geometry:
         return self._space.geometry
@@ -156,17 +163,22 @@ class SpaceField(GeometryField):
     def dof_dtype(self) -> type:
         return self.space.dof_dtype
-    def gradient_valid(self) -> bool:
-        """Whether gradient operator can be computed. Only for scalar and vector fields as higher-order tensors are not support yet"""
-        return not wp.types.type_is_matrix(self.dtype)
+    @property
+    def gradient_dtype(self):
+        """Return type of the gradient operator. Assumes self.gradient_valid()"""
+        if wp.types.type_is_vector(self.dtype):
+            return cache.cached_mat_type(
+                shape=(wp.types.type_length(self.dtype), self.geometry.dimension),
+                dtype=wp.types.type_scalar_type(self.dtype),
+            )
+        return cache.cached_vec_type(length=self.geometry.dimension, dtype=wp.types.type_scalar_type(self.dtype))
-    def divergence_valid(self) -> bool:
-        """Whether divergence of this field can be computed. Only for vector and tensor fields with same dimension as embedding geometry"""
+    @property
+    def divergence_dtype(self):
+        """Return type of the divergence operator. Assumes self.gradient_valid()"""
         if wp.types.type_is_vector(self.dtype):
-            return wp.types.type_length(self.dtype) == self.space.geometry.dimension
-        if wp.types.type_is_matrix(self.dtype):
-            return self.dtype._shape_[0] == self.space.geometry.dimension
-        return False
+            return wp.types.type_scalar_type(self.dtype)
+        return cache.cached_vec_type(length=self.dtype._shape_[1], dtype=wp.types.type_scalar_type(self.dtype))
     def _make_eval_degree(self):
         ORDER = self.space.ORDER