PyPI - warp-lang - Versions diffs - 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show

warp/__init__.py +15 -7
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +22 -443
warp/build_dll.py +384 -0
warp/builtins.py +998 -488
warp/codegen.py +1307 -739
warp/config.py +5 -3
warp/constants.py +6 -0
warp/context.py +1291 -548
warp/dlpack.py +31 -31
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +164 -55
warp/native/builtin.h +150 -174
warp/native/bvh.cpp +75 -328
warp/native/bvh.cu +406 -23
warp/native/bvh.h +37 -45
warp/native/clang/clang.cpp +136 -24
warp/native/crt.cpp +1 -76
warp/native/crt.h +111 -104
warp/native/cuda_crt.h +1049 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -949
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/initializer_array.h +2 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +13 -16
warp/native/marching.cu +157 -161
warp/native/mat.h +119 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +108 -83
warp/native/mesh.cu +243 -6
warp/native/mesh.h +1547 -458
warp/native/nanovdb/NanoVDB.h +1 -1
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +157 -0
warp/native/reduce.cu +348 -0
warp/native/runlength_encode.cpp +62 -0
warp/native/runlength_encode.cu +46 -0
warp/native/scan.cu +11 -13
warp/native/scan.h +1 -0
warp/native/solid_angle.h +442 -0
warp/native/sort.cpp +13 -0
warp/native/sort.cu +9 -1
warp/native/sparse.cpp +338 -0
warp/native/sparse.cu +545 -0
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +30 -0
warp/native/vec.h +126 -24
warp/native/volume.h +120 -0
warp/native/warp.cpp +658 -53
warp/native/warp.cu +660 -68
warp/native/warp.h +112 -12
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +392 -152
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +21 -8
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +161 -19
warp/sim/model.py +795 -291
warp/sim/optimizer.py +2 -6
warp/sim/render.py +65 -3
warp/sim/utils.py +3 -0
warp/sparse.py +1227 -0
warp/stubs.py +665 -223
warp/tape.py +66 -15
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/torus.usda +105 -105
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +1497 -211
warp/tests/test_array_reduce.py +150 -0
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +75 -43
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +233 -128
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +136 -108
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -74
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +180 -116
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +577 -24
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +251 -15
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +508 -2778
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +325 -34
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +190 -0
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +460 -0
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +331 -85
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -1987
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +72 -30
warp/types.py +1744 -713
warp/utils.py +360 -350
warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/bin/warp-clang.exp +0 -0
warp/bin/warp-clang.lib +0 -0
warp/bin/warp.exp +0 -0
warp/bin/warp.lib +0 -0
warp/tests/test_all.py +0 -215
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.9.0.dist-info/METADATA +0 -20
warp_lang-0.9.0.dist-info/RECORD +0 -177
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/fem/space/shape/square_shape_function.py ADDED Viewed

@@ -0,0 +1,611 @@
+import math
+import warp as wp
+import numpy as np
+from warp.fem.polynomial import Polynomial, quadrature_1d, lagrange_scales, is_closed
+from warp.fem.types import Coords
+from warp.fem import cache
+from .triangle_shape_function import Triangle2DPolynomialShapeFunctions
+class SquareBipolynomialShapeFunctions:
+    def __init__(self, degree: int, family: Polynomial):
+        self.family = family
+        self.ORDER = wp.constant(degree)
+        self.NODES_PER_ELEMENT = wp.constant((degree + 1) * (degree + 1))
+        self.NODES_PER_SIDE = wp.constant(degree + 1)
+        lobatto_coords, lobatto_weight = quadrature_1d(point_count=degree + 1, family=family)
+        lagrange_scale = lagrange_scales(lobatto_coords)
+        NodeVec = wp.types.vector(length=degree + 1, dtype=wp.float32)
+        self.LOBATTO_COORDS = wp.constant(NodeVec(lobatto_coords))
+        self.LOBATTO_WEIGHT = wp.constant(NodeVec(lobatto_weight))
+        self.LAGRANGE_SCALE = wp.constant(NodeVec(lagrange_scale))
+        self.ORDER_PLUS_ONE = wp.constant(self.ORDER + 1)
+    @property
+    def name(self) -> str:
+        return f"Square_Q{self.ORDER}_{self.family}"
+    def make_node_coords_in_element(self):
+        ORDER = self.ORDER
+        LOBATTO_COORDS = self.LOBATTO_COORDS
+        @cache.dynamic_func(suffix=self.name)
+        def node_coords_in_element(
+            node_index_in_elt: int,
+        ):
+            node_i = node_index_in_elt // (ORDER + 1)
+            node_j = node_index_in_elt - (ORDER + 1) * node_i
+            return Coords(LOBATTO_COORDS[node_i], LOBATTO_COORDS[node_j], 0.0)
+        return node_coords_in_element
+    def make_node_quadrature_weight(self):
+        ORDER = self.ORDER
+        LOBATTO_WEIGHT = self.LOBATTO_WEIGHT
+        def node_quadrature_weight(
+            node_index_in_elt: int,
+        ):
+            node_i = node_index_in_elt // (ORDER + 1)
+            node_j = node_index_in_elt - (ORDER + 1) * node_i
+            return LOBATTO_WEIGHT[node_i] * LOBATTO_WEIGHT[node_j]
+        def node_quadrature_weight_linear(
+            node_index_in_elt: int,
+        ):
+            return 0.25
+        if ORDER == 1:
+            return cache.get_func(node_quadrature_weight_linear, self.name)
+        return cache.get_func(node_quadrature_weight, self.name)
+    @wp.func
+    def _vertex_coords_f(vidx_in_cell: int):
+        x = vidx_in_cell // 2
+        y = vidx_in_cell - 2 * x
+        return wp.vec2(float(x), float(y))
+    def make_trace_node_quadrature_weight(self):
+        ORDER = self.ORDER
+        LOBATTO_WEIGHT = self.LOBATTO_WEIGHT
+        def trace_node_quadrature_weight(
+            node_index_in_elt: int,
+        ):
+            # We're either on a side interior or at a vertex
+            # I.e., either both indices are at extrema, or only one is
+            # Pick the interior one if possible, if both are at extrema pick any one
+            node_i = node_index_in_elt // (ORDER + 1)
+            if node_i > 0 and node_i < ORDER:
+                return LOBATTO_WEIGHT[node_i]
+            node_j = node_index_in_elt - (ORDER + 1) * node_i
+            return LOBATTO_WEIGHT[node_j]
+        def trace_node_quadrature_weight_linear(
+            node_index_in_elt: int,
+        ):
+            return 0.5
+        def trace_node_quadrature_weight_open(
+            node_index_in_elt: int,
+        ):
+            return 0.0
+        if not is_closed(self.family):
+            return cache.get_func(trace_node_quadrature_weight_open, self.name)
+        if ORDER == 1:
+            return cache.get_func(trace_node_quadrature_weight_linear, self.name)
+        return cache.get_func(trace_node_quadrature_weight, self.name)
+    def make_element_inner_weight(self):
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
+        LOBATTO_COORDS = self.LOBATTO_COORDS
+        LAGRANGE_SCALE = self.LAGRANGE_SCALE
+        def element_inner_weight(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            node_i = node_index_in_elt // ORDER_PLUS_ONE
+            node_j = node_index_in_elt - ORDER_PLUS_ONE * node_i
+            w = float(1.0)
+            for k in range(ORDER_PLUS_ONE):
+                if k != node_i:
+                    w *= coords[0] - LOBATTO_COORDS[k]
+                if k != node_j:
+                    w *= coords[1] - LOBATTO_COORDS[k]
+            w *= LAGRANGE_SCALE[node_i] * LAGRANGE_SCALE[node_j]
+            return w
+        def element_inner_weight_linear(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            v = SquareBipolynomialShapeFunctions._vertex_coords_f(node_index_in_elt)
+            wx = (1.0 - coords[0]) * (1.0 - v[0]) + v[0] * coords[0]
+            wy = (1.0 - coords[1]) * (1.0 - v[1]) + v[1] * coords[1]
+            return wx * wy
+        if self.ORDER == 1 and is_closed(self.family):
+            return cache.get_func(element_inner_weight_linear, self.name)
+        return cache.get_func(element_inner_weight, self.name)
+    def make_element_inner_weight_gradient(self):
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
+        LOBATTO_COORDS = self.LOBATTO_COORDS
+        LAGRANGE_SCALE = self.LAGRANGE_SCALE
+        def element_inner_weight_gradient(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            node_i = node_index_in_elt // ORDER_PLUS_ONE
+            node_j = node_index_in_elt - ORDER_PLUS_ONE * node_i
+            prefix_x = float(1.0)
+            prefix_y = float(1.0)
+            for k in range(ORDER_PLUS_ONE):
+                if k != node_i:
+                    prefix_y *= coords[0] - LOBATTO_COORDS[k]
+                if k != node_j:
+                    prefix_x *= coords[1] - LOBATTO_COORDS[k]
+            grad_x = float(0.0)
+            grad_y = float(0.0)
+            for k in range(ORDER_PLUS_ONE):
+                if k != node_i:
+                    delta_x = coords[0] - LOBATTO_COORDS[k]
+                    grad_x = grad_x * delta_x + prefix_x
+                    prefix_x *= delta_x
+                if k != node_j:
+                    delta_y = coords[1] - LOBATTO_COORDS[k]
+                    grad_y = grad_y * delta_y + prefix_y
+                    prefix_y *= delta_y
+            grad = LAGRANGE_SCALE[node_i] * LAGRANGE_SCALE[node_j] * wp.vec2(grad_x, grad_y)
+            return grad
+        def element_inner_weight_gradient_linear(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            v = SquareBipolynomialShapeFunctions._vertex_coords_f(node_index_in_elt)
+            wx = (1.0 - coords[0]) * (1.0 - v[0]) + v[0] * coords[0]
+            wy = (1.0 - coords[1]) * (1.0 - v[1]) + v[1] * coords[1]
+            dx = 2.0 * v[0] - 1.0
+            dy = 2.0 * v[1] - 1.0
+            return wp.vec2(dx * wy, dy * wx)
+        if self.ORDER == 1 and is_closed(self.family):
+            return cache.get_func(element_inner_weight_gradient_linear, self.name)
+        return cache.get_func(element_inner_weight_gradient, self.name)
+    def element_node_triangulation(self):
+        from warp.fem.utils import grid_to_tris
+        return grid_to_tris(self.ORDER, self.ORDER)
+class SquareSerendipityShapeFunctions:
+    """
+    Serendipity element ~ tensor product space without interior nodes
+    Side shape functions are usual Lagrange shape functions times a linear function in the normal direction
+    Corner shape functions are bilinear shape functions times a function of (x^{d-1} + y^{d-1})
+    """
+    # Node categories
+    VERTEX = wp.constant(0)
+    EDGE_X = wp.constant(1)
+    EDGE_Y = wp.constant(2)
+    def __init__(self, degree: int, family: Polynomial):
+        if not is_closed(family):
+            raise ValueError("A closed polynomial family is required to define serendipity elements")
+        if degree not in [2, 3]:
+            raise NotImplementedError("Serendipity element only implemented for order 2 or 3")
+        self.family = family
+        self.ORDER = wp.constant(degree)
+        self.NODES_PER_ELEMENT = wp.constant(4 * degree)
+        self.NODES_PER_SIDE = wp.constant(degree + 1)
+        lobatto_coords, lobatto_weight = quadrature_1d(point_count=degree + 1, family=family)
+        lagrange_scale = lagrange_scales(lobatto_coords)
+        NodeVec = wp.types.vector(length=degree + 1, dtype=wp.float32)
+        self.LOBATTO_COORDS = wp.constant(NodeVec(lobatto_coords))
+        self.LOBATTO_WEIGHT = wp.constant(NodeVec(lobatto_weight))
+        self.LAGRANGE_SCALE = wp.constant(NodeVec(lagrange_scale))
+        self.ORDER_PLUS_ONE = wp.constant(self.ORDER + 1)
+        self.node_type_and_type_index = self._get_node_type_and_type_index()
+        self._node_lobatto_indices = self._get_node_lobatto_indices()
+    @property
+    def name(self) -> str:
+        return f"Square_S{self.ORDER}_{self.family}"
+    def _get_node_type_and_type_index(self):
+        @cache.dynamic_func(suffix=self.name)
+        def node_type_and_index(
+            node_index_in_elt: int,
+        ):
+            if node_index_in_elt < 4:
+                return SquareSerendipityShapeFunctions.VERTEX, node_index_in_elt
+            type_index = (node_index_in_elt - 4) // 2
+            side = node_index_in_elt - 4 - 2 * type_index
+            return SquareSerendipityShapeFunctions.EDGE_X + side, type_index
+        return node_type_and_index
+    @wp.func
+    def side_offset_and_index(type_index: int):
+        index_in_side = type_index // 2
+        side_offset = type_index - 2 * index_in_side
+        return side_offset, index_in_side
+    def _get_node_lobatto_indices(self):
+        ORDER = self.ORDER
+        @cache.dynamic_func(suffix=self.name)
+        def node_lobatto_indices(node_type: int, type_index: int):
+            if node_type == SquareSerendipityShapeFunctions.VERTEX:
+                node_i = type_index // 2
+                node_j = type_index - 2 * node_i
+                return node_i * ORDER, node_j * ORDER
+            side_offset, index_in_side = SquareSerendipityShapeFunctions.side_offset_and_index(type_index)
+            if node_type == SquareSerendipityShapeFunctions.EDGE_X:
+                node_i = 1 + index_in_side
+                node_j = side_offset * ORDER
+            else:
+                node_j = 1 + index_in_side
+                node_i = side_offset * ORDER
+            return node_i, node_j
+        return node_lobatto_indices
+    def make_node_coords_in_element(self):
+        LOBATTO_COORDS = self.LOBATTO_COORDS
+        @cache.dynamic_func(suffix=self.name)
+        def node_coords_in_element(
+            node_index_in_elt: int,
+        ):
+            node_type, type_index = self.node_type_and_type_index(node_index_in_elt)
+            node_i, node_j = self._node_lobatto_indices(node_type, type_index)
+            return Coords(LOBATTO_COORDS[node_i], LOBATTO_COORDS[node_j], 0.0)
+        return node_coords_in_element
+    def make_node_quadrature_weight(self):
+        ORDER = self.ORDER
+        @cache.dynamic_func(suffix=self.name)
+        def node_quadrature_weight(
+            node_index_in_elt: int,
+        ):
+            node_type, type_index = self.node_type_and_type_index(node_index_in_elt)
+            if node_type == SquareSerendipityShapeFunctions.VERTEX:
+                return 0.25 / float(ORDER * ORDER)
+            return (0.25 - 0.25 / float(ORDER * ORDER)) / float(ORDER - 1)
+        return node_quadrature_weight
+    def make_trace_node_quadrature_weight(self):
+        LOBATTO_WEIGHT = self.LOBATTO_WEIGHT
+        @cache.dynamic_func(suffix=self.name)
+        def trace_node_quadrature_weight(
+            node_index_in_elt: int,
+        ):
+            node_type, type_index = self.node_type_and_type_index(node_index_in_elt)
+            if node_type == SquareSerendipityShapeFunctions.VERTEX:
+                return LOBATTO_WEIGHT[0]
+            side_offset, index_in_side = SquareSerendipityShapeFunctions.side_offset_and_index(type_index)
+            return LOBATTO_WEIGHT[1 + index_in_side]
+        return trace_node_quadrature_weight
+    def make_element_inner_weight(self):
+        ORDER = self.ORDER
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
+        LOBATTO_COORDS = self.LOBATTO_COORDS
+        LAGRANGE_SCALE = self.LAGRANGE_SCALE
+        DEGREE_3_CIRCLE_RAD = wp.constant(0.5**2 + (0.5 - LOBATTO_COORDS[1]) ** 2)
+        DEGREE_3_CIRCLE_SCALE = 1.0 / (0.5 - DEGREE_3_CIRCLE_RAD)
+        @cache.dynamic_func(suffix=self.name)
+        def element_inner_weight(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            node_type, type_index = self.node_type_and_type_index(node_index_in_elt)
+            node_i, node_j = self._node_lobatto_indices(node_type, type_index)
+            if node_type == SquareSerendipityShapeFunctions.VERTEX:
+                cx = wp.select(node_i == 0, coords[0], 1.0 - coords[0])
+                cy = wp.select(node_j == 0, coords[1], 1.0 - coords[1])
+                w = cx * cy
+                if ORDER == 2:
+                    w *= cx + cy - 2.0 + LOBATTO_COORDS[1]
+                    return w * LAGRANGE_SCALE[0]
+                if ORDER == 3:
+                    w *= (cx - 0.5) * (cx - 0.5) + (cy - 0.5) * (cy - 0.5) - DEGREE_3_CIRCLE_RAD
+                    return w * DEGREE_3_CIRCLE_SCALE
+            w = float(1.0)
+            if node_type == SquareSerendipityShapeFunctions.EDGE_Y:
+                w *= wp.select(node_i == 0, coords[0], 1.0 - coords[0])
+            else:
+                for k in range(ORDER_PLUS_ONE):
+                    if k != node_i:
+                        w *= coords[0] - LOBATTO_COORDS[k]
+                w *= LAGRANGE_SCALE[node_i]
+            if node_type == SquareSerendipityShapeFunctions.EDGE_X:
+                w *= wp.select(node_j == 0, coords[1], 1.0 - coords[1])
+            else:
+                for k in range(ORDER_PLUS_ONE):
+                    if k != node_j:
+                        w *= coords[1] - LOBATTO_COORDS[k]
+                w *= LAGRANGE_SCALE[node_j]
+            return w
+        return element_inner_weight
+    def make_element_inner_weight_gradient(self):
+        ORDER = self.ORDER
+        ORDER_PLUS_ONE = self.ORDER_PLUS_ONE
+        LOBATTO_COORDS = self.LOBATTO_COORDS
+        LAGRANGE_SCALE = self.LAGRANGE_SCALE
+        DEGREE_3_CIRCLE_RAD = wp.constant(0.5**2 + (0.5 - LOBATTO_COORDS[1]) ** 2)
+        DEGREE_3_CIRCLE_SCALE = 1.0 / (0.5 - DEGREE_3_CIRCLE_RAD)
+        @cache.dynamic_func(suffix=self.name)
+        def element_inner_weight_gradient(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            node_type, type_index = self.node_type_and_type_index(node_index_in_elt)
+            node_i, node_j = self._node_lobatto_indices(node_type, type_index)
+            if node_type == SquareSerendipityShapeFunctions.VERTEX:
+                cx = wp.select(node_i == 0, coords[0], 1.0 - coords[0])
+                cy = wp.select(node_j == 0, coords[1], 1.0 - coords[1])
+                gx = wp.select(node_i == 0, 1.0, -1.0)
+                gy = wp.select(node_j == 0, 1.0, -1.0)
+                if ORDER == 2:
+                    w = cx + cy - 2.0 + LOBATTO_COORDS[1]
+                    grad_x = cy * gx * (w + cx)
+                    grad_y = cx * gy * (w + cy)
+                    return wp.vec2(grad_x, grad_y) * LAGRANGE_SCALE[0]
+                if ORDER == 3:
+                    w = (cx - 0.5) * (cx - 0.5) + (cy - 0.5) * (cy - 0.5) - DEGREE_3_CIRCLE_RAD
+                    dw_dcx = 2.0 * cx - 1.0
+                    dw_dcy = 2.0 * cy - 1.0
+                    grad_x = cy * gx * (w + cx * dw_dcx)
+                    grad_y = cx * gy * (w + cy * dw_dcy)
+                    return wp.vec2(grad_x, grad_y) * DEGREE_3_CIRCLE_SCALE
+            if node_type == SquareSerendipityShapeFunctions.EDGE_X:
+                prefix_x = wp.select(node_j == 0, coords[1], 1.0 - coords[1])
+            else:
+                prefix_x = LAGRANGE_SCALE[node_j]
+                for k in range(ORDER_PLUS_ONE):
+                    if k != node_j:
+                        prefix_x *= coords[1] - LOBATTO_COORDS[k]
+            if node_type == SquareSerendipityShapeFunctions.EDGE_Y:
+                prefix_y = wp.select(node_i == 0, coords[0], 1.0 - coords[0])
+            else:
+                prefix_y = LAGRANGE_SCALE[node_i]
+                for k in range(ORDER_PLUS_ONE):
+                    if k != node_i:
+                        prefix_y *= coords[0] - LOBATTO_COORDS[k]
+            if node_type == SquareSerendipityShapeFunctions.EDGE_X:
+                grad_y = wp.select(node_j == 0, 1.0, -1.0) * prefix_y
+            else:
+                prefix_y *= LAGRANGE_SCALE[node_j]
+                grad_y = float(0.0)
+                for k in range(ORDER_PLUS_ONE):
+                    if k != node_j:
+                        delta_y = coords[1] - LOBATTO_COORDS[k]
+                        grad_y = grad_y * delta_y + prefix_y
+                        prefix_y *= delta_y
+            if node_type == SquareSerendipityShapeFunctions.EDGE_Y:
+                grad_x = wp.select(node_i == 0, 1.0, -1.0) * prefix_x
+            else:
+                prefix_x *= LAGRANGE_SCALE[node_i]
+                grad_x = float(0.0)
+                for k in range(ORDER_PLUS_ONE):
+                    if k != node_i:
+                        delta_x = coords[0] - LOBATTO_COORDS[k]
+                        grad_x = grad_x * delta_x + prefix_x
+                        prefix_x *= delta_x
+            grad = wp.vec2(grad_x, grad_y)
+            return grad
+        return element_inner_weight_gradient
+    def element_node_triangulation(self):
+        if self.ORDER == 2:
+            element_triangles = [
+                [0, 4, 5],
+                [5, 4, 6],
+                [5, 6, 1],
+                [4, 2, 7],
+                [4, 7, 6],
+                [6, 7, 3],
+            ]
+        else:
+            element_triangles = [
+                [0, 4, 5],
+                [2, 7, 8],
+                [3, 10, 11],
+                [1, 9, 6],
+                [5, 6, 9],
+                [5, 4, 6],
+                [8, 11, 10],
+                [8, 7, 11],
+                [4, 8, 10],
+                [4, 10, 6],
+            ]
+        return element_triangles
+class SquareNonConformingPolynomialShapeFunctions:
+    # embeds the largest equilateral triangle centered at (0.5, 0.5) into the reference square
+    _tri_height = 0.75
+    _tri_side = 2.0 / math.sqrt(3.0) * _tri_height
+    _tri_to_square = np.array([[_tri_side, _tri_side / 2.0], [0.0, _tri_height]])
+    _TRI_OFFSET = wp.constant(wp.vec2(0.5 - 0.5 * _tri_side, 0.5 - _tri_height / 3.0))
+    def __init__(self, degree: int):
+        self._tri_shape = Triangle2DPolynomialShapeFunctions(degree=degree)
+        self.ORDER = self._tri_shape.ORDER
+        self.NODES_PER_ELEMENT = self._tri_shape.NODES_PER_ELEMENT
+        self.element_node_triangulation = self._tri_shape.element_node_triangulation
+    @property
+    def name(self) -> str:
+        return f"Square_P{self.ORDER}d"
+    def make_node_coords_in_element(self):
+        node_coords_in_tet = self._tri_shape.make_node_coords_in_element()
+        TRI_TO_SQUARE = wp.constant(wp.mat22(self._tri_to_square))
+        @cache.dynamic_func(suffix=self.name)
+        def node_coords_in_element(
+            node_index_in_elt: int,
+        ):
+            tri_coords = node_coords_in_tet(node_index_in_elt)
+            coords = (
+                TRI_TO_SQUARE * wp.vec2(tri_coords[1], tri_coords[2])
+            ) + SquareNonConformingPolynomialShapeFunctions._TRI_OFFSET
+            return Coords(coords[0], coords[1], 0.0)
+        return node_coords_in_element
+    def make_node_quadrature_weight(self):
+        NODES_PER_ELEMENT = self.NODES_PER_ELEMENT
+        if self.ORDER == 2:
+            # Intrinsic quadrature (order 2)
+            @cache.dynamic_func(suffix=self.name)
+            def node_quadrature_weight_quadratic(
+                node_index_in_elt: int,
+            ):
+                node_type, type_index = self._tri_shape.node_type_and_type_index(node_index_in_elt)
+                if node_type == Triangle2DPolynomialShapeFunctions.VERTEX:
+                    return 0.18518521
+                return 0.14814811
+            return node_quadrature_weight_quadratic
+        @cache.dynamic_func(suffix=self.name)
+        def node_uniform_quadrature_weight(
+            node_index_in_elt: int,
+        ):
+            return 1.0 / float(NODES_PER_ELEMENT)
+        return node_uniform_quadrature_weight
+    def make_trace_node_quadrature_weight(self):
+        # Non-conforming, zero measure on sides
+        @wp.func
+        def zero(node_index_in_elt: int):
+            return 0.0
+        return zero
+    def make_element_inner_weight(self):
+        tri_inner_weight = self._tri_shape.make_element_inner_weight()
+        SQUARE_TO_TRI = wp.constant(wp.mat22(np.linalg.inv(self._tri_to_square)))
+        @cache.dynamic_func(suffix=self.name)
+        def element_inner_weight(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            tri_param = SQUARE_TO_TRI * (
+                wp.vec2(coords[0], coords[1]) - SquareNonConformingPolynomialShapeFunctions._TRI_OFFSET
+            )
+            tri_coords = Coords(1.0 - tri_param[0] - tri_param[1], tri_param[0], tri_param[1])
+            return tri_inner_weight(tri_coords, node_index_in_elt)
+        return element_inner_weight
+    def make_element_inner_weight_gradient(self):
+        tri_inner_weight_gradient = self._tri_shape.make_element_inner_weight_gradient()
+        SQUARE_TO_TRI = wp.constant(wp.mat22(np.linalg.inv(self._tri_to_square)))
+        @cache.dynamic_func(suffix=self.name)
+        def element_inner_weight_gradient(
+            coords: Coords,
+            node_index_in_elt: int,
+        ):
+            tri_param = SQUARE_TO_TRI * (
+                wp.vec2(coords[0], coords[1]) - SquareNonConformingPolynomialShapeFunctions._TRI_OFFSET
+            )
+            tri_coords = Coords(1.0 - tri_param[0] - tri_param[1], tri_param[0], tri_param[1])
+            grad = tri_inner_weight_gradient(tri_coords, node_index_in_elt)
+            return wp.transpose(SQUARE_TO_TRI) * grad
+        return element_inner_weight_gradient