PyPI - warp-lang - Versions diffs - 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl - Mend

warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (271) hide show

docs/conf.py +17 -5
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/env/env_usd.py +4 -1
examples/env/environment.py +8 -9
examples/example_dem.py +34 -33
examples/example_diffray.py +364 -337
examples/example_fluid.py +32 -23
examples/example_jacobian_ik.py +97 -93
examples/example_marching_cubes.py +6 -16
examples/example_mesh.py +6 -16
examples/example_mesh_intersect.py +16 -14
examples/example_nvdb.py +14 -16
examples/example_raycast.py +14 -13
examples/example_raymarch.py +16 -23
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +82 -78
examples/example_sim_cloth.py +45 -48
examples/example_sim_fk_grad.py +51 -44
examples/example_sim_fk_grad_torch.py +47 -40
examples/example_sim_grad_bounce.py +108 -133
examples/example_sim_grad_cloth.py +99 -113
examples/example_sim_granular.py +5 -6
examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
examples/example_sim_neo_hookean.py +51 -55
examples/example_sim_particle_chain.py +4 -4
examples/example_sim_quadruped.py +126 -81
examples/example_sim_rigid_chain.py +54 -61
examples/example_sim_rigid_contact.py +66 -70
examples/example_sim_rigid_fem.py +3 -3
examples/example_sim_rigid_force.py +1 -1
examples/example_sim_rigid_gyroscopic.py +3 -4
examples/example_sim_rigid_kinematics.py +28 -39
examples/example_sim_trajopt.py +112 -110
examples/example_sph.py +9 -8
examples/example_wave.py +7 -7
examples/fem/bsr_utils.py +30 -17
examples/fem/example_apic_fluid.py +85 -69
examples/fem/example_convection_diffusion.py +97 -93
examples/fem/example_convection_diffusion_dg.py +142 -149
examples/fem/example_convection_diffusion_dg0.py +141 -136
examples/fem/example_deformed_geometry.py +146 -0
examples/fem/example_diffusion.py +115 -84
examples/fem/example_diffusion_3d.py +116 -86
examples/fem/example_diffusion_mgpu.py +102 -79
examples/fem/example_mixed_elasticity.py +139 -100
examples/fem/example_navier_stokes.py +175 -162
examples/fem/example_stokes.py +143 -111
examples/fem/example_stokes_transfer.py +186 -157
examples/fem/mesh_utils.py +59 -97
examples/fem/plot_utils.py +138 -17
tools/ci/publishing/build_nodes_info.py +54 -0
warp/__init__.py +4 -3
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +836 -492
warp/codegen.py +864 -553
warp/config.py +3 -1
warp/context.py +389 -172
warp/fem/__init__.py +24 -6
warp/fem/cache.py +318 -25
warp/fem/dirichlet.py +7 -3
warp/fem/domain.py +14 -0
warp/fem/field/__init__.py +30 -38
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +244 -138
warp/fem/field/restriction.py +8 -6
warp/fem/field/test.py +127 -59
warp/fem/field/trial.py +117 -60
warp/fem/geometry/__init__.py +5 -1
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +24 -1
warp/fem/geometry/geometry.py +86 -14
warp/fem/geometry/grid_2d.py +112 -54
warp/fem/geometry/grid_3d.py +134 -65
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +85 -33
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +451 -115
warp/fem/geometry/trimesh_2d.py +197 -92
warp/fem/integrate.py +534 -268
warp/fem/operator.py +58 -31
warp/fem/polynomial.py +11 -0
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +150 -58
warp/fem/quadrature/quadrature.py +209 -57
warp/fem/space/__init__.py +230 -53
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +49 -2
warp/fem/space/function_space.py +90 -39
warp/fem/space/grid_2d_function_space.py +149 -496
warp/fem/space/grid_3d_function_space.py +173 -538
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +129 -76
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +46 -34
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +132 -1039
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +104 -742
warp/fem/types.py +13 -11
warp/fem/utils.py +335 -60
warp/native/array.h +120 -34
warp/native/builtin.h +101 -72
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +22 -40
warp/native/clang/clang.cpp +1 -0
warp/native/crt.h +2 -0
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1522 -1243
warp/native/intersect.h +19 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +76 -17
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -18
warp/native/mesh.h +395 -40
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +44 -34
warp/native/reduce.cpp +1 -1
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +163 -155
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +18 -14
warp/native/vec.h +103 -21
warp/native/warp.cpp +2 -1
warp/native/warp.cu +28 -3
warp/native/warp.h +4 -3
warp/render/render_opengl.py +261 -109
warp/sim/__init__.py +1 -2
warp/sim/articulation.py +385 -185
warp/sim/import_mjcf.py +59 -48
warp/sim/import_urdf.py +15 -15
warp/sim/import_usd.py +174 -102
warp/sim/inertia.py +17 -18
warp/sim/integrator_xpbd.py +4 -3
warp/sim/model.py +330 -250
warp/sim/render.py +1 -1
warp/sparse.py +625 -152
warp/stubs.py +341 -309
warp/tape.py +9 -6
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +94 -74
warp/tests/test_array.py +82 -101
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +22 -12
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +18 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +165 -134
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +237 -0
warp/tests/test_fabricarray.py +22 -24
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1034 -124
warp/tests/test_fp16.py +23 -16
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +123 -181
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +35 -34
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +24 -25
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +304 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +60 -22
warp/tests/test_mesh_query_aabb.py +21 -25
warp/tests/test_mesh_query_point.py +111 -22
warp/tests/test_mesh_query_ray.py +12 -24
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +90 -86
warp/tests/test_transient_module.py +10 -12
warp/tests/test_types.py +363 -0
warp/tests/test_utils.py +451 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +418 -376
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/unittest_utils.py +342 -0
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +589 -0
warp/types.py +622 -211
warp/utils.py +54 -393
warp_lang-1.0.0b6.dist-info/METADATA +238 -0
warp_lang-1.0.0b6.dist-info/RECORD +409 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
examples/example_cache_management.py +0 -40
examples/example_multigpu.py +0 -54
examples/example_struct.py +0 -65
examples/fem/example_stokes_transfer_3d.py +0 -210
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/fem/field/discrete_field.py +0 -80
warp/fem/space/nodal_function_space.py +0 -233
warp/tests/test_all.py +0 -223
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-1.0.0b2.dist-info/METADATA +0 -26
warp_lang-1.0.0b2.dist-info/RECORD +0 -380
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

warp/fem/types.py CHANGED Viewed

@@ -1,13 +1,9 @@
 import warp as wp
-vec1i = wp.types.vector(length=1, dtype=wp.int32)
-vec2i = wp.types.vector(length=2, dtype=wp.int32)
-vec3i = wp.types.vector(length=3, dtype=wp.int32)
-vec4i = wp.types.vector(length=4, dtype=wp.int32)
-vec8i = wp.types.vector(length=8, dtype=wp.int32)
-vec6 = wp.types.vector(length=6, dtype=wp.float32)
+# kept to avoid breaking existing example code, no longer used internally
+vec2i = wp.vec2i
+vec3i = wp.vec3i
+vec4i = wp.vec4i
 Coords = wp.vec3
 OUTSIDE = wp.constant(-1.0e8)
@@ -20,7 +16,7 @@ NULL_ELEMENT_INDEX = wp.constant(-1)
 NULL_QP_INDEX = wp.constant(-1)
 NULL_NODE_INDEX = wp.constant(-1)
-DofIndex = vec2i
+DofIndex = wp.vec2i
 """Opaque descriptor for indexing degrees of freedom within elements"""
 NULL_DOF_INDEX = wp.constant(DofIndex(-1, -1))
@@ -59,12 +55,18 @@ class Sample:
     """For bilinear form assembly, index of the trial degree-of-freedom currently being considered"""
+@wp.func
+def make_free_sample(element_index: ElementIndex, element_coords: Coords):
+    """Returns a :class:`Sample` that is not associated to any quadrature point or dof"""
+    return Sample(element_index, element_coords, NULL_QP_INDEX, 0.0, NULL_DOF_INDEX, NULL_DOF_INDEX)
 class Field:
     """
     Tag for field-like integrand arguments
     """
-    call_operator: "wp.fem.Operator" = None  # Set in operator.py
+    call_operator: "warp.fem.operator.Operator" = None  # Set in operator.py
 class Domain:
@@ -72,4 +74,4 @@ class Domain:
     Tag for domain-like integrand arguments
     """
-    call_operator: "wp.fem.Operator" = None  # Set in operator.py
+    call_operator: "warp.fem.operator.Operator" = None  # Set in operator.py

warp/fem/utils.py CHANGED Viewed

@@ -1,83 +1,123 @@
 from typing import Any, Tuple
-import warp as wp
-from warp.utils import radix_sort_pairs, runlength_encode, array_scan
+import numpy as np
-from .types import vec6
+import warp as wp
+from warp.fem.cache import (
+    Temporary,
+    TemporaryStore,
+    borrow_temporary,
+    borrow_temporary_like,
+)
+from warp.utils import array_scan, radix_sort_pairs, runlength_encode
 @wp.func
-def generalized_outer(x: Any, y: wp.vec2):
+def generalized_outer(x: Any, y: Any):
+    """Generalized outer product allowing for the first argument to be a scalar"""
     return wp.outer(x, y)
 @wp.func
-def generalized_outer(x: Any, y: wp.vec3):
-    return wp.outer(x, y)
+def generalized_outer(x: wp.float32, y: wp.vec2):
+    return x * y
 @wp.func
-def generalized_outer(x: Any, y: wp.float32):
+def generalized_outer(x: wp.float32, y: wp.vec3):
     return x * y
 @wp.func
-def unit_element(template_type: wp.float32, coord: int):
-    return 1.0
+def generalized_inner(x: Any, y: Any):
+    """Generalized inner product allowing for the first argument to be a tensor"""
+    return wp.dot(x, y)
 @wp.func
-def unit_element(template_type: wp.vec2, coord: int):
-    t = wp.vec2(0.0)
-    t[coord] = 1.0
-    return t
+def generalized_inner(x: wp.mat22, y: wp.vec2):
+    return x[0] * y[0] + x[1] * y[1]
 @wp.func
-def unit_element(template_type: wp.vec3, coord: int):
-    t = wp.vec3(0.0)
-    t[coord] = 1.0
-    return t
+def generalized_inner(x: wp.mat33, y: wp.vec3):
+    return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
+@wp.func
+def apply_right(x: Any, y: Any):
+    """Performs x y multiplication with y a square matrix and x either a row-vector or a matrix.
+    Will be removed once native @ operator is implemented.
+    """
+    return x * y
 @wp.func
-def unit_element(template_type: vec6, coord: int):
-    t = vec6(0.0)
+def apply_right(x: wp.vec2, y: wp.mat22):
+    return x[0] * y[0] + x[1] * y[1]
+@wp.func
+def apply_right(x: wp.vec3, y: wp.mat33):
+    return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
+@wp.func
+def unit_element(template_type: Any, coord: int):
+    """Returns a instance of `template_type` with a single coordinate set to 1 in the canonical basis"""
+    t = type(template_type)(0.0)
     t[coord] = 1.0
     return t
+@wp.func
+def unit_element(template_type: wp.float32, coord: int):
+    return 1.0
 @wp.func
 def unit_element(template_type: wp.mat22, coord: int):
     t = wp.mat22(0.0)
-    t[coord // 2, coord % 2] = 1.0
+    row = coord // 2
+    col = coord - 2 * row
+    t[row, col] = 1.0
     return t
 @wp.func
 def unit_element(template_type: wp.mat33, coord: int):
-    t = wp.mat22(0.0)
-    t[coord // 3, coord % 3] = 1.0
+    t = wp.mat33(0.0)
+    row = coord // 3
+    col = coord - 3 * row
+    t[row, col] = 1.0
     return t
 @wp.func
-def symmetric_part(x: wp.mat22):
-    off_diag = 0.5 * (x[0, 1] + x[1, 0])
-    return wp.mat22(x[0, 0], off_diag, off_diag, x[1, 1])
+def symmetric_part(x: Any):
+    """Symmetric part of a square tensor"""
+    return 0.5 * (x + wp.transpose(x))
 @wp.func
-def symmetric_part(x: wp.mat33):
-    d = 0.5 * (x[1, 2] + x[2, 1])
-    e = 0.5 * (x[2, 0] + x[0, 2])
-    f = 0.5 * (x[0, 1] + x[1, 0])
-    return wp.mat33(x[0, 0], f, e, f, x[1, 1], d, e, d, x[2, 2])
+def skew_part(x: wp.mat22):
+    """Skew part of a 2x2 tensor as corresponding rotation angle"""
+    return 0.5 * (x[1, 0] - x[0, 1])
+@wp.func
+def skew_part(x: wp.mat33):
+    """Skew part of a 3x3 tensor as the corresponding rotation vector"""
+    a = 0.5 * (x[2, 1] - x[1, 2])
+    b = 0.5 * (x[0, 2] - x[2, 0])
+    c = 0.5 * (x[1, 0] - x[0, 1])
+    return wp.vec3(a, b, c)
 def compress_node_indices(
-    node_count: int, node_indices: wp.array(dtype=int)
-) -> Tuple[wp.array, wp.array, int, wp.array]:
+    node_count: int, node_indices: wp.array(dtype=int), temporary_store: TemporaryStore = None
+) -> Tuple[Temporary, Temporary, int, Temporary]:
     """
     Compress an unsorted list of node indices into:
      - a node_offsets array, giving for each node the start offset of corresponding indices in sorted_array_indices
@@ -87,8 +127,14 @@ def compress_node_indices(
     """
     index_count = node_indices.size
-    sorted_node_indices = wp.empty(2 * index_count, dtype=int, device=node_indices.device)
-    sorted_array_indices = wp.empty_like(sorted_node_indices)
+    sorted_node_indices_temp = borrow_temporary(
+        temporary_store, shape=2 * index_count, dtype=int, device=node_indices.device
+    )
+    sorted_array_indices_temp = borrow_temporary_like(sorted_node_indices_temp, temporary_store)
+    sorted_node_indices = sorted_node_indices_temp.array
+    sorted_array_indices = sorted_array_indices_temp.array
     wp.copy(dest=sorted_node_indices, src=node_indices, count=index_count)
@@ -104,14 +150,44 @@ def compress_node_indices(
     radix_sort_pairs(sorted_node_indices, sorted_array_indices, count=index_count)
     # Build prefix sum of number of elements per node
-    unique_node_indices = wp.empty(n=index_count, dtype=int, device=node_indices.device)
-    node_element_counts = wp.empty(n=index_count, dtype=int, device=node_indices.device)
-    unique_node_count = runlength_encode(
-        sorted_node_indices, unique_node_indices, node_element_counts, value_count=index_count
+    unique_node_indices_temp = borrow_temporary(
+        temporary_store, shape=index_count, dtype=int, device=node_indices.device
+    )
+    node_element_counts_temp = borrow_temporary(
+        temporary_store, shape=index_count, dtype=int, device=node_indices.device
+    )
+    unique_node_indices = unique_node_indices_temp.array
+    node_element_counts = node_element_counts_temp.array
+    unique_node_count_dev = borrow_temporary(temporary_store, shape=(1,), dtype=int, device=sorted_node_indices.device)
+    runlength_encode(
+        sorted_node_indices,
+        unique_node_indices,
+        node_element_counts,
+        value_count=index_count,
+        run_count=unique_node_count_dev.array,
     )
+    # Transfer unique node count to host
+    if node_indices.device.is_cuda:
+        unique_node_count_host = borrow_temporary(temporary_store, shape=(1,), dtype=int, pinned=True, device="cpu")
+        wp.copy(src=unique_node_count_dev.array, dest=unique_node_count_host.array, count=1)
+        wp.synchronize_stream(wp.get_stream(node_indices.device))
+        unique_node_count_dev.release()
+        unique_node_count = int(unique_node_count_host.array.numpy()[0])
+        unique_node_count_host.release()
+    else:
+        unique_node_count = int(unique_node_count_dev.array.numpy()[0])
+        unique_node_count_dev.release()
     # Scatter seen run counts to global array of element count per node
-    node_offsets = wp.zeros(shape=(node_count + 1), device=node_element_counts.device, dtype=int)
+    node_offsets_temp = borrow_temporary(
+        temporary_store, shape=(node_count + 1), device=node_element_counts.device, dtype=int
+    )
+    node_offsets = node_offsets_temp.array
+    node_offsets.zero_()
     wp.launch(
         kernel=_scatter_node_counts,
         dim=unique_node_count,
@@ -122,51 +198,47 @@ def compress_node_indices(
     # Prefix sum of number of elements per node
     array_scan(node_offsets, node_offsets, inclusive=True)
-    return node_offsets, sorted_array_indices, unique_node_count, unique_node_indices
-_pinned_temp_count_buffer = {}
+    sorted_node_indices_temp.release()
+    node_element_counts_temp.release()
-def _get_pinned_temp_count_buffer(device):
-    device = str(device)
-    if device not in _pinned_temp_count_buffer:
-        _pinned_temp_count_buffer[device] = wp.empty(shape=(1,), dtype=int, pinned=True, device="cpu")
+    return node_offsets_temp, sorted_array_indices_temp, unique_node_count, unique_node_indices_temp
-    return _pinned_temp_count_buffer[device]
-def masked_indices(mask: wp.array(dtype=int), missing_index=-1) -> Tuple[wp.array, wp.array]:
+def masked_indices(
+    mask: wp.array, missing_index=-1, temporary_store: TemporaryStore = None
+) -> Tuple[Temporary, Temporary]:
     """
     From an array of boolean masks (must be either 0 or 1), returns:
       - The list of indices for which the mask is 1
       - A map associating to each element of the input mask array its local index if non-zero, or missing_index if zero.
     """
-    offsets = wp.empty_like(mask)
+    offsets_temp = borrow_temporary_like(mask, temporary_store)
+    offsets = offsets_temp.array
     wp.utils.array_scan(mask, offsets, inclusive=True)
     # Get back total counts on host
     if offsets.device.is_cuda:
-        masked_count = _get_pinned_temp_count_buffer(offsets.device)
-        wp.copy(dest=masked_count, src=offsets, src_offset=offsets.shape[0] - 1, count=1)
-        wp.synchronize_stream(wp.get_stream())
-        masked_count = int(masked_count.numpy()[0])
+        masked_count_temp = borrow_temporary(temporary_store, shape=1, dtype=int, pinned=True, device="cpu")
+        wp.copy(dest=masked_count_temp.array, src=offsets, src_offset=offsets.shape[0] - 1, count=1)
+        wp.synchronize_stream(wp.get_stream(offsets.device))
+        masked_count = int(masked_count_temp.array.numpy()[0])
+        masked_count_temp.release()
     else:
         masked_count = int(offsets.numpy()[-1])
     # Convert counts to indices
-    indices = wp.empty(n=masked_count, device=mask.device, dtype=int)
+    indices_temp = borrow_temporary(temporary_store, shape=masked_count, device=mask.device, dtype=int)
     wp.launch(
         kernel=_masked_indices_kernel,
         dim=offsets.shape,
-        inputs=[missing_index, mask, offsets, indices, offsets],
+        inputs=[missing_index, mask, offsets, indices_temp.array, offsets],
         device=mask.device,
     )
-    return indices, offsets
+    return indices_temp, offsets_temp
 def array_axpy(x: wp.array, y: wp.array, alpha: float = 1.0, beta: float = 1.0):
@@ -177,7 +249,7 @@ def array_axpy(x: wp.array, y: wp.array, alpha: float = 1.0, beta: float = 1.0):
     alpha = dtype(alpha)
     beta = dtype(beta)
-    if x.dtype != y.dtype or x.shape != y.shape or x.device != y.device:
+    if not wp.types.types_equal(x.dtype, y.dtype) or x.shape != y.shape or x.device != y.device:
         raise ValueError("x and y arrays must have same dat atype, shape and device")
     wp.launch(kernel=_array_axpy_kernel, dim=x.shape, device=x.device, inputs=[x, y, alpha, beta])
@@ -218,3 +290,206 @@ def _masked_indices_kernel(
 def _array_axpy_kernel(x: wp.array(dtype=Any), y: wp.array(dtype=Any), alpha: Any, beta: Any):
     i = wp.tid()
     y[i] = beta * y[i] + alpha * x[i]
+def grid_to_tris(Nx: int, Ny: int):
+    """Constructs a triangular mesh topology by dividing each cell of a dense 2D grid into two triangles.
+    The resulting triangles will be oriented counter-clockwise assuming that `y` is the fastest moving index direction
+    Args:
+        Nx: Resolution of the grid along `x` dimension
+        Ny: Resolution of the grid along `y` dimension
+    Returns:
+        Array of shape (2 * Nx * Ny, 3) containing vertex indices for each triangle
+    """
+    cx, cy = np.meshgrid(np.arange(Nx, dtype=int), np.arange(Ny, dtype=int), indexing="ij")
+    vidx = np.transpose(
+        np.array(
+            [
+                (Ny + 1) * cx + cy,
+                (Ny + 1) * (cx + 1) + cy,
+                (Ny + 1) * (cx + 1) + (cy + 1),
+                (Ny + 1) * cx + cy,
+                (Ny + 1) * (cx + 1) + (cy + 1),
+                (Ny + 1) * (cx) + (cy + 1),
+            ]
+        )
+    ).reshape((-1, 3))
+    return vidx
+def grid_to_tets(Nx: int, Ny: int, Nz: int):
+    """Constructs a tetrahedral mesh topology by diving each cell of a dense 3D grid into five tetrahedrons
+    The resulting tets have positive volume assuming that `z` is the fastest moving index direction
+    Args:
+        Nx: Resolution of the grid along `x` dimension
+        Ny: Resolution of the grid along `y` dimension
+        Nz: Resolution of the grid along `z` dimension
+    Returns:
+        Array of shape (5 * Nx * Ny * Nz, 4) containing vertex indices for each tet
+    """
+    # Global node indices for each cell
+    cx, cy, cz = np.meshgrid(
+        np.arange(Nx, dtype=int), np.arange(Ny, dtype=int), np.arange(Nz, dtype=int), indexing="ij"
+    )
+    grid_vidx = np.array(
+        [
+            (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * cy + cz,
+            (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * cy + cz + 1,
+            (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * (cy + 1) + cz,
+            (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * (cy + 1) + cz + 1,
+            (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * cy + cz,
+            (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * cy + cz + 1,
+            (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * (cy + 1) + cz,
+            (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * (cy + 1) + cz + 1,
+        ]
+    )
+    # decompose grid cells into 5 tets
+    tet_vidx = np.array(
+        [
+            [0, 1, 2, 4],
+            [3, 2, 1, 7],
+            [5, 1, 7, 4],
+            [6, 7, 4, 2],
+            [4, 1, 2, 7],
+        ]
+    )
+    # Convert to 3d index coordinates
+    vidx_coords = np.array(
+        [
+            [0, 0, 0],
+            [0, 0, 1],
+            [0, 1, 0],
+            [0, 1, 1],
+            [1, 0, 0],
+            [1, 0, 1],
+            [1, 1, 0],
+            [1, 1, 1],
+        ]
+    )
+    tet_coords = vidx_coords[tet_vidx]
+    # Symmetry bits for each cell
+    ox, oy, oz = np.meshgrid(
+        np.arange(Nx, dtype=int) % 2, np.arange(Ny, dtype=int) % 2, np.arange(Nz, dtype=int) % 2, indexing="ij"
+    )
+    tet_coords = np.broadcast_to(tet_coords, shape=(*ox.shape, *tet_coords.shape))
+    # Flip coordinates according to symmetry
+    ox_bk = np.broadcast_to(ox.reshape(*ox.shape, 1, 1), tet_coords.shape[:-1])
+    oy_bk = np.broadcast_to(oy.reshape(*oy.shape, 1, 1), tet_coords.shape[:-1])
+    oz_bk = np.broadcast_to(oz.reshape(*oz.shape, 1, 1), tet_coords.shape[:-1])
+    tet_coords_x = tet_coords[..., 0] ^ ox_bk
+    tet_coords_y = tet_coords[..., 1] ^ oy_bk
+    tet_coords_z = tet_coords[..., 2] ^ oz_bk
+    # Back to local vertex indices
+    corner_indices = 4 * tet_coords_x + 2 * tet_coords_y + tet_coords_z
+    # Now go from cell-local to global node indices
+    # There must be a nicer way than this, but for small grids this works
+    corner_indices = corner_indices.reshape(-1, 4)
+    grid_vidx = grid_vidx.reshape((8, -1, 1))
+    grid_vidx = np.broadcast_to(grid_vidx, shape=(8, grid_vidx.shape[1], 5))
+    grid_vidx = grid_vidx.reshape((8, -1))
+    node_indices = np.arange(corner_indices.shape[0])
+    tet_grid_vidx = np.transpose(
+        [
+            grid_vidx[corner_indices[:, 0], node_indices],
+            grid_vidx[corner_indices[:, 1], node_indices],
+            grid_vidx[corner_indices[:, 2], node_indices],
+            grid_vidx[corner_indices[:, 3], node_indices],
+        ]
+    )
+    return tet_grid_vidx
+def grid_to_quads(Nx: int, Ny: int):
+    """Constructs a quadrilateral mesh topology from a dense 2D grid
+    The resulting quads will be indexed counter-clockwise
+    Args:
+        Nx: Resolution of the grid along `x` dimension
+        Ny: Resolution of the grid along `y` dimension
+    Returns:
+        Array of shape (Nx * Ny, 4) containing vertex indices for each quadrilateral
+    """
+    quad_vtx = np.array(
+        [
+            [0, 0],
+            [1, 0],
+            [1, 1],
+            [0, 1],
+        ]
+    ).T
+    quads = np.stack(np.meshgrid(np.arange(0, Nx), np.arange(0, Ny), indexing="ij"))
+    quads_vtx_shape = (*quads.shape, quad_vtx.shape[1])
+    quads_vtx = np.broadcast_to(quads.reshape(*quads.shape, 1), quads_vtx_shape) + np.broadcast_to(
+        quad_vtx.reshape(2, 1, 1, quad_vtx.shape[1]), quads_vtx_shape
+    )
+    quad_vtx_indices = quads_vtx[0] * (Ny + 1) + quads_vtx[1]
+    return quad_vtx_indices.reshape(-1, 4)
+def grid_to_hexes(Nx: int, Ny: int, Nz: int):
+    """Constructs a hexahedral mesh topology from a dense 3D grid
+    The resulting hexes will be indexed following usual convention assuming that `z` is the fastest moving index direction
+    (counter-clockwise bottom vertices, then counter-clockwise top vertices)
+    Args:
+        Nx: Resolution of the grid along `x` dimension
+        Ny: Resolution of the grid along `y` dimension
+        Nz: Resolution of the grid along `z` dimension
+    Returns:
+        Array of shape (Nx * Ny * Nz, 8) containing vertex indices for each hexaedron
+    """
+    hex_vtx = np.array(
+        [
+            [0, 0, 0],
+            [1, 0, 0],
+            [1, 1, 0],
+            [0, 1, 0],
+            [0, 0, 1],
+            [1, 0, 1],
+            [1, 1, 1],
+            [0, 1, 1],
+        ]
+    ).T
+    hexes = np.stack(np.meshgrid(np.arange(0, Nx), np.arange(0, Ny), np.arange(0, Nz), indexing="ij"))
+    hexes_vtx_shape = (*hexes.shape, hex_vtx.shape[1])
+    hexes_vtx = np.broadcast_to(hexes.reshape(*hexes.shape, 1), hexes_vtx_shape) + np.broadcast_to(
+        hex_vtx.reshape(3, 1, 1, 1, hex_vtx.shape[1]), hexes_vtx_shape
+    )
+    hexes_vtx_indices = hexes_vtx[0] * (Nz + 1) * (Ny + 1) + hexes_vtx[1] * (Nz + 1) + hexes_vtx[2]
+    return hexes_vtx_indices.reshape(-1, 8)