PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl - Mend

warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +93 -30
warp/build_dll.py +47 -67
warp/builtins.py +955 -137
warp/codegen.py +312 -206
warp/config.py +1 -1
warp/context.py +1249 -784
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +2 -1
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +82 -5
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +283 -69
warp/native/vec.h +381 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +323 -192
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +85 -6
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +56 -5
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +184 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/tests/test_types.py CHANGED Viewed

@@ -267,11 +267,6 @@ class TestTypes(unittest.TestCase):
         with self.assertRaisesRegex(TypeError, r"Expected to assign a `int32` value but got `str` instead"):
             v1[0] = "123.0"
-        with self.assertRaisesRegex(
-            TypeError, r"Expected to assign a slice from a sequence of values but got `int` instead"
-        ):
-            v1[:] = 123
         with self.assertRaisesRegex(
             TypeError, r"Expected to assign a slice from a sequence of `int32` values but got `vec3i` instead"
         ):
@@ -483,26 +478,11 @@ class TestTypes(unittest.TestCase):
         with self.assertRaisesRegex(TypeError, r"Expected to assign a `float16` value but got `str` instead"):
             m[0][0] = "123.0"
-        with self.assertRaisesRegex(
-            TypeError, r"Expected to assign a slice from a sequence of values but got `int` instead"
-        ):
-            m[0] = 123
         with self.assertRaisesRegex(
             TypeError, r"Expected to assign a slice from a sequence of `float16` values but got `mat22h` instead"
         ):
             m[0] = (m,)
-        with self.assertRaisesRegex(
-            KeyError, r"Slices are not supported when indexing matrices using the `m\[start:end\]` notation"
-        ):
-            m[:] = 123
-        with self.assertRaisesRegex(
-            KeyError, r"Slices are not supported when indexing matrices using the `m\[i, j\]` notation"
-        ):
-            m[0, :1] = (123,)
         with self.assertRaisesRegex(ValueError, r"Can only assign sequence of same size"):
             m[0][:1] = (1, 2)

warp/tests/test_vec.py CHANGED Viewed

@@ -922,39 +922,6 @@ def test_vec_assign(test, device):
     run(vec_assign_attribute)
-def test_vec_assign_copy(test, device):
-    saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
-    try:
-        wp.config.enable_vector_component_overwrites = True
-        @wp.kernel(module="unique")
-        def vec_assign_overwrite(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
-            tid = wp.tid()
-            a = wp.vec3()
-            b = x[tid]
-            a = b
-            a[1] = 3.0
-            y[tid] = a
-        x = wp.ones(1, dtype=wp.vec3, device=device, requires_grad=True)
-        y = wp.zeros(1, dtype=wp.vec3, device=device, requires_grad=True)
-        tape = wp.Tape()
-        with tape:
-            wp.launch(vec_assign_overwrite, dim=1, inputs=[x, y], device=device)
-        y.grad = wp.ones_like(y, requires_grad=False)
-        tape.backward()
-        assert_np_equal(y.numpy(), np.array([[1.0, 3.0, 1.0]], dtype=float))
-        assert_np_equal(x.grad.numpy(), np.array([[1.0, 0.0, 1.0]], dtype=float))
-    finally:
-        wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
 @wp.kernel
 def vec_array_extract_subscript(x: wp.array2d(dtype=wp.vec3), y: wp.array2d(dtype=float)):
     i, j = wp.tid()
@@ -1189,6 +1156,181 @@ def test_scalar_vec_div(test, device):
     assert_np_equal(x.grad.numpy(), np.array(((-1.0, -0.25, -0.0625),), dtype=float))
+def test_vec_indexing_assign(test, device):
+    @wp.func
+    def fn():
+        v = wp.vec4(1.0, 2.0, 3.0, 4.0)
+        v[0] = 123.0
+        v[1] *= 2.0
+        wp.expect_eq(v[0], 123.0)
+        wp.expect_eq(v[1], 4.0)
+        wp.expect_eq(v[2], 3.0)
+        wp.expect_eq(v[3], 4.0)
+        v[-1] = 123.0
+        v[-2] *= 2.0
+        wp.expect_eq(v[-1], 123.0)
+        wp.expect_eq(v[-2], 6.0)
+        wp.expect_eq(v[-3], 4.0)
+        wp.expect_eq(v[-4], 123.0)
+    @wp.kernel(module="unique")
+    def kernel():
+        fn()
+    wp.launch(kernel, 1, device=device)
+    wp.synchronize()
+    fn()
+def test_vec_slicing_assign(test, device):
+    vec0 = wp.vec(0, float)
+    vec1 = wp.vec(1, float)
+    vec2 = wp.vec(2, float)
+    vec3 = wp.vec(3, float)
+    vec4 = wp.vec(4, float)
+    @wp.func
+    def fn():
+        v = wp.vec4(1.0, 2.0, 3.0, 4.0)
+        wp.expect_eq(v[:] == vec4(1.0, 2.0, 3.0, 4.0), True)
+        wp.expect_eq(v[-123:123] == vec4(1.0, 2.0, 3.0, 4.0), True)
+        wp.expect_eq(v[123:] == vec0(), True)
+        wp.expect_eq(v[:-123] == vec0(), True)
+        wp.expect_eq(v[::123] == vec1(1.0), True)
+        wp.expect_eq(v[1:] == vec3(2.0, 3.0, 4.0), True)
+        wp.expect_eq(v[-2:] == vec2(3.0, 4.0), True)
+        wp.expect_eq(v[:2] == vec2(1.0, 2.0), True)
+        wp.expect_eq(v[:-1] == vec3(1.0, 2.0, 3.0), True)
+        wp.expect_eq(v[::2] == vec2(1.0, 3.0), True)
+        wp.expect_eq(v[1::2] == vec2(2.0, 4.0), True)
+        wp.expect_eq(v[::-1] == vec4(4.0, 3.0, 2.0, 1.0), True)
+        wp.expect_eq(v[::-2] == vec2(4.0, 2.0), True)
+        wp.expect_eq(v[1::-2] == vec1(2.0), True)
+        v[1:] = vec3(5.0, 6.0, 7.0)
+        wp.expect_eq(v == wp.vec4(1.0, 5.0, 6.0, 7.0), True)
+        v[-2:] = vec2(8.0, 9.0)
+        wp.expect_eq(v == wp.vec4(1.0, 5.0, 8.0, 9.0), True)
+        v[:2] = vec2(10.0, 11.0)
+        wp.expect_eq(v == wp.vec4(10.0, 11.0, 8.0, 9.0), True)
+        v[:-1] = vec3(12.0, 13.0, 14.0)
+        wp.expect_eq(v == wp.vec4(12.0, 13.0, 14.0, 9.0), True)
+        v[::2] = vec2(15.0, 16.0)
+        wp.expect_eq(v == wp.vec4(15.0, 13.0, 16.0, 9.0), True)
+        v[1::2] = vec2(17.0, 18.0)
+        wp.expect_eq(v == wp.vec4(15.0, 17.0, 16.0, 18.0), True)
+        v[::-1] = vec4(19.0, 20.0, 21.0, 22.0)
+        wp.expect_eq(v == wp.vec4(22.0, 21.0, 20.0, 19.0), True)
+        v[::-2] = vec2(23.0, 24.0)
+        wp.expect_eq(v == wp.vec4(22.0, 24.0, 20.0, 23.0), True)
+        v[1::-2] = vec1(25.0)
+        wp.expect_eq(v == wp.vec4(22.0, 25.0, 20.0, 23.0), True)
+        v[1:] += vec3(26.0, 27.0, 28.0)
+        wp.expect_eq(v == wp.vec4(22.0, 51.0, 47.0, 51.0), True)
+        v[:-1] -= vec3(29.0, 30.0, 31.0)
+        wp.expect_eq(v == wp.vec4(-7.0, 21.0, 16.0, 51.0), True)
+        v[:] %= vec4(32.0, 33.0, 34.0, 35.0)
+        wp.expect_eq(v == wp.vec4(-7.0, 21.0, 16.0, 16.0), True)
+    @wp.kernel(module="unique")
+    def kernel():
+        fn()
+    wp.launch(kernel, 1, device=device)
+    wp.synchronize()
+    fn()
+def test_vec_assign_inplace_errors(test, device):
+    @wp.kernel
+    def kernel_1():
+        v = wp.vec4(1.0, 2.0, 3.0, 4.0)
+        v[1:] = wp.vec3d(wp.float64(5.0), wp.float64(6.0), wp.float64(7.0))
+    with test.assertRaisesRegex(
+        ValueError,
+        r"The provided vector is expected to be of length 3 with dtype float32.$",
+    ):
+        wp.launch(kernel_1, dim=1, device=device)
+    @wp.kernel
+    def kernel_2():
+        v = wp.vec4(1.0, 2.0, 3.0, 4.0)
+        v[1:] = wp.float64(5.0)
+    with test.assertRaisesRegex(
+        ValueError,
+        r"The provided value is expected to be a vector of length 3, with dtype float32.$",
+    ):
+        wp.launch(kernel_2, dim=1, device=device)
+    @wp.kernel
+    def kernel_3():
+        v = wp.vec4(1.0, 2.0, 3.0, 4.0)
+        v[1:] = wp.mat22(5.0, 6.0, 7.0, 8.0)
+    with test.assertRaisesRegex(
+        ValueError,
+        r"The provided value is expected to be a vector of length 3, with dtype float32.$",
+    ):
+        wp.launch(kernel_3, dim=1, device=device)
+    @wp.kernel
+    def kernel_4():
+        v = wp.vec4(1.0, 2.0, 3.0, 4.0)
+        v[1:] = wp.vec2(5.0, 6.0)
+    with test.assertRaisesRegex(
+        ValueError,
+        r"The length of the provided vector \(2\) isn't compatible with the given slice \(expected 3\).$",
+    ):
+        wp.launch(kernel_4, dim=1, device=device)
+def test_vec_slicing_assign_backward(test, device):
+    @wp.kernel(module="unique")
+    def kernel(arr_x: wp.array(dtype=wp.vec2), arr_y: wp.array(dtype=wp.vec4)):
+        i = wp.tid()
+        y = arr_y[i]
+        y[:2] = arr_x[i]
+        y[1:-1] += arr_x[i][:2]
+        y[3:1:-1] -= arr_x[i][0:]
+        arr_y[i] = y
+    x = wp.ones(1, dtype=wp.vec2, requires_grad=True, device=device)
+    y = wp.zeros(1, dtype=wp.vec4, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(kernel, 1, inputs=(x,), outputs=(y,), device=device)
+    y.grad = wp.ones_like(y)
+    tape.backward()
+    assert_np_equal(y.numpy(), np.array(((1.0, 2.0, 0.0, -1.0),), dtype=float))
+    assert_np_equal(x.grad.numpy(), np.array(((1.0, 1.0),), dtype=float))
 devices = get_test_devices()
@@ -1248,7 +1390,6 @@ add_function_test(TestVec, "test_length_mismatch", test_length_mismatch, devices
 add_function_test(TestVec, "test_vector_len", test_vector_len, devices=devices)
 add_function_test(TestVec, "test_vec_extract", test_vec_extract, devices=devices)
 add_function_test(TestVec, "test_vec_assign", test_vec_assign, devices=devices)
-add_function_test(TestVec, "test_vec_assign_copy", test_vec_assign_copy, devices=devices)
 add_function_test(TestVec, "test_vec_array_extract", test_vec_array_extract, devices=devices)
 add_function_test(TestVec, "test_vec_array_assign", test_vec_array_assign, devices=devices)
 add_function_test(TestVec, "test_vec_add_inplace", test_vec_add_inplace, devices=devices)
@@ -1256,6 +1397,10 @@ add_function_test(TestVec, "test_vec_sub_inplace", test_vec_sub_inplace, devices
 add_function_test(TestVec, "test_vec_array_add_inplace", test_vec_array_add_inplace, devices=devices)
 add_function_test(TestVec, "test_vec_array_sub_inplace", test_vec_array_sub_inplace, devices=devices)
 add_function_test(TestVec, "test_scalar_vec_div", test_scalar_vec_div, devices=devices)
+add_function_test(TestVec, "test_vec_indexing_assign", test_vec_indexing_assign, devices=devices)
+add_function_test(TestVec, "test_vec_slicing_assign", test_vec_slicing_assign, devices=devices)
+add_function_test(TestVec, "test_vec_assign_inplace_errors", test_vec_assign_inplace_errors, devices=devices)
+add_function_test(TestVec, "test_vec_slicing_assign_backward", test_vec_slicing_assign_backward, devices=devices)
 if __name__ == "__main__":

warp/tests/test_vec_assign_copy.py ADDED Viewed

@@ -0,0 +1,143 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+import warp as wp
+from warp.tests.unittest_utils import *
+def setUpModule():
+    wp.config.enable_vector_component_overwrites = True
+def tearDownModule():
+    wp.config.enable_vector_component_overwrites = False
+@wp.kernel
+def vec_assign_subscript(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
+    i = wp.tid()
+    a = wp.vec3()
+    a[0] = 1.0 * x[i]
+    a[1] = 2.0 * x[i]
+    a[2] = 3.0 * x[i]
+    y[i] = a
+@wp.kernel
+def vec_assign_attribute(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
+    i = wp.tid()
+    a = wp.vec3()
+    a.x = 1.0 * x[i]
+    a.y = 2.0 * x[i]
+    a.z = 3.0 * x[i]
+    y[i] = a
+def test_vec_assign(test, device):
+    def run(kernel):
+        x = wp.ones(1, dtype=float, requires_grad=True, device=device)
+        y = wp.zeros(1, dtype=wp.vec3, requires_grad=True, device=device)
+        tape = wp.Tape()
+        with tape:
+            wp.launch(kernel, 1, inputs=[x], outputs=[y], device=device)
+        y.grad = wp.ones_like(y)
+        tape.backward()
+        assert_np_equal(y.numpy(), np.array([[1.0, 2.0, 3.0]], dtype=float))
+        assert_np_equal(x.grad.numpy(), np.array([6.0], dtype=float))
+    run(vec_assign_subscript)
+    run(vec_assign_attribute)
+def test_vec_assign_copy(test, device):
+    @wp.kernel(module="unique")
+    def vec_assign_overwrite(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
+        tid = wp.tid()
+        a = wp.vec3()
+        b = x[tid]
+        a = b
+        a[1] = 3.0
+        y[tid] = a
+    x = wp.ones(1, dtype=wp.vec3, device=device, requires_grad=True)
+    y = wp.zeros(1, dtype=wp.vec3, device=device, requires_grad=True)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(vec_assign_overwrite, dim=1, inputs=[x, y], device=device)
+    y.grad = wp.ones_like(y, requires_grad=False)
+    tape.backward()
+    assert_np_equal(y.numpy(), np.array([[1.0, 3.0, 1.0]], dtype=float))
+    assert_np_equal(x.grad.numpy(), np.array([[1.0, 0.0, 1.0]], dtype=float))
+def test_vec_slicing_assign_backward(test, device):
+    @wp.kernel(module="unique")
+    def kernel(arr_x: wp.array(dtype=wp.vec2), arr_y: wp.array(dtype=wp.vec4)):
+        i = wp.tid()
+        x = arr_x[i]
+        y = arr_y[i]
+        y[:2] = x
+        y[1:-1] += x[:2]
+        y[3:1:-1] -= x[0:]
+        arr_y[i] = y
+    x = wp.ones(1, dtype=wp.vec2, requires_grad=True, device=device)
+    y = wp.zeros(1, dtype=wp.vec4, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(kernel, 1, inputs=(x,), outputs=(y,), device=device)
+    y.grad = wp.ones_like(y)
+    tape.backward()
+    assert_np_equal(y.numpy(), np.array(((1.0, 2.0, 0.0, -1.0),), dtype=float))
+    assert_np_equal(x.grad.numpy(), np.array(((1.0, 1.0),), dtype=float))
+devices = get_test_devices()
+class TestVecAssignCopy(unittest.TestCase):
+    pass
+add_function_test(TestVecAssignCopy, "test_vec_assign", test_vec_assign, devices=devices)
+add_function_test(TestVecAssignCopy, "test_vec_assign_copy", test_vec_assign_copy, devices=devices)
+add_function_test(
+    TestVecAssignCopy, "test_vec_slicing_assign_backward", test_vec_slicing_assign_backward, devices=devices
+)
+if __name__ == "__main__":
+    wp.clear_kernel_cache()
+    unittest.main(verbosity=2, failfast=True)

warp/tests/tile/test_tile.py CHANGED Viewed

@@ -109,12 +109,29 @@ def test_tile_copy_2d(test, device):
 @wp.func
-def unary_func(x: float):
+def unary_func(x: wp.float32):
+    return wp.sin(x)
+@wp.func
+def unary_func(x: wp.float64):
     return wp.sin(x)
 @wp.kernel
-def tile_unary_map(input: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
+def tile_unary_map_user_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(unary_func, a)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+@wp.kernel
+def tile_unary_map_builtin_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
     # tile index
     i, j = wp.tid()
@@ -131,17 +148,76 @@ def test_tile_unary_map(test, device):
     M = TILE_M * 7
     N = TILE_N * 5
-    A = rng.random((M, N), dtype=np.float32)
-    B = np.sin(A)
+    def run(kernel, dtype):
+        A = rng.random((M, N), dtype=dtype)
+        B = np.sin(A)
+        A_grad = np.cos(A)
+        A_wp = wp.array(A, requires_grad=True, device=device)
+        B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+        with wp.Tape() as tape:
+            wp.launch_tiled(
+                kernel,
+                dim=[int(M / TILE_M), int(N / TILE_N)],
+                inputs=[A_wp, B_wp],
+                block_dim=TILE_DIM,
+                device=device,
+            )
+        tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
+        # verify forward pass
+        assert_np_equal(B_wp.numpy(), B, tol=tol)
+        # verify backward pass
+        B_wp.grad = wp.ones_like(B_wp, device=device)
+        tape.backward()
-    A_grad = np.cos(A)
+        assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
+    dtypes = [np.float32, np.float64]
+    for dtype in dtypes:
+        run(tile_unary_map_user_func, dtype)
+        run(tile_unary_map_builtin_func, dtype)
+@wp.func
+def unary_func_mixed_types(x: int) -> float:
+    return wp.sin(float(x))
+@wp.kernel
+def tile_unary_map_mixed_types(input: wp.array2d(dtype=int), output: wp.array2d(dtype=float)):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(unary_func_mixed_types, a)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+def test_tile_unary_map_mixed_types(test, device):
+    rng = np.random.default_rng(42)
+    M = TILE_M * 7
+    N = TILE_N * 5
+    A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
+    B = np.sin(A.astype(np.float32))
+    A_grad = np.cos(A.astype(np.float32))
     A_wp = wp.array(A, requires_grad=True, device=device)
-    B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+    B_wp = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
     with wp.Tape() as tape:
         wp.launch_tiled(
-            tile_unary_map,
+            tile_unary_map_mixed_types,
             dim=[int(M / TILE_M), int(N / TILE_N)],
             inputs=[A_wp, B_wp],
             block_dim=TILE_DIM,
@@ -155,17 +231,23 @@ def test_tile_unary_map(test, device):
     B_wp.grad = wp.ones_like(B_wp, device=device)
     tape.backward()
-    assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
+    # The a gradients are now stored as ints and can't capture the correct values
+    # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
 @wp.func
-def binary_func(x: float, y: float):
-    return wp.sin(x) + y
+def binary_func(x: wp.float32, y: wp.float32):
+    return x + y
+@wp.func
+def binary_func(x: wp.float64, y: wp.float64):
+    return x + y
 @wp.kernel
-def tile_binary_map(
-    input_a: wp.array2d(dtype=float), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
+def tile_binary_map_user_func(
+    input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
 ):
     # tile index
     i, j = wp.tid()
@@ -178,26 +260,107 @@ def tile_binary_map(
     wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+@wp.kernel
+def tile_binary_map_builtin_func(
+    input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
+):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(wp.add, a, b)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
 def test_tile_binary_map(test, device):
     rng = np.random.default_rng(42)
     M = TILE_M * 7
     N = TILE_N * 5
-    A = rng.random((M, N), dtype=np.float32)
+    def run(kernel, dtype):
+        A = rng.random((M, N), dtype=dtype)
+        B = rng.random((M, N), dtype=dtype)
+        C = A + B
+        A_grad = np.ones_like(A)
+        B_grad = np.ones_like(B)
+        A_wp = wp.array(A, requires_grad=True, device=device)
+        B_wp = wp.array(B, requires_grad=True, device=device)
+        C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+        with wp.Tape() as tape:
+            wp.launch_tiled(
+                kernel,
+                dim=[int(M / TILE_M), int(N / TILE_N)],
+                inputs=[A_wp, B_wp, C_wp],
+                block_dim=TILE_DIM,
+                device=device,
+            )
+        tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
+        # verify forward pass
+        assert_np_equal(C_wp.numpy(), C, tol=tol)
+        # verify backward pass
+        C_wp.grad = wp.ones_like(C_wp, device=device)
+        tape.backward()
+        assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
+        assert_np_equal(B_wp.grad.numpy(), B_grad, tol=tol)
+    dtypes = [np.float32, np.float64]
+    for dtype in dtypes:
+        run(tile_binary_map_builtin_func, dtype)
+        run(tile_binary_map_user_func, dtype)
+@wp.func
+def binary_func_mixed_types(x: int, y: float) -> float:
+    return wp.sin(float(x)) + y
+@wp.kernel
+def tile_binary_map_mixed_types(
+    input_a: wp.array2d(dtype=int), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
+):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(binary_func_mixed_types, a, b)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+def test_tile_binary_map_mixed_types(test, device):
+    rng = np.random.default_rng(42)
+    M = TILE_M * 7
+    N = TILE_N * 5
+    A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
     B = rng.random((M, N), dtype=np.float32)
-    C = np.sin(A) + B
+    C = np.sin(A.astype(np.float32)) + B
-    A_grad = np.cos(A)
+    A_grad = np.cos(A.astype(np.float32))
     B_grad = np.ones_like(B)
     A_wp = wp.array(A, requires_grad=True, device=device)
     B_wp = wp.array(B, requires_grad=True, device=device)
-    C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+    C_wp = wp.zeros_like(B_wp, requires_grad=True, device=device)
     with wp.Tape() as tape:
         wp.launch_tiled(
-            tile_binary_map,
+            tile_binary_map_mixed_types,
             dim=[int(M / TILE_M), int(N / TILE_N)],
             inputs=[A_wp, B_wp, C_wp],
             block_dim=TILE_DIM,
@@ -211,7 +374,8 @@ def test_tile_binary_map(test, device):
     C_wp.grad = wp.ones_like(C_wp, device=device)
     tape.backward()
-    assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
+    # The a gradiens are now stored as ints and can't capture the correct values
+    # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
     assert_np_equal(B_wp.grad.numpy(), B_grad)
@@ -1085,7 +1249,9 @@ class TestTile(unittest.TestCase):
 add_function_test(TestTile, "test_tile_copy_1d", test_tile_copy_1d, devices=devices)
 add_function_test(TestTile, "test_tile_copy_2d", test_tile_copy_2d, devices=devices)
 add_function_test(TestTile, "test_tile_unary_map", test_tile_unary_map, devices=devices)
+add_function_test(TestTile, "test_tile_unary_map_mixed_types", test_tile_unary_map_mixed_types, devices=devices)
 add_function_test(TestTile, "test_tile_binary_map", test_tile_binary_map, devices=devices)
+add_function_test(TestTile, "test_tile_binary_map_mixed_types", test_tile_binary_map_mixed_types, devices=devices)
 add_function_test(TestTile, "test_tile_transpose", test_tile_transpose, devices=devices)
 add_function_test(TestTile, "test_tile_operators", test_tile_operators, devices=devices)
 add_function_test(TestTile, "test_tile_tile", test_tile_tile, devices=get_cuda_test_devices())