warp-lang 1.6.2__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +7 -1
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +410 -0
- warp/build_dll.py +6 -14
- warp/builtins.py +452 -362
- warp/codegen.py +179 -119
- warp/config.py +42 -6
- warp/context.py +490 -271
- warp/dlpack.py +8 -6
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +2 -2
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_magnetostatics.py +6 -6
- warp/examples/fem/utils.py +9 -3
- warp/examples/interop/example_jax_callable.py +116 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +205 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/tile/example_tile_matmul.py +2 -4
- warp/fem/__init__.py +11 -1
- warp/fem/adaptivity.py +4 -4
- warp/fem/field/nodal_field.py +22 -68
- warp/fem/field/virtual.py +62 -23
- warp/fem/geometry/adaptive_nanogrid.py +9 -10
- warp/fem/geometry/closest_point.py +1 -1
- warp/fem/geometry/deformed_geometry.py +5 -2
- warp/fem/geometry/geometry.py +5 -0
- warp/fem/geometry/grid_2d.py +12 -12
- warp/fem/geometry/grid_3d.py +12 -15
- warp/fem/geometry/hexmesh.py +5 -7
- warp/fem/geometry/nanogrid.py +9 -11
- warp/fem/geometry/quadmesh.py +13 -13
- warp/fem/geometry/tetmesh.py +3 -4
- warp/fem/geometry/trimesh.py +3 -8
- warp/fem/integrate.py +262 -93
- warp/fem/linalg.py +5 -5
- warp/fem/quadrature/pic_quadrature.py +37 -22
- warp/fem/quadrature/quadrature.py +194 -25
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/basis_function_space.py +4 -2
- warp/fem/space/basis_space.py +25 -18
- warp/fem/space/hexmesh_function_space.py +2 -2
- warp/fem/space/partition.py +6 -2
- warp/fem/space/quadmesh_function_space.py +8 -8
- warp/fem/space/shape/cube_shape_function.py +23 -23
- warp/fem/space/shape/square_shape_function.py +12 -12
- warp/fem/space/shape/triangle_shape_function.py +1 -1
- warp/fem/space/tetmesh_function_space.py +3 -3
- warp/fem/space/trimesh_function_space.py +2 -2
- warp/fem/utils.py +12 -6
- warp/jax.py +14 -1
- warp/jax_experimental/__init__.py +16 -0
- warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
- warp/jax_experimental/ffi.py +698 -0
- warp/jax_experimental/xla_ffi.py +602 -0
- warp/math.py +89 -0
- warp/native/array.h +13 -0
- warp/native/builtin.h +29 -3
- warp/native/bvh.cpp +3 -1
- warp/native/bvh.cu +42 -14
- warp/native/bvh.h +2 -1
- warp/native/clang/clang.cpp +30 -3
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/exports.h +68 -63
- warp/native/intersect.h +26 -26
- warp/native/intersect_adj.h +33 -33
- warp/native/marching.cu +1 -1
- warp/native/mat.h +513 -9
- warp/native/mesh.h +10 -10
- warp/native/quat.h +99 -11
- warp/native/rand.h +6 -0
- warp/native/sort.cpp +122 -59
- warp/native/sort.cu +152 -15
- warp/native/sort.h +8 -1
- warp/native/sparse.cpp +43 -22
- warp/native/sparse.cu +52 -17
- warp/native/svd.h +116 -0
- warp/native/tile.h +301 -105
- warp/native/tile_reduce.h +46 -3
- warp/native/vec.h +68 -7
- warp/native/volume.cpp +85 -113
- warp/native/volume_builder.cu +25 -10
- warp/native/volume_builder.h +6 -0
- warp/native/warp.cpp +5 -6
- warp/native/warp.cu +99 -10
- warp/native/warp.h +19 -10
- warp/optim/linear.py +10 -10
- warp/sim/articulation.py +4 -4
- warp/sim/collide.py +21 -10
- warp/sim/import_mjcf.py +449 -155
- warp/sim/import_urdf.py +32 -12
- warp/sim/integrator_euler.py +5 -5
- warp/sim/integrator_featherstone.py +3 -10
- warp/sim/integrator_vbd.py +207 -2
- warp/sim/integrator_xpbd.py +5 -5
- warp/sim/model.py +42 -13
- warp/sim/utils.py +2 -2
- warp/sparse.py +642 -555
- warp/stubs.py +216 -19
- warp/tests/__main__.py +0 -15
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
- warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
- warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
- warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
- warp/tests/interop/__init__.py +0 -0
- warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
- warp/tests/sim/__init__.py +0 -0
- warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
- warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
- warp/tests/{test_model.py → sim/test_model.py} +40 -0
- warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
- warp/tests/sim/test_vbd.py +597 -0
- warp/tests/test_bool.py +1 -1
- warp/tests/test_examples.py +28 -36
- warp/tests/test_fem.py +23 -4
- warp/tests/test_linear_solvers.py +0 -11
- warp/tests/test_mat.py +233 -79
- warp/tests/test_mat_scalar_ops.py +4 -4
- warp/tests/test_overwrite.py +0 -60
- warp/tests/test_quat.py +67 -46
- warp/tests/test_rand.py +44 -37
- warp/tests/test_sparse.py +47 -6
- warp/tests/test_spatial.py +75 -0
- warp/tests/test_static.py +1 -1
- warp/tests/test_utils.py +84 -4
- warp/tests/test_vec.py +46 -34
- warp/tests/tile/__init__.py +0 -0
- warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
- warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
- warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
- warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
- warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
- warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
- warp/tests/unittest_serial.py +1 -0
- warp/tests/unittest_suites.py +45 -59
- warp/tests/unittest_utils.py +2 -1
- warp/thirdparty/unittest_parallel.py +3 -1
- warp/types.py +110 -658
- warp/utils.py +137 -72
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
- warp/examples/optim/example_walker.py +0 -317
- warp/native/cutlass_gemm.cpp +0 -43
- warp/native/cutlass_gemm.cu +0 -382
- warp/tests/test_matmul.py +0 -511
- warp/tests/test_matmul_lite.py +0 -411
- warp/tests/test_vbd.py +0 -386
- warp/tests/unused_test_misc.py +0 -77
- /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
- /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
- /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
- /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
- /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
- /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
- /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
- /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
- /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
- /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
- /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
- /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
- /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
- /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
- /warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
- /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
- /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
- /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/dlpack.py
CHANGED
|
@@ -48,10 +48,6 @@ Py_DecRef.restype = None
|
|
|
48
48
|
|
|
49
49
|
PyCapsule_Destructor = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
|
|
50
50
|
|
|
51
|
-
PyCapsule_New = ctypes.pythonapi.PyCapsule_New
|
|
52
|
-
PyCapsule_New.argtypes = [ctypes.c_void_p, ctypes.c_char_p, PyCapsule_Destructor]
|
|
53
|
-
PyCapsule_New.restype = ctypes.py_object
|
|
54
|
-
|
|
55
51
|
PyCapsule_IsValid = ctypes.pythonapi.PyCapsule_IsValid
|
|
56
52
|
PyCapsule_IsValid.argtypes = [ctypes.py_object, ctypes.c_char_p]
|
|
57
53
|
PyCapsule_IsValid.restype = ctypes.c_int
|
|
@@ -105,8 +101,8 @@ def _dlpack_capsule_deleter(ptr) -> None:
|
|
|
105
101
|
|
|
106
102
|
capsule = ctypes.cast(ptr, ctypes.py_object)
|
|
107
103
|
|
|
108
|
-
if
|
|
109
|
-
managed_ptr =
|
|
104
|
+
if PyCapsule_IsValid(capsule, _c_str_dltensor):
|
|
105
|
+
managed_ptr = PyCapsule_GetPointer(capsule, _c_str_dltensor)
|
|
110
106
|
managed_tensor = DLManagedTensor.from_address(managed_ptr)
|
|
111
107
|
if managed_tensor.deleter:
|
|
112
108
|
managed_tensor.deleter(managed_ptr)
|
|
@@ -302,6 +298,12 @@ def to_dlpack(wp_array: warp.array):
|
|
|
302
298
|
|
|
303
299
|
managed_tensor.deleter = _dlpack_tensor_deleter
|
|
304
300
|
|
|
301
|
+
# NOTE: jax.ffi.pycapsule() defines the PyCapsule_New() argtypes incorrectly, which causes problems.
|
|
302
|
+
# Here we make sure that the PyCapsule_Destructor callback is correctly defined.
|
|
303
|
+
PyCapsule_New = ctypes.pythonapi.PyCapsule_New
|
|
304
|
+
PyCapsule_New.argtypes = [ctypes.c_void_p, ctypes.c_char_p, PyCapsule_Destructor]
|
|
305
|
+
PyCapsule_New.restype = ctypes.py_object
|
|
306
|
+
|
|
305
307
|
capsule = PyCapsule_New(
|
|
306
308
|
ctypes.byref(managed_tensor),
|
|
307
309
|
_c_str_dltensor,
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
import warp as wp
|
|
19
|
+
|
|
20
|
+
BLOCK_DIM = 128
|
|
21
|
+
|
|
22
|
+
TILE = 32
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_test_kernel(storage_type: str):
|
|
26
|
+
@wp.kernel
|
|
27
|
+
def load_store(a: wp.array2d(dtype=wp.float32), b: wp.array2d(dtype=wp.float32)):
|
|
28
|
+
i, j = wp.tid()
|
|
29
|
+
|
|
30
|
+
if wp.static(storage_type == "shared"):
|
|
31
|
+
a_tile = wp.tile_load(a, shape=(TILE, TILE), offset=(i * TILE, j * TILE), storage="shared")
|
|
32
|
+
else:
|
|
33
|
+
a_tile = wp.tile_load(a, shape=(TILE, TILE), offset=(i * TILE, j * TILE), storage="register")
|
|
34
|
+
|
|
35
|
+
wp.tile_store(b, a_tile, offset=(i * TILE, j * TILE))
|
|
36
|
+
|
|
37
|
+
return load_store
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
if __name__ == "__main__":
|
|
41
|
+
wp.config.quiet = True
|
|
42
|
+
wp.init()
|
|
43
|
+
wp.clear_kernel_cache()
|
|
44
|
+
wp.set_module_options({"fast_math": True, "enable_backward": False})
|
|
45
|
+
|
|
46
|
+
iterations = 100
|
|
47
|
+
rng = np.random.default_rng(42)
|
|
48
|
+
|
|
49
|
+
shared_benchmark_data = {}
|
|
50
|
+
register_benchmark_data = {}
|
|
51
|
+
memcpy_benchmark_data = {}
|
|
52
|
+
|
|
53
|
+
sizes = list(range(128, 4097, 128))
|
|
54
|
+
|
|
55
|
+
print(f"{'Transfer Size (Bytes)':<23s} {'Shared (GiB/s)':<16s} {'Register (GiB/s)':<18s} {'memcpy (GiB/s)':<16s}")
|
|
56
|
+
print("-" * 79)
|
|
57
|
+
|
|
58
|
+
for size in sizes:
|
|
59
|
+
a = wp.array(rng.random((size, size), dtype=np.float32), dtype=wp.float32)
|
|
60
|
+
b = wp.empty_like(a)
|
|
61
|
+
|
|
62
|
+
for storage_type in ("shared", "register"):
|
|
63
|
+
load_store = create_test_kernel(storage_type)
|
|
64
|
+
|
|
65
|
+
cmd = wp.launch_tiled(
|
|
66
|
+
load_store,
|
|
67
|
+
dim=(a.shape[0] // TILE, a.shape[1] // TILE),
|
|
68
|
+
inputs=[a],
|
|
69
|
+
outputs=[b],
|
|
70
|
+
block_dim=BLOCK_DIM,
|
|
71
|
+
record_cmd=True,
|
|
72
|
+
)
|
|
73
|
+
# Warmup
|
|
74
|
+
for _ in range(5):
|
|
75
|
+
cmd.launch()
|
|
76
|
+
|
|
77
|
+
with wp.ScopedTimer("benchmark", cuda_filter=wp.TIMING_KERNEL, print=False, synchronize=True) as timer:
|
|
78
|
+
for _ in range(iterations):
|
|
79
|
+
cmd.launch()
|
|
80
|
+
|
|
81
|
+
np.testing.assert_equal(a.numpy(), b.numpy())
|
|
82
|
+
|
|
83
|
+
timing_results = [result.elapsed for result in timer.timing_results]
|
|
84
|
+
avg_bw = 2.0 * (a.capacity / (1024 * 1024 * 1024)) / (1e-3 * np.mean(timing_results))
|
|
85
|
+
|
|
86
|
+
if storage_type == "shared":
|
|
87
|
+
shared_benchmark_data[a.capacity] = avg_bw
|
|
88
|
+
else:
|
|
89
|
+
register_benchmark_data[a.capacity] = avg_bw
|
|
90
|
+
|
|
91
|
+
# Compare with memcpy
|
|
92
|
+
with wp.ScopedTimer("benchmark", cuda_filter=wp.TIMING_MEMCPY, print=False, synchronize=True) as timer:
|
|
93
|
+
for _ in range(iterations):
|
|
94
|
+
wp.copy(b, a)
|
|
95
|
+
|
|
96
|
+
timing_results = [result.elapsed for result in timer.timing_results]
|
|
97
|
+
avg_bw = 2.0 * (a.capacity / (1024 * 1024 * 1024)) / (1e-3 * np.mean(timing_results))
|
|
98
|
+
memcpy_benchmark_data[a.capacity] = avg_bw
|
|
99
|
+
|
|
100
|
+
# Print results
|
|
101
|
+
print(
|
|
102
|
+
f"{a.capacity:<23d} {shared_benchmark_data[a.capacity]:<#16.4g} {register_benchmark_data[a.capacity]:<#18.4g} {memcpy_benchmark_data[a.capacity]:<#16.4g}"
|
|
103
|
+
)
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
###########################################################################
|
|
17
|
+
# Example Sample Mesh
|
|
18
|
+
#
|
|
19
|
+
# Shows how to sample points on a mesh's surface using
|
|
20
|
+
# a Cumulative Distribution Function (CDF).
|
|
21
|
+
#
|
|
22
|
+
# The CDF enables uniform sampling of points across the mesh's surface,
|
|
23
|
+
# even when the density of triangles varies. It represents the cumulative
|
|
24
|
+
# probability of selecting a triangle from the mesh, with each triangle
|
|
25
|
+
# weighted by its area relative to the total surface area of the mesh.
|
|
26
|
+
#
|
|
27
|
+
###########################################################################
|
|
28
|
+
|
|
29
|
+
import numpy as np
|
|
30
|
+
|
|
31
|
+
import warp as wp
|
|
32
|
+
import warp.render
|
|
33
|
+
|
|
34
|
+
# fmt: off
|
|
35
|
+
POINTS = np.array(
|
|
36
|
+
(
|
|
37
|
+
(-0.986598, -0.400638, -0.175759), (-0.81036 , -0.482105, -0.541125),
|
|
38
|
+
(-1.079616, 0.022652, -0.023381), (-0.894468, -0.080795, -0.618379),
|
|
39
|
+
(-0.607365, -0.702012, -0.556551), (-0.366107, -0.800096, -0.620734),
|
|
40
|
+
(-0.801777, -0.690991, -0.239593), (-0.553576, -0.871746, -0.335518),
|
|
41
|
+
(-0.309133, -0.370805, -0.965784), (-0.288299, -0.956987, -0.402091),
|
|
42
|
+
(-0.051878, -0.894342, -0.597583), (-0.386774, -1.003107, -0.145116),
|
|
43
|
+
(-0.19062 , -1.061165, 0.012418), (-0.176053, -1.044838, -0.217194),
|
|
44
|
+
( 0.001479, -1.020045, -0.356905), (-0.105375, -0.655117, -0.861365),
|
|
45
|
+
(-0.542102, -0.517255, -0.795259), (-0.476599, -0.105709, -0.981171),
|
|
46
|
+
(-1.047915, -0.121584, 0.322098), (-0.527852, 0.137252, 0.501813),
|
|
47
|
+
(-0.721762, -0.803275, 0.117162), (-0.904992, -0.573281, 0.168408),
|
|
48
|
+
(-0.796762, -0.473428, 0.569649), (-0.606446, -0.753374, 0.492938),
|
|
49
|
+
(-0.466481, -0.576566, 0.802562), (-0.50476 , -0.908596, 0.300064),
|
|
50
|
+
(-0.337425, -1.008902, 0.170911), (-0.048676, -1.055594, 0.246732),
|
|
51
|
+
(-0.212871, -0.760442, 0.738447), (-0.281356, -0.9322 , 0.474965),
|
|
52
|
+
(-0.560476, 0.062512, -0.561019), (-0.003252, 0.083237, -1.049784),
|
|
53
|
+
(-0.009392, 0.593703, -0.522479), (-0.530465, 0.577231, 0.007172),
|
|
54
|
+
(-0.02106 , 0.064189, 1.066722), (-0.003512, 0.59714 , 0.516904),
|
|
55
|
+
( 0.000194, 1.093899, 0.001113), ( 0.256861, -0.955856, -0.445325),
|
|
56
|
+
( 0.251205, -1.038759, -0.174212), ( 0.170201, -0.800019, -0.712158),
|
|
57
|
+
( 0.364385, -0.560298, -0.866843), ( 0.092809, -0.269437, -1.058467),
|
|
58
|
+
( 0.628127, -0.12359 , -0.9012 ), ( 0.507433, -0.930658, -0.215908),
|
|
59
|
+
( 0.496448, -0.800205, -0.545904), ( 0.757415, -0.527449, -0.565395),
|
|
60
|
+
( 0.908704, -0.596257, 0.028995), ( 0.754069, -0.731365, -0.256687),
|
|
61
|
+
( 0.921362, -0.09028 , -0.546421), ( 1.017846, -0.335787, -0.263017),
|
|
62
|
+
( 0.016768, -1.080014, -0.058473), ( 0.204245, -1.056388, 0.078346),
|
|
63
|
+
( 0.260892, -1.001704, 0.322104), ( 0.16608 , -0.739172, 0.788097),
|
|
64
|
+
( 0.021091, -0.931327, 0.557789), (-0.046158, -0.408417, 1.011046),
|
|
65
|
+
( 0.429623, -0.987237, 0.088537), ( 0.704993, -0.739396, 0.386838),
|
|
66
|
+
( 0.37277 , -0.825639, 0.591102), ( 0.493947, -0.896091, 0.339163),
|
|
67
|
+
( 0.321112, -0.540547, 0.890161), ( 0.654753, -0.520495, 0.690104),
|
|
68
|
+
( 0.922472, -0.124429, 0.530498), ( 0.662544, -0.85601 , 0.054375),
|
|
69
|
+
( 0.950976, -0.422783, 0.327726), ( 0.536849, 0.109943, -0.52279 ),
|
|
70
|
+
( 0.517242, 0.120634, 0.535708), ( 0.532707, 0.598943, -0.000767),
|
|
71
|
+
( 1.086691, 0.048722, 0.032517), ( 0.528734, -0.109809, 0.96863 ),
|
|
72
|
+
(-0.581832, -0.916941, -0.027829), (-0.625071, -0.14445 , 0.906538),
|
|
73
|
+
),
|
|
74
|
+
dtype=np.float32,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
FACE_VERTEX_INDICES = np.array(
|
|
78
|
+
(
|
|
79
|
+
6, 0, 1, 6, 21, 0, 2, 0, 18, 0, 3, 1, 2, 3, 0, 5,
|
|
80
|
+
7, 4, 70, 7, 11, 4, 6, 1, 16, 1, 3, 7, 6, 4, 4, 1,
|
|
81
|
+
16, 9, 7, 5, 3, 17, 16, 16, 17, 8, 41, 8, 17, 30, 17, 3,
|
|
82
|
+
10, 14, 9, 5, 10, 9, 10, 37, 14, 15, 10, 5, 7, 9, 11, 11,
|
|
83
|
+
9, 13, 11, 13, 12, 50, 12, 13, 9, 14, 13, 15, 16, 8, 15, 8,
|
|
84
|
+
41, 16, 5, 4, 16, 15, 5, 17, 31, 41, 21, 22, 18, 20, 21, 6,
|
|
85
|
+
18, 0, 21, 20, 25, 23, 20, 70, 25, 70, 11, 26, 26, 25, 70, 25,
|
|
86
|
+
29, 23, 21, 20, 23, 21, 23, 22, 23, 24, 22, 24, 71, 22, 26, 29,
|
|
87
|
+
25, 26, 11, 12, 12, 27, 26, 26, 27, 29, 27, 54, 29, 27, 12, 50,
|
|
88
|
+
28, 29, 54, 54, 53, 28, 23, 28, 24, 29, 28, 23, 28, 55, 24, 28,
|
|
89
|
+
53, 55, 53, 60, 55, 24, 55, 71, 55, 34, 71, 30, 3, 2, 2, 33,
|
|
90
|
+
30, 17, 30, 31, 32, 31, 30, 33, 36, 32, 19, 33, 2, 19, 35, 33,
|
|
91
|
+
19, 71, 34, 35, 19, 34, 34, 66, 35, 35, 36, 33, 35, 67, 36, 15,
|
|
92
|
+
39, 10, 10, 39, 37, 44, 37, 39, 14, 50, 13, 14, 38, 50, 14, 37,
|
|
93
|
+
38, 37, 43, 38, 40, 15, 41, 40, 39, 15, 41, 42, 40, 44, 39, 40,
|
|
94
|
+
31, 42, 41, 38, 43, 56, 44, 43, 37, 44, 47, 43, 47, 63, 43, 44,
|
|
95
|
+
40, 45, 42, 45, 40, 46, 63, 47, 45, 47, 44, 65, 48, 42, 46, 47,
|
|
96
|
+
49, 49, 47, 45, 48, 45, 42, 45, 48, 49, 68, 49, 48, 27, 52, 54,
|
|
97
|
+
50, 51, 27, 27, 51, 52, 50, 38, 51, 38, 56, 51, 51, 56, 52, 54,
|
|
98
|
+
52, 58, 52, 59, 58, 53, 54, 58, 60, 69, 55, 55, 69, 34, 43, 63,
|
|
99
|
+
56, 59, 52, 56, 63, 59, 56, 63, 57, 59, 58, 60, 53, 57, 58, 59,
|
|
100
|
+
58, 57, 61, 60, 58, 61, 57, 64, 61, 62, 61, 64, 60, 61, 69, 62,
|
|
101
|
+
69, 61, 46, 57, 63, 64, 57, 46, 46, 49, 64, 68, 64, 49, 62, 64,
|
|
102
|
+
68, 32, 65, 31, 65, 32, 67, 32, 36, 67, 65, 42, 31, 67, 68, 65,
|
|
103
|
+
48, 65, 68, 34, 69, 66, 67, 35, 66, 68, 66, 62, 66, 69, 62, 67,
|
|
104
|
+
66, 68, 33, 32, 30, 19, 2, 18, 20, 6, 70, 7, 70, 6, 18, 71,
|
|
105
|
+
19, 22, 71, 18,
|
|
106
|
+
),
|
|
107
|
+
dtype=np.int32,
|
|
108
|
+
)
|
|
109
|
+
# fmt: on
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@wp.kernel(enable_backward=False)
|
|
113
|
+
def compute_tri_areas(
|
|
114
|
+
points: wp.array(dtype=wp.vec3),
|
|
115
|
+
face_vertex_indices: wp.array(dtype=wp.int32),
|
|
116
|
+
out_tri_areas: wp.array(dtype=wp.float32),
|
|
117
|
+
out_total_area: wp.array(dtype=wp.float32),
|
|
118
|
+
):
|
|
119
|
+
tri = wp.tid()
|
|
120
|
+
|
|
121
|
+
# Retrieve the indices of the three vertices that form the current triangle.
|
|
122
|
+
vtx_0 = face_vertex_indices[tri * 3]
|
|
123
|
+
vtx_1 = face_vertex_indices[tri * 3 + 1]
|
|
124
|
+
vtx_2 = face_vertex_indices[tri * 3 + 2]
|
|
125
|
+
|
|
126
|
+
# Retrieve their 3D position.
|
|
127
|
+
pt_0 = points[vtx_0]
|
|
128
|
+
pt_1 = points[vtx_1]
|
|
129
|
+
pt_2 = points[vtx_2]
|
|
130
|
+
|
|
131
|
+
# Calculate the cross product of two edges of the triangle,
|
|
132
|
+
# which gives a vector whose magnitude is twice the area of the triangle.
|
|
133
|
+
cross = wp.cross((pt_1 - pt_0), (pt_2 - pt_0))
|
|
134
|
+
area = wp.length(cross) * 0.5
|
|
135
|
+
|
|
136
|
+
# Store the result.
|
|
137
|
+
out_tri_areas[tri] = area
|
|
138
|
+
wp.atomic_add(out_total_area, 0, area)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@wp.kernel(enable_backward=False)
|
|
142
|
+
def compute_probability_distribution(
|
|
143
|
+
tri_areas: wp.array(dtype=wp.float32),
|
|
144
|
+
total_area: wp.array(dtype=wp.float32),
|
|
145
|
+
out_probabilities: wp.array(dtype=wp.float32),
|
|
146
|
+
):
|
|
147
|
+
tri = wp.tid()
|
|
148
|
+
|
|
149
|
+
# Calculate the probability of selecting this triangle,
|
|
150
|
+
# which is proportional to the triangle's area relative to total mesh area.
|
|
151
|
+
out_probabilities[tri] = tri_areas[tri] / total_area[0]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@wp.kernel(enable_backward=False)
|
|
155
|
+
def accumulate_cdf(
|
|
156
|
+
tri_count: wp.int32,
|
|
157
|
+
out_cdf: wp.array(dtype=wp.float32),
|
|
158
|
+
):
|
|
159
|
+
# Transform probability values into a Cumulative Distribution Function (CDF).
|
|
160
|
+
for tri in range(1, tri_count):
|
|
161
|
+
out_cdf[tri] += out_cdf[tri - 1]
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@wp.kernel(enable_backward=False)
|
|
165
|
+
def sample_mesh(
|
|
166
|
+
mesh: wp.uint64,
|
|
167
|
+
cdf: wp.array(dtype=wp.float32),
|
|
168
|
+
seed: wp.int32,
|
|
169
|
+
out_points: wp.array(dtype=wp.vec3),
|
|
170
|
+
):
|
|
171
|
+
tid = wp.tid()
|
|
172
|
+
|
|
173
|
+
rng = wp.rand_init(seed, tid)
|
|
174
|
+
|
|
175
|
+
# Sample the triangle index using the CDF.
|
|
176
|
+
sample = wp.randf(rng)
|
|
177
|
+
tri = wp.lower_bound(cdf, sample)
|
|
178
|
+
|
|
179
|
+
# Sample the location in that triangle using random barycentric cordinates.
|
|
180
|
+
ru = wp.randf(rng)
|
|
181
|
+
rv = wp.randf(rng)
|
|
182
|
+
tri_u = 1.0 - wp.sqrt(ru)
|
|
183
|
+
tri_v = wp.sqrt(ru) * (1.0 - rv)
|
|
184
|
+
pos = wp.mesh_eval_position(mesh, tri, tri_u, tri_v)
|
|
185
|
+
|
|
186
|
+
# Store the result.
|
|
187
|
+
out_points[tid] = pos
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class Example:
|
|
191
|
+
def __init__(self, stage_path="example_sample_mesh.usd"):
|
|
192
|
+
self.mesh = wp.Mesh(
|
|
193
|
+
points=wp.array(POINTS, dtype=wp.vec3),
|
|
194
|
+
indices=wp.array(FACE_VERTEX_INDICES, dtype=wp.int32),
|
|
195
|
+
)
|
|
196
|
+
self.tri_count = len(FACE_VERTEX_INDICES) // 3
|
|
197
|
+
|
|
198
|
+
# Compute the area of each triangle and the total area of the mesh.
|
|
199
|
+
tri_areas = wp.empty(shape=(self.tri_count,), dtype=wp.float32)
|
|
200
|
+
total_area = wp.zeros(shape=(1,), dtype=wp.float32)
|
|
201
|
+
wp.launch(
|
|
202
|
+
compute_tri_areas,
|
|
203
|
+
dim=tri_areas.shape,
|
|
204
|
+
inputs=(
|
|
205
|
+
self.mesh.points,
|
|
206
|
+
self.mesh.indices,
|
|
207
|
+
),
|
|
208
|
+
outputs=(
|
|
209
|
+
tri_areas,
|
|
210
|
+
total_area,
|
|
211
|
+
),
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Build a Cumulative Distribution Function (CDF) where the probability
|
|
215
|
+
# of sampling a given triangle is proportional to its area.
|
|
216
|
+
self.cdf = wp.empty(shape=(self.tri_count,), dtype=wp.float32)
|
|
217
|
+
wp.launch(
|
|
218
|
+
compute_probability_distribution,
|
|
219
|
+
dim=self.cdf.shape,
|
|
220
|
+
inputs=(
|
|
221
|
+
tri_areas,
|
|
222
|
+
total_area,
|
|
223
|
+
),
|
|
224
|
+
outputs=(self.cdf,),
|
|
225
|
+
)
|
|
226
|
+
wp.launch(
|
|
227
|
+
accumulate_cdf,
|
|
228
|
+
dim=(1,),
|
|
229
|
+
inputs=(self.tri_count,),
|
|
230
|
+
outputs=(self.cdf,),
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Array to store the sampled points.
|
|
234
|
+
self.points = wp.empty(shape=(100,), dtype=wp.vec3)
|
|
235
|
+
|
|
236
|
+
self.fps = 4
|
|
237
|
+
self.frame = 0
|
|
238
|
+
|
|
239
|
+
if stage_path:
|
|
240
|
+
self.renderer = wp.render.UsdRenderer(stage_path, fps=self.fps)
|
|
241
|
+
else:
|
|
242
|
+
self.renderer = None
|
|
243
|
+
|
|
244
|
+
def step(self):
|
|
245
|
+
with wp.ScopedTimer("step"):
|
|
246
|
+
# Sample new points on the mesh using the CDF and the current frame
|
|
247
|
+
# number as seed to ensure different samples each frame.
|
|
248
|
+
wp.launch(
|
|
249
|
+
sample_mesh,
|
|
250
|
+
dim=self.points.shape,
|
|
251
|
+
inputs=(
|
|
252
|
+
self.mesh.id,
|
|
253
|
+
self.cdf,
|
|
254
|
+
self.frame,
|
|
255
|
+
),
|
|
256
|
+
outputs=(self.points,),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
self.frame += 1
|
|
260
|
+
|
|
261
|
+
def render(self):
|
|
262
|
+
if self.renderer is None:
|
|
263
|
+
return
|
|
264
|
+
|
|
265
|
+
with wp.ScopedTimer("render"):
|
|
266
|
+
self.renderer.begin_frame(self.frame / self.fps)
|
|
267
|
+
self.renderer.render_mesh(
|
|
268
|
+
name="mesh",
|
|
269
|
+
points=self.mesh.points.numpy(),
|
|
270
|
+
indices=self.mesh.indices.numpy(),
|
|
271
|
+
colors=(0.35, 0.55, 0.9),
|
|
272
|
+
)
|
|
273
|
+
self.renderer.render_points(name="points", points=self.points.numpy(), radius=0.05, colors=(0.8, 0.3, 0.2))
|
|
274
|
+
self.renderer.end_frame()
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
if __name__ == "__main__":
|
|
278
|
+
import argparse
|
|
279
|
+
|
|
280
|
+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
281
|
+
parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
|
|
282
|
+
parser.add_argument(
|
|
283
|
+
"--stage_path",
|
|
284
|
+
type=lambda x: None if x == "None" else str(x),
|
|
285
|
+
default="example_sample_mesh.usd",
|
|
286
|
+
help="Path to the output USD file.",
|
|
287
|
+
)
|
|
288
|
+
parser.add_argument("--num_frames", type=int, default=16, help="Total number of frames.")
|
|
289
|
+
|
|
290
|
+
args = parser.parse_known_args()[0]
|
|
291
|
+
|
|
292
|
+
with wp.ScopedDevice(args.device):
|
|
293
|
+
example = Example(stage_path=args.stage_path)
|
|
294
|
+
|
|
295
|
+
for _ in range(args.num_frames):
|
|
296
|
+
example.step()
|
|
297
|
+
example.render()
|
|
298
|
+
|
|
299
|
+
if example.renderer:
|
|
300
|
+
example.renderer.save()
|
|
@@ -117,7 +117,7 @@ def divergence_form(s: Sample, domain: Domain, u: Field, psi: Field):
|
|
|
117
117
|
def invert_volume_kernel(values: wp.array(dtype=float)):
|
|
118
118
|
i = wp.tid()
|
|
119
119
|
m = values[i]
|
|
120
|
-
values[i] = wp.
|
|
120
|
+
values[i] = wp.where(m == 0.0, 0.0, 1.0 / m)
|
|
121
121
|
|
|
122
122
|
|
|
123
123
|
@wp.kernel
|
|
@@ -75,7 +75,7 @@ def cell_transport_form(s: fem.Sample, domain: fem.Domain, u: fem.Field, v: fem.
|
|
|
75
75
|
def initial_condition(s: fem.Sample, domain: fem.Domain):
|
|
76
76
|
x = domain(s)[0] * 2.0
|
|
77
77
|
wave = wp.sin(x * wp.pi)
|
|
78
|
-
return wp.vec2(wp.
|
|
78
|
+
return wp.vec2(wp.where(x <= 1.0, wave, 0.0), 0.0)
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
@fem.integrand
|
|
@@ -87,7 +87,7 @@ def velocity_norm(s: fem.Sample, u: fem.Field):
|
|
|
87
87
|
def minmod(a: float, b: float):
|
|
88
88
|
sa = wp.sign(a)
|
|
89
89
|
sb = wp.sign(b)
|
|
90
|
-
return wp.
|
|
90
|
+
return wp.where(sa == sb, sa * wp.min(wp.abs(a), wp.abs(b)), 0.0)
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
@fem.integrand
|
|
@@ -57,7 +57,7 @@ def boundary_projector_form(
|
|
|
57
57
|
Bilinear boundary condition projector form, non-zero on radial edges
|
|
58
58
|
"""
|
|
59
59
|
nor = fem.normal(domain, s)
|
|
60
|
-
active = wp.
|
|
60
|
+
active = wp.where(nor[0] < -0.9999 or nor[1] < -0.9999, 1.0, 0.0)
|
|
61
61
|
return active * u(s) * v(s)
|
|
62
62
|
|
|
63
63
|
|
|
@@ -60,8 +60,8 @@ def cube_to_cylinder_grad(x: wp.vec3):
|
|
|
60
60
|
dir_grad = (wp.identity(n=3, dtype=float) - wp.outer(dir_xz, dir_xz)) / wp.length(pos_xz)
|
|
61
61
|
|
|
62
62
|
abs_xz = wp.abs(pos_xz)
|
|
63
|
-
xinf_grad = wp.
|
|
64
|
-
abs_xz[0] > abs_xz[2], wp.
|
|
63
|
+
xinf_grad = wp.where(
|
|
64
|
+
abs_xz[0] > abs_xz[2], wp.vec(wp.sign(pos_xz[0]), 0.0, 0.0), wp.vec3(0.0, 0.0, wp.sign(pos_xz[2]))
|
|
65
65
|
)
|
|
66
66
|
grad = dir_grad * wp.max(abs_xz) + wp.outer(dir_xz, xinf_grad)
|
|
67
67
|
|
|
@@ -85,10 +85,10 @@ def permeability_field(
|
|
|
85
85
|
r = wp.sqrt(x * x + z * z)
|
|
86
86
|
|
|
87
87
|
if r <= core_radius:
|
|
88
|
-
return wp.
|
|
88
|
+
return wp.where(y < core_height, MU_i, MU_0)
|
|
89
89
|
|
|
90
90
|
if r >= coil_internal_radius and r <= coil_external_radius:
|
|
91
|
-
return wp.
|
|
91
|
+
return wp.where(y < coil_height, MU_c, MU_0)
|
|
92
92
|
|
|
93
93
|
return MU_0
|
|
94
94
|
|
|
@@ -107,10 +107,10 @@ def current_field(
|
|
|
107
107
|
|
|
108
108
|
r = wp.sqrt(x * x + z * z)
|
|
109
109
|
|
|
110
|
-
return wp.
|
|
110
|
+
return wp.where(
|
|
111
111
|
y < coil_height and r >= coil_internal_radius and r <= coil_external_radius,
|
|
112
|
-
wp.vec3(0.0),
|
|
113
112
|
wp.vec3(z, 0.0, -x) * current / r,
|
|
113
|
+
wp.vec3(0.0),
|
|
114
114
|
)
|
|
115
115
|
|
|
116
116
|
|
warp/examples/fem/utils.py
CHANGED
|
@@ -34,6 +34,9 @@ __all__ = [
|
|
|
34
34
|
"Plot",
|
|
35
35
|
]
|
|
36
36
|
|
|
37
|
+
# matrix inversion routines contain nested loops,
|
|
38
|
+
# default unrolling leads to code explosion
|
|
39
|
+
wp.set_module_options({"max_unroll": 6})
|
|
37
40
|
|
|
38
41
|
#
|
|
39
42
|
# Mesh utilities
|
|
@@ -225,6 +228,7 @@ def bsr_cg(
|
|
|
225
228
|
mv_routine=None,
|
|
226
229
|
quiet=False,
|
|
227
230
|
method: str = "cg",
|
|
231
|
+
M: BsrMatrix = None,
|
|
228
232
|
) -> Tuple[float, int]:
|
|
229
233
|
"""Solves the linear system A x = b using an iterative solver, optionally with diagonal preconditioning
|
|
230
234
|
|
|
@@ -245,7 +249,9 @@ def bsr_cg(
|
|
|
245
249
|
|
|
246
250
|
"""
|
|
247
251
|
|
|
248
|
-
if
|
|
252
|
+
if M is not None:
|
|
253
|
+
M = aslinearoperator(M)
|
|
254
|
+
elif mv_routine is None:
|
|
249
255
|
M = preconditioner(A, "diag") if use_diag_precond else None
|
|
250
256
|
else:
|
|
251
257
|
A = LinearOperator(A.shape, A.dtype, A.device, matvec=mv_routine)
|
|
@@ -458,7 +464,7 @@ def bsr_solve_saddle(
|
|
|
458
464
|
return err, end_iter
|
|
459
465
|
|
|
460
466
|
|
|
461
|
-
@wp.kernel
|
|
467
|
+
@wp.kernel(enable_backward=False)
|
|
462
468
|
def _compute_schur_inverse_diagonal(
|
|
463
469
|
B_offsets: wp.array(dtype=int),
|
|
464
470
|
B_indices: wp.array(dtype=int),
|
|
@@ -500,7 +506,7 @@ def invert_diagonal_bsr_matrix(A: BsrMatrix):
|
|
|
500
506
|
)
|
|
501
507
|
|
|
502
508
|
|
|
503
|
-
@wp.kernel
|
|
509
|
+
@wp.kernel(enable_backward=False)
|
|
504
510
|
def _block_diagonal_invert(values: wp.array(dtype=Any)):
|
|
505
511
|
i = wp.tid()
|
|
506
512
|
values[i] = fem.utils.inverse_qr(values[i])
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
###########################################################################
|
|
17
|
+
# Example jax_callable()
|
|
18
|
+
#
|
|
19
|
+
# Examples of calling annotated Python functions from JAX.
|
|
20
|
+
###########################################################################
|
|
21
|
+
|
|
22
|
+
from functools import partial
|
|
23
|
+
|
|
24
|
+
import jax
|
|
25
|
+
import jax.numpy as jnp
|
|
26
|
+
|
|
27
|
+
import warp as wp
|
|
28
|
+
from warp.jax_experimental.ffi import jax_callable
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@wp.kernel
|
|
32
|
+
def scale_kernel(a: wp.array(dtype=float), s: float, output: wp.array(dtype=float)):
|
|
33
|
+
tid = wp.tid()
|
|
34
|
+
output[tid] = a[tid] * s
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@wp.kernel
|
|
38
|
+
def scale_vec_kernel(a: wp.array(dtype=wp.vec2), s: float, output: wp.array(dtype=wp.vec2)):
|
|
39
|
+
tid = wp.tid()
|
|
40
|
+
output[tid] = a[tid] * s
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# The Python function to call.
|
|
44
|
+
# Note the argument annotations, just like Warp kernels.
|
|
45
|
+
def example_func(
|
|
46
|
+
# inputs
|
|
47
|
+
a: wp.array(dtype=float),
|
|
48
|
+
b: wp.array(dtype=wp.vec2),
|
|
49
|
+
s: float,
|
|
50
|
+
# outputs
|
|
51
|
+
c: wp.array(dtype=float),
|
|
52
|
+
d: wp.array(dtype=wp.vec2),
|
|
53
|
+
):
|
|
54
|
+
wp.launch(scale_kernel, dim=a.shape, inputs=[a, s], outputs=[c])
|
|
55
|
+
wp.launch(scale_vec_kernel, dim=b.shape, inputs=[b, s], outputs=[d])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def example1():
|
|
59
|
+
jax_func = jax_callable(example_func, num_outputs=2, vmap_method="broadcast_all")
|
|
60
|
+
|
|
61
|
+
@jax.jit
|
|
62
|
+
def f():
|
|
63
|
+
# inputs
|
|
64
|
+
a = jnp.arange(10, dtype=jnp.float32)
|
|
65
|
+
b = jnp.arange(10, dtype=jnp.float32).reshape((5, 2)) # wp.vec2
|
|
66
|
+
s = 2.0
|
|
67
|
+
|
|
68
|
+
# output shapes
|
|
69
|
+
output_dims = {"c": a.shape, "d": b.shape}
|
|
70
|
+
|
|
71
|
+
c, d = jax_func(a, b, s, output_dims=output_dims)
|
|
72
|
+
|
|
73
|
+
return c, d
|
|
74
|
+
|
|
75
|
+
r1, r2 = f()
|
|
76
|
+
print(r1)
|
|
77
|
+
print(r2)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def example2():
|
|
81
|
+
jax_func = jax_callable(example_func, num_outputs=2, vmap_method="broadcast_all")
|
|
82
|
+
|
|
83
|
+
# NOTE: scalar arguments must be static compile-time constants
|
|
84
|
+
@partial(jax.jit, static_argnames=["s"])
|
|
85
|
+
def f(a, b, s):
|
|
86
|
+
# output shapes
|
|
87
|
+
output_dims = {"c": a.shape, "d": b.shape}
|
|
88
|
+
|
|
89
|
+
c, d = jax_func(a, b, s, output_dims=output_dims)
|
|
90
|
+
|
|
91
|
+
return c, d
|
|
92
|
+
|
|
93
|
+
# inputs
|
|
94
|
+
a = jnp.arange(10, dtype=jnp.float32)
|
|
95
|
+
b = jnp.arange(10, dtype=jnp.float32).reshape((5, 2)) # wp.vec2
|
|
96
|
+
s = 3.0
|
|
97
|
+
|
|
98
|
+
r1, r2 = f(a, b, s)
|
|
99
|
+
print(r1)
|
|
100
|
+
print(r2)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def main():
|
|
104
|
+
wp.init()
|
|
105
|
+
wp.load_module(device=wp.get_device())
|
|
106
|
+
|
|
107
|
+
examples = [example1, example2]
|
|
108
|
+
|
|
109
|
+
for example in examples:
|
|
110
|
+
print("\n===========================================================================")
|
|
111
|
+
print(f"{example.__name__}:")
|
|
112
|
+
example()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
if __name__ == "__main__":
|
|
116
|
+
main()
|