warp-lang 1.6.2__py3-none-win_amd64.whl → 1.7.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +7 -1
- warp/autograd.py +12 -2
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +410 -0
- warp/build_dll.py +6 -14
- warp/builtins.py +463 -372
- warp/codegen.py +196 -124
- warp/config.py +42 -6
- warp/context.py +496 -271
- warp/dlpack.py +8 -6
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/benchmarks/benchmark_cloth.py +1 -1
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/distributed/example_jacobi_mpi.py +507 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +2 -2
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_magnetostatics.py +6 -6
- warp/examples/fem/utils.py +9 -3
- warp/examples/interop/example_jax_callable.py +116 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +205 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/tile/example_tile_matmul.py +2 -4
- warp/fem/__init__.py +11 -1
- warp/fem/adaptivity.py +4 -4
- warp/fem/field/field.py +11 -1
- warp/fem/field/nodal_field.py +56 -88
- warp/fem/field/virtual.py +62 -23
- warp/fem/geometry/adaptive_nanogrid.py +16 -13
- warp/fem/geometry/closest_point.py +1 -1
- warp/fem/geometry/deformed_geometry.py +5 -2
- warp/fem/geometry/geometry.py +5 -0
- warp/fem/geometry/grid_2d.py +12 -12
- warp/fem/geometry/grid_3d.py +12 -15
- warp/fem/geometry/hexmesh.py +5 -7
- warp/fem/geometry/nanogrid.py +9 -11
- warp/fem/geometry/quadmesh.py +13 -13
- warp/fem/geometry/tetmesh.py +3 -4
- warp/fem/geometry/trimesh.py +7 -20
- warp/fem/integrate.py +262 -93
- warp/fem/linalg.py +5 -5
- warp/fem/quadrature/pic_quadrature.py +37 -22
- warp/fem/quadrature/quadrature.py +194 -25
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/basis_function_space.py +4 -2
- warp/fem/space/basis_space.py +25 -18
- warp/fem/space/hexmesh_function_space.py +2 -2
- warp/fem/space/partition.py +6 -2
- warp/fem/space/quadmesh_function_space.py +8 -8
- warp/fem/space/shape/cube_shape_function.py +23 -23
- warp/fem/space/shape/square_shape_function.py +12 -12
- warp/fem/space/shape/triangle_shape_function.py +1 -1
- warp/fem/space/tetmesh_function_space.py +3 -3
- warp/fem/space/trimesh_function_space.py +2 -2
- warp/fem/utils.py +12 -6
- warp/jax.py +14 -1
- warp/jax_experimental/__init__.py +16 -0
- warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -29
- warp/jax_experimental/ffi.py +702 -0
- warp/jax_experimental/xla_ffi.py +602 -0
- warp/math.py +89 -0
- warp/native/array.h +13 -0
- warp/native/builtin.h +29 -3
- warp/native/bvh.cpp +3 -1
- warp/native/bvh.cu +42 -14
- warp/native/bvh.h +2 -1
- warp/native/clang/clang.cpp +30 -3
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/exports.h +68 -63
- warp/native/intersect.h +26 -26
- warp/native/intersect_adj.h +33 -33
- warp/native/marching.cu +1 -1
- warp/native/mat.h +513 -9
- warp/native/mesh.h +10 -10
- warp/native/quat.h +99 -11
- warp/native/rand.h +6 -0
- warp/native/sort.cpp +122 -59
- warp/native/sort.cu +152 -15
- warp/native/sort.h +8 -1
- warp/native/sparse.cpp +43 -22
- warp/native/sparse.cu +52 -17
- warp/native/svd.h +116 -0
- warp/native/tile.h +312 -116
- warp/native/tile_reduce.h +46 -3
- warp/native/vec.h +68 -7
- warp/native/volume.cpp +85 -113
- warp/native/volume_builder.cu +25 -10
- warp/native/volume_builder.h +6 -0
- warp/native/warp.cpp +5 -6
- warp/native/warp.cu +100 -11
- warp/native/warp.h +19 -10
- warp/optim/linear.py +10 -10
- warp/render/render_opengl.py +19 -17
- warp/render/render_usd.py +93 -3
- warp/sim/articulation.py +4 -4
- warp/sim/collide.py +32 -19
- warp/sim/import_mjcf.py +449 -155
- warp/sim/import_urdf.py +32 -12
- warp/sim/inertia.py +189 -156
- warp/sim/integrator_euler.py +8 -5
- warp/sim/integrator_featherstone.py +3 -10
- warp/sim/integrator_vbd.py +207 -2
- warp/sim/integrator_xpbd.py +8 -5
- warp/sim/model.py +71 -25
- warp/sim/render.py +4 -0
- warp/sim/utils.py +2 -2
- warp/sparse.py +642 -555
- warp/stubs.py +217 -20
- warp/tests/__main__.py +0 -15
- warp/tests/assets/torus.usda +1 -1
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
- warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
- warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
- warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
- warp/tests/interop/__init__.py +0 -0
- warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
- warp/tests/sim/__init__.py +0 -0
- warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
- warp/tests/{test_collision.py → sim/test_collision.py} +236 -205
- warp/tests/sim/test_inertia.py +161 -0
- warp/tests/{test_model.py → sim/test_model.py} +40 -0
- warp/tests/{flaky_test_sim_grad.py → sim/test_sim_grad.py} +4 -0
- warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
- warp/tests/sim/test_vbd.py +597 -0
- warp/tests/sim/test_xpbd.py +399 -0
- warp/tests/test_bool.py +1 -1
- warp/tests/test_codegen.py +24 -3
- warp/tests/test_examples.py +40 -38
- warp/tests/test_fem.py +98 -14
- warp/tests/test_linear_solvers.py +0 -11
- warp/tests/test_mat.py +577 -156
- warp/tests/test_mat_scalar_ops.py +4 -4
- warp/tests/test_overwrite.py +0 -60
- warp/tests/test_quat.py +356 -151
- warp/tests/test_rand.py +44 -37
- warp/tests/test_sparse.py +47 -6
- warp/tests/test_spatial.py +75 -0
- warp/tests/test_static.py +1 -1
- warp/tests/test_utils.py +84 -4
- warp/tests/test_vec.py +336 -178
- warp/tests/tile/__init__.py +0 -0
- warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
- warp/tests/{test_tile_load.py → tile/test_tile_load.py} +98 -1
- warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
- warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
- warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
- warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
- warp/tests/unittest_serial.py +1 -0
- warp/tests/unittest_suites.py +45 -62
- warp/tests/unittest_utils.py +2 -1
- warp/thirdparty/unittest_parallel.py +3 -1
- warp/types.py +175 -666
- warp/utils.py +137 -72
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/METADATA +46 -12
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/RECORD +184 -171
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/WHEEL +1 -1
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info/licenses}/LICENSE.md +0 -26
- warp/examples/optim/example_walker.py +0 -317
- warp/native/cutlass_gemm.cpp +0 -43
- warp/native/cutlass_gemm.cu +0 -382
- warp/tests/test_matmul.py +0 -511
- warp/tests/test_matmul_lite.py +0 -411
- warp/tests/test_vbd.py +0 -386
- warp/tests/unused_test_misc.py +0 -77
- /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
- /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
- /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
- /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
- /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
- /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
- /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
- /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
- /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
- /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
- /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
- /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
- /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
- /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
- /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
- /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
- /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
- {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/top_level.txt +0 -0
warp/utils.py
CHANGED
|
@@ -13,13 +13,15 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
16
18
|
import cProfile
|
|
17
19
|
import ctypes
|
|
18
20
|
import os
|
|
19
21
|
import sys
|
|
20
22
|
import time
|
|
21
23
|
import warnings
|
|
22
|
-
from typing import Any, Optional
|
|
24
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
23
25
|
|
|
24
26
|
import numpy as np
|
|
25
27
|
|
|
@@ -141,6 +143,8 @@ def radix_sort_pairs(keys, values, count: int):
|
|
|
141
143
|
runtime.core.radix_sort_pairs_int_host(keys.ptr, values.ptr, count)
|
|
142
144
|
elif keys.dtype == wp.float32 and values.dtype == wp.int32:
|
|
143
145
|
runtime.core.radix_sort_pairs_float_host(keys.ptr, values.ptr, count)
|
|
146
|
+
elif keys.dtype == wp.int64 and values.dtype == wp.int32:
|
|
147
|
+
runtime.core.radix_sort_pairs_int64_host(keys.ptr, values.ptr, count)
|
|
144
148
|
else:
|
|
145
149
|
raise RuntimeError("Unsupported data type")
|
|
146
150
|
elif keys.device.is_cuda:
|
|
@@ -148,6 +152,90 @@ def radix_sort_pairs(keys, values, count: int):
|
|
|
148
152
|
runtime.core.radix_sort_pairs_int_device(keys.ptr, values.ptr, count)
|
|
149
153
|
elif keys.dtype == wp.float32 and values.dtype == wp.int32:
|
|
150
154
|
runtime.core.radix_sort_pairs_float_device(keys.ptr, values.ptr, count)
|
|
155
|
+
elif keys.dtype == wp.int64 and values.dtype == wp.int32:
|
|
156
|
+
runtime.core.radix_sort_pairs_int64_device(keys.ptr, values.ptr, count)
|
|
157
|
+
else:
|
|
158
|
+
raise RuntimeError("Unsupported data type")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def segmented_sort_pairs(
|
|
162
|
+
keys,
|
|
163
|
+
values,
|
|
164
|
+
count: int,
|
|
165
|
+
segment_start_indices: wp.array(dtype=wp.int32),
|
|
166
|
+
segment_end_indices: wp.array(dtype=wp.int32) = None,
|
|
167
|
+
):
|
|
168
|
+
"""Sort key-value pairs within segments.
|
|
169
|
+
|
|
170
|
+
This function performs a segmented sort of key-value pairs, where the sorting is done independently within each segment.
|
|
171
|
+
The segments are defined by their start and optionally end indices.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
keys: Array of keys to sort. Must be of type int32 or float32.
|
|
175
|
+
values: Array of values to sort along with keys. Must be of type int32.
|
|
176
|
+
count: Number of elements to sort.
|
|
177
|
+
segment_start_indices: Array containing start index of each segment. Must be of type int32.
|
|
178
|
+
If segment_end_indices is None, this array must have length at least num_segments + 1,
|
|
179
|
+
and segment_end_indices will be inferred as segment_start_indices[1:].
|
|
180
|
+
If segment_end_indices is provided, this array must have length at least num_segments.
|
|
181
|
+
segment_end_indices: Optional array containing end index of each segment. Must be of type int32 if provided.
|
|
182
|
+
If None, segment_end_indices will be inferred from segment_start_indices[1:].
|
|
183
|
+
If provided, must have length at least num_segments.
|
|
184
|
+
|
|
185
|
+
Raises:
|
|
186
|
+
RuntimeError: If array storage devices don't match, if storage size is insufficient,
|
|
187
|
+
if segment_start_indices is not of type int32, or if data types are unsupported.
|
|
188
|
+
"""
|
|
189
|
+
if keys.device != values.device:
|
|
190
|
+
raise RuntimeError("Array storage devices do not match")
|
|
191
|
+
|
|
192
|
+
if count == 0:
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
if keys.size < 2 * count or values.size < 2 * count:
|
|
196
|
+
raise RuntimeError("Array storage must be large enough to contain 2*count elements")
|
|
197
|
+
|
|
198
|
+
from warp.context import runtime
|
|
199
|
+
|
|
200
|
+
if segment_start_indices.dtype != wp.int32:
|
|
201
|
+
raise RuntimeError("segment_start_indices array must be of type int32")
|
|
202
|
+
|
|
203
|
+
# Handle case where segment_end_indices is not provided
|
|
204
|
+
if segment_end_indices is None:
|
|
205
|
+
num_segments = max(0, segment_start_indices.size - 1)
|
|
206
|
+
|
|
207
|
+
segment_end_indices = segment_start_indices[1:]
|
|
208
|
+
segment_end_indices_ptr = segment_end_indices.ptr
|
|
209
|
+
segment_start_indices_ptr = segment_start_indices.ptr
|
|
210
|
+
else:
|
|
211
|
+
if segment_end_indices.dtype != wp.int32:
|
|
212
|
+
raise RuntimeError("segment_end_indices array must be of type int32")
|
|
213
|
+
|
|
214
|
+
num_segments = segment_start_indices.size
|
|
215
|
+
|
|
216
|
+
segment_end_indices_ptr = segment_end_indices.ptr
|
|
217
|
+
segment_start_indices_ptr = segment_start_indices.ptr
|
|
218
|
+
|
|
219
|
+
if keys.device.is_cpu:
|
|
220
|
+
if keys.dtype == wp.int32 and values.dtype == wp.int32:
|
|
221
|
+
runtime.core.segmented_sort_pairs_int_host(
|
|
222
|
+
keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
|
|
223
|
+
)
|
|
224
|
+
elif keys.dtype == wp.float32 and values.dtype == wp.int32:
|
|
225
|
+
runtime.core.segmented_sort_pairs_float_host(
|
|
226
|
+
keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
raise RuntimeError("Unsupported data type")
|
|
230
|
+
elif keys.device.is_cuda:
|
|
231
|
+
if keys.dtype == wp.int32 and values.dtype == wp.int32:
|
|
232
|
+
runtime.core.segmented_sort_pairs_int_device(
|
|
233
|
+
keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
|
|
234
|
+
)
|
|
235
|
+
elif keys.dtype == wp.float32 and values.dtype == wp.int32:
|
|
236
|
+
runtime.core.segmented_sort_pairs_float_device(
|
|
237
|
+
keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
|
|
238
|
+
)
|
|
151
239
|
else:
|
|
152
240
|
raise RuntimeError("Unsupported data type")
|
|
153
241
|
|
|
@@ -673,37 +761,38 @@ class ScopedTimer:
|
|
|
673
761
|
|
|
674
762
|
def __init__(
|
|
675
763
|
self,
|
|
676
|
-
name,
|
|
677
|
-
active=True,
|
|
678
|
-
print=True,
|
|
679
|
-
detailed=False,
|
|
680
|
-
dict=None,
|
|
681
|
-
use_nvtx=False,
|
|
682
|
-
color="rapids",
|
|
683
|
-
synchronize=False,
|
|
684
|
-
cuda_filter=0,
|
|
685
|
-
report_func=None,
|
|
686
|
-
skip_tape=False,
|
|
764
|
+
name: str,
|
|
765
|
+
active: bool = True,
|
|
766
|
+
print: bool = True,
|
|
767
|
+
detailed: bool = False,
|
|
768
|
+
dict: Optional[Dict[str, List[float]]] = None,
|
|
769
|
+
use_nvtx: bool = False,
|
|
770
|
+
color: Union[int, str] = "rapids",
|
|
771
|
+
synchronize: bool = False,
|
|
772
|
+
cuda_filter: int = 0,
|
|
773
|
+
report_func: Optional[Callable[[List[TimingResult], str], None]] = None,
|
|
774
|
+
skip_tape: bool = False,
|
|
687
775
|
):
|
|
688
776
|
"""Context manager object for a timer
|
|
689
777
|
|
|
690
778
|
Parameters:
|
|
691
|
-
name
|
|
692
|
-
active
|
|
693
|
-
print
|
|
694
|
-
detailed
|
|
695
|
-
dict
|
|
696
|
-
use_nvtx
|
|
697
|
-
color
|
|
698
|
-
synchronize
|
|
699
|
-
cuda_filter
|
|
700
|
-
report_func
|
|
701
|
-
|
|
779
|
+
name: Name of timer
|
|
780
|
+
active: Enables this timer
|
|
781
|
+
print: At context manager exit, print elapsed time to ``sys.stdout``
|
|
782
|
+
detailed: Collects additional profiling data using cProfile and calls ``print_stats()`` at context exit
|
|
783
|
+
dict: A dictionary of lists to which the elapsed time will be appended using ``name`` as a key
|
|
784
|
+
use_nvtx: If true, timing functionality is replaced by an NVTX range
|
|
785
|
+
color: ARGB value (e.g. 0x00FFFF) or color name (e.g. 'cyan') associated with the NVTX range
|
|
786
|
+
synchronize: Synchronize the CPU thread with any outstanding CUDA work to return accurate GPU timings
|
|
787
|
+
cuda_filter: Filter flags for CUDA activity timing, e.g. ``warp.TIMING_KERNEL`` or ``warp.TIMING_ALL``
|
|
788
|
+
report_func: A callback function to print the activity report.
|
|
789
|
+
If ``None``, :func:`wp.timing_print() <timing_print>` will be used.
|
|
790
|
+
skip_tape: If true, the timer will not be recorded in the tape
|
|
702
791
|
|
|
703
792
|
Attributes:
|
|
704
793
|
extra_msg (str): Can be set to a string that will be added to the printout at context exit.
|
|
705
794
|
elapsed (float): The duration of the ``with`` block used with this object
|
|
706
|
-
timing_results (
|
|
795
|
+
timing_results (List[TimingResult]): The list of activity timing results, if collection was requested using ``cuda_filter``
|
|
707
796
|
"""
|
|
708
797
|
self.name = name
|
|
709
798
|
self.active = active and self.enabled
|
|
@@ -799,7 +888,7 @@ class ScopedTimer:
|
|
|
799
888
|
|
|
800
889
|
# Allow temporarily enabling/disabling mempool allocators
|
|
801
890
|
class ScopedMempool:
|
|
802
|
-
def __init__(self, device, enable: bool):
|
|
891
|
+
def __init__(self, device: Devicelike, enable: bool):
|
|
803
892
|
self.device = wp.get_device(device)
|
|
804
893
|
self.enable = enable
|
|
805
894
|
|
|
@@ -813,7 +902,7 @@ class ScopedMempool:
|
|
|
813
902
|
|
|
814
903
|
# Allow temporarily enabling/disabling mempool access
|
|
815
904
|
class ScopedMempoolAccess:
|
|
816
|
-
def __init__(self, target_device, peer_device, enable: bool):
|
|
905
|
+
def __init__(self, target_device: Devicelike, peer_device: Devicelike, enable: bool):
|
|
817
906
|
self.target_device = target_device
|
|
818
907
|
self.peer_device = peer_device
|
|
819
908
|
self.enable = enable
|
|
@@ -828,7 +917,7 @@ class ScopedMempoolAccess:
|
|
|
828
917
|
|
|
829
918
|
# Allow temporarily enabling/disabling peer access
|
|
830
919
|
class ScopedPeerAccess:
|
|
831
|
-
def __init__(self, target_device, peer_device, enable: bool):
|
|
920
|
+
def __init__(self, target_device: Devicelike, peer_device: Devicelike, enable: bool):
|
|
832
921
|
self.target_device = target_device
|
|
833
922
|
self.peer_device = peer_device
|
|
834
923
|
self.enable = enable
|
|
@@ -842,7 +931,7 @@ class ScopedPeerAccess:
|
|
|
842
931
|
|
|
843
932
|
|
|
844
933
|
class ScopedCapture:
|
|
845
|
-
def __init__(self, device=None, stream=None, force_module_load=None, external=False):
|
|
934
|
+
def __init__(self, device: Devicelike = None, stream=None, force_module_load=None, external=False):
|
|
846
935
|
self.device = device
|
|
847
936
|
self.stream = stream
|
|
848
937
|
self.force_module_load = force_module_load
|
|
@@ -868,27 +957,6 @@ class ScopedCapture:
|
|
|
868
957
|
self.active = False
|
|
869
958
|
|
|
870
959
|
|
|
871
|
-
# helper kernels for adj_matmul
|
|
872
|
-
@wp.kernel
|
|
873
|
-
def add_kernel_2d(x: wp.array2d(dtype=Any), acc: wp.array2d(dtype=Any), beta: Any):
|
|
874
|
-
i, j = wp.tid()
|
|
875
|
-
|
|
876
|
-
x[i, j] = x[i, j] + beta * acc[i, j]
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
@wp.kernel
|
|
880
|
-
def add_kernel_3d(x: wp.array3d(dtype=Any), acc: wp.array3d(dtype=Any), beta: Any):
|
|
881
|
-
i, j, k = wp.tid()
|
|
882
|
-
|
|
883
|
-
x[i, j, k] = x[i, j, k] + beta * acc[i, j, k]
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
# explicit instantiations of generic kernels for adj_matmul
|
|
887
|
-
for T in [wp.float16, wp.float32, wp.float64]:
|
|
888
|
-
wp.overload(add_kernel_2d, [wp.array2d(dtype=T), wp.array2d(dtype=T), T])
|
|
889
|
-
wp.overload(add_kernel_3d, [wp.array3d(dtype=T), wp.array3d(dtype=T), T])
|
|
890
|
-
|
|
891
|
-
|
|
892
960
|
def check_p2p():
|
|
893
961
|
"""Check if the machine is configured properly for peer-to-peer transfers.
|
|
894
962
|
|
|
@@ -927,31 +995,28 @@ class timing_result_t(ctypes.Structure):
|
|
|
927
995
|
|
|
928
996
|
|
|
929
997
|
class TimingResult:
|
|
930
|
-
"""Timing result for a single activity.
|
|
998
|
+
"""Timing result for a single activity."""
|
|
931
999
|
|
|
932
|
-
|
|
933
|
-
|
|
1000
|
+
def __init__(self, device, name, filter, elapsed):
|
|
1001
|
+
self.device: warp.context.Device = device
|
|
1002
|
+
"""The device where the activity was recorded."""
|
|
934
1003
|
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
name (str): The activity name.
|
|
938
|
-
filter (int): The type of activity (e.g., ``warp.TIMING_KERNEL``).
|
|
939
|
-
elapsed (float): The elapsed time in milliseconds.
|
|
940
|
-
"""
|
|
1004
|
+
self.name: str = name
|
|
1005
|
+
"""The activity name."""
|
|
941
1006
|
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
self.
|
|
946
|
-
|
|
1007
|
+
self.filter: int = filter
|
|
1008
|
+
"""The type of activity (e.g., ``warp.TIMING_KERNEL``)."""
|
|
1009
|
+
|
|
1010
|
+
self.elapsed: float = elapsed
|
|
1011
|
+
"""The elapsed time in milliseconds."""
|
|
947
1012
|
|
|
948
1013
|
|
|
949
|
-
def timing_begin(cuda_filter=TIMING_ALL, synchronize=True):
|
|
1014
|
+
def timing_begin(cuda_filter: int = TIMING_ALL, synchronize: bool = True) -> None:
|
|
950
1015
|
"""Begin detailed activity timing.
|
|
951
1016
|
|
|
952
1017
|
Parameters:
|
|
953
|
-
cuda_filter
|
|
954
|
-
synchronize
|
|
1018
|
+
cuda_filter: Filter flags for CUDA activity timing, e.g. ``warp.TIMING_KERNEL`` or ``warp.TIMING_ALL``
|
|
1019
|
+
synchronize: Whether to synchronize all CUDA devices before timing starts
|
|
955
1020
|
"""
|
|
956
1021
|
|
|
957
1022
|
if synchronize:
|
|
@@ -960,14 +1025,14 @@ def timing_begin(cuda_filter=TIMING_ALL, synchronize=True):
|
|
|
960
1025
|
warp.context.runtime.core.cuda_timing_begin(cuda_filter)
|
|
961
1026
|
|
|
962
1027
|
|
|
963
|
-
def timing_end(synchronize=True):
|
|
1028
|
+
def timing_end(synchronize: bool = True) -> List[TimingResult]:
|
|
964
1029
|
"""End detailed activity timing.
|
|
965
1030
|
|
|
966
1031
|
Parameters:
|
|
967
|
-
synchronize
|
|
1032
|
+
synchronize: Whether to synchronize all CUDA devices before timing ends
|
|
968
1033
|
|
|
969
1034
|
Returns:
|
|
970
|
-
|
|
1035
|
+
A list of :class:`TimingResult` objects for all recorded activities.
|
|
971
1036
|
"""
|
|
972
1037
|
|
|
973
1038
|
if synchronize:
|
|
@@ -1006,12 +1071,12 @@ def timing_end(synchronize=True):
|
|
|
1006
1071
|
return results
|
|
1007
1072
|
|
|
1008
1073
|
|
|
1009
|
-
def timing_print(results, indent=""):
|
|
1074
|
+
def timing_print(results: List[TimingResult], indent: str = "") -> None:
|
|
1010
1075
|
"""Print timing results.
|
|
1011
1076
|
|
|
1012
1077
|
Parameters:
|
|
1013
|
-
results
|
|
1014
|
-
indent
|
|
1078
|
+
results: List of :class:`TimingResult` objects to print.
|
|
1079
|
+
indent: Optional indentation to prepend to all output lines.
|
|
1015
1080
|
"""
|
|
1016
1081
|
|
|
1017
1082
|
if not results:
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: warp-lang
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.1
|
|
4
4
|
Summary: A Python framework for high-performance simulation and graphics programming
|
|
5
5
|
Author-email: NVIDIA Corporation <warp-python@nvidia.com>
|
|
6
6
|
License: Apache-2.0
|
|
7
|
-
Project-URL:
|
|
7
|
+
Project-URL: Homepage, https://developer.nvidia.com/warp-python
|
|
8
8
|
Project-URL: Documentation, https://nvidia.github.io/warp
|
|
9
|
+
Project-URL: Repository, https://github.com/NVIDIA/warp
|
|
10
|
+
Project-URL: Issues, https://github.com/NVIDIA/warp/issues
|
|
9
11
|
Project-URL: Changelog, https://github.com/NVIDIA/warp/blob/main/CHANGELOG.md
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Natural Language :: English
|
|
10
16
|
Classifier: Programming Language :: Python :: 3.8
|
|
11
17
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
18
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -14,15 +20,22 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
-
Classifier:
|
|
23
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
24
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA :: 12
|
|
18
25
|
Classifier: Operating System :: OS Independent
|
|
26
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
27
|
Requires-Python: >=3.8
|
|
20
28
|
Description-Content-Type: text/markdown
|
|
21
29
|
License-File: LICENSE.md
|
|
22
30
|
Requires-Dist: numpy
|
|
31
|
+
Provides-Extra: docs
|
|
32
|
+
Requires-Dist: nvidia-sphinx-theme; python_version >= "3.9" and extra == "docs"
|
|
33
|
+
Requires-Dist: sphinx-copybutton; extra == "docs"
|
|
34
|
+
Requires-Dist: ruff==0.11.5; extra == "docs"
|
|
35
|
+
Requires-Dist: myst_parser; extra == "docs"
|
|
23
36
|
Provides-Extra: dev
|
|
24
37
|
Requires-Dist: pre-commit; extra == "dev"
|
|
25
|
-
Requires-Dist: ruff; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff==0.11.5; extra == "dev"
|
|
26
39
|
Requires-Dist: nvtx; extra == "dev"
|
|
27
40
|
Requires-Dist: nvidia-sphinx-theme; python_version >= "3.9" and extra == "dev"
|
|
28
41
|
Requires-Dist: sphinx-copybutton; extra == "dev"
|
|
@@ -32,6 +45,7 @@ Requires-Dist: usd-core; extra == "extras"
|
|
|
32
45
|
Requires-Dist: matplotlib; extra == "extras"
|
|
33
46
|
Requires-Dist: pillow; extra == "extras"
|
|
34
47
|
Requires-Dist: pyglet; extra == "extras"
|
|
48
|
+
Dynamic: license-file
|
|
35
49
|
|
|
36
50
|
[](https://badge.fury.io/py/warp-lang)
|
|
37
51
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
@@ -81,9 +95,9 @@ the `pip install` command, e.g.
|
|
|
81
95
|
|
|
82
96
|
| Platform | Install Command |
|
|
83
97
|
| --------------- | ----------------------------------------------------------------------------------------------------------------------------- |
|
|
84
|
-
| Linux aarch64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.
|
|
85
|
-
| Linux x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.
|
|
86
|
-
| Windows x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.
|
|
98
|
+
| Linux aarch64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.7.1/warp_lang-1.7.1+cu11-py3-none-manylinux2014_aarch64.whl` |
|
|
99
|
+
| Linux x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.7.1/warp_lang-1.7.1+cu11-py3-none-manylinux2014_x86_64.whl` |
|
|
100
|
+
| Windows x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.7.1/warp_lang-1.7.1+cu11-py3-none-win_amd64.whl` |
|
|
87
101
|
|
|
88
102
|
The `--force-reinstall` option may need to be used to overwrite a previous installation.
|
|
89
103
|
|
|
@@ -99,6 +113,15 @@ pip install -U --pre warp-lang --extra-index-url=https://pypi.nvidia.com/
|
|
|
99
113
|
|
|
100
114
|
Note that the nightly builds are built with the CUDA 12 runtime and are not published for macOS.
|
|
101
115
|
|
|
116
|
+
If you plan to install nightly builds regularly, you can simplify future installations by adding NVIDIA's package
|
|
117
|
+
repository as an extra index via the `PIP_EXTRA_INDEX_URL` environment variable. For example:
|
|
118
|
+
|
|
119
|
+
```text
|
|
120
|
+
export PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com"
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
This ensures the index is automatically used for `pip` commands, avoiding the need to specify it explicitly.
|
|
124
|
+
|
|
102
125
|
### CUDA Requirements
|
|
103
126
|
|
|
104
127
|
* Warp packages built with CUDA Toolkit 11.x require NVIDIA driver 470 or newer.
|
|
@@ -250,16 +273,16 @@ python -m warp.tests
|
|
|
250
273
|
<td align="center">raymarch</td>
|
|
251
274
|
</tr>
|
|
252
275
|
<tr>
|
|
276
|
+
<td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_sample_mesh.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_sample_mesh.png"></a></td>
|
|
253
277
|
<td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_sph.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_sph.png"></a></td>
|
|
254
278
|
<td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_torch.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_torch.png"></a></td>
|
|
255
279
|
<td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_wave.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_wave.png"></a></td>
|
|
256
|
-
<td></td>
|
|
257
280
|
</tr>
|
|
258
281
|
<tr>
|
|
282
|
+
<td align="center">sample mesh</td>
|
|
259
283
|
<td align="center">sph</td>
|
|
260
284
|
<td align="center">torch</td>
|
|
261
285
|
<td align="center">wave</td>
|
|
262
|
-
<td align="center"></td>
|
|
263
286
|
</tr>
|
|
264
287
|
</tbody>
|
|
265
288
|
</table>
|
|
@@ -323,6 +346,18 @@ python -m warp.tests
|
|
|
323
346
|
<td align="center">trajectory</td>
|
|
324
347
|
<td align="center">soft body properties</td>
|
|
325
348
|
</tr>
|
|
349
|
+
<tr>
|
|
350
|
+
<td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_fluid_checkpoint.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_fluid_checkpoint.png"></a></td>
|
|
351
|
+
<td></td>
|
|
352
|
+
<td></td>
|
|
353
|
+
<td></td>
|
|
354
|
+
</tr>
|
|
355
|
+
<tr>
|
|
356
|
+
<td align="center">fluid checkpoint</td>
|
|
357
|
+
<td align="center"></td>
|
|
358
|
+
<td align="center"></td>
|
|
359
|
+
<td align="center"></td>
|
|
360
|
+
</tr>
|
|
326
361
|
</tbody>
|
|
327
362
|
</table>
|
|
328
363
|
|
|
@@ -473,8 +508,7 @@ Warp is provided under the Apache License, Version 2.0. Please see [LICENSE.md](
|
|
|
473
508
|
|
|
474
509
|
## Contributing
|
|
475
510
|
|
|
476
|
-
Contributions and pull requests from the community are welcome
|
|
477
|
-
terms described in the **Feedback** section of [LICENSE.md](LICENSE.md#9-feedback).
|
|
511
|
+
Contributions and pull requests from the community are welcome.
|
|
478
512
|
Please see the [Contribution Guide](https://nvidia.github.io/warp/modules/contribution_guide.html) for more
|
|
479
513
|
information on contributing to the development of Warp.
|
|
480
514
|
|