warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +15 -7
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +22 -443
- warp/build_dll.py +384 -0
- warp/builtins.py +998 -488
- warp/codegen.py +1307 -739
- warp/config.py +5 -3
- warp/constants.py +6 -0
- warp/context.py +1291 -548
- warp/dlpack.py +31 -31
- warp/fabric.py +326 -0
- warp/fem/__init__.py +27 -0
- warp/fem/cache.py +389 -0
- warp/fem/dirichlet.py +181 -0
- warp/fem/domain.py +263 -0
- warp/fem/field/__init__.py +101 -0
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +299 -0
- warp/fem/field/restriction.py +21 -0
- warp/fem/field/test.py +181 -0
- warp/fem/field/trial.py +183 -0
- warp/fem/geometry/__init__.py +19 -0
- warp/fem/geometry/closest_point.py +70 -0
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +744 -0
- warp/fem/geometry/geometry.py +186 -0
- warp/fem/geometry/grid_2d.py +373 -0
- warp/fem/geometry/grid_3d.py +435 -0
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +376 -0
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +840 -0
- warp/fem/geometry/trimesh_2d.py +577 -0
- warp/fem/integrate.py +1616 -0
- warp/fem/operator.py +191 -0
- warp/fem/polynomial.py +213 -0
- warp/fem/quadrature/__init__.py +2 -0
- warp/fem/quadrature/pic_quadrature.py +245 -0
- warp/fem/quadrature/quadrature.py +294 -0
- warp/fem/space/__init__.py +292 -0
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +236 -0
- warp/fem/space/function_space.py +145 -0
- warp/fem/space/grid_2d_function_space.py +267 -0
- warp/fem/space/grid_3d_function_space.py +306 -0
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +350 -0
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +160 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +292 -0
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +221 -0
- warp/fem/types.py +77 -0
- warp/fem/utils.py +495 -0
- warp/native/array.h +164 -55
- warp/native/builtin.h +150 -174
- warp/native/bvh.cpp +75 -328
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +37 -45
- warp/native/clang/clang.cpp +136 -24
- warp/native/crt.cpp +1 -76
- warp/native/crt.h +111 -104
- warp/native/cuda_crt.h +1049 -0
- warp/native/cuda_util.cpp +15 -3
- warp/native/cuda_util.h +3 -1
- warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
- warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
- warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
- warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
- warp/native/cutlass/tools/library/scripts/library.py +799 -0
- warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
- warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
- warp/native/cutlass/tools/library/scripts/rt.py +796 -0
- warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
- warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
- warp/native/cutlass_gemm.cu +5 -3
- warp/native/exports.h +1240 -949
- warp/native/fabric.h +228 -0
- warp/native/hashgrid.cpp +4 -4
- warp/native/hashgrid.h +22 -2
- warp/native/initializer_array.h +2 -2
- warp/native/intersect.h +22 -7
- warp/native/intersect_adj.h +8 -8
- warp/native/intersect_tri.h +13 -16
- warp/native/marching.cu +157 -161
- warp/native/mat.h +119 -19
- warp/native/matnn.h +2 -2
- warp/native/mesh.cpp +108 -83
- warp/native/mesh.cu +243 -6
- warp/native/mesh.h +1547 -458
- warp/native/nanovdb/NanoVDB.h +1 -1
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +45 -35
- warp/native/range.h +6 -2
- warp/native/reduce.cpp +157 -0
- warp/native/reduce.cu +348 -0
- warp/native/runlength_encode.cpp +62 -0
- warp/native/runlength_encode.cu +46 -0
- warp/native/scan.cu +11 -13
- warp/native/scan.h +1 -0
- warp/native/solid_angle.h +442 -0
- warp/native/sort.cpp +13 -0
- warp/native/sort.cu +9 -1
- warp/native/sparse.cpp +338 -0
- warp/native/sparse.cu +545 -0
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +30 -0
- warp/native/vec.h +126 -24
- warp/native/volume.h +120 -0
- warp/native/warp.cpp +658 -53
- warp/native/warp.cu +660 -68
- warp/native/warp.h +112 -12
- warp/optim/__init__.py +1 -0
- warp/optim/linear.py +922 -0
- warp/optim/sgd.py +92 -0
- warp/render/render_opengl.py +392 -152
- warp/render/render_usd.py +11 -11
- warp/sim/__init__.py +2 -2
- warp/sim/articulation.py +385 -185
- warp/sim/collide.py +21 -8
- warp/sim/import_mjcf.py +297 -106
- warp/sim/import_urdf.py +389 -210
- warp/sim/import_usd.py +198 -97
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_euler.py +14 -8
- warp/sim/integrator_xpbd.py +161 -19
- warp/sim/model.py +795 -291
- warp/sim/optimizer.py +2 -6
- warp/sim/render.py +65 -3
- warp/sim/utils.py +3 -0
- warp/sparse.py +1227 -0
- warp/stubs.py +665 -223
- warp/tape.py +66 -15
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/torus.usda +105 -105
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +128 -74
- warp/tests/test_array.py +1497 -211
- warp/tests/test_array_reduce.py +150 -0
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +99 -0
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +75 -43
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +233 -128
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +136 -108
- warp/tests/test_examples.py +277 -0
- warp/tests/test_fabricarray.py +955 -0
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1271 -0
- warp/tests/test_fp16.py +53 -19
- warp/tests/test_func.py +187 -74
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +180 -116
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +52 -37
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +577 -24
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +251 -15
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_linear_solvers.py +154 -0
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +508 -2778
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +305 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +71 -14
- warp/tests/test_mesh_query_aabb.py +41 -25
- warp/tests/test_mesh_query_point.py +325 -34
- warp/tests/test_mesh_query_ray.py +39 -22
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +190 -0
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +460 -0
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +331 -85
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +118 -89
- warp/tests/test_transient_module.py +12 -13
- warp/tests/test_types.py +614 -0
- warp/tests/test_utils.py +494 -0
- warp/tests/test_vec.py +354 -1987
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +457 -293
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +341 -0
- warp/tests/unittest_utils.py +568 -0
- warp/tests/unused_test_misc.py +71 -0
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +549 -0
- warp/torch.py +72 -30
- warp/types.py +1744 -713
- warp/utils.py +360 -350
- warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
- warp_lang-0.11.0.dist-info/METADATA +238 -0
- warp_lang-0.11.0.dist-info/RECORD +332 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
- warp/bin/warp-clang.exp +0 -0
- warp/bin/warp-clang.lib +0 -0
- warp/bin/warp.exp +0 -0
- warp/bin/warp.lib +0 -0
- warp/tests/test_all.py +0 -215
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-0.9.0.dist-info/METADATA +0 -20
- warp_lang-0.9.0.dist-info/RECORD +0 -177
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/tests/test_mlp.py
CHANGED
|
@@ -5,9 +5,12 @@
|
|
|
5
5
|
# distribution of this software and related documentation without an express
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
8
10
|
import numpy as np
|
|
11
|
+
|
|
9
12
|
import warp as wp
|
|
10
|
-
from warp.tests.
|
|
13
|
+
from warp.tests.unittest_utils import *
|
|
11
14
|
|
|
12
15
|
wp.init()
|
|
13
16
|
|
|
@@ -35,17 +38,17 @@ def loss_kernel(x: wp.array2d(dtype=float), loss: wp.array(dtype=float)):
|
|
|
35
38
|
|
|
36
39
|
|
|
37
40
|
def test_mlp(test, device):
|
|
38
|
-
np.random.
|
|
41
|
+
rng = np.random.default_rng(123)
|
|
39
42
|
|
|
40
43
|
m = 10
|
|
41
44
|
n = 200
|
|
42
45
|
|
|
43
46
|
batches = 20000
|
|
44
47
|
|
|
45
|
-
weights = wp.array(
|
|
46
|
-
bias = wp.array(
|
|
48
|
+
weights = wp.array(rng.random(size=(m, n)) * 0.5 - 0.5, dtype=float, device=device)
|
|
49
|
+
bias = wp.array(rng.random(size=m) * 0.5 - 0.5, dtype=float, device=device)
|
|
47
50
|
|
|
48
|
-
x = wp.array(
|
|
51
|
+
x = wp.array(rng.random(size=(n, batches)), dtype=float, device=device)
|
|
49
52
|
y = wp.zeros(shape=(m, batches), device=device)
|
|
50
53
|
|
|
51
54
|
with wp.ScopedTimer("warp", active=False):
|
|
@@ -86,13 +89,15 @@ def create_mlp(m, n):
|
|
|
86
89
|
def create_golden():
|
|
87
90
|
import torch
|
|
88
91
|
|
|
92
|
+
rng = np.random.default_rng(123)
|
|
93
|
+
|
|
89
94
|
input_size = 32
|
|
90
95
|
hidden_size = 16
|
|
91
96
|
batch_size = 64
|
|
92
97
|
|
|
93
98
|
network = create_mlp(input_size, hidden_size)
|
|
94
99
|
|
|
95
|
-
x = torch.Tensor(
|
|
100
|
+
x = torch.Tensor(rng.random(size=(batch_size, input_size)))
|
|
96
101
|
x.requires_grad = True
|
|
97
102
|
|
|
98
103
|
y = network.forward(x)
|
|
@@ -169,6 +174,8 @@ def test_mlp_grad(test, device):
|
|
|
169
174
|
def profile_mlp_torch(device):
|
|
170
175
|
import torch
|
|
171
176
|
|
|
177
|
+
rng = np.random.default_rng(123)
|
|
178
|
+
|
|
172
179
|
m = 128
|
|
173
180
|
n = 64
|
|
174
181
|
|
|
@@ -179,7 +186,7 @@ def profile_mlp_torch(device):
|
|
|
179
186
|
|
|
180
187
|
network = create_mlp(m, n)
|
|
181
188
|
|
|
182
|
-
x = torch.Tensor(
|
|
189
|
+
x = torch.Tensor(rng.random(size=(b, m)))
|
|
183
190
|
|
|
184
191
|
with wp.ScopedTimer("torch_forward" + str(b)):
|
|
185
192
|
y = network.forward(x)
|
|
@@ -190,7 +197,7 @@ def profile_mlp_torch(device):
|
|
|
190
197
|
|
|
191
198
|
network = create_mlp(m, n)
|
|
192
199
|
|
|
193
|
-
x = torch.Tensor(
|
|
200
|
+
x = torch.Tensor(rng.random(size=(b, m)))
|
|
194
201
|
y = network.forward(x)
|
|
195
202
|
|
|
196
203
|
loss = torch.norm(y)
|
|
@@ -204,6 +211,8 @@ def profile_mlp_torch(device):
|
|
|
204
211
|
|
|
205
212
|
|
|
206
213
|
def profile_mlp_warp(device):
|
|
214
|
+
rng = np.random.default_rng(123)
|
|
215
|
+
|
|
207
216
|
m = 128
|
|
208
217
|
n = 64
|
|
209
218
|
|
|
@@ -212,10 +221,10 @@ def profile_mlp_warp(device):
|
|
|
212
221
|
for i in range(steps):
|
|
213
222
|
b = 2**i
|
|
214
223
|
|
|
215
|
-
weights = wp.array(
|
|
216
|
-
bias = wp.array(
|
|
224
|
+
weights = wp.array(rng.random(size=(m, n)) * 0.5 - 0.5, dtype=float, device=device)
|
|
225
|
+
bias = wp.array(rng.random(size=m) * 0.5 - 0.5, dtype=float, device=device)
|
|
217
226
|
|
|
218
|
-
x = wp.array(
|
|
227
|
+
x = wp.array(rng.random(size=(n, b)), dtype=float, device=device)
|
|
219
228
|
y = wp.zeros(shape=(m, b), device=device)
|
|
220
229
|
|
|
221
230
|
with wp.ScopedTimer("warp-forward" + str(b)):
|
|
@@ -225,10 +234,10 @@ def profile_mlp_warp(device):
|
|
|
225
234
|
for i in range(steps):
|
|
226
235
|
b = 2**i
|
|
227
236
|
|
|
228
|
-
weights = wp.array(
|
|
229
|
-
bias = wp.array(
|
|
237
|
+
weights = wp.array(rng.random(size=(m, n)) * 0.5 - 0.5, dtype=float, device=device, requires_grad=True)
|
|
238
|
+
bias = wp.array(rng.random(size=m) * 0.5 - 0.5, dtype=float, device=device, requires_grad=True)
|
|
230
239
|
|
|
231
|
-
x = wp.array(
|
|
240
|
+
x = wp.array(rng.random(size=(n, b)), dtype=float, device=device, requires_grad=True)
|
|
232
241
|
y = wp.zeros(shape=(m, b), device=device, requires_grad=True)
|
|
233
242
|
|
|
234
243
|
loss = wp.zeros(1, dtype=float, device=device)
|
|
@@ -251,18 +260,17 @@ def profile_mlp_warp(device):
|
|
|
251
260
|
# profile_mlp_torch("cuda")
|
|
252
261
|
|
|
253
262
|
|
|
254
|
-
|
|
255
|
-
|
|
263
|
+
devices = get_test_devices()
|
|
264
|
+
|
|
256
265
|
|
|
257
|
-
|
|
258
|
-
|
|
266
|
+
class TestMLP(unittest.TestCase):
|
|
267
|
+
pass
|
|
259
268
|
|
|
260
|
-
add_function_test(TestMLP, "test_mlp", test_mlp, devices=devices)
|
|
261
|
-
add_function_test(TestMLP, "test_mlp_grad", test_mlp_grad, devices=devices)
|
|
262
269
|
|
|
263
|
-
|
|
270
|
+
add_function_test(TestMLP, "test_mlp", test_mlp, devices=devices)
|
|
271
|
+
add_function_test(TestMLP, "test_mlp_grad", test_mlp_grad, devices=devices)
|
|
264
272
|
|
|
265
273
|
|
|
266
274
|
if __name__ == "__main__":
|
|
267
|
-
|
|
275
|
+
wp.build.clear_kernel_cache()
|
|
268
276
|
unittest.main(verbosity=2, failfast=False)
|
warp/tests/test_model.py
CHANGED
|
@@ -5,103 +5,106 @@
|
|
|
5
5
|
# distribution of this software and related documentation without an express
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
|
-
import
|
|
9
|
-
from warp.tests.test_base import *
|
|
10
|
-
from warp.sim import ModelBuilder
|
|
8
|
+
import unittest
|
|
11
9
|
|
|
12
10
|
import numpy as np
|
|
13
11
|
|
|
12
|
+
import warp as wp
|
|
13
|
+
from warp.sim import ModelBuilder
|
|
14
|
+
from warp.tests.unittest_utils import *
|
|
15
|
+
|
|
14
16
|
wp.init()
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
]
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2], tri_ke, tri_ka, tri_kd, tri_drag, tri_lift)
|
|
61
|
-
|
|
62
|
-
assert_np_equal(np.array(builder1.tri_indices), np.array(builder2.tri_indices))
|
|
63
|
-
assert_np_equal(np.array(builder1.tri_poses), np.array(builder2.tri_poses), tol=1.0e-6)
|
|
64
|
-
assert_np_equal(np.array(builder1.tri_activations), np.array(builder2.tri_activations))
|
|
65
|
-
assert_np_equal(np.array(builder1.tri_materials), np.array(builder2.tri_materials))
|
|
66
|
-
|
|
67
|
-
def test_add_edges(self):
|
|
68
|
-
pts = np.array(
|
|
69
|
-
[
|
|
70
|
-
[-0.00585869, 0.34189449, -1.17415233],
|
|
71
|
-
[-1.894547, 0.1788074, 0.9251329],
|
|
72
|
-
[-1.26141048, 0.16140787, 0.08823282],
|
|
73
|
-
[-0.08609255, -0.82722546, 0.65995427],
|
|
74
|
-
[0.78827592, -1.77375711, -0.55582718],
|
|
75
|
-
]
|
|
19
|
+
class TestModel(unittest.TestCase):
|
|
20
|
+
def test_add_triangles(self):
|
|
21
|
+
rng = np.random.default_rng(123)
|
|
22
|
+
|
|
23
|
+
pts = np.array(
|
|
24
|
+
[
|
|
25
|
+
[-0.00585869, 0.34189449, -1.17415233],
|
|
26
|
+
[-1.894547, 0.1788074, 0.9251329],
|
|
27
|
+
[-1.26141048, 0.16140787, 0.08823282],
|
|
28
|
+
[-0.08609255, -0.82722546, 0.65995427],
|
|
29
|
+
[0.78827592, -1.77375711, -0.55582718],
|
|
30
|
+
]
|
|
31
|
+
)
|
|
32
|
+
tris = np.array([[0, 3, 4], [0, 2, 3], [2, 1, 3], [1, 4, 3]])
|
|
33
|
+
|
|
34
|
+
builder1 = ModelBuilder()
|
|
35
|
+
builder2 = ModelBuilder()
|
|
36
|
+
for pt in pts:
|
|
37
|
+
builder1.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
|
|
38
|
+
builder2.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
|
|
39
|
+
|
|
40
|
+
# test add_triangle(s) with default arguments:
|
|
41
|
+
areas = builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2])
|
|
42
|
+
for i, t in enumerate(tris):
|
|
43
|
+
area = builder1.add_triangle(t[0], t[1], t[2])
|
|
44
|
+
self.assertAlmostEqual(area, areas[i], places=6)
|
|
45
|
+
|
|
46
|
+
# test add_triangle(s) with non default arguments:
|
|
47
|
+
tri_ke = rng.standard_normal(size=pts.shape[0])
|
|
48
|
+
tri_ka = rng.standard_normal(size=pts.shape[0])
|
|
49
|
+
tri_kd = rng.standard_normal(size=pts.shape[0])
|
|
50
|
+
tri_drag = rng.standard_normal(size=pts.shape[0])
|
|
51
|
+
tri_lift = rng.standard_normal(size=pts.shape[0])
|
|
52
|
+
for i, t in enumerate(tris):
|
|
53
|
+
builder1.add_triangle(
|
|
54
|
+
t[0],
|
|
55
|
+
t[1],
|
|
56
|
+
t[2],
|
|
57
|
+
tri_ke[i],
|
|
58
|
+
tri_ka[i],
|
|
59
|
+
tri_kd[i],
|
|
60
|
+
tri_drag[i],
|
|
61
|
+
tri_lift[i],
|
|
76
62
|
)
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
63
|
+
builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2], tri_ke, tri_ka, tri_kd, tri_drag, tri_lift)
|
|
64
|
+
|
|
65
|
+
assert_np_equal(np.array(builder1.tri_indices), np.array(builder2.tri_indices))
|
|
66
|
+
assert_np_equal(np.array(builder1.tri_poses), np.array(builder2.tri_poses), tol=1.0e-6)
|
|
67
|
+
assert_np_equal(np.array(builder1.tri_activations), np.array(builder2.tri_activations))
|
|
68
|
+
assert_np_equal(np.array(builder1.tri_materials), np.array(builder2.tri_materials))
|
|
69
|
+
|
|
70
|
+
def test_add_edges(self):
|
|
71
|
+
rng = np.random.default_rng(123)
|
|
72
|
+
|
|
73
|
+
pts = np.array(
|
|
74
|
+
[
|
|
75
|
+
[-0.00585869, 0.34189449, -1.17415233],
|
|
76
|
+
[-1.894547, 0.1788074, 0.9251329],
|
|
77
|
+
[-1.26141048, 0.16140787, 0.08823282],
|
|
78
|
+
[-0.08609255, -0.82722546, 0.65995427],
|
|
79
|
+
[0.78827592, -1.77375711, -0.55582718],
|
|
80
|
+
]
|
|
81
|
+
)
|
|
82
|
+
edges = np.array([[0, 4, 3, 1], [3, 2, 4, 1]])
|
|
83
|
+
|
|
84
|
+
builder1 = ModelBuilder()
|
|
85
|
+
builder2 = ModelBuilder()
|
|
86
|
+
for pt in pts:
|
|
87
|
+
builder1.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
|
|
88
|
+
builder2.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
|
|
89
|
+
|
|
90
|
+
# test defaults:
|
|
91
|
+
for i in range(2):
|
|
92
|
+
builder1.add_edge(edges[i, 0], edges[i, 1], edges[i, 2], edges[i, 3])
|
|
93
|
+
builder2.add_edges(edges[:, 0], edges[:, 1], edges[:, 2], edges[:, 3])
|
|
94
|
+
|
|
95
|
+
# test non defaults:
|
|
96
|
+
rest = rng.standard_normal(size=2)
|
|
97
|
+
edge_ke = rng.standard_normal(size=2)
|
|
98
|
+
edge_kd = rng.standard_normal(size=2)
|
|
99
|
+
for i in range(2):
|
|
100
|
+
builder1.add_edge(edges[i, 0], edges[i, 1], edges[i, 2], edges[i, 3], rest[i], edge_ke[i], edge_kd[i])
|
|
101
|
+
builder2.add_edges(edges[:, 0], edges[:, 1], edges[:, 2], edges[:, 3], rest, edge_ke, edge_kd)
|
|
102
|
+
|
|
103
|
+
assert_np_equal(np.array(builder1.edge_indices), np.array(builder2.edge_indices))
|
|
104
|
+
assert_np_equal(np.array(builder1.edge_rest_angle), np.array(builder2.edge_rest_angle), tol=1.0e-4)
|
|
105
|
+
assert_np_equal(np.array(builder1.edge_bending_properties), np.array(builder2.edge_bending_properties))
|
|
103
106
|
|
|
104
107
|
|
|
105
108
|
if __name__ == "__main__":
|
|
106
|
-
|
|
109
|
+
wp.build.clear_kernel_cache()
|
|
107
110
|
unittest.main(verbosity=2)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
import warp as wp
|
|
11
|
+
from warp.tests.unittest_utils import *
|
|
12
|
+
|
|
13
|
+
wp.init()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
devices = get_test_devices()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestModuleLite(unittest.TestCase):
|
|
20
|
+
def test_module_lite_load(self):
|
|
21
|
+
# Load current module
|
|
22
|
+
wp.load_module()
|
|
23
|
+
|
|
24
|
+
# Load named module
|
|
25
|
+
wp.load_module(wp.config)
|
|
26
|
+
|
|
27
|
+
# Load named module (string)
|
|
28
|
+
wp.load_module(wp.config, recursive=True)
|
|
29
|
+
|
|
30
|
+
def test_module_lite_options(self):
|
|
31
|
+
wp.set_module_options({"max_unroll": 8})
|
|
32
|
+
module_options = wp.get_module_options()
|
|
33
|
+
self.assertIsInstance(module_options, dict)
|
|
34
|
+
self.assertEqual(module_options["max_unroll"], 8)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
if __name__ == "__main__":
|
|
38
|
+
wp.build.clear_kernel_cache()
|
|
39
|
+
unittest.main(verbosity=2)
|
warp/tests/test_multigpu.py
CHANGED
|
@@ -5,16 +5,12 @@
|
|
|
5
5
|
# distribution of this software and related documentation without an express
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
|
-
import
|
|
9
|
-
import warp as wp
|
|
8
|
+
import unittest
|
|
10
9
|
|
|
11
|
-
import
|
|
10
|
+
import numpy as np
|
|
12
11
|
|
|
13
12
|
import warp as wp
|
|
14
|
-
from warp.tests.
|
|
15
|
-
|
|
16
|
-
import unittest
|
|
17
|
-
|
|
13
|
+
from warp.tests.unittest_utils import *
|
|
18
14
|
|
|
19
15
|
wp.init()
|
|
20
16
|
|
|
@@ -31,156 +27,134 @@ def arange(start: int, step: int, a: wp.array(dtype=int)):
|
|
|
31
27
|
a[tid] = start + step * tid
|
|
32
28
|
|
|
33
29
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
n = 32
|
|
41
|
-
|
|
42
|
-
wp.set_device("cuda:0")
|
|
43
|
-
a0 = wp.empty(n, dtype=int)
|
|
44
|
-
wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
|
|
45
|
-
|
|
46
|
-
wp.set_device("cuda:1")
|
|
47
|
-
a1 = wp.empty(n, dtype=int)
|
|
48
|
-
wp.launch(arange, dim=a1.size, inputs=[0, 1, a1])
|
|
49
|
-
|
|
50
|
-
# restore default device
|
|
51
|
-
wp.set_device(saved_device)
|
|
30
|
+
class TestMultiGPU(unittest.TestCase):
|
|
31
|
+
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
32
|
+
def test_multigpu_set_device(self):
|
|
33
|
+
# save default device
|
|
34
|
+
saved_device = wp.get_device()
|
|
52
35
|
|
|
53
|
-
|
|
54
|
-
assert a1.device == "cuda:1"
|
|
36
|
+
n = 32
|
|
55
37
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
assert_np_equal(a0.numpy(), expected)
|
|
59
|
-
assert_np_equal(a1.numpy(), expected)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def test_multigpu_scoped_device(test, device):
|
|
63
|
-
assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
|
|
64
|
-
|
|
65
|
-
n = 32
|
|
66
|
-
|
|
67
|
-
with wp.ScopedDevice("cuda:0"):
|
|
38
|
+
wp.set_device("cuda:0")
|
|
68
39
|
a0 = wp.empty(n, dtype=int)
|
|
69
40
|
wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
|
|
70
41
|
|
|
71
|
-
|
|
42
|
+
wp.set_device("cuda:1")
|
|
72
43
|
a1 = wp.empty(n, dtype=int)
|
|
73
44
|
wp.launch(arange, dim=a1.size, inputs=[0, 1, a1])
|
|
74
45
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
expected = np.arange(n, dtype=int)
|
|
46
|
+
# restore default device
|
|
47
|
+
wp.set_device(saved_device)
|
|
79
48
|
|
|
80
|
-
|
|
81
|
-
|
|
49
|
+
assert a0.device == "cuda:0"
|
|
50
|
+
assert a1.device == "cuda:1"
|
|
82
51
|
|
|
52
|
+
expected = np.arange(n, dtype=int)
|
|
83
53
|
|
|
84
|
-
|
|
85
|
-
|
|
54
|
+
assert_np_equal(a0.numpy(), expected)
|
|
55
|
+
assert_np_equal(a1.numpy(), expected)
|
|
86
56
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
with wp.ScopedDevice("cuda:1"):
|
|
91
|
-
assert wp.get_device() == "cuda:1"
|
|
92
|
-
assert wp.get_cuda_device() == "cuda:1"
|
|
57
|
+
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
58
|
+
def test_multigpu_scoped_device(self):
|
|
59
|
+
n = 32
|
|
93
60
|
|
|
94
61
|
with wp.ScopedDevice("cuda:0"):
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
with wp.ScopedDevice("cpu"):
|
|
99
|
-
assert wp.get_device() == "cpu"
|
|
100
|
-
assert wp.get_cuda_device() == "cuda:0"
|
|
62
|
+
a0 = wp.empty(n, dtype=int)
|
|
63
|
+
wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
|
|
101
64
|
|
|
102
|
-
|
|
65
|
+
with wp.ScopedDevice("cuda:1"):
|
|
66
|
+
a1 = wp.empty(n, dtype=int)
|
|
67
|
+
wp.launch(arange, dim=a1.size, inputs=[0, 1, a1])
|
|
103
68
|
|
|
104
|
-
|
|
105
|
-
|
|
69
|
+
assert a0.device == "cuda:0"
|
|
70
|
+
assert a1.device == "cuda:1"
|
|
106
71
|
|
|
107
|
-
|
|
108
|
-
assert wp.get_cuda_device() == "cuda:0"
|
|
72
|
+
expected = np.arange(n, dtype=int)
|
|
109
73
|
|
|
110
|
-
|
|
111
|
-
|
|
74
|
+
assert_np_equal(a0.numpy(), expected)
|
|
75
|
+
assert_np_equal(a1.numpy(), expected)
|
|
112
76
|
|
|
113
|
-
|
|
114
|
-
|
|
77
|
+
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
78
|
+
def test_multigpu_nesting(self):
|
|
79
|
+
initial_device = wp.get_device()
|
|
80
|
+
initial_cuda_device = wp.get_cuda_device()
|
|
115
81
|
|
|
82
|
+
with wp.ScopedDevice("cuda:1"):
|
|
83
|
+
assert wp.get_device() == "cuda:1"
|
|
84
|
+
assert wp.get_cuda_device() == "cuda:1"
|
|
116
85
|
|
|
117
|
-
|
|
118
|
-
|
|
86
|
+
with wp.ScopedDevice("cuda:0"):
|
|
87
|
+
assert wp.get_device() == "cuda:0"
|
|
88
|
+
assert wp.get_cuda_device() == "cuda:0"
|
|
119
89
|
|
|
120
|
-
|
|
90
|
+
with wp.ScopedDevice("cpu"):
|
|
91
|
+
assert wp.get_device() == "cpu"
|
|
92
|
+
assert wp.get_cuda_device() == "cuda:0"
|
|
121
93
|
|
|
122
|
-
|
|
123
|
-
a1 = wp.zeros(n, dtype=float, device="cuda:1")
|
|
94
|
+
wp.set_device("cuda:1")
|
|
124
95
|
|
|
125
|
-
|
|
96
|
+
assert wp.get_device() == "cuda:1"
|
|
97
|
+
assert wp.get_cuda_device() == "cuda:1"
|
|
126
98
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
wp.synchronize_device(a0.device)
|
|
130
|
-
wp.copy(a1, a0)
|
|
99
|
+
assert wp.get_device() == "cuda:0"
|
|
100
|
+
assert wp.get_cuda_device() == "cuda:0"
|
|
131
101
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
wp.copy(a0, a1)
|
|
102
|
+
assert wp.get_device() == "cuda:1"
|
|
103
|
+
assert wp.get_cuda_device() == "cuda:1"
|
|
135
104
|
|
|
136
|
-
|
|
105
|
+
assert wp.get_device() == initial_device
|
|
106
|
+
assert wp.get_cuda_device() == initial_cuda_device
|
|
137
107
|
|
|
138
|
-
|
|
139
|
-
|
|
108
|
+
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
109
|
+
def test_multigpu_pingpong(self):
|
|
110
|
+
n = 1024 * 1024
|
|
140
111
|
|
|
112
|
+
a0 = wp.zeros(n, dtype=float, device="cuda:0")
|
|
113
|
+
a1 = wp.zeros(n, dtype=float, device="cuda:1")
|
|
141
114
|
|
|
142
|
-
|
|
143
|
-
assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
|
|
115
|
+
iters = 10
|
|
144
116
|
|
|
145
|
-
|
|
117
|
+
for _ in range(iters):
|
|
118
|
+
wp.launch(inc, dim=a0.size, inputs=[a0], device=a0.device)
|
|
119
|
+
wp.synchronize_device(a0.device)
|
|
120
|
+
wp.copy(a1, a0)
|
|
146
121
|
|
|
147
|
-
|
|
148
|
-
|
|
122
|
+
wp.launch(inc, dim=a1.size, inputs=[a1], device=a1.device)
|
|
123
|
+
wp.synchronize_device(a1.device)
|
|
124
|
+
wp.copy(a0, a1)
|
|
149
125
|
|
|
150
|
-
|
|
151
|
-
stream1 = wp.get_stream("cuda:1")
|
|
126
|
+
expected = np.full(n, iters * 2, dtype=np.float32)
|
|
152
127
|
|
|
153
|
-
|
|
128
|
+
assert_np_equal(a0.numpy(), expected)
|
|
129
|
+
assert_np_equal(a1.numpy(), expected)
|
|
154
130
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
wp.copy(a1, a0, stream=stream1)
|
|
131
|
+
@unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
|
|
132
|
+
def test_multigpu_pingpong_streams(self):
|
|
133
|
+
n = 1024 * 1024
|
|
159
134
|
|
|
160
|
-
wp.
|
|
161
|
-
|
|
162
|
-
wp.copy(a0, a1, stream=stream0)
|
|
135
|
+
a0 = wp.zeros(n, dtype=float, device="cuda:0")
|
|
136
|
+
a1 = wp.zeros(n, dtype=float, device="cuda:1")
|
|
163
137
|
|
|
164
|
-
|
|
138
|
+
stream0 = wp.get_stream("cuda:0")
|
|
139
|
+
stream1 = wp.get_stream("cuda:1")
|
|
165
140
|
|
|
166
|
-
|
|
167
|
-
assert_np_equal(a1.numpy(), expected)
|
|
141
|
+
iters = 10
|
|
168
142
|
|
|
143
|
+
for _ in range(iters):
|
|
144
|
+
wp.launch(inc, dim=a0.size, inputs=[a0], stream=stream0)
|
|
145
|
+
stream1.wait_stream(stream0)
|
|
146
|
+
wp.copy(a1, a0, stream=stream1)
|
|
169
147
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
148
|
+
wp.launch(inc, dim=a1.size, inputs=[a1], stream=stream1)
|
|
149
|
+
stream0.wait_stream(stream1)
|
|
150
|
+
wp.copy(a0, a1, stream=stream0)
|
|
173
151
|
|
|
174
|
-
|
|
175
|
-
add_function_test(TestMultigpu, "test_multigpu_set_device", test_multigpu_set_device)
|
|
176
|
-
add_function_test(TestMultigpu, "test_multigpu_scoped_device", test_multigpu_scoped_device)
|
|
177
|
-
add_function_test(TestMultigpu, "test_multigpu_nesting", test_multigpu_nesting)
|
|
178
|
-
add_function_test(TestMultigpu, "test_multigpu_pingpong", test_multigpu_pingpong)
|
|
179
|
-
add_function_test(TestMultigpu, "test_multigpu_pingpong_streams", test_multigpu_pingpong_streams)
|
|
152
|
+
expected = np.full(n, iters * 2, dtype=np.float32)
|
|
180
153
|
|
|
181
|
-
|
|
154
|
+
assert_np_equal(a0.numpy(), expected)
|
|
155
|
+
assert_np_equal(a1.numpy(), expected)
|
|
182
156
|
|
|
183
157
|
|
|
184
158
|
if __name__ == "__main__":
|
|
185
|
-
|
|
159
|
+
wp.build.clear_kernel_cache()
|
|
186
160
|
unittest.main(verbosity=2, failfast=False)
|