PyPI - warp-lang - Versions diffs - 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show

warp/__init__.py +10 -4
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +868 -507
warp/codegen.py +1074 -638
warp/config.py +3 -3
warp/constants.py +6 -0
warp/context.py +715 -222
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +147 -44
warp/native/builtin.h +122 -149
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +34 -43
warp/native/clang/clang.cpp +13 -8
warp/native/crt.h +2 -0
warp/native/cuda_crt.h +5 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -952
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +1 -1
warp/native/marching.cu +157 -161
warp/native/mat.h +80 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -23
warp/native/mesh.h +446 -46
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +1 -1
warp/native/reduce.cu +10 -12
warp/native/runlength_encode.cu +6 -10
warp/native/scan.cu +8 -11
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +164 -154
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +14 -30
warp/native/vec.h +107 -23
warp/native/volume.h +120 -0
warp/native/warp.cpp +560 -30
warp/native/warp.cu +431 -44
warp/native/warp.h +13 -4
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +335 -119
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +8 -0
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +158 -16
warp/sim/model.py +795 -291
warp/sim/render.py +3 -3
warp/sim/utils.py +3 -0
warp/sparse.py +640 -150
warp/stubs.py +606 -267
warp/tape.py +61 -10
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +212 -97
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +42 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +208 -130
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +178 -109
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +32 -31
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +140 -22
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +9 -6
warp/types.py +1089 -366
warp/utils.py +93 -387
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/tests/test_all.py +0 -219
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.10.1.dist-info/METADATA +0 -21
warp_lang-0.10.1.dist-info/RECORD +0 -188
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
{warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/native/cutlass/tools/library/scripts/pycutlass/setup.py ADDED Viewed

@@ -0,0 +1,80 @@
+import distutils.cmd
+from setuptools import setup
+import setuptools.command.build_py
+import os
+# build rmm dependency
+class BuildRMM(distutils.cmd.Command):
+    user_options = []
+    def initialize_options(self):
+        pass
+    def finalize_options(self):
+        pass
+    def run(self):
+        try:
+            import rmm
+        except ImportError:
+            print("installing rmm")
+            os.system("git clone -b branch-22.08 --recurse-submodules https://github.com/rapidsai/rmm.git")
+            os.chdir("./rmm")
+            os.system("./build.sh librmm rmm")
+            os.chdir("./python")
+            os.system("python setup.py build_ext --inplace")
+            os.system("python setup.py install")
+cutlass_path = os.getenv('CUTLASS_PATH')
+assert cutlass_path is not None, "Environment variable 'CUTLASS_PATH' is not defined."
+cuda_install_path = os.getenv('CUDA_INSTALL_PATH')
+assert cuda_install_path is not None, "Environment variable 'CUDA_INSTALL_PATH' is not defined."
+ext_modules = []
+try:
+    from pybind11.setup_helpers import Pybind11Extension, build_ext
+    include_dirs = [
+        cutlass_path + "/include",
+        cuda_install_path + "/include",
+        cutlass_path + "/tools/util/include",
+        cutlass_path + "/test",
+        cutlass_path + "/tools/library/scripts/pycutlass/googletest/googletest/include"
+    ]
+    ext_modules = [
+        Pybind11Extension("cutlass",
+                          ["src/cpp/cutlass.cpp"],
+                          include_dirs=include_dirs,
+                          extra_compile_args=["-fpermissive", "-w"])
+    ]
+except ImportError:
+    pass
+setup(
+    name="PyCutlass",
+    version="0.0.1",
+    author="Zhaodong Chen; Andrew Kerr; Haicheng Wu; Szymon Migacz; Graham Markall",
+    author_email="zhaodongc@nvidia.com",
+    description="Python interface for CUTLASS",
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    package_dir={"": "src"},
+    packages=['pycutlass', 'pycutlass.utils', 'pycutlass.test'],
+    setup_requires=["pybind11", "numpy<1.23"],
+    install_requires=[
+        "numpy<1.23",
+        'pybind11',
+        'cuda-python<11.7.0',
+        'typeguard',
+        'bfloat16',
+        'typing',
+        'scikit-build',
+        'treelib'
+    ],
+    cmdclass={
+        'rmm': BuildRMM
+    },
+    ext_modules=ext_modules,
+    python_requires=">=3.6",
+)

warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+import re
+def SubstituteTemplate(template, values):
+    text = template
+    changed = True
+    while changed:
+        changed = False
+        for key, value in values.items():
+            regex = "\\$\\{%s\\}" % key
+            newtext = re.sub(regex, value, text)
+            if newtext != text:
+                changed = True
+            text = newtext
+    return text
+from pycutlass.type_hint import *
+from pycutlass.tensor_ref import *
+from pycutlass.operation import *
+from pycutlass.epilogue import *
+from pycutlass.parser import *
+from pycutlass.compiler import ArtifactManager
+from pycutlass.memory_manager import *
+from pycutlass.arguments import *
+from pycutlass.library import *
+from pycutlass.c_types import *
+from pycutlass.gemm_operation import *
+from pycutlass.conv2d_operation import *
+from pycutlass.compiler import *
+from pycutlass.utils import *
+from pycutlass.frontend import *
+from pycutlass.reduction_operation import *
+from pycutlass.compiler import *
+# module-wide variables
+import sys
+this = sys.modules[__name__]
+# artifact manager
+this.compiler = ArtifactManager()
+def get_memory_pool(init_pool_size=0, max_pool_size=2**34):
+    this.memory_pool = PoolMemoryManager(
+        init_pool_size=init_pool_size,
+        max_pool_size=max_pool_size
+    )
+    return this.memory_pool

warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py ADDED Viewed

@@ -0,0 +1,118 @@
+#################################################################################################
+#
+# Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#################################################################################################
+from .frontend import CupyFrontend
+from typeguard import typechecked
+from pycutlass.frontend import *
+from typing import Union
+import numpy as np
+from cuda import cuda
+try:
+    import torch
+    torch_available = True
+except ImportError:
+    torch_available = False
+from cuda import cudart
+try:
+    import cupy as cp
+    cupy_available = True
+except ImportError:
+    cupy_available = False
+# @typechecked
+class ArgumentBase:
+    """
+    Base class for operation arguments
+    """
+    def __init__(self,
+                 A: 'Union[cuda.CUdeviceptr, np.ndarray, torch.Tensor, cp.ndarray]',
+                 B: 'Union[cuda.CUdeviceptr, np.ndarray, torch.Tensor, cp.ndarray]',
+                 C: 'Union[cuda.CUdeviceptr, np.ndarray, torch.Tensor, cp.ndarray]',
+                 D: 'Union[cuda.CUdeviceptr, np.ndarray, torch.Tensor, cp.ndarray]',
+                 **kwargs) -> None:
+        # tensor_C can be interpreted as the bias with bias=True in keyword args
+        if "bias" in kwargs.keys():
+            self.bias = kwargs["bias"]
+        else:
+            # by default, tensor_C is not bias
+            self.bias = False
+        # preprocessing input tensors
+        if isinstance(A, np.ndarray):
+            self.host_D = D
+            self.buffer_A = NumpyFrontend.argument(A, False)
+            self.buffer_B = NumpyFrontend.argument(B, False)
+            self.buffer_C = NumpyFrontend.argument(C, False)
+            self.buffer_D = NumpyFrontend.argument(D, True)
+            self.ptr_A = self.buffer_A.ptr
+            self.ptr_B = self.buffer_B.ptr
+            self.ptr_C = self.buffer_C.ptr
+            self.ptr_D = self.buffer_D.ptr
+            # number of elements in C
+            self.tensor_c_numel = C.size
+        elif torch_available and isinstance(A, torch.Tensor):
+            self.ptr_A = TorchFrontend.argument(A)
+            self.ptr_B = TorchFrontend.argument(B)
+            self.ptr_C = TorchFrontend.argument(C)
+            self.ptr_D = TorchFrontend.argument(D)
+            # number of elements in C
+            self.tensor_c_numel = C.numel()
+        elif isinstance(A, cuda.CUdeviceptr):
+            self.ptr_A = A
+            self.ptr_B = B
+            self.ptr_C = C
+            self.ptr_D = D
+        elif cupy_available and isinstance(A, cp.ndarray):
+            self.ptr_A = CupyFrontend.argument(A)
+            self.ptr_B = CupyFrontend.argument(B)
+            self.ptr_C = CupyFrontend.argument(C)
+            self.ptr_D = CupyFrontend.argument(D)
+            # number of elements in C
+            self.tensor_c_numel = C.size
+        else:
+            raise TypeError(
+                "Unsupported Frontend. Only support numpy and torch")
+    def sync(self, stream_sync=True):
+        if stream_sync:
+            err, = cudart.cudaDeviceSynchronize()
+            if err != cuda.CUresult.CUDA_SUCCESS:
+                raise RuntimeError("CUDA Error %s" % str(err))
+        if hasattr(self, "host_D"):
+            err, = cuda.cuMemcpyDtoH(
+                self.host_D, self.ptr_D, self.host_D.size * self.host_D.itemsize)
+            if err != cuda.CUresult.CUDA_SUCCESS:
+                raise RuntimeError("CUDA Error %s" % str(err))

warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py ADDED Viewed

@@ -0,0 +1,241 @@
+#################################################################################################
+#
+# Copyright (c) 2017 - 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#################################################################################################
+import ctypes
+from pycutlass.library import *
+# 12B
+class GemmCoord_(ctypes.Structure):
+    _fields_ = [
+        ("m", ctypes.c_int),
+        ("n", ctypes.c_int),
+        ("k", ctypes.c_int)
+    ]
+    def __init__(self, gemm_coord) -> None:
+        for field_name, _ in self._fields_:
+            setattr(self, field_name, getattr(gemm_coord, field_name)())
+class MatrixCoord_(ctypes.Structure):
+    _fields_ = [
+        ("row", ctypes.c_int),
+        ("column", ctypes.c_int)
+    ]
+dtype2ctype = {
+    cutlass.float16: ctypes.c_uint16,
+    cutlass.float32: ctypes.c_float,
+    cutlass.float64: ctypes.c_double,
+    cutlass.int32: ctypes.c_int32
+}
+def get_gemm_arguments(epilogue_functor):
+    _EpilogueOutputOpParams = epilogue_functor.epilogue_type
+    class _GemmArguments(ctypes.Structure):
+        _fields_ = [
+            # Arguments from UniversalArgumentsBase
+            ("mode", ctypes.c_int),
+            ("problem_size", GemmCoord_),
+            ("batch_count", ctypes.c_int),
+            ("batch_stride_D", ctypes.c_longlong),
+            # Remaining arguments
+            ("epilogue", _EpilogueOutputOpParams),
+            ("ptr_A", ctypes.c_void_p),
+            ("ptr_B", ctypes.c_void_p),
+            ("ptr_C", ctypes.c_void_p),
+            ("ptr_D", ctypes.c_void_p),
+            ("batch_stride_A", ctypes.c_longlong),
+            ("batch_stride_B", ctypes.c_longlong),
+            ("batch_stride_C", ctypes.c_longlong),
+            ("stride_a", ctypes.c_longlong),
+            ("stride_b", ctypes.c_longlong),
+            ("stride_c", ctypes.c_longlong),
+            ("stride_d", ctypes.c_longlong),
+            ("lda", ctypes.c_longlong),
+            ("ldb", ctypes.c_longlong),
+            ("ldc", ctypes.c_longlong),
+            ("ldd", ctypes.c_longlong),
+            ("ptr_gather_A_indices", ctypes.c_void_p),
+            ("ptr_gether_B_indices", ctypes.c_void_p),
+            ("ptr_scatter_D_indices", ctypes.c_void_p)
+        ]
+    return _GemmArguments, _EpilogueOutputOpParams
+###########################################################################################
+# GEMM Grouped
+###########################################################################################
+# include/cutlass/gemm/kernel/gemm_grouped.h
+def get_gemm_grouped_arguments(epilogue_functor):
+    _EpilogueOutputOpParams = epilogue_functor.epilogue_type
+    class _GEMMGroupedArguments(ctypes.Structure):
+        _fields_ = [
+            ("problem_sizes", ctypes.c_void_p),
+            ("problem_count", ctypes.c_int),
+            ("threadblock_count", ctypes.c_int),
+            ("output_op", _EpilogueOutputOpParams),
+            ("ptr_A", ctypes.c_void_p),
+            ("ptr_B", ctypes.c_void_p),
+            ("ptr_C", ctypes.c_void_p),
+            ("ptr_D", ctypes.c_void_p),
+            ("lda", ctypes.c_void_p),
+            ("ldb", ctypes.c_void_p),
+            ("ldc", ctypes.c_void_p),
+            ("ldd", ctypes.c_void_p),
+            ("host_problem_sizes", ctypes.c_void_p)
+        ]
+    return _GEMMGroupedArguments, _EpilogueOutputOpParams
+############################################################################################
+# Convolution2D
+############################################################################################
+# We use the arguments as the interface
+# include/cutlass/conv/conv2d_problem_size.h
+# 64B
+class Conv2DProblemSize(ctypes.Structure):
+    _fields_ = [
+        ("N", ctypes.c_int),
+        ("H", ctypes.c_int),
+        ("W", ctypes.c_int),
+        ("C", ctypes.c_int),
+        ("P", ctypes.c_int),
+        ("Q", ctypes.c_int),
+        ("K", ctypes.c_int),
+        ("R", ctypes.c_int),
+        ("S", ctypes.c_int),
+        ("pad_h", ctypes.c_int),
+        ("pad_w", ctypes.c_int),
+        ("stride_h", ctypes.c_int),
+        ("stride_w", ctypes.c_int),
+        ("dilation_h", ctypes.c_int),
+        ("dilation_w", ctypes.c_int),
+        ("mode", ctypes.c_int),  # kCrossCorrelation: 0, kConvolution: 1
+        ("split_k_slices", ctypes.c_int),
+        ("groups", ctypes.c_int)
+    ]
+    def __init__(self, problem_size) -> None:
+        for field_name, _ in self._fields_:
+            setattr(self, field_name, getattr(problem_size, field_name))
+# include/cutlass/layout/tensor.h
+# 12B
+class Layout4D(ctypes.Structure):
+    _fields_ = [
+        ("stride", ctypes.c_int * 3)
+    ]
+    def __init__(self, tensor_ref):
+        stride = tensor_ref.stride()
+        setattr(self, "stride", (stride.at(0), stride.at(1), stride.at(2)))
+# TODO: Tensor 5-D takes ("stride", ctypes.c_int * 4)
+# include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_optimized.h
+# TensorRef is basically cutlass::TensorRef<Element, Layout>;
+# include/cutlass/tensor_ref.h
+# 24B
+class TensorRef_(ctypes.Structure):
+    _fields_ = [
+        ("ptr", ctypes.c_void_p),
+        ("layout", Layout4D)
+    ]
+    def __init__(self, tensor_ref):
+        setattr(self, "ptr", tensor_ref.data())
+        setattr(self, "layout", Layout4D(tensor_ref.layout()))
+class TensorRef2D_(ctypes.Structure):
+    _fields_ = [
+        ("ptr", ctypes.c_void_p),
+        ("stride", ctypes.c_int)
+    ]
+# include/cutlass/conv/kernel/implicit_gemm_convolution.h
+# split_k_mode: kNone: 0, kSerial: 1, kParallel: 2, kParallelSerial: 3, kInvalid: 4
+def get_conv2d_arguments(epilogue_functor):
+    _EpilogueOutputOpParams = epilogue_functor.epilogue_type
+    class _Conv2dArguments(ctypes.Structure):
+        _fields_ = [
+            ("problem_size", Conv2DProblemSize),  # 0
+            ("ref_A", TensorRef_),  # 72
+            ("ref_B", TensorRef_),  # 96
+            ("ref_C", TensorRef_),  # 120
+            ("ref_D", TensorRef_),  # 144
+            ("output_op", _EpilogueOutputOpParams),  # 168
+            ("split_k_mode", ctypes.c_int)  # 192
+        ]
+    return _Conv2dArguments, _EpilogueOutputOpParams
+############################################################################################
+# Reduction
+############################################################################################
+def get_reduction_params(epilogue_functor):
+    _EpilogueOutputParams = epilogue_functor.epilogue_type
+    class _ReductionParams(ctypes.Structure):
+        _fields_ = [
+            ("problem_size", MatrixCoord_),
+            ("partitions", ctypes.c_int),
+            ("partition_stride", ctypes.c_longlong),
+            ("workspace", TensorRef2D_),
+            ("destination", TensorRef2D_),
+            ("source", TensorRef2D_),
+            ("output_op", _EpilogueOutputParams)
+        ]
+    return _ReductionParams, _EpilogueOutputParams