warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +15 -7
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +22 -443
- warp/build_dll.py +384 -0
- warp/builtins.py +998 -488
- warp/codegen.py +1307 -739
- warp/config.py +5 -3
- warp/constants.py +6 -0
- warp/context.py +1291 -548
- warp/dlpack.py +31 -31
- warp/fabric.py +326 -0
- warp/fem/__init__.py +27 -0
- warp/fem/cache.py +389 -0
- warp/fem/dirichlet.py +181 -0
- warp/fem/domain.py +263 -0
- warp/fem/field/__init__.py +101 -0
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +299 -0
- warp/fem/field/restriction.py +21 -0
- warp/fem/field/test.py +181 -0
- warp/fem/field/trial.py +183 -0
- warp/fem/geometry/__init__.py +19 -0
- warp/fem/geometry/closest_point.py +70 -0
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +744 -0
- warp/fem/geometry/geometry.py +186 -0
- warp/fem/geometry/grid_2d.py +373 -0
- warp/fem/geometry/grid_3d.py +435 -0
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +376 -0
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +840 -0
- warp/fem/geometry/trimesh_2d.py +577 -0
- warp/fem/integrate.py +1616 -0
- warp/fem/operator.py +191 -0
- warp/fem/polynomial.py +213 -0
- warp/fem/quadrature/__init__.py +2 -0
- warp/fem/quadrature/pic_quadrature.py +245 -0
- warp/fem/quadrature/quadrature.py +294 -0
- warp/fem/space/__init__.py +292 -0
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +236 -0
- warp/fem/space/function_space.py +145 -0
- warp/fem/space/grid_2d_function_space.py +267 -0
- warp/fem/space/grid_3d_function_space.py +306 -0
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +350 -0
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +160 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +292 -0
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +221 -0
- warp/fem/types.py +77 -0
- warp/fem/utils.py +495 -0
- warp/native/array.h +164 -55
- warp/native/builtin.h +150 -174
- warp/native/bvh.cpp +75 -328
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +37 -45
- warp/native/clang/clang.cpp +136 -24
- warp/native/crt.cpp +1 -76
- warp/native/crt.h +111 -104
- warp/native/cuda_crt.h +1049 -0
- warp/native/cuda_util.cpp +15 -3
- warp/native/cuda_util.h +3 -1
- warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
- warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
- warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
- warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
- warp/native/cutlass/tools/library/scripts/library.py +799 -0
- warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
- warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
- warp/native/cutlass/tools/library/scripts/rt.py +796 -0
- warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
- warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
- warp/native/cutlass_gemm.cu +5 -3
- warp/native/exports.h +1240 -949
- warp/native/fabric.h +228 -0
- warp/native/hashgrid.cpp +4 -4
- warp/native/hashgrid.h +22 -2
- warp/native/initializer_array.h +2 -2
- warp/native/intersect.h +22 -7
- warp/native/intersect_adj.h +8 -8
- warp/native/intersect_tri.h +13 -16
- warp/native/marching.cu +157 -161
- warp/native/mat.h +119 -19
- warp/native/matnn.h +2 -2
- warp/native/mesh.cpp +108 -83
- warp/native/mesh.cu +243 -6
- warp/native/mesh.h +1547 -458
- warp/native/nanovdb/NanoVDB.h +1 -1
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +45 -35
- warp/native/range.h +6 -2
- warp/native/reduce.cpp +157 -0
- warp/native/reduce.cu +348 -0
- warp/native/runlength_encode.cpp +62 -0
- warp/native/runlength_encode.cu +46 -0
- warp/native/scan.cu +11 -13
- warp/native/scan.h +1 -0
- warp/native/solid_angle.h +442 -0
- warp/native/sort.cpp +13 -0
- warp/native/sort.cu +9 -1
- warp/native/sparse.cpp +338 -0
- warp/native/sparse.cu +545 -0
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +30 -0
- warp/native/vec.h +126 -24
- warp/native/volume.h +120 -0
- warp/native/warp.cpp +658 -53
- warp/native/warp.cu +660 -68
- warp/native/warp.h +112 -12
- warp/optim/__init__.py +1 -0
- warp/optim/linear.py +922 -0
- warp/optim/sgd.py +92 -0
- warp/render/render_opengl.py +392 -152
- warp/render/render_usd.py +11 -11
- warp/sim/__init__.py +2 -2
- warp/sim/articulation.py +385 -185
- warp/sim/collide.py +21 -8
- warp/sim/import_mjcf.py +297 -106
- warp/sim/import_urdf.py +389 -210
- warp/sim/import_usd.py +198 -97
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_euler.py +14 -8
- warp/sim/integrator_xpbd.py +161 -19
- warp/sim/model.py +795 -291
- warp/sim/optimizer.py +2 -6
- warp/sim/render.py +65 -3
- warp/sim/utils.py +3 -0
- warp/sparse.py +1227 -0
- warp/stubs.py +665 -223
- warp/tape.py +66 -15
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/torus.usda +105 -105
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +128 -74
- warp/tests/test_array.py +1497 -211
- warp/tests/test_array_reduce.py +150 -0
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +99 -0
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +75 -43
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +233 -128
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +136 -108
- warp/tests/test_examples.py +277 -0
- warp/tests/test_fabricarray.py +955 -0
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1271 -0
- warp/tests/test_fp16.py +53 -19
- warp/tests/test_func.py +187 -74
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +180 -116
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +52 -37
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +577 -24
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +251 -15
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_linear_solvers.py +154 -0
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +508 -2778
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +305 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +71 -14
- warp/tests/test_mesh_query_aabb.py +41 -25
- warp/tests/test_mesh_query_point.py +325 -34
- warp/tests/test_mesh_query_ray.py +39 -22
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +190 -0
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +460 -0
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +331 -85
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +118 -89
- warp/tests/test_transient_module.py +12 -13
- warp/tests/test_types.py +614 -0
- warp/tests/test_utils.py +494 -0
- warp/tests/test_vec.py +354 -1987
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +457 -293
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +341 -0
- warp/tests/unittest_utils.py +568 -0
- warp/tests/unused_test_misc.py +71 -0
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +549 -0
- warp/torch.py +72 -30
- warp/types.py +1744 -713
- warp/utils.py +360 -350
- warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
- warp_lang-0.11.0.dist-info/METADATA +238 -0
- warp_lang-0.11.0.dist-info/RECORD +332 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
- warp/bin/warp-clang.exp +0 -0
- warp/bin/warp-clang.lib +0 -0
- warp/bin/warp.exp +0 -0
- warp/bin/warp.lib +0 -0
- warp/tests/test_all.py +0 -215
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-0.9.0.dist-info/METADATA +0 -20
- warp_lang-0.9.0.dist-info/RECORD +0 -177
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/clang/clang.cpp
CHANGED
|
@@ -25,6 +25,8 @@
|
|
|
25
25
|
#include <llvm/PassRegistry.h>
|
|
26
26
|
#include <llvm/InitializePasses.h>
|
|
27
27
|
#include <llvm/IR/LegacyPassManager.h>
|
|
28
|
+
#include <llvm/IRReader/IRReader.h>
|
|
29
|
+
#include <llvm/Linker/Linker.h>
|
|
28
30
|
|
|
29
31
|
#include <llvm/ExecutionEngine/Orc/LLJIT.h>
|
|
30
32
|
#include <llvm/ExecutionEngine/JITEventListener.h>
|
|
@@ -45,6 +47,7 @@
|
|
|
45
47
|
#elif defined(__APPLE__)
|
|
46
48
|
extern "C" void __bzero(void*, size_t);
|
|
47
49
|
extern "C" __double2 __sincos_stret(double);
|
|
50
|
+
extern "C" __float2 __sincosf_stret(float);
|
|
48
51
|
#endif
|
|
49
52
|
|
|
50
53
|
extern "C" {
|
|
@@ -54,21 +57,20 @@ extern "C" {
|
|
|
54
57
|
// On Linux it suffices for these symbols not to be stripped out, while for Windows a .pdb has to contain
|
|
55
58
|
// their information. LLVM defines them, but we don't want a huge .pdb with all LLVM source code's debug
|
|
56
59
|
// info. By forward-declaring them here it suffices to compile this file with /Zi.
|
|
57
|
-
struct jit_descriptor;
|
|
58
|
-
extern jit_descriptor __jit_debug_descriptor;
|
|
60
|
+
extern struct jit_descriptor __jit_debug_descriptor;
|
|
59
61
|
extern void __jit_debug_register_code();
|
|
60
62
|
|
|
61
63
|
}
|
|
62
64
|
|
|
63
65
|
namespace wp {
|
|
64
|
-
|
|
66
|
+
|
|
65
67
|
#if defined (_WIN32)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
68
|
+
// Windows defaults to using the COFF binary format (aka. "msvc" in the target triple).
|
|
69
|
+
// Override it to use the ELF format to support DWARF debug info, but keep using the
|
|
70
|
+
// Microsoft calling convention (see also https://llvm.org/docs/DebuggingJITedCode.html).
|
|
71
|
+
static const char* target_triple = "x86_64-pc-windows-elf";
|
|
70
72
|
#else
|
|
71
|
-
|
|
73
|
+
static const char* target_triple = LLVM_DEFAULT_TARGET_TRIPLE;
|
|
72
74
|
#endif
|
|
73
75
|
|
|
74
76
|
static void initialize_llvm()
|
|
@@ -93,6 +95,11 @@ static std::unique_ptr<llvm::Module> cpp_to_llvm(const std::string& input_file,
|
|
|
93
95
|
args.push_back("-triple");
|
|
94
96
|
args.push_back(target_triple);
|
|
95
97
|
|
|
98
|
+
#if defined(__x86_64__) || defined(_M_X64)
|
|
99
|
+
args.push_back("-target-feature");
|
|
100
|
+
args.push_back("+f16c"); // Enables support for _Float16
|
|
101
|
+
#endif
|
|
102
|
+
|
|
96
103
|
clang::IntrusiveRefCntPtr<clang::DiagnosticOptions> diagnostic_options = new clang::DiagnosticOptions();
|
|
97
104
|
std::unique_ptr<clang::TextDiagnosticPrinter> text_diagnostic_printer =
|
|
98
105
|
std::make_unique<clang::TextDiagnosticPrinter>(llvm::errs(), &*diagnostic_options);
|
|
@@ -114,8 +121,6 @@ static std::unique_ptr<llvm::Module> cpp_to_llvm(const std::string& input_file,
|
|
|
114
121
|
std::unique_ptr<llvm::MemoryBuffer> buffer = llvm::MemoryBuffer::getMemBufferCopy(cpp_src);
|
|
115
122
|
compiler_invocation.getPreprocessorOpts().addRemappedFile(input_file.c_str(), buffer.get());
|
|
116
123
|
|
|
117
|
-
compiler_instance.getPreprocessorOpts().addMacroDef("WP_CPU");
|
|
118
|
-
|
|
119
124
|
if(!debug)
|
|
120
125
|
{
|
|
121
126
|
compiler_instance.getPreprocessorOpts().addMacroDef("NDEBUG");
|
|
@@ -133,18 +138,71 @@ static std::unique_ptr<llvm::Module> cpp_to_llvm(const std::string& input_file,
|
|
|
133
138
|
return success ? std::move(emit_llvm_only_action.takeModule()) : nullptr;
|
|
134
139
|
}
|
|
135
140
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
WP_API int compile_cpp(const char* cpp_src, const char* include_dir, const char* output_file, bool debug)
|
|
141
|
+
static std::unique_ptr<llvm::Module> cuda_to_llvm(const std::string& input_file, const char* cpp_src, const char* include_dir, bool debug, llvm::LLVMContext& context)
|
|
139
142
|
{
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
// Compilation arguments
|
|
144
|
+
std::vector<const char*> args;
|
|
145
|
+
args.push_back(input_file.c_str());
|
|
146
|
+
|
|
147
|
+
args.push_back("-I");
|
|
148
|
+
args.push_back(include_dir);
|
|
149
|
+
|
|
150
|
+
args.push_back(debug ? "-O0" : "-O2");
|
|
151
|
+
|
|
152
|
+
args.push_back("-triple");
|
|
153
|
+
args.push_back("nvptx64-nvidia-cuda");
|
|
154
|
+
|
|
155
|
+
args.push_back("-target-cpu");
|
|
156
|
+
args.push_back("sm_70");
|
|
157
|
+
|
|
158
|
+
clang::IntrusiveRefCntPtr<clang::DiagnosticOptions> diagnostic_options = new clang::DiagnosticOptions();
|
|
159
|
+
std::unique_ptr<clang::TextDiagnosticPrinter> text_diagnostic_printer =
|
|
160
|
+
std::make_unique<clang::TextDiagnosticPrinter>(llvm::errs(), &*diagnostic_options);
|
|
161
|
+
clang::IntrusiveRefCntPtr<clang::DiagnosticIDs> diagnostic_ids;
|
|
162
|
+
std::unique_ptr<clang::DiagnosticsEngine> diagnostic_engine =
|
|
163
|
+
std::make_unique<clang::DiagnosticsEngine>(diagnostic_ids, &*diagnostic_options, text_diagnostic_printer.release());
|
|
164
|
+
|
|
165
|
+
clang::CompilerInstance compiler_instance;
|
|
166
|
+
|
|
167
|
+
auto& compiler_invocation = compiler_instance.getInvocation();
|
|
168
|
+
clang::CompilerInvocation::CreateFromArgs(compiler_invocation, args, *diagnostic_engine.release());
|
|
169
|
+
|
|
170
|
+
if(debug)
|
|
171
|
+
{
|
|
172
|
+
compiler_invocation.getCodeGenOpts().setDebugInfo(clang::codegenoptions::FullDebugInfo);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Map code to a MemoryBuffer
|
|
176
|
+
std::unique_ptr<llvm::MemoryBuffer> buffer = llvm::MemoryBuffer::getMemBufferCopy(cpp_src);
|
|
177
|
+
compiler_invocation.getPreprocessorOpts().addRemappedFile(input_file.c_str(), buffer.get());
|
|
178
|
+
|
|
179
|
+
// According to https://llvm.org/docs/CompileCudaWithLLVM.html, "Both clang and nvcc define `__CUDACC__` during CUDA compilation."
|
|
180
|
+
// But this normally happens in the __clang_cuda_runtime_wrapper.h header, which we don't include.
|
|
181
|
+
// The __CUDA__ and __CUDA_ARCH__ macros are internally defined by llvm-project/clang/lib/Frontend/InitPreprocessor.cpp
|
|
182
|
+
compiler_instance.getPreprocessorOpts().addMacroDef("__CUDACC__");
|
|
183
|
+
|
|
184
|
+
if(!debug)
|
|
185
|
+
{
|
|
186
|
+
compiler_instance.getPreprocessorOpts().addMacroDef("NDEBUG");
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
compiler_instance.getLangOpts().CUDA = 1;
|
|
190
|
+
compiler_instance.getLangOpts().CUDAIsDevice = 1;
|
|
191
|
+
compiler_instance.getLangOpts().CUDAAllowVariadicFunctions = 1;
|
|
192
|
+
|
|
193
|
+
compiler_instance.createDiagnostics(text_diagnostic_printer.get(), false);
|
|
145
194
|
|
|
146
|
-
|
|
195
|
+
clang::EmitLLVMOnlyAction emit_llvm_only_action(&context);
|
|
196
|
+
bool success = compiler_instance.ExecuteAction(emit_llvm_only_action);
|
|
197
|
+
buffer.release();
|
|
198
|
+
|
|
199
|
+
return success ? std::move(emit_llvm_only_action.takeModule()) : nullptr;
|
|
200
|
+
}
|
|
147
201
|
|
|
202
|
+
extern "C" {
|
|
203
|
+
|
|
204
|
+
WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char* include_dir, const char* output_file, bool debug)
|
|
205
|
+
{
|
|
148
206
|
initialize_llvm();
|
|
149
207
|
|
|
150
208
|
llvm::LLVMContext context;
|
|
@@ -155,13 +213,13 @@ WP_API int compile_cpp(const char* cpp_src, const char* include_dir, const char*
|
|
|
155
213
|
return -1;
|
|
156
214
|
}
|
|
157
215
|
|
|
158
|
-
std::string
|
|
159
|
-
const llvm::Target* target = llvm::TargetRegistry::lookupTarget(target_triple,
|
|
216
|
+
std::string error;
|
|
217
|
+
const llvm::Target* target = llvm::TargetRegistry::lookupTarget(target_triple, error);
|
|
160
218
|
|
|
161
219
|
const char* CPU = "generic";
|
|
162
220
|
const char* features = "";
|
|
163
221
|
llvm::TargetOptions target_options;
|
|
164
|
-
llvm::Reloc::Model relocation_model = llvm::Reloc::PIC_; //
|
|
222
|
+
llvm::Reloc::Model relocation_model = llvm::Reloc::PIC_; // Position Independent Code
|
|
165
223
|
llvm::CodeModel::Model code_model = llvm::CodeModel::Large; // Don't make assumptions about displacement sizes
|
|
166
224
|
llvm::TargetMachine* target_machine = target->createTargetMachine(target_triple, CPU, features, target_options, relocation_model, code_model);
|
|
167
225
|
|
|
@@ -182,6 +240,59 @@ WP_API int compile_cpp(const char* cpp_src, const char* include_dir, const char*
|
|
|
182
240
|
return 0;
|
|
183
241
|
}
|
|
184
242
|
|
|
243
|
+
WP_API int compile_cuda(const char* cpp_src, const char *input_file, const char* include_dir, const char* output_file, bool debug)
|
|
244
|
+
{
|
|
245
|
+
initialize_llvm();
|
|
246
|
+
|
|
247
|
+
llvm::LLVMContext context;
|
|
248
|
+
std::unique_ptr<llvm::Module> module = cuda_to_llvm(input_file, cpp_src, include_dir, debug, context);
|
|
249
|
+
|
|
250
|
+
if(!module)
|
|
251
|
+
{
|
|
252
|
+
return -1;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
std::string error;
|
|
256
|
+
const llvm::Target* target = llvm::TargetRegistry::lookupTarget("nvptx64-nvidia-cuda", error);
|
|
257
|
+
|
|
258
|
+
const char* CPU = "sm_70";
|
|
259
|
+
const char* features = "+ptx75"; // Warp requires CUDA 11.5, which supports PTX ISA 7.5
|
|
260
|
+
llvm::TargetOptions target_options;
|
|
261
|
+
llvm::Reloc::Model relocation_model = llvm::Reloc::PIC_;
|
|
262
|
+
llvm::TargetMachine* target_machine = target->createTargetMachine("nvptx64-nvidia-cuda", CPU, features, target_options, relocation_model);
|
|
263
|
+
|
|
264
|
+
module->setDataLayout(target_machine->createDataLayout());
|
|
265
|
+
|
|
266
|
+
// Link libdevice
|
|
267
|
+
llvm::SMDiagnostic diagnostic;
|
|
268
|
+
std::string libdevice_path = std::string(include_dir) + "/libdevice/libdevice.10.bc";
|
|
269
|
+
std::unique_ptr<llvm::Module> libdevice(llvm::parseIRFile(libdevice_path, diagnostic, context));
|
|
270
|
+
if(!libdevice)
|
|
271
|
+
{
|
|
272
|
+
return -1;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
llvm::Linker linker(*module.get());
|
|
276
|
+
if(linker.linkInModule(std::move(libdevice), llvm::Linker::Flags::LinkOnlyNeeded) == true)
|
|
277
|
+
{
|
|
278
|
+
return -1;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
std::error_code error_code;
|
|
282
|
+
llvm::raw_fd_ostream output(output_file, error_code, llvm::sys::fs::OF_None);
|
|
283
|
+
|
|
284
|
+
llvm::legacy::PassManager pass_manager;
|
|
285
|
+
llvm::CodeGenFileType file_type = llvm::CGFT_AssemblyFile;
|
|
286
|
+
target_machine->addPassesToEmitFile(pass_manager, output, nullptr, file_type);
|
|
287
|
+
|
|
288
|
+
pass_manager.run(*module);
|
|
289
|
+
output.flush();
|
|
290
|
+
|
|
291
|
+
delete target_machine;
|
|
292
|
+
|
|
293
|
+
return 0;
|
|
294
|
+
}
|
|
295
|
+
|
|
185
296
|
// Global JIT instance
|
|
186
297
|
static llvm::orc::LLJIT* jit = nullptr;
|
|
187
298
|
|
|
@@ -248,6 +359,7 @@ WP_API int load_obj(const char* object_file, const char* module_name)
|
|
|
248
359
|
SYMBOL(log10f), SYMBOL_T(log10, double(*)(double)),
|
|
249
360
|
SYMBOL(expf), SYMBOL_T(exp, double(*)(double)),
|
|
250
361
|
SYMBOL(sqrtf), SYMBOL_T(sqrt, double(*)(double)),
|
|
362
|
+
SYMBOL(cbrtf), SYMBOL_T(cbrt, double(*)(double)),
|
|
251
363
|
SYMBOL(powf), SYMBOL_T(pow, double(*)(double, double)),
|
|
252
364
|
SYMBOL(floorf), SYMBOL_T(floor, double(*)(double)),
|
|
253
365
|
SYMBOL(ceilf), SYMBOL_T(ceil, double(*)(double)),
|
|
@@ -276,7 +388,7 @@ WP_API int load_obj(const char* object_file, const char* module_name)
|
|
|
276
388
|
SYMBOL(__chkstk),
|
|
277
389
|
#elif defined(__APPLE__)
|
|
278
390
|
SYMBOL(__bzero),
|
|
279
|
-
SYMBOL(__sincos_stret),
|
|
391
|
+
SYMBOL(__sincos_stret), SYMBOL(__sincosf_stret),
|
|
280
392
|
#else
|
|
281
393
|
SYMBOL(sincosf), SYMBOL_T(sincos, void(*)(double,double*,double*)),
|
|
282
394
|
#endif
|
|
@@ -335,7 +447,7 @@ WP_API uint64_t lookup(const char* dll_name, const char* function_name)
|
|
|
335
447
|
if(!func)
|
|
336
448
|
{
|
|
337
449
|
std::cerr << "Failed to lookup symbol: " << llvm::toString(func.takeError()) << std::endl;
|
|
338
|
-
return
|
|
450
|
+
return 0;
|
|
339
451
|
}
|
|
340
452
|
|
|
341
453
|
return func->getValue();
|
warp/native/crt.cpp
CHANGED
|
@@ -29,79 +29,4 @@ extern "C" WP_API void _wp_assert(const char* expression, const char* file, unsi
|
|
|
29
29
|
// Now invoke the standard assert(), which may abort the program or break
|
|
30
30
|
// into the debugger as decided by the runtime environment.
|
|
31
31
|
assert(false && "assert() failed");
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
// Export CRT symbols from warp.dll for use by compute kernel DLLs
|
|
35
|
-
// These are declared in crt.h
|
|
36
|
-
#if defined(_MSC_VER)
|
|
37
|
-
|
|
38
|
-
#pragma comment(linker,"/export:printf")
|
|
39
|
-
|
|
40
|
-
#pragma comment(linker,"/export:abs")
|
|
41
|
-
#pragma comment(linker,"/export:llabs")
|
|
42
|
-
|
|
43
|
-
#pragma comment(linker,"/export:fmodf")
|
|
44
|
-
#pragma comment(linker,"/export:fmod")
|
|
45
|
-
#pragma comment(linker,"/export:logf")
|
|
46
|
-
#pragma comment(linker,"/export:log")
|
|
47
|
-
#pragma comment(linker,"/export:log2f")
|
|
48
|
-
#pragma comment(linker,"/export:log2")
|
|
49
|
-
#pragma comment(linker,"/export:log10f")
|
|
50
|
-
#pragma comment(linker,"/export:log10")
|
|
51
|
-
#pragma comment(linker,"/export:expf")
|
|
52
|
-
#pragma comment(linker,"/export:exp")
|
|
53
|
-
#pragma comment(linker,"/export:sqrtf")
|
|
54
|
-
#pragma comment(linker,"/export:sqrt")
|
|
55
|
-
#pragma comment(linker,"/export:powf")
|
|
56
|
-
#pragma comment(linker,"/export:pow")
|
|
57
|
-
#pragma comment(linker,"/export:floorf")
|
|
58
|
-
#pragma comment(linker,"/export:floor")
|
|
59
|
-
#pragma comment(linker,"/export:ceilf")
|
|
60
|
-
#pragma comment(linker,"/export:ceil")
|
|
61
|
-
#pragma comment(linker,"/export:fabsf")
|
|
62
|
-
#pragma comment(linker,"/export:fabs")
|
|
63
|
-
#pragma comment(linker,"/export:roundf")
|
|
64
|
-
#pragma comment(linker,"/export:round")
|
|
65
|
-
#pragma comment(linker,"/export:truncf")
|
|
66
|
-
#pragma comment(linker,"/export:trunc")
|
|
67
|
-
#pragma comment(linker,"/export:rintf")
|
|
68
|
-
#pragma comment(linker,"/export:rint")
|
|
69
|
-
#pragma comment(linker,"/export:acosf")
|
|
70
|
-
#pragma comment(linker,"/export:acos")
|
|
71
|
-
#pragma comment(linker,"/export:asinf")
|
|
72
|
-
#pragma comment(linker,"/export:asin")
|
|
73
|
-
#pragma comment(linker,"/export:atanf")
|
|
74
|
-
#pragma comment(linker,"/export:atan")
|
|
75
|
-
#pragma comment(linker,"/export:atan2f")
|
|
76
|
-
#pragma comment(linker,"/export:atan2")
|
|
77
|
-
#pragma comment(linker,"/export:cosf")
|
|
78
|
-
#pragma comment(linker,"/export:cos")
|
|
79
|
-
#pragma comment(linker,"/export:sinf")
|
|
80
|
-
#pragma comment(linker,"/export:sin")
|
|
81
|
-
#pragma comment(linker,"/export:tanf")
|
|
82
|
-
#pragma comment(linker,"/export:tan")
|
|
83
|
-
#pragma comment(linker,"/export:sinhf")
|
|
84
|
-
#pragma comment(linker,"/export:sinh")
|
|
85
|
-
#pragma comment(linker,"/export:coshf")
|
|
86
|
-
#pragma comment(linker,"/export:cosh")
|
|
87
|
-
#pragma comment(linker,"/export:tanhf")
|
|
88
|
-
#pragma comment(linker,"/export:tanh")
|
|
89
|
-
#pragma comment(linker,"/export:fmaf")
|
|
90
|
-
|
|
91
|
-
#pragma comment(linker,"/export:memset")
|
|
92
|
-
#pragma comment(linker,"/export:memcpy")
|
|
93
|
-
|
|
94
|
-
#pragma comment(linker,"/export:_wp_isfinite")
|
|
95
|
-
#pragma comment(linker,"/export:_wp_assert")
|
|
96
|
-
|
|
97
|
-
// For functions with large stack frames the MSVC compiler will emit a call to
|
|
98
|
-
// __chkstk() to linearly touch each memory page. This grows the stack without
|
|
99
|
-
// triggering the stack overflow guards.
|
|
100
|
-
#pragma comment(linker,"/export:__chkstk")
|
|
101
|
-
|
|
102
|
-
// The MSVC linker checks for the _fltused symbol if any floating-point
|
|
103
|
-
// functionality is used. It's defined by the Microsoft CRT to indicate that
|
|
104
|
-
// the x87 FPU control word was properly initialized.
|
|
105
|
-
#pragma comment(linker,"/export:_fltused")
|
|
106
|
-
|
|
107
|
-
#endif // _MSC_VER
|
|
32
|
+
}
|
warp/native/crt.h
CHANGED
|
@@ -30,15 +30,15 @@
|
|
|
30
30
|
#define WP_API
|
|
31
31
|
#endif
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
#if !defined(__CUDA_ARCH__)
|
|
34
34
|
|
|
35
35
|
// Helper for implementing assert() macro
|
|
36
|
-
WP_API void _wp_assert(const char* message, const char* file, unsigned int line);
|
|
36
|
+
extern "C" WP_API void _wp_assert(const char* message, const char* file, unsigned int line);
|
|
37
37
|
|
|
38
38
|
// Helper for implementing isfinite()
|
|
39
|
-
WP_API int _wp_isfinite(double);
|
|
39
|
+
extern "C" WP_API int _wp_isfinite(double);
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
#endif // !__CUDA_ARCH__
|
|
42
42
|
|
|
43
43
|
#if !defined(WP_NO_CRT)
|
|
44
44
|
|
|
@@ -52,106 +52,6 @@ WP_API int _wp_isfinite(double);
|
|
|
52
52
|
|
|
53
53
|
#else
|
|
54
54
|
|
|
55
|
-
#if defined(__CUDACC__)
|
|
56
|
-
|
|
57
|
-
// stdio.h
|
|
58
|
-
extern "C" __device__ int printf(const char* format, ... );
|
|
59
|
-
|
|
60
|
-
#else
|
|
61
|
-
|
|
62
|
-
extern "C" {
|
|
63
|
-
|
|
64
|
-
// stdio.h
|
|
65
|
-
int printf(const char * format, ... );
|
|
66
|
-
|
|
67
|
-
// stdlib.h
|
|
68
|
-
int abs(int);
|
|
69
|
-
long long llabs(long long);
|
|
70
|
-
|
|
71
|
-
// math.h
|
|
72
|
-
float fmodf(float, float);
|
|
73
|
-
double fmod(double, double);
|
|
74
|
-
float logf(float);
|
|
75
|
-
double log(double);
|
|
76
|
-
float log2f(float);
|
|
77
|
-
double log2(double);
|
|
78
|
-
float log10f(float);
|
|
79
|
-
double log10(double);
|
|
80
|
-
float expf(float);
|
|
81
|
-
double exp(double);
|
|
82
|
-
float sqrtf(float);
|
|
83
|
-
double sqrt(double);
|
|
84
|
-
float powf(float, float);
|
|
85
|
-
double pow(double, double);
|
|
86
|
-
float floorf(float);
|
|
87
|
-
double floor(double);
|
|
88
|
-
float ceilf(float);
|
|
89
|
-
double ceil(double);
|
|
90
|
-
float fabsf(float);
|
|
91
|
-
double fabs(double);
|
|
92
|
-
float roundf(float);
|
|
93
|
-
double round(double);
|
|
94
|
-
float truncf(float);
|
|
95
|
-
double trunc(double);
|
|
96
|
-
float rintf(float);
|
|
97
|
-
double rint(double);
|
|
98
|
-
float acosf(float);
|
|
99
|
-
double acos(double);
|
|
100
|
-
float asinf(float);
|
|
101
|
-
double asin(double);
|
|
102
|
-
float atanf(float);
|
|
103
|
-
double atan(double);
|
|
104
|
-
float atan2f(float, float);
|
|
105
|
-
double atan2(double, double);
|
|
106
|
-
float cosf(float);
|
|
107
|
-
double cos(double);
|
|
108
|
-
float sinf(float);
|
|
109
|
-
double sin(double);
|
|
110
|
-
float tanf(float);
|
|
111
|
-
double tan(double);
|
|
112
|
-
float sinhf(float);
|
|
113
|
-
double sinh(double);
|
|
114
|
-
float coshf(float);
|
|
115
|
-
double cosh(double);
|
|
116
|
-
float tanhf(float);
|
|
117
|
-
double tanh(double);
|
|
118
|
-
float fmaf(float, float, float);
|
|
119
|
-
|
|
120
|
-
// stddef.h
|
|
121
|
-
#if defined(_WIN32)
|
|
122
|
-
using size_t = unsigned __int64;
|
|
123
|
-
#else
|
|
124
|
-
using size_t = unsigned long;
|
|
125
|
-
#endif
|
|
126
|
-
|
|
127
|
-
// string.h
|
|
128
|
-
void* memset(void*, int, size_t);
|
|
129
|
-
void* memcpy(void*, const void*, size_t);
|
|
130
|
-
|
|
131
|
-
// stdlib.h
|
|
132
|
-
void* malloc(size_t);
|
|
133
|
-
void free(void*);
|
|
134
|
-
|
|
135
|
-
} // extern "C"
|
|
136
|
-
|
|
137
|
-
// cmath
|
|
138
|
-
inline bool isfinite(double x)
|
|
139
|
-
{
|
|
140
|
-
return _wp_isfinite(x);
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// assert.h
|
|
144
|
-
#ifdef NDEBUG
|
|
145
|
-
#define assert(expression) ((void)0)
|
|
146
|
-
#else
|
|
147
|
-
#define assert(expression) (void)( \
|
|
148
|
-
(!!(expression)) || \
|
|
149
|
-
(_wp_assert((#expression), (__FILE__), (unsigned)(__LINE__)), 0) \
|
|
150
|
-
)
|
|
151
|
-
#endif
|
|
152
|
-
|
|
153
|
-
#endif // !__CUDACC__
|
|
154
|
-
|
|
155
55
|
// These definitions are taken from Jitify: https://github.com/NVIDIA/jitify
|
|
156
56
|
|
|
157
57
|
/// float.h
|
|
@@ -221,6 +121,9 @@ enum {
|
|
|
221
121
|
#define LLONG_MIN (-LLONG_MAX - 1LL)
|
|
222
122
|
#define ULLONG_MAX 18446744073709551615ULL
|
|
223
123
|
|
|
124
|
+
#define INFINITY ((float)(DBL_MAX * DBL_MAX))
|
|
125
|
+
#define HUGE_VAL ((double)INFINITY)
|
|
126
|
+
#define HUGE_VALF ((float)INFINITY)
|
|
224
127
|
|
|
225
128
|
/// stdint.h
|
|
226
129
|
typedef signed char int8_t;
|
|
@@ -325,4 +228,108 @@ typedef unsigned long long uint64_t;
|
|
|
325
228
|
|
|
326
229
|
#define M_PI 3.14159265358979323846
|
|
327
230
|
|
|
231
|
+
#if defined(__CUDACC__)
|
|
232
|
+
|
|
233
|
+
#if defined(__clang__)
|
|
234
|
+
// When compiling CUDA with barebones Clang we need to define its builtins and runtime functions ourselves.
|
|
235
|
+
#include "cuda_crt.h"
|
|
236
|
+
#endif
|
|
237
|
+
|
|
238
|
+
#else
|
|
239
|
+
|
|
240
|
+
extern "C" {
|
|
241
|
+
|
|
242
|
+
// stdio.h
|
|
243
|
+
int printf(const char * format, ... );
|
|
244
|
+
|
|
245
|
+
// stdlib.h
|
|
246
|
+
int abs(int);
|
|
247
|
+
long long llabs(long long);
|
|
248
|
+
|
|
249
|
+
// math.h
|
|
250
|
+
float fmodf(float, float);
|
|
251
|
+
double fmod(double, double);
|
|
252
|
+
float logf(float);
|
|
253
|
+
double log(double);
|
|
254
|
+
float log2f(float);
|
|
255
|
+
double log2(double);
|
|
256
|
+
float log10f(float);
|
|
257
|
+
double log10(double);
|
|
258
|
+
float expf(float);
|
|
259
|
+
double exp(double);
|
|
260
|
+
float sqrtf(float);
|
|
261
|
+
double sqrt(double);
|
|
262
|
+
float cbrtf(float);
|
|
263
|
+
double cbrt(double);
|
|
264
|
+
float powf(float, float);
|
|
265
|
+
double pow(double, double);
|
|
266
|
+
float floorf(float);
|
|
267
|
+
double floor(double);
|
|
268
|
+
float ceilf(float);
|
|
269
|
+
double ceil(double);
|
|
270
|
+
float fabsf(float);
|
|
271
|
+
double fabs(double);
|
|
272
|
+
float roundf(float);
|
|
273
|
+
double round(double);
|
|
274
|
+
float truncf(float);
|
|
275
|
+
double trunc(double);
|
|
276
|
+
float rintf(float);
|
|
277
|
+
double rint(double);
|
|
278
|
+
float acosf(float);
|
|
279
|
+
double acos(double);
|
|
280
|
+
float asinf(float);
|
|
281
|
+
double asin(double);
|
|
282
|
+
float atanf(float);
|
|
283
|
+
double atan(double);
|
|
284
|
+
float atan2f(float, float);
|
|
285
|
+
double atan2(double, double);
|
|
286
|
+
float cosf(float);
|
|
287
|
+
double cos(double);
|
|
288
|
+
float sinf(float);
|
|
289
|
+
double sin(double);
|
|
290
|
+
float tanf(float);
|
|
291
|
+
double tan(double);
|
|
292
|
+
float sinhf(float);
|
|
293
|
+
double sinh(double);
|
|
294
|
+
float coshf(float);
|
|
295
|
+
double cosh(double);
|
|
296
|
+
float tanhf(float);
|
|
297
|
+
double tanh(double);
|
|
298
|
+
float fmaf(float, float, float);
|
|
299
|
+
|
|
300
|
+
// stddef.h
|
|
301
|
+
#if defined(_WIN32)
|
|
302
|
+
using size_t = unsigned __int64;
|
|
303
|
+
#else
|
|
304
|
+
using size_t = unsigned long;
|
|
305
|
+
#endif
|
|
306
|
+
|
|
307
|
+
// string.h
|
|
308
|
+
void* memset(void*, int, size_t);
|
|
309
|
+
void* memcpy(void*, const void*, size_t);
|
|
310
|
+
|
|
311
|
+
// stdlib.h
|
|
312
|
+
void* malloc(size_t);
|
|
313
|
+
void free(void*);
|
|
314
|
+
|
|
315
|
+
} // extern "C"
|
|
316
|
+
|
|
317
|
+
// cmath
|
|
318
|
+
inline bool isfinite(double x)
|
|
319
|
+
{
|
|
320
|
+
return _wp_isfinite(x);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// assert.h
|
|
324
|
+
#ifdef NDEBUG
|
|
325
|
+
#define assert(expression) ((void)0)
|
|
326
|
+
#else
|
|
327
|
+
#define assert(expression) (void)( \
|
|
328
|
+
(!!(expression)) || \
|
|
329
|
+
(_wp_assert((#expression), (__FILE__), (unsigned)(__LINE__)), 0) \
|
|
330
|
+
)
|
|
331
|
+
#endif
|
|
332
|
+
|
|
333
|
+
#endif // !__CUDACC__
|
|
334
|
+
|
|
328
335
|
#endif // WP_NO_CRT
|