warp-lang 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +3 -1
- warp/__init__.pyi +3489 -1
- warp/autograd.py +45 -122
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +241 -252
- warp/build_dll.py +130 -26
- warp/builtins.py +1907 -384
- warp/codegen.py +272 -104
- warp/config.py +12 -1
- warp/constants.py +1 -1
- warp/context.py +770 -238
- warp/dlpack.py +1 -1
- warp/examples/benchmarks/benchmark_cloth.py +2 -2
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/core/example_sample_mesh.py +1 -1
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/fem/example_adaptive_grid.py +5 -5
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +1 -1
- warp/examples/fem/example_convection_diffusion.py +9 -6
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion.py +2 -2
- warp/examples/fem/example_diffusion_3d.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +5 -3
- warp/examples/fem/example_mixed_elasticity.py +5 -3
- warp/examples/fem/example_navier_stokes.py +11 -9
- warp/examples/fem/example_nonconforming_contact.py +5 -3
- warp/examples/fem/example_streamlines.py +8 -3
- warp/examples/fem/utils.py +9 -8
- warp/examples/interop/example_jax_callable.py +34 -4
- warp/examples/interop/example_jax_ffi_callback.py +2 -2
- warp/examples/interop/example_jax_kernel.py +27 -1
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/sim/example_cloth.py +1 -1
- warp/examples/sim/example_cloth_self_contact.py +48 -54
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +2 -1
- warp/examples/tile/example_tile_convolution.py +1 -1
- warp/examples/tile/example_tile_filtering.py +1 -1
- warp/examples/tile/example_tile_matmul.py +1 -1
- warp/examples/tile/example_tile_mlp.py +2 -0
- warp/fabric.py +7 -7
- warp/fem/__init__.py +5 -0
- warp/fem/adaptivity.py +1 -1
- warp/fem/cache.py +152 -63
- warp/fem/dirichlet.py +2 -2
- warp/fem/domain.py +136 -6
- warp/fem/field/field.py +141 -99
- warp/fem/field/nodal_field.py +85 -39
- warp/fem/field/virtual.py +99 -52
- warp/fem/geometry/adaptive_nanogrid.py +91 -86
- warp/fem/geometry/closest_point.py +13 -0
- warp/fem/geometry/deformed_geometry.py +102 -40
- warp/fem/geometry/element.py +56 -2
- warp/fem/geometry/geometry.py +323 -22
- warp/fem/geometry/grid_2d.py +157 -62
- warp/fem/geometry/grid_3d.py +116 -20
- warp/fem/geometry/hexmesh.py +86 -20
- warp/fem/geometry/nanogrid.py +166 -86
- warp/fem/geometry/partition.py +59 -25
- warp/fem/geometry/quadmesh.py +86 -135
- warp/fem/geometry/tetmesh.py +47 -119
- warp/fem/geometry/trimesh.py +77 -270
- warp/fem/integrate.py +181 -95
- warp/fem/linalg.py +25 -58
- warp/fem/operator.py +124 -27
- warp/fem/quadrature/pic_quadrature.py +36 -14
- warp/fem/quadrature/quadrature.py +40 -16
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/basis_function_space.py +66 -46
- warp/fem/space/basis_space.py +17 -4
- warp/fem/space/dof_mapper.py +1 -1
- warp/fem/space/function_space.py +2 -2
- warp/fem/space/grid_2d_function_space.py +4 -1
- warp/fem/space/hexmesh_function_space.py +4 -2
- warp/fem/space/nanogrid_function_space.py +3 -1
- warp/fem/space/partition.py +11 -2
- warp/fem/space/quadmesh_function_space.py +4 -1
- warp/fem/space/restriction.py +5 -2
- warp/fem/space/shape/__init__.py +10 -8
- warp/fem/space/tetmesh_function_space.py +4 -1
- warp/fem/space/topology.py +52 -21
- warp/fem/space/trimesh_function_space.py +4 -1
- warp/fem/utils.py +53 -8
- warp/jax.py +1 -2
- warp/jax_experimental/ffi.py +210 -67
- warp/jax_experimental/xla_ffi.py +37 -24
- warp/math.py +171 -1
- warp/native/array.h +103 -4
- warp/native/builtin.h +182 -35
- warp/native/coloring.cpp +6 -2
- warp/native/cuda_util.cpp +1 -1
- warp/native/exports.h +118 -63
- warp/native/intersect.h +5 -5
- warp/native/mat.h +8 -13
- warp/native/mathdx.cpp +11 -5
- warp/native/matnn.h +1 -123
- warp/native/mesh.h +1 -1
- warp/native/quat.h +34 -6
- warp/native/rand.h +7 -7
- warp/native/sparse.cpp +121 -258
- warp/native/sparse.cu +181 -274
- warp/native/spatial.h +305 -17
- warp/native/svd.h +23 -8
- warp/native/tile.h +603 -73
- warp/native/tile_radix_sort.h +1112 -0
- warp/native/tile_reduce.h +239 -13
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +10 -20
- warp/native/warp.cpp +36 -4
- warp/native/warp.cu +588 -52
- warp/native/warp.h +47 -74
- warp/optim/linear.py +5 -1
- warp/paddle.py +7 -8
- warp/py.typed +0 -0
- warp/render/render_opengl.py +110 -80
- warp/render/render_usd.py +124 -62
- warp/sim/__init__.py +9 -0
- warp/sim/collide.py +253 -80
- warp/sim/graph_coloring.py +8 -1
- warp/sim/import_mjcf.py +4 -3
- warp/sim/import_usd.py +11 -7
- warp/sim/integrator.py +5 -2
- warp/sim/integrator_euler.py +1 -1
- warp/sim/integrator_featherstone.py +1 -1
- warp/sim/integrator_vbd.py +761 -322
- warp/sim/integrator_xpbd.py +1 -1
- warp/sim/model.py +265 -260
- warp/sim/utils.py +10 -7
- warp/sparse.py +303 -166
- warp/tape.py +54 -51
- warp/tests/cuda/test_conditional_captures.py +1046 -0
- warp/tests/cuda/test_streams.py +1 -1
- warp/tests/geometry/test_volume.py +2 -2
- warp/tests/interop/test_dlpack.py +9 -9
- warp/tests/interop/test_jax.py +0 -1
- warp/tests/run_coverage_serial.py +1 -1
- warp/tests/sim/disabled_kinematics.py +2 -2
- warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
- warp/tests/sim/test_collision.py +159 -51
- warp/tests/sim/test_coloring.py +91 -2
- warp/tests/test_array.py +254 -2
- warp/tests/test_array_reduce.py +2 -2
- warp/tests/test_assert.py +53 -0
- warp/tests/test_atomic_cas.py +312 -0
- warp/tests/test_codegen.py +142 -19
- warp/tests/test_conditional.py +47 -1
- warp/tests/test_ctypes.py +0 -20
- warp/tests/test_devices.py +8 -0
- warp/tests/test_fabricarray.py +4 -2
- warp/tests/test_fem.py +58 -25
- warp/tests/test_func.py +42 -1
- warp/tests/test_grad.py +1 -1
- warp/tests/test_lerp.py +1 -3
- warp/tests/test_map.py +481 -0
- warp/tests/test_mat.py +23 -24
- warp/tests/test_quat.py +28 -15
- warp/tests/test_rounding.py +10 -38
- warp/tests/test_runlength_encode.py +7 -7
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +83 -2
- warp/tests/test_spatial.py +507 -1
- warp/tests/test_static.py +48 -0
- warp/tests/test_struct.py +2 -2
- warp/tests/test_tape.py +38 -0
- warp/tests/test_tuple.py +265 -0
- warp/tests/test_types.py +2 -2
- warp/tests/test_utils.py +24 -18
- warp/tests/test_vec.py +38 -408
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/tile/test_tile.py +438 -131
- warp/tests/tile/test_tile_mathdx.py +518 -14
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_reduce.py +307 -5
- warp/tests/tile/test_tile_shared_memory.py +136 -7
- warp/tests/tile/test_tile_sort.py +121 -0
- warp/tests/unittest_suites.py +14 -6
- warp/types.py +462 -308
- warp/utils.py +647 -86
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +190 -176
- warp/stubs.py +0 -3381
- warp/tests/sim/test_xpbd.py +0 -399
- warp/tests/test_mlp.py +0 -282
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0
warp/native/tuple.h
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
namespace wp
|
|
21
|
+
{
|
|
22
|
+
|
|
23
|
+
template <typename... Types>
|
|
24
|
+
struct tuple_t;
|
|
25
|
+
|
|
26
|
+
template <>
|
|
27
|
+
struct tuple_t<>
|
|
28
|
+
{
|
|
29
|
+
|
|
30
|
+
static constexpr int size() { return 0; }
|
|
31
|
+
|
|
32
|
+
// Base case: empty tuple.
|
|
33
|
+
template <typename Callable>
|
|
34
|
+
void apply(Callable&&) const { }
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
template <typename Head, typename... Tail>
|
|
38
|
+
struct tuple_t<Head, Tail...>
|
|
39
|
+
{
|
|
40
|
+
Head head;
|
|
41
|
+
tuple_t<Tail...> tail;
|
|
42
|
+
|
|
43
|
+
CUDA_CALLABLE inline tuple_t() {}
|
|
44
|
+
CUDA_CALLABLE inline tuple_t(Head h, Tail... t) : head(h), tail(t...) {}
|
|
45
|
+
|
|
46
|
+
static constexpr int size() { return 1 + tuple_t<Tail...>::size(); }
|
|
47
|
+
|
|
48
|
+
// Applies a callable to each element.
|
|
49
|
+
template <typename Callable>
|
|
50
|
+
void apply(Callable&& func) const
|
|
51
|
+
{
|
|
52
|
+
func(head); // Apply the callable to the current element.
|
|
53
|
+
tail.apply(func); // Recursively process the rest of the tuple.
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// Tuple constructor.
|
|
58
|
+
template <typename... Args>
|
|
59
|
+
CUDA_CALLABLE inline tuple_t<Args...>
|
|
60
|
+
tuple(
|
|
61
|
+
Args... args
|
|
62
|
+
)
|
|
63
|
+
{
|
|
64
|
+
return tuple_t<Args...>(args...);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Helper to extract a value from the tuple.
|
|
68
|
+
// Can be replaced with simpler member function version when our CPU compiler
|
|
69
|
+
// backend supports constexpr if statements.
|
|
70
|
+
template <int N, typename Head, typename... Tail>
|
|
71
|
+
struct tuple_get
|
|
72
|
+
{
|
|
73
|
+
static CUDA_CALLABLE inline const auto&
|
|
74
|
+
value(
|
|
75
|
+
const tuple_t<Head, Tail...>& t
|
|
76
|
+
)
|
|
77
|
+
{
|
|
78
|
+
return tuple_get<N - 1, Tail...>::value(t.tail);
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
// Specialization for the base case N == 0. Simply return the head of the tuple.
|
|
83
|
+
template <typename Head, typename... Tail>
|
|
84
|
+
struct tuple_get<0, Head, Tail...>
|
|
85
|
+
{
|
|
86
|
+
static CUDA_CALLABLE inline const auto&
|
|
87
|
+
value(
|
|
88
|
+
const tuple_t<Head, Tail...>& t
|
|
89
|
+
)
|
|
90
|
+
{
|
|
91
|
+
return t.head;
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
template <int Index, typename... Args>
|
|
96
|
+
CUDA_CALLABLE inline auto
|
|
97
|
+
extract(
|
|
98
|
+
const tuple_t<Args...>& t
|
|
99
|
+
)
|
|
100
|
+
{
|
|
101
|
+
return tuple_get<Index, Args...>::value(t);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
template <typename... Args>
|
|
105
|
+
CUDA_CALLABLE inline int
|
|
106
|
+
len(
|
|
107
|
+
const tuple_t<Args...>& t
|
|
108
|
+
)
|
|
109
|
+
{
|
|
110
|
+
return t.size();
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
template <typename... Args>
|
|
114
|
+
CUDA_CALLABLE inline void
|
|
115
|
+
adj_len(
|
|
116
|
+
const tuple_t<Args...>& t,
|
|
117
|
+
tuple_t<Args...>& adj_t,
|
|
118
|
+
int adj_ret
|
|
119
|
+
)
|
|
120
|
+
{
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
template <typename... Args>
|
|
124
|
+
CUDA_CALLABLE inline void
|
|
125
|
+
print(
|
|
126
|
+
const tuple_t<Args...>& t
|
|
127
|
+
)
|
|
128
|
+
{
|
|
129
|
+
t.apply([&](auto a) { print(a); });
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
template <typename... Args>
|
|
133
|
+
CUDA_CALLABLE inline void
|
|
134
|
+
adj_print(
|
|
135
|
+
const tuple_t<Args...>& t,
|
|
136
|
+
tuple_t<Args...>& adj_t
|
|
137
|
+
)
|
|
138
|
+
{
|
|
139
|
+
adj_t.apply([&](auto a) { print(a); });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
CUDA_CALLABLE inline tuple_t<>
|
|
143
|
+
add(
|
|
144
|
+
const tuple_t<>& a,
|
|
145
|
+
const tuple_t<>& b
|
|
146
|
+
)
|
|
147
|
+
{
|
|
148
|
+
return tuple_t<>();
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
template <typename Head, typename... Tail>
|
|
152
|
+
CUDA_CALLABLE inline tuple_t<Head, Tail...>
|
|
153
|
+
add(
|
|
154
|
+
const tuple_t<Head, Tail...>& a,
|
|
155
|
+
const tuple_t<Head, Tail...>& b
|
|
156
|
+
)
|
|
157
|
+
{
|
|
158
|
+
tuple_t<Head, Tail...> out;
|
|
159
|
+
out.head = add(a.head, b.head);
|
|
160
|
+
out.tail = add(a.tail, b.tail);
|
|
161
|
+
return out;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
CUDA_CALLABLE inline void
|
|
165
|
+
adj_add(
|
|
166
|
+
const tuple_t<>& a,
|
|
167
|
+
const tuple_t<>& b,
|
|
168
|
+
tuple_t<>& adj_a,
|
|
169
|
+
tuple_t<>& adj_b,
|
|
170
|
+
const tuple_t<>& adj_ret
|
|
171
|
+
)
|
|
172
|
+
{
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
template <typename Head, typename... Tail>
|
|
176
|
+
CUDA_CALLABLE inline void
|
|
177
|
+
adj_add(
|
|
178
|
+
const tuple_t<Head, Tail...>& a,
|
|
179
|
+
const tuple_t<Head, Tail...>& b,
|
|
180
|
+
tuple_t<Head, Tail...>& adj_a,
|
|
181
|
+
tuple_t<Head, Tail...>& adj_b,
|
|
182
|
+
const tuple_t<Head, Tail...>& adj_ret
|
|
183
|
+
)
|
|
184
|
+
{
|
|
185
|
+
adj_add(a.head, b.head, adj_a.head, adj_b.head, adj_ret.head);
|
|
186
|
+
adj_add(a.tail, b.tail, adj_a.tail, adj_b.tail, adj_ret.tail);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
} // namespace wp
|
warp/native/vec.h
CHANGED
|
@@ -149,26 +149,15 @@ using vec2d = vec_t<2,double>;
|
|
|
149
149
|
using vec3d = vec_t<3,double>;
|
|
150
150
|
using vec4d = vec_t<4,double>;
|
|
151
151
|
|
|
152
|
-
//--------------
|
|
153
|
-
// vec<Length, Type> methods
|
|
154
|
-
|
|
155
|
-
// Should these accept const references as arguments? It's all
|
|
156
|
-
// inlined so maybe it doesn't matter? Even if it does, it
|
|
157
|
-
// probably depends on the Length of the vector...
|
|
158
|
-
|
|
159
|
-
// negation:
|
|
160
152
|
template<unsigned Length, typename Type>
|
|
161
|
-
inline CUDA_CALLABLE vec_t<Length, Type> operator - (vec_t<Length, Type
|
|
153
|
+
inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
|
|
162
154
|
{
|
|
163
|
-
// NB: this constructor will initialize all ret's components to 0, which is
|
|
164
|
-
// unnecessary...
|
|
165
155
|
vec_t<Length, Type> ret;
|
|
166
|
-
for(
|
|
156
|
+
for(unsigned i=0; i < Length; ++i)
|
|
167
157
|
{
|
|
168
|
-
ret[i] = -
|
|
158
|
+
ret[i] = -x[i];
|
|
169
159
|
}
|
|
170
160
|
|
|
171
|
-
// Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
|
|
172
161
|
return ret;
|
|
173
162
|
}
|
|
174
163
|
|
|
@@ -843,8 +832,9 @@ inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const v
|
|
|
843
832
|
if (diff > tolerance)
|
|
844
833
|
{
|
|
845
834
|
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
846
|
-
printf("
|
|
847
|
-
printf("
|
|
835
|
+
printf(" Expected: "); print(expected);
|
|
836
|
+
printf(" Actual: "); print(actual);
|
|
837
|
+
printf(" Max absolute difference: "); print(diff);
|
|
848
838
|
}
|
|
849
839
|
}
|
|
850
840
|
|
|
@@ -979,11 +969,11 @@ template<unsigned Length, typename Type>
|
|
|
979
969
|
inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
980
970
|
{
|
|
981
971
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
for( unsigned i=0; i < Length; ++i )
|
|
972
|
+
for (unsigned i=0; i < Length; ++i)
|
|
985
973
|
{
|
|
986
|
-
|
|
974
|
+
Type inv = Type(1) / a[i];
|
|
975
|
+
adj_a[i] -= s * adj_ret[i] * inv * inv;
|
|
976
|
+
adj_s += adj_ret[i] * inv;
|
|
987
977
|
}
|
|
988
978
|
|
|
989
979
|
#if FP_CHECK
|
warp/native/warp.cpp
CHANGED
|
@@ -24,6 +24,11 @@
|
|
|
24
24
|
#include <stdlib.h>
|
|
25
25
|
#include <string.h>
|
|
26
26
|
|
|
27
|
+
// MSVC provides _aligned_malloc() instead of the standard aligned_alloc()
|
|
28
|
+
#if defined(_MSC_VER)
|
|
29
|
+
#include <malloc.h>
|
|
30
|
+
#endif
|
|
31
|
+
|
|
27
32
|
uint16_t float_to_half_bits(float x)
|
|
28
33
|
{
|
|
29
34
|
// adapted from Fabien Giesen's post: https://gist.github.com/rygorous/2156668
|
|
@@ -114,7 +119,7 @@ float half_bits_to_float(uint16_t u)
|
|
|
114
119
|
int init()
|
|
115
120
|
{
|
|
116
121
|
#if WP_ENABLE_CUDA
|
|
117
|
-
int cuda_init();
|
|
122
|
+
int cuda_init(void);
|
|
118
123
|
// note: it's safe to proceed even if CUDA initialization failed
|
|
119
124
|
cuda_init();
|
|
120
125
|
#endif
|
|
@@ -163,12 +168,28 @@ int is_debug_enabled()
|
|
|
163
168
|
|
|
164
169
|
void* alloc_host(size_t s)
|
|
165
170
|
{
|
|
166
|
-
|
|
171
|
+
// increase CPU array alignment for compatibility with other libs, e.g., JAX, XLA, Eigen.
|
|
172
|
+
size_t alignment = 64;
|
|
173
|
+
|
|
174
|
+
// msvc does not provide the standard aligned_alloc()
|
|
175
|
+
#if defined(_MSC_VER)
|
|
176
|
+
return _aligned_malloc(s, alignment);
|
|
177
|
+
#else
|
|
178
|
+
// ensure that the size is a multiple of alignment
|
|
179
|
+
size_t remainder = s % alignment;
|
|
180
|
+
if (remainder != 0)
|
|
181
|
+
s += alignment - remainder;
|
|
182
|
+
return aligned_alloc(alignment, s);
|
|
183
|
+
#endif
|
|
167
184
|
}
|
|
168
185
|
|
|
169
186
|
void free_host(void* ptr)
|
|
170
187
|
{
|
|
171
|
-
|
|
188
|
+
#if defined(_MSC_VER)
|
|
189
|
+
_aligned_free(ptr);
|
|
190
|
+
#else
|
|
191
|
+
free(ptr);
|
|
192
|
+
#endif
|
|
172
193
|
}
|
|
173
194
|
|
|
174
195
|
bool memcpy_h2h(void* dest, void* src, size_t n)
|
|
@@ -990,6 +1011,7 @@ WP_API int cuda_device_get_count() { return 0; }
|
|
|
990
1011
|
WP_API void* cuda_device_get_primary_context(int ordinal) { return NULL; }
|
|
991
1012
|
WP_API const char* cuda_device_get_name(int ordinal) { return NULL; }
|
|
992
1013
|
WP_API int cuda_device_get_arch(int ordinal) { return 0; }
|
|
1014
|
+
WP_API int cuda_device_get_sm_count(int ordinal) { return 0; }
|
|
993
1015
|
WP_API void cuda_device_get_uuid(int ordinal, char uuid[16]) {}
|
|
994
1016
|
WP_API int cuda_device_get_pci_domain_id(int ordinal) { return -1; }
|
|
995
1017
|
WP_API int cuda_device_get_pci_bus_id(int ordinal) { return -1; }
|
|
@@ -1050,10 +1072,20 @@ WP_API float cuda_event_elapsed_time(void* start_event, void* end_event) { retur
|
|
|
1050
1072
|
|
|
1051
1073
|
WP_API bool cuda_graph_begin_capture(void* context, void* stream, int external) { return false; }
|
|
1052
1074
|
WP_API bool cuda_graph_end_capture(void* context, void* stream, void** graph_ret) { return false; }
|
|
1075
|
+
WP_API bool cuda_graph_create_exec(void* context, void* stream, void* graph, void** graph_exec_ret) { return false; }
|
|
1053
1076
|
WP_API bool cuda_graph_launch(void* graph, void* stream) { return false; }
|
|
1054
1077
|
WP_API bool cuda_graph_destroy(void* context, void* graph) { return false; }
|
|
1078
|
+
WP_API bool cuda_graph_exec_destroy(void* context, void* graph_exec) { return false; }
|
|
1079
|
+
WP_API bool capture_debug_dot_print(void* graph, const char *path, uint32_t flags) { return false; }
|
|
1080
|
+
|
|
1081
|
+
WP_API bool cuda_graph_insert_if_else(void* context, void* stream, int* condition, void** if_graph_ret, void** else_graph_ret) { return false; }
|
|
1082
|
+
WP_API bool cuda_graph_insert_while(void* context, void* stream, int* condition, void** body_graph_ret, uint64_t* handle_ret) { return false; }
|
|
1083
|
+
WP_API bool cuda_graph_set_condition(void* context, void* stream, int* condition, uint64_t handle) { return false; }
|
|
1084
|
+
WP_API bool cuda_graph_pause_capture(void* context, void* stream, void** graph_ret) { return false; }
|
|
1085
|
+
WP_API bool cuda_graph_resume_capture(void* context, void* stream, void* graph) { return false; }
|
|
1086
|
+
WP_API bool cuda_graph_insert_child_graph(void* context, void* stream, void* child_graph) { return false; }
|
|
1055
1087
|
|
|
1056
|
-
WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
|
|
1088
|
+
WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, bool compile_time_trace, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
|
|
1057
1089
|
|
|
1058
1090
|
WP_API void* cuda_load_module(void* context, const char* ptx) { return NULL; }
|
|
1059
1091
|
WP_API void cuda_unload_module(void* context, void* module) {}
|