warp-lang 1.7.2rc1__py3-none-macosx_10_13_universal2.whl → 1.8.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (192) hide show
  1. warp/__init__.py +3 -1
  2. warp/__init__.pyi +3489 -1
  3. warp/autograd.py +45 -122
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +241 -252
  6. warp/build_dll.py +130 -26
  7. warp/builtins.py +1907 -384
  8. warp/codegen.py +272 -104
  9. warp/config.py +12 -1
  10. warp/constants.py +1 -1
  11. warp/context.py +770 -238
  12. warp/dlpack.py +1 -1
  13. warp/examples/benchmarks/benchmark_cloth.py +2 -2
  14. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  15. warp/examples/core/example_sample_mesh.py +1 -1
  16. warp/examples/core/example_spin_lock.py +93 -0
  17. warp/examples/core/example_work_queue.py +118 -0
  18. warp/examples/fem/example_adaptive_grid.py +5 -5
  19. warp/examples/fem/example_apic_fluid.py +1 -1
  20. warp/examples/fem/example_burgers.py +1 -1
  21. warp/examples/fem/example_convection_diffusion.py +9 -6
  22. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  23. warp/examples/fem/example_deformed_geometry.py +1 -1
  24. warp/examples/fem/example_diffusion.py +2 -2
  25. warp/examples/fem/example_diffusion_3d.py +1 -1
  26. warp/examples/fem/example_distortion_energy.py +1 -1
  27. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  28. warp/examples/fem/example_magnetostatics.py +5 -3
  29. warp/examples/fem/example_mixed_elasticity.py +5 -3
  30. warp/examples/fem/example_navier_stokes.py +11 -9
  31. warp/examples/fem/example_nonconforming_contact.py +5 -3
  32. warp/examples/fem/example_streamlines.py +8 -3
  33. warp/examples/fem/utils.py +9 -8
  34. warp/examples/interop/example_jax_callable.py +34 -4
  35. warp/examples/interop/example_jax_ffi_callback.py +2 -2
  36. warp/examples/interop/example_jax_kernel.py +27 -1
  37. warp/examples/optim/example_drone.py +1 -1
  38. warp/examples/sim/example_cloth.py +1 -1
  39. warp/examples/sim/example_cloth_self_contact.py +48 -54
  40. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  41. warp/examples/tile/example_tile_cholesky.py +2 -1
  42. warp/examples/tile/example_tile_convolution.py +1 -1
  43. warp/examples/tile/example_tile_filtering.py +1 -1
  44. warp/examples/tile/example_tile_matmul.py +1 -1
  45. warp/examples/tile/example_tile_mlp.py +2 -0
  46. warp/fabric.py +7 -7
  47. warp/fem/__init__.py +5 -0
  48. warp/fem/adaptivity.py +1 -1
  49. warp/fem/cache.py +152 -63
  50. warp/fem/dirichlet.py +2 -2
  51. warp/fem/domain.py +136 -6
  52. warp/fem/field/field.py +141 -99
  53. warp/fem/field/nodal_field.py +85 -39
  54. warp/fem/field/virtual.py +99 -52
  55. warp/fem/geometry/adaptive_nanogrid.py +91 -86
  56. warp/fem/geometry/closest_point.py +13 -0
  57. warp/fem/geometry/deformed_geometry.py +102 -40
  58. warp/fem/geometry/element.py +56 -2
  59. warp/fem/geometry/geometry.py +323 -22
  60. warp/fem/geometry/grid_2d.py +157 -62
  61. warp/fem/geometry/grid_3d.py +116 -20
  62. warp/fem/geometry/hexmesh.py +86 -20
  63. warp/fem/geometry/nanogrid.py +166 -86
  64. warp/fem/geometry/partition.py +59 -25
  65. warp/fem/geometry/quadmesh.py +86 -135
  66. warp/fem/geometry/tetmesh.py +47 -119
  67. warp/fem/geometry/trimesh.py +77 -270
  68. warp/fem/integrate.py +181 -95
  69. warp/fem/linalg.py +25 -58
  70. warp/fem/operator.py +124 -27
  71. warp/fem/quadrature/pic_quadrature.py +36 -14
  72. warp/fem/quadrature/quadrature.py +40 -16
  73. warp/fem/space/__init__.py +1 -1
  74. warp/fem/space/basis_function_space.py +66 -46
  75. warp/fem/space/basis_space.py +17 -4
  76. warp/fem/space/dof_mapper.py +1 -1
  77. warp/fem/space/function_space.py +2 -2
  78. warp/fem/space/grid_2d_function_space.py +4 -1
  79. warp/fem/space/hexmesh_function_space.py +4 -2
  80. warp/fem/space/nanogrid_function_space.py +3 -1
  81. warp/fem/space/partition.py +11 -2
  82. warp/fem/space/quadmesh_function_space.py +4 -1
  83. warp/fem/space/restriction.py +5 -2
  84. warp/fem/space/shape/__init__.py +10 -8
  85. warp/fem/space/tetmesh_function_space.py +4 -1
  86. warp/fem/space/topology.py +52 -21
  87. warp/fem/space/trimesh_function_space.py +4 -1
  88. warp/fem/utils.py +53 -8
  89. warp/jax.py +1 -2
  90. warp/jax_experimental/ffi.py +210 -67
  91. warp/jax_experimental/xla_ffi.py +37 -24
  92. warp/math.py +171 -1
  93. warp/native/array.h +103 -4
  94. warp/native/builtin.h +182 -35
  95. warp/native/coloring.cpp +6 -2
  96. warp/native/cuda_util.cpp +1 -1
  97. warp/native/exports.h +118 -63
  98. warp/native/intersect.h +5 -5
  99. warp/native/mat.h +8 -13
  100. warp/native/mathdx.cpp +11 -5
  101. warp/native/matnn.h +1 -123
  102. warp/native/mesh.h +1 -1
  103. warp/native/quat.h +34 -6
  104. warp/native/rand.h +7 -7
  105. warp/native/sparse.cpp +121 -258
  106. warp/native/sparse.cu +181 -274
  107. warp/native/spatial.h +305 -17
  108. warp/native/svd.h +23 -8
  109. warp/native/tile.h +603 -73
  110. warp/native/tile_radix_sort.h +1112 -0
  111. warp/native/tile_reduce.h +239 -13
  112. warp/native/tile_scan.h +240 -0
  113. warp/native/tuple.h +189 -0
  114. warp/native/vec.h +10 -20
  115. warp/native/warp.cpp +36 -4
  116. warp/native/warp.cu +588 -52
  117. warp/native/warp.h +47 -74
  118. warp/optim/linear.py +5 -1
  119. warp/paddle.py +7 -8
  120. warp/py.typed +0 -0
  121. warp/render/render_opengl.py +110 -80
  122. warp/render/render_usd.py +124 -62
  123. warp/sim/__init__.py +9 -0
  124. warp/sim/collide.py +253 -80
  125. warp/sim/graph_coloring.py +8 -1
  126. warp/sim/import_mjcf.py +4 -3
  127. warp/sim/import_usd.py +11 -7
  128. warp/sim/integrator.py +5 -2
  129. warp/sim/integrator_euler.py +1 -1
  130. warp/sim/integrator_featherstone.py +1 -1
  131. warp/sim/integrator_vbd.py +761 -322
  132. warp/sim/integrator_xpbd.py +1 -1
  133. warp/sim/model.py +265 -260
  134. warp/sim/utils.py +10 -7
  135. warp/sparse.py +303 -166
  136. warp/tape.py +54 -51
  137. warp/tests/cuda/test_conditional_captures.py +1046 -0
  138. warp/tests/cuda/test_streams.py +1 -1
  139. warp/tests/geometry/test_volume.py +2 -2
  140. warp/tests/interop/test_dlpack.py +9 -9
  141. warp/tests/interop/test_jax.py +0 -1
  142. warp/tests/run_coverage_serial.py +1 -1
  143. warp/tests/sim/disabled_kinematics.py +2 -2
  144. warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
  145. warp/tests/sim/test_collision.py +159 -51
  146. warp/tests/sim/test_coloring.py +91 -2
  147. warp/tests/test_array.py +254 -2
  148. warp/tests/test_array_reduce.py +2 -2
  149. warp/tests/test_assert.py +53 -0
  150. warp/tests/test_atomic_cas.py +312 -0
  151. warp/tests/test_codegen.py +142 -19
  152. warp/tests/test_conditional.py +47 -1
  153. warp/tests/test_ctypes.py +0 -20
  154. warp/tests/test_devices.py +8 -0
  155. warp/tests/test_fabricarray.py +4 -2
  156. warp/tests/test_fem.py +58 -25
  157. warp/tests/test_func.py +42 -1
  158. warp/tests/test_grad.py +1 -1
  159. warp/tests/test_lerp.py +1 -3
  160. warp/tests/test_map.py +481 -0
  161. warp/tests/test_mat.py +23 -24
  162. warp/tests/test_quat.py +28 -15
  163. warp/tests/test_rounding.py +10 -38
  164. warp/tests/test_runlength_encode.py +7 -7
  165. warp/tests/test_smoothstep.py +1 -1
  166. warp/tests/test_sparse.py +83 -2
  167. warp/tests/test_spatial.py +507 -1
  168. warp/tests/test_static.py +48 -0
  169. warp/tests/test_struct.py +2 -2
  170. warp/tests/test_tape.py +38 -0
  171. warp/tests/test_tuple.py +265 -0
  172. warp/tests/test_types.py +2 -2
  173. warp/tests/test_utils.py +24 -18
  174. warp/tests/test_vec.py +38 -408
  175. warp/tests/test_vec_constructors.py +325 -0
  176. warp/tests/tile/test_tile.py +438 -131
  177. warp/tests/tile/test_tile_mathdx.py +518 -14
  178. warp/tests/tile/test_tile_matmul.py +179 -0
  179. warp/tests/tile/test_tile_reduce.py +307 -5
  180. warp/tests/tile/test_tile_shared_memory.py +136 -7
  181. warp/tests/tile/test_tile_sort.py +121 -0
  182. warp/tests/unittest_suites.py +14 -6
  183. warp/types.py +462 -308
  184. warp/utils.py +647 -86
  185. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
  186. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +189 -175
  187. warp/stubs.py +0 -3381
  188. warp/tests/sim/test_xpbd.py +0 -399
  189. warp/tests/test_mlp.py +0 -282
  190. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
  191. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
  192. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0
warp/native/tuple.h ADDED
@@ -0,0 +1,189 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ namespace wp
21
+ {
22
+
23
+ template <typename... Types>
24
+ struct tuple_t;
25
+
26
+ template <>
27
+ struct tuple_t<>
28
+ {
29
+
30
+ static constexpr int size() { return 0; }
31
+
32
+ // Base case: empty tuple.
33
+ template <typename Callable>
34
+ void apply(Callable&&) const { }
35
+ };
36
+
37
+ template <typename Head, typename... Tail>
38
+ struct tuple_t<Head, Tail...>
39
+ {
40
+ Head head;
41
+ tuple_t<Tail...> tail;
42
+
43
+ CUDA_CALLABLE inline tuple_t() {}
44
+ CUDA_CALLABLE inline tuple_t(Head h, Tail... t) : head(h), tail(t...) {}
45
+
46
+ static constexpr int size() { return 1 + tuple_t<Tail...>::size(); }
47
+
48
+ // Applies a callable to each element.
49
+ template <typename Callable>
50
+ void apply(Callable&& func) const
51
+ {
52
+ func(head); // Apply the callable to the current element.
53
+ tail.apply(func); // Recursively process the rest of the tuple.
54
+ }
55
+ };
56
+
57
+ // Tuple constructor.
58
+ template <typename... Args>
59
+ CUDA_CALLABLE inline tuple_t<Args...>
60
+ tuple(
61
+ Args... args
62
+ )
63
+ {
64
+ return tuple_t<Args...>(args...);
65
+ }
66
+
67
+ // Helper to extract a value from the tuple.
68
+ // Can be replaced with simpler member function version when our CPU compiler
69
+ // backend supports constexpr if statements.
70
+ template <int N, typename Head, typename... Tail>
71
+ struct tuple_get
72
+ {
73
+ static CUDA_CALLABLE inline const auto&
74
+ value(
75
+ const tuple_t<Head, Tail...>& t
76
+ )
77
+ {
78
+ return tuple_get<N - 1, Tail...>::value(t.tail);
79
+ }
80
+ };
81
+
82
+ // Specialization for the base case N == 0. Simply return the head of the tuple.
83
+ template <typename Head, typename... Tail>
84
+ struct tuple_get<0, Head, Tail...>
85
+ {
86
+ static CUDA_CALLABLE inline const auto&
87
+ value(
88
+ const tuple_t<Head, Tail...>& t
89
+ )
90
+ {
91
+ return t.head;
92
+ }
93
+ };
94
+
95
+ template <int Index, typename... Args>
96
+ CUDA_CALLABLE inline auto
97
+ extract(
98
+ const tuple_t<Args...>& t
99
+ )
100
+ {
101
+ return tuple_get<Index, Args...>::value(t);
102
+ }
103
+
104
+ template <typename... Args>
105
+ CUDA_CALLABLE inline int
106
+ len(
107
+ const tuple_t<Args...>& t
108
+ )
109
+ {
110
+ return t.size();
111
+ }
112
+
113
+ template <typename... Args>
114
+ CUDA_CALLABLE inline void
115
+ adj_len(
116
+ const tuple_t<Args...>& t,
117
+ tuple_t<Args...>& adj_t,
118
+ int adj_ret
119
+ )
120
+ {
121
+ }
122
+
123
+ template <typename... Args>
124
+ CUDA_CALLABLE inline void
125
+ print(
126
+ const tuple_t<Args...>& t
127
+ )
128
+ {
129
+ t.apply([&](auto a) { print(a); });
130
+ }
131
+
132
+ template <typename... Args>
133
+ CUDA_CALLABLE inline void
134
+ adj_print(
135
+ const tuple_t<Args...>& t,
136
+ tuple_t<Args...>& adj_t
137
+ )
138
+ {
139
+ adj_t.apply([&](auto a) { print(a); });
140
+ }
141
+
142
+ CUDA_CALLABLE inline tuple_t<>
143
+ add(
144
+ const tuple_t<>& a,
145
+ const tuple_t<>& b
146
+ )
147
+ {
148
+ return tuple_t<>();
149
+ }
150
+
151
+ template <typename Head, typename... Tail>
152
+ CUDA_CALLABLE inline tuple_t<Head, Tail...>
153
+ add(
154
+ const tuple_t<Head, Tail...>& a,
155
+ const tuple_t<Head, Tail...>& b
156
+ )
157
+ {
158
+ tuple_t<Head, Tail...> out;
159
+ out.head = add(a.head, b.head);
160
+ out.tail = add(a.tail, b.tail);
161
+ return out;
162
+ }
163
+
164
+ CUDA_CALLABLE inline void
165
+ adj_add(
166
+ const tuple_t<>& a,
167
+ const tuple_t<>& b,
168
+ tuple_t<>& adj_a,
169
+ tuple_t<>& adj_b,
170
+ const tuple_t<>& adj_ret
171
+ )
172
+ {
173
+ }
174
+
175
+ template <typename Head, typename... Tail>
176
+ CUDA_CALLABLE inline void
177
+ adj_add(
178
+ const tuple_t<Head, Tail...>& a,
179
+ const tuple_t<Head, Tail...>& b,
180
+ tuple_t<Head, Tail...>& adj_a,
181
+ tuple_t<Head, Tail...>& adj_b,
182
+ const tuple_t<Head, Tail...>& adj_ret
183
+ )
184
+ {
185
+ adj_add(a.head, b.head, adj_a.head, adj_b.head, adj_ret.head);
186
+ adj_add(a.tail, b.tail, adj_a.tail, adj_b.tail, adj_ret.tail);
187
+ }
188
+
189
+ } // namespace wp
warp/native/vec.h CHANGED
@@ -149,26 +149,15 @@ using vec2d = vec_t<2,double>;
149
149
  using vec3d = vec_t<3,double>;
150
150
  using vec4d = vec_t<4,double>;
151
151
 
152
- //--------------
153
- // vec<Length, Type> methods
154
-
155
- // Should these accept const references as arguments? It's all
156
- // inlined so maybe it doesn't matter? Even if it does, it
157
- // probably depends on the Length of the vector...
158
-
159
- // negation:
160
152
  template<unsigned Length, typename Type>
161
- inline CUDA_CALLABLE vec_t<Length, Type> operator - (vec_t<Length, Type> a)
153
+ inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
162
154
  {
163
- // NB: this constructor will initialize all ret's components to 0, which is
164
- // unnecessary...
165
155
  vec_t<Length, Type> ret;
166
- for( unsigned i=0; i < Length; ++i )
156
+ for(unsigned i=0; i < Length; ++i)
167
157
  {
168
- ret[i] = -a[i];
158
+ ret[i] = -x[i];
169
159
  }
170
160
 
171
- // Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
172
161
  return ret;
173
162
  }
174
163
 
@@ -843,8 +832,9 @@ inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const v
843
832
  if (diff > tolerance)
844
833
  {
845
834
  printf("Error, expect_near() failed with tolerance "); print(tolerance);
846
- printf("\t Expected: "); print(expected);
847
- printf("\t Actual: "); print(actual);
835
+ printf(" Expected: "); print(expected);
836
+ printf(" Actual: "); print(actual);
837
+ printf(" Max absolute difference: "); print(diff);
848
838
  }
849
839
  }
850
840
 
@@ -979,11 +969,11 @@ template<unsigned Length, typename Type>
979
969
  inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
980
970
  {
981
971
 
982
- adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
983
-
984
- for( unsigned i=0; i < Length; ++i )
972
+ for (unsigned i=0; i < Length; ++i)
985
973
  {
986
- adj_a[i] += s / adj_ret[i];
974
+ Type inv = Type(1) / a[i];
975
+ adj_a[i] -= s * adj_ret[i] * inv * inv;
976
+ adj_s += adj_ret[i] * inv;
987
977
  }
988
978
 
989
979
  #if FP_CHECK
warp/native/warp.cpp CHANGED
@@ -24,6 +24,11 @@
24
24
  #include <stdlib.h>
25
25
  #include <string.h>
26
26
 
27
+ // MSVC provides _aligned_malloc() instead of the standard aligned_alloc()
28
+ #if defined(_MSC_VER)
29
+ #include <malloc.h>
30
+ #endif
31
+
27
32
  uint16_t float_to_half_bits(float x)
28
33
  {
29
34
  // adapted from Fabien Giesen's post: https://gist.github.com/rygorous/2156668
@@ -114,7 +119,7 @@ float half_bits_to_float(uint16_t u)
114
119
  int init()
115
120
  {
116
121
  #if WP_ENABLE_CUDA
117
- int cuda_init();
122
+ int cuda_init(void);
118
123
  // note: it's safe to proceed even if CUDA initialization failed
119
124
  cuda_init();
120
125
  #endif
@@ -163,12 +168,28 @@ int is_debug_enabled()
163
168
 
164
169
  void* alloc_host(size_t s)
165
170
  {
166
- return malloc(s);
171
+ // increase CPU array alignment for compatibility with other libs, e.g., JAX, XLA, Eigen.
172
+ size_t alignment = 64;
173
+
174
+ // msvc does not provide the standard aligned_alloc()
175
+ #if defined(_MSC_VER)
176
+ return _aligned_malloc(s, alignment);
177
+ #else
178
+ // ensure that the size is a multiple of alignment
179
+ size_t remainder = s % alignment;
180
+ if (remainder != 0)
181
+ s += alignment - remainder;
182
+ return aligned_alloc(alignment, s);
183
+ #endif
167
184
  }
168
185
 
169
186
  void free_host(void* ptr)
170
187
  {
171
- free(ptr);
188
+ #if defined(_MSC_VER)
189
+ _aligned_free(ptr);
190
+ #else
191
+ free(ptr);
192
+ #endif
172
193
  }
173
194
 
174
195
  bool memcpy_h2h(void* dest, void* src, size_t n)
@@ -990,6 +1011,7 @@ WP_API int cuda_device_get_count() { return 0; }
990
1011
  WP_API void* cuda_device_get_primary_context(int ordinal) { return NULL; }
991
1012
  WP_API const char* cuda_device_get_name(int ordinal) { return NULL; }
992
1013
  WP_API int cuda_device_get_arch(int ordinal) { return 0; }
1014
+ WP_API int cuda_device_get_sm_count(int ordinal) { return 0; }
993
1015
  WP_API void cuda_device_get_uuid(int ordinal, char uuid[16]) {}
994
1016
  WP_API int cuda_device_get_pci_domain_id(int ordinal) { return -1; }
995
1017
  WP_API int cuda_device_get_pci_bus_id(int ordinal) { return -1; }
@@ -1050,10 +1072,20 @@ WP_API float cuda_event_elapsed_time(void* start_event, void* end_event) { retur
1050
1072
 
1051
1073
  WP_API bool cuda_graph_begin_capture(void* context, void* stream, int external) { return false; }
1052
1074
  WP_API bool cuda_graph_end_capture(void* context, void* stream, void** graph_ret) { return false; }
1075
+ WP_API bool cuda_graph_create_exec(void* context, void* stream, void* graph, void** graph_exec_ret) { return false; }
1053
1076
  WP_API bool cuda_graph_launch(void* graph, void* stream) { return false; }
1054
1077
  WP_API bool cuda_graph_destroy(void* context, void* graph) { return false; }
1078
+ WP_API bool cuda_graph_exec_destroy(void* context, void* graph_exec) { return false; }
1079
+ WP_API bool capture_debug_dot_print(void* graph, const char *path, uint32_t flags) { return false; }
1080
+
1081
+ WP_API bool cuda_graph_insert_if_else(void* context, void* stream, int* condition, void** if_graph_ret, void** else_graph_ret) { return false; }
1082
+ WP_API bool cuda_graph_insert_while(void* context, void* stream, int* condition, void** body_graph_ret, uint64_t* handle_ret) { return false; }
1083
+ WP_API bool cuda_graph_set_condition(void* context, void* stream, int* condition, uint64_t handle) { return false; }
1084
+ WP_API bool cuda_graph_pause_capture(void* context, void* stream, void** graph_ret) { return false; }
1085
+ WP_API bool cuda_graph_resume_capture(void* context, void* stream, void* graph) { return false; }
1086
+ WP_API bool cuda_graph_insert_child_graph(void* context, void* stream, void* child_graph) { return false; }
1055
1087
 
1056
- WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
1088
+ WP_API size_t cuda_compile_program(const char* cuda_src, const char* program_name, int arch, const char* include_dir, int num_cuda_include_dirs, const char** cuda_include_dirs, bool debug, bool verbose, bool verify_fp, bool fast_math, bool fuse_fp, bool lineinfo, bool compile_time_trace, const char* output_path, size_t num_ltoirs, char** ltoirs, size_t* ltoir_sizes, int* ltoir_input_types) { return 0; }
1057
1089
 
1058
1090
  WP_API void* cuda_load_module(void* context, const char* ptx) { return NULL; }
1059
1091
  WP_API void cuda_unload_module(void* context, void* module) {}