warp-lang 1.5.0__py3-none-macosx_10_13_universal2.whl → 1.5.1__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/builtins.py +59 -28
- warp/codegen.py +21 -17
- warp/config.py +1 -1
- warp/context.py +59 -35
- warp/examples/sim/example_cloth.py +3 -1
- warp/fem/geometry/geometry.py +0 -2
- warp/native/coloring.cpp +5 -1
- warp/native/cuda_util.cpp +56 -53
- warp/native/tile.h +2 -5
- warp/render/render_opengl.py +7 -6
- warp/sim/import_urdf.py +8 -8
- warp/sim/model.py +23 -19
- warp/sparse.py +1 -1
- warp/stubs.py +23 -23
- warp/tests/test_coloring.py +12 -2
- warp/tests/test_examples.py +3 -1
- warp/tests/test_func.py +21 -4
- warp/tests/test_lerp.py +13 -87
- warp/tests/test_matmul.py +6 -9
- warp/tests/test_matmul_lite.py +6 -11
- warp/tests/test_overwrite.py +45 -0
- warp/tests/test_smoothstep.py +17 -83
- warp/tests/test_static.py +3 -3
- warp/tests/test_tile.py +44 -0
- warp/tests/unittest_utils.py +0 -2
- warp/types.py +2 -2
- warp/utils.py +1 -2
- {warp_lang-1.5.0.dist-info → warp_lang-1.5.1.dist-info}/METADATA +28 -29
- {warp_lang-1.5.0.dist-info → warp_lang-1.5.1.dist-info}/RECORD +34 -34
- {warp_lang-1.5.0.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.0.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.5.0.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0
warp/native/cuda_util.cpp
CHANGED
|
@@ -120,15 +120,17 @@ static inline int get_minor(int version)
|
|
|
120
120
|
return (version % 1000) / 10;
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
-
|
|
123
|
+
// Get versioned driver entry point. The version argument should match the function pointer type.
|
|
124
|
+
// For example, to initialize PFN_cuCtxCreate_v3020 use version 3020.
|
|
125
|
+
static bool get_driver_entry_point(const char* name, int version, void** pfn)
|
|
124
126
|
{
|
|
125
127
|
if (!pfn_cuGetProcAddress || !name || !pfn)
|
|
126
128
|
return false;
|
|
127
129
|
|
|
128
130
|
#if CUDA_VERSION < 12000
|
|
129
|
-
CUresult r = pfn_cuGetProcAddress(name, pfn,
|
|
131
|
+
CUresult r = pfn_cuGetProcAddress(name, pfn, version, CU_GET_PROC_ADDRESS_DEFAULT);
|
|
130
132
|
#else
|
|
131
|
-
CUresult r = pfn_cuGetProcAddress(name, pfn,
|
|
133
|
+
CUresult r = pfn_cuGetProcAddress(name, pfn, version, CU_GET_PROC_ADDRESS_DEFAULT, NULL);
|
|
132
134
|
#endif
|
|
133
135
|
|
|
134
136
|
if (r != CUDA_SUCCESS)
|
|
@@ -170,7 +172,8 @@ bool init_cuda_driver()
|
|
|
170
172
|
|
|
171
173
|
// check the CUDA driver version and report an error if it's too low
|
|
172
174
|
int driver_version = 0;
|
|
173
|
-
if (get_driver_entry_point("cuDriverGetVersion", &(void*&)pfn_cuDriverGetVersion) &&
|
|
175
|
+
if (get_driver_entry_point("cuDriverGetVersion", 2020, &(void*&)pfn_cuDriverGetVersion) &&
|
|
176
|
+
check_cu(pfn_cuDriverGetVersion(&driver_version)))
|
|
174
177
|
{
|
|
175
178
|
if (driver_version < WP_CUDA_DRIVER_VERSION)
|
|
176
179
|
{
|
|
@@ -186,55 +189,55 @@ bool init_cuda_driver()
|
|
|
186
189
|
}
|
|
187
190
|
|
|
188
191
|
// initialize driver entry points
|
|
189
|
-
get_driver_entry_point("cuGetErrorString", &(void*&)pfn_cuGetErrorString);
|
|
190
|
-
get_driver_entry_point("cuGetErrorName", &(void*&)pfn_cuGetErrorName);
|
|
191
|
-
get_driver_entry_point("cuInit", &(void*&)pfn_cuInit);
|
|
192
|
-
get_driver_entry_point("cuDeviceGet", &(void*&)pfn_cuDeviceGet);
|
|
193
|
-
get_driver_entry_point("cuDeviceGetCount", &(void*&)pfn_cuDeviceGetCount);
|
|
194
|
-
get_driver_entry_point("cuDeviceGetName", &(void*&)pfn_cuDeviceGetName);
|
|
195
|
-
get_driver_entry_point("cuDeviceGetAttribute", &(void*&)pfn_cuDeviceGetAttribute);
|
|
196
|
-
get_driver_entry_point("cuDeviceGetUuid", &(void*&)pfn_cuDeviceGetUuid);
|
|
197
|
-
get_driver_entry_point("cuDevicePrimaryCtxRetain", &(void*&)pfn_cuDevicePrimaryCtxRetain);
|
|
198
|
-
get_driver_entry_point("cuDevicePrimaryCtxRelease", &(void*&)pfn_cuDevicePrimaryCtxRelease);
|
|
199
|
-
get_driver_entry_point("cuDeviceCanAccessPeer", &(void*&)pfn_cuDeviceCanAccessPeer);
|
|
200
|
-
get_driver_entry_point("cuMemGetInfo", &(void*&)pfn_cuMemGetInfo);
|
|
201
|
-
get_driver_entry_point("cuCtxSetCurrent", &(void*&)pfn_cuCtxSetCurrent);
|
|
202
|
-
get_driver_entry_point("cuCtxGetCurrent", &(void*&)pfn_cuCtxGetCurrent);
|
|
203
|
-
get_driver_entry_point("cuCtxPushCurrent", &(void*&)pfn_cuCtxPushCurrent);
|
|
204
|
-
get_driver_entry_point("cuCtxPopCurrent", &(void*&)pfn_cuCtxPopCurrent);
|
|
205
|
-
get_driver_entry_point("cuCtxSynchronize", &(void*&)pfn_cuCtxSynchronize);
|
|
206
|
-
get_driver_entry_point("cuCtxGetDevice", &(void*&)pfn_cuCtxGetDevice);
|
|
207
|
-
get_driver_entry_point("cuCtxCreate", &(void*&)pfn_cuCtxCreate);
|
|
208
|
-
get_driver_entry_point("cuCtxDestroy", &(void*&)pfn_cuCtxDestroy);
|
|
209
|
-
get_driver_entry_point("cuCtxEnablePeerAccess", &(void*&)pfn_cuCtxEnablePeerAccess);
|
|
210
|
-
get_driver_entry_point("cuCtxDisablePeerAccess", &(void*&)pfn_cuCtxDisablePeerAccess);
|
|
211
|
-
get_driver_entry_point("cuStreamCreate", &(void*&)pfn_cuStreamCreate);
|
|
212
|
-
get_driver_entry_point("cuStreamDestroy", &(void*&)pfn_cuStreamDestroy);
|
|
213
|
-
get_driver_entry_point("cuStreamSynchronize", &(void*&)pfn_cuStreamSynchronize);
|
|
214
|
-
get_driver_entry_point("cuStreamWaitEvent", &(void*&)pfn_cuStreamWaitEvent);
|
|
215
|
-
get_driver_entry_point("cuStreamGetCtx", &(void*&)pfn_cuStreamGetCtx);
|
|
216
|
-
get_driver_entry_point("cuStreamGetCaptureInfo", &(void*&)pfn_cuStreamGetCaptureInfo);
|
|
217
|
-
get_driver_entry_point("cuStreamUpdateCaptureDependencies", &(void*&)pfn_cuStreamUpdateCaptureDependencies);
|
|
218
|
-
get_driver_entry_point("cuStreamCreateWithPriority", &(void*&)pfn_cuStreamCreateWithPriority);
|
|
219
|
-
get_driver_entry_point("cuStreamGetPriority", &(void*&)pfn_cuStreamGetPriority);
|
|
220
|
-
get_driver_entry_point("cuEventCreate", &(void*&)pfn_cuEventCreate);
|
|
221
|
-
get_driver_entry_point("cuEventDestroy", &(void*&)pfn_cuEventDestroy);
|
|
222
|
-
get_driver_entry_point("cuEventRecord", &(void*&)pfn_cuEventRecord);
|
|
223
|
-
get_driver_entry_point("cuEventRecordWithFlags", &(void*&)pfn_cuEventRecordWithFlags);
|
|
224
|
-
get_driver_entry_point("cuEventSynchronize", &(void*&)pfn_cuEventSynchronize);
|
|
225
|
-
get_driver_entry_point("cuModuleLoadDataEx", &(void*&)pfn_cuModuleLoadDataEx);
|
|
226
|
-
get_driver_entry_point("cuModuleUnload", &(void*&)pfn_cuModuleUnload);
|
|
227
|
-
get_driver_entry_point("cuModuleGetFunction", &(void*&)pfn_cuModuleGetFunction);
|
|
228
|
-
get_driver_entry_point("cuLaunchKernel", &(void*&)pfn_cuLaunchKernel);
|
|
229
|
-
get_driver_entry_point("cuMemcpyPeerAsync", &(void*&)pfn_cuMemcpyPeerAsync);
|
|
230
|
-
get_driver_entry_point("cuPointerGetAttribute", &(void*&)pfn_cuPointerGetAttribute);
|
|
231
|
-
get_driver_entry_point("cuGraphicsMapResources", &(void*&)pfn_cuGraphicsMapResources);
|
|
232
|
-
get_driver_entry_point("cuGraphicsUnmapResources", &(void*&)pfn_cuGraphicsUnmapResources);
|
|
233
|
-
get_driver_entry_point("cuGraphicsResourceGetMappedPointer", &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
|
|
234
|
-
get_driver_entry_point("cuGraphicsGLRegisterBuffer", &(void*&)pfn_cuGraphicsGLRegisterBuffer);
|
|
235
|
-
get_driver_entry_point("cuGraphicsUnregisterResource", &(void*&)pfn_cuGraphicsUnregisterResource);
|
|
236
|
-
get_driver_entry_point("cuModuleGetGlobal", &(void*&)pfn_cuModuleGetGlobal);
|
|
237
|
-
get_driver_entry_point("cuFuncSetAttribute", &(void*&)pfn_cuFuncSetAttribute);
|
|
192
|
+
get_driver_entry_point("cuGetErrorString", 6000, &(void*&)pfn_cuGetErrorString);
|
|
193
|
+
get_driver_entry_point("cuGetErrorName", 6000, &(void*&)pfn_cuGetErrorName);
|
|
194
|
+
get_driver_entry_point("cuInit", 2000, &(void*&)pfn_cuInit);
|
|
195
|
+
get_driver_entry_point("cuDeviceGet", 2000, &(void*&)pfn_cuDeviceGet);
|
|
196
|
+
get_driver_entry_point("cuDeviceGetCount", 2000, &(void*&)pfn_cuDeviceGetCount);
|
|
197
|
+
get_driver_entry_point("cuDeviceGetName", 2000, &(void*&)pfn_cuDeviceGetName);
|
|
198
|
+
get_driver_entry_point("cuDeviceGetAttribute", 2000, &(void*&)pfn_cuDeviceGetAttribute);
|
|
199
|
+
get_driver_entry_point("cuDeviceGetUuid", 110400, &(void*&)pfn_cuDeviceGetUuid);
|
|
200
|
+
get_driver_entry_point("cuDevicePrimaryCtxRetain", 7000, &(void*&)pfn_cuDevicePrimaryCtxRetain);
|
|
201
|
+
get_driver_entry_point("cuDevicePrimaryCtxRelease", 11000, &(void*&)pfn_cuDevicePrimaryCtxRelease);
|
|
202
|
+
get_driver_entry_point("cuDeviceCanAccessPeer", 4000, &(void*&)pfn_cuDeviceCanAccessPeer);
|
|
203
|
+
get_driver_entry_point("cuMemGetInfo", 3020, &(void*&)pfn_cuMemGetInfo);
|
|
204
|
+
get_driver_entry_point("cuCtxSetCurrent", 4000, &(void*&)pfn_cuCtxSetCurrent);
|
|
205
|
+
get_driver_entry_point("cuCtxGetCurrent", 4000, &(void*&)pfn_cuCtxGetCurrent);
|
|
206
|
+
get_driver_entry_point("cuCtxPushCurrent", 4000, &(void*&)pfn_cuCtxPushCurrent);
|
|
207
|
+
get_driver_entry_point("cuCtxPopCurrent", 4000, &(void*&)pfn_cuCtxPopCurrent);
|
|
208
|
+
get_driver_entry_point("cuCtxSynchronize", 2000, &(void*&)pfn_cuCtxSynchronize);
|
|
209
|
+
get_driver_entry_point("cuCtxGetDevice", 2000, &(void*&)pfn_cuCtxGetDevice);
|
|
210
|
+
get_driver_entry_point("cuCtxCreate", 3020, &(void*&)pfn_cuCtxCreate);
|
|
211
|
+
get_driver_entry_point("cuCtxDestroy", 4000, &(void*&)pfn_cuCtxDestroy);
|
|
212
|
+
get_driver_entry_point("cuCtxEnablePeerAccess", 4000, &(void*&)pfn_cuCtxEnablePeerAccess);
|
|
213
|
+
get_driver_entry_point("cuCtxDisablePeerAccess", 4000, &(void*&)pfn_cuCtxDisablePeerAccess);
|
|
214
|
+
get_driver_entry_point("cuStreamCreate", 2000, &(void*&)pfn_cuStreamCreate);
|
|
215
|
+
get_driver_entry_point("cuStreamDestroy", 4000, &(void*&)pfn_cuStreamDestroy);
|
|
216
|
+
get_driver_entry_point("cuStreamSynchronize", 2000, &(void*&)pfn_cuStreamSynchronize);
|
|
217
|
+
get_driver_entry_point("cuStreamWaitEvent", 3020, &(void*&)pfn_cuStreamWaitEvent);
|
|
218
|
+
get_driver_entry_point("cuStreamGetCtx", 9020, &(void*&)pfn_cuStreamGetCtx);
|
|
219
|
+
get_driver_entry_point("cuStreamGetCaptureInfo", 11030, &(void*&)pfn_cuStreamGetCaptureInfo);
|
|
220
|
+
get_driver_entry_point("cuStreamUpdateCaptureDependencies", 11030, &(void*&)pfn_cuStreamUpdateCaptureDependencies);
|
|
221
|
+
get_driver_entry_point("cuStreamCreateWithPriority", 5050, &(void*&)pfn_cuStreamCreateWithPriority);
|
|
222
|
+
get_driver_entry_point("cuStreamGetPriority", 5050, &(void*&)pfn_cuStreamGetPriority);
|
|
223
|
+
get_driver_entry_point("cuEventCreate", 2000, &(void*&)pfn_cuEventCreate);
|
|
224
|
+
get_driver_entry_point("cuEventDestroy", 4000, &(void*&)pfn_cuEventDestroy);
|
|
225
|
+
get_driver_entry_point("cuEventRecord", 2000, &(void*&)pfn_cuEventRecord);
|
|
226
|
+
get_driver_entry_point("cuEventRecordWithFlags", 11010, &(void*&)pfn_cuEventRecordWithFlags);
|
|
227
|
+
get_driver_entry_point("cuEventSynchronize", 2000, &(void*&)pfn_cuEventSynchronize);
|
|
228
|
+
get_driver_entry_point("cuModuleLoadDataEx", 2010, &(void*&)pfn_cuModuleLoadDataEx);
|
|
229
|
+
get_driver_entry_point("cuModuleUnload", 2000, &(void*&)pfn_cuModuleUnload);
|
|
230
|
+
get_driver_entry_point("cuModuleGetFunction", 2000, &(void*&)pfn_cuModuleGetFunction);
|
|
231
|
+
get_driver_entry_point("cuLaunchKernel", 4000, &(void*&)pfn_cuLaunchKernel);
|
|
232
|
+
get_driver_entry_point("cuMemcpyPeerAsync", 4000, &(void*&)pfn_cuMemcpyPeerAsync);
|
|
233
|
+
get_driver_entry_point("cuPointerGetAttribute", 4000, &(void*&)pfn_cuPointerGetAttribute);
|
|
234
|
+
get_driver_entry_point("cuGraphicsMapResources", 3000, &(void*&)pfn_cuGraphicsMapResources);
|
|
235
|
+
get_driver_entry_point("cuGraphicsUnmapResources", 3000, &(void*&)pfn_cuGraphicsUnmapResources);
|
|
236
|
+
get_driver_entry_point("cuGraphicsResourceGetMappedPointer", 3020, &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
|
|
237
|
+
get_driver_entry_point("cuGraphicsGLRegisterBuffer", 3000, &(void*&)pfn_cuGraphicsGLRegisterBuffer);
|
|
238
|
+
get_driver_entry_point("cuGraphicsUnregisterResource", 3000, &(void*&)pfn_cuGraphicsUnregisterResource);
|
|
239
|
+
get_driver_entry_point("cuModuleGetGlobal", 3020, &(void*&)pfn_cuModuleGetGlobal);
|
|
240
|
+
get_driver_entry_point("cuFuncSetAttribute", 9000, &(void*&)pfn_cuFuncSetAttribute);
|
|
238
241
|
|
|
239
242
|
if (pfn_cuInit)
|
|
240
243
|
cuda_driver_initialized = check_cu(pfn_cuInit(0));
|
warp/native/tile.h
CHANGED
|
@@ -1125,8 +1125,6 @@ inline CUDA_CALLABLE auto untile(Tile& tile)
|
|
|
1125
1125
|
}
|
|
1126
1126
|
}
|
|
1127
1127
|
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
1128
|
template <typename Tile, typename Value>
|
|
1131
1129
|
inline CUDA_CALLABLE void adj_untile(Tile& tile, Tile& adj_tile, Value& adj_ret)
|
|
1132
1130
|
{
|
|
@@ -1156,7 +1154,7 @@ inline CUDA_CALLABLE auto tile_zeros()
|
|
|
1156
1154
|
return T(0);
|
|
1157
1155
|
}
|
|
1158
1156
|
|
|
1159
|
-
//
|
|
1157
|
+
// one-initialized tile
|
|
1160
1158
|
template <typename T, int M, int N>
|
|
1161
1159
|
inline CUDA_CALLABLE auto tile_ones()
|
|
1162
1160
|
{
|
|
@@ -1164,7 +1162,7 @@ inline CUDA_CALLABLE auto tile_ones()
|
|
|
1164
1162
|
return T(1);
|
|
1165
1163
|
}
|
|
1166
1164
|
|
|
1167
|
-
//
|
|
1165
|
+
// tile with evenly spaced values
|
|
1168
1166
|
template <typename T, int M, int N>
|
|
1169
1167
|
inline CUDA_CALLABLE auto tile_arange(T start, T stop, T step)
|
|
1170
1168
|
{
|
|
@@ -1220,7 +1218,6 @@ inline CUDA_CALLABLE void tile_store(array_t<T>& dest, int x, int y, Tile& src)
|
|
|
1220
1218
|
src.copy_to_global(dest, x, y);
|
|
1221
1219
|
}
|
|
1222
1220
|
|
|
1223
|
-
// entry point for store
|
|
1224
1221
|
template <typename T, typename Tile>
|
|
1225
1222
|
inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, Tile& src)
|
|
1226
1223
|
{
|
warp/render/render_opengl.py
CHANGED
|
@@ -1040,7 +1040,7 @@ class OpenGLRenderer:
|
|
|
1040
1040
|
self.render_depth = render_depth
|
|
1041
1041
|
self.enable_backface_culling = enable_backface_culling
|
|
1042
1042
|
|
|
1043
|
-
self._device = wp.
|
|
1043
|
+
self._device = wp.get_preferred_device()
|
|
1044
1044
|
self._title = title
|
|
1045
1045
|
|
|
1046
1046
|
self.window = pyglet.window.Window(
|
|
@@ -2278,14 +2278,9 @@ Instances: {len(self._instances)}"""
|
|
|
2278
2278
|
colors1 = np.array(colors1, dtype=np.float32)
|
|
2279
2279
|
colors2 = np.array(colors2, dtype=np.float32)
|
|
2280
2280
|
|
|
2281
|
-
# create buffer for checkerboard colors
|
|
2282
|
-
self._instance_color1_buffer = gl.GLuint()
|
|
2283
|
-
gl.glGenBuffers(1, self._instance_color1_buffer)
|
|
2284
2281
|
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_color1_buffer)
|
|
2285
2282
|
gl.glBufferData(gl.GL_ARRAY_BUFFER, colors1.nbytes, colors1.ctypes.data, gl.GL_STATIC_DRAW)
|
|
2286
2283
|
|
|
2287
|
-
self._instance_color2_buffer = gl.GLuint()
|
|
2288
|
-
gl.glGenBuffers(1, self._instance_color2_buffer)
|
|
2289
2284
|
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_color2_buffer)
|
|
2290
2285
|
gl.glBufferData(gl.GL_ARRAY_BUFFER, colors2.nbytes, colors2.ctypes.data, gl.GL_STATIC_DRAW)
|
|
2291
2286
|
|
|
@@ -2322,6 +2317,12 @@ Instances: {len(self._instances)}"""
|
|
|
2322
2317
|
int(self._instance_transform_gl_buffer.value), self._device
|
|
2323
2318
|
)
|
|
2324
2319
|
|
|
2320
|
+
# create color buffers
|
|
2321
|
+
self._instance_color1_buffer = gl.GLuint()
|
|
2322
|
+
gl.glGenBuffers(1, self._instance_color1_buffer)
|
|
2323
|
+
self._instance_color2_buffer = gl.GLuint()
|
|
2324
|
+
gl.glGenBuffers(1, self._instance_color2_buffer)
|
|
2325
|
+
|
|
2325
2326
|
self.update_instance_colors()
|
|
2326
2327
|
|
|
2327
2328
|
# set up instance attribute pointers
|
warp/sim/import_urdf.py
CHANGED
|
@@ -211,14 +211,14 @@ def parse_urdf(
|
|
|
211
211
|
if hasattr(m, "geometry"):
|
|
212
212
|
# multiple meshes are contained in a scene
|
|
213
213
|
for geom in m.geometry.values():
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
214
|
+
geom_vertices = np.array(geom.vertices, dtype=np.float32) * scaling
|
|
215
|
+
geom_faces = np.array(geom.faces.flatten(), dtype=np.int32)
|
|
216
|
+
geom_mesh = Mesh(geom_vertices, geom_faces)
|
|
217
217
|
s = builder.add_shape_mesh(
|
|
218
218
|
body=link,
|
|
219
219
|
pos=wp.vec3(tf.p),
|
|
220
220
|
rot=wp.quat(tf.q),
|
|
221
|
-
mesh=
|
|
221
|
+
mesh=geom_mesh,
|
|
222
222
|
density=density,
|
|
223
223
|
is_visible=visible,
|
|
224
224
|
has_ground_collision=not just_visual,
|
|
@@ -228,14 +228,14 @@ def parse_urdf(
|
|
|
228
228
|
shapes.append(s)
|
|
229
229
|
else:
|
|
230
230
|
# a single mesh
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
231
|
+
m_vertices = np.array(m.vertices, dtype=np.float32) * scaling
|
|
232
|
+
m_faces = np.array(m.faces.flatten(), dtype=np.int32)
|
|
233
|
+
m_mesh = Mesh(m_vertices, m_faces)
|
|
234
234
|
s = builder.add_shape_mesh(
|
|
235
235
|
body=link,
|
|
236
236
|
pos=wp.vec3(tf.p),
|
|
237
237
|
rot=wp.quat(tf.q),
|
|
238
|
-
mesh=
|
|
238
|
+
mesh=m_mesh,
|
|
239
239
|
density=density,
|
|
240
240
|
is_visible=visible,
|
|
241
241
|
has_ground_collision=not just_visual,
|
warp/sim/model.py
CHANGED
|
@@ -578,14 +578,14 @@ class Model:
|
|
|
578
578
|
|
|
579
579
|
This setting is not supported by :class:`FeatherstoneIntegrator`.
|
|
580
580
|
|
|
581
|
-
joint_limit_lower (array): Joint lower position limits, shape [
|
|
582
|
-
joint_limit_upper (array): Joint upper position limits, shape [
|
|
583
|
-
joint_limit_ke (array): Joint position limit stiffness (used by the Euler integrators), shape [
|
|
584
|
-
joint_limit_kd (array): Joint position limit damping (used by the Euler integrators), shape [
|
|
581
|
+
joint_limit_lower (array): Joint lower position limits, shape [joint_axis_count], float
|
|
582
|
+
joint_limit_upper (array): Joint upper position limits, shape [joint_axis_count], float
|
|
583
|
+
joint_limit_ke (array): Joint position limit stiffness (used by the Euler integrators), shape [joint_axis_count], float
|
|
584
|
+
joint_limit_kd (array): Joint position limit damping (used by the Euler integrators), shape [joint_axis_count], float
|
|
585
585
|
joint_twist_lower (array): Joint lower twist limit, shape [joint_count], float
|
|
586
586
|
joint_twist_upper (array): Joint upper twist limit, shape [joint_count], float
|
|
587
|
-
joint_q_start (array): Start index of the first position coordinate per joint, shape [joint_count], int
|
|
588
|
-
joint_qd_start (array): Start index of the first velocity coordinate per joint, shape [joint_count], int
|
|
587
|
+
joint_q_start (array): Start index of the first position coordinate per joint (note the last value is an additional sentinel entry to allow for querying the q dimensionality of joint i via ``joint_q_start[i+1] - joint_q_start[i]``), shape [joint_count + 1], int
|
|
588
|
+
joint_qd_start (array): Start index of the first velocity coordinate per joint (note the last value is an additional sentinel entry to allow for querying the qd dimensionality of joint i via ``joint_qd_start[i+1] - joint_qd_start[i]``), shape [joint_count + 1], int
|
|
589
589
|
articulation_start (array): Articulation start index, shape [articulation_count], int
|
|
590
590
|
joint_name (list): Joint names, shape [joint_count], str
|
|
591
591
|
joint_attach_ke (float): Joint attachment force stiffness (used by :class:`SemiImplicitIntegrator`)
|
|
@@ -1442,12 +1442,14 @@ class ModelBuilder:
|
|
|
1442
1442
|
self.shape_collision_filter_pairs.add((i + shape_count, j + shape_count))
|
|
1443
1443
|
for group, shapes in builder.shape_collision_group_map.items():
|
|
1444
1444
|
if separate_collision_group:
|
|
1445
|
-
|
|
1445
|
+
extend_group = self.last_collision_group + 1
|
|
1446
1446
|
else:
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1447
|
+
extend_group = group + self.last_collision_group if group > -1 else -1
|
|
1448
|
+
|
|
1449
|
+
if extend_group not in self.shape_collision_group_map:
|
|
1450
|
+
self.shape_collision_group_map[extend_group] = []
|
|
1451
|
+
|
|
1452
|
+
self.shape_collision_group_map[extend_group].extend([s + shape_count for s in shapes])
|
|
1451
1453
|
|
|
1452
1454
|
# update last collision group counter
|
|
1453
1455
|
if separate_collision_group:
|
|
@@ -2616,11 +2618,12 @@ class ModelBuilder:
|
|
|
2616
2618
|
joint_remap[joint["original_id"]] = i
|
|
2617
2619
|
# update articulation_start
|
|
2618
2620
|
for i, old_i in enumerate(self.articulation_start):
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2621
|
+
start_i = old_i
|
|
2622
|
+
while start_i not in joint_remap:
|
|
2623
|
+
start_i += 1
|
|
2624
|
+
if start_i >= self.joint_count:
|
|
2622
2625
|
break
|
|
2623
|
-
self.articulation_start[i] = joint_remap.get(
|
|
2626
|
+
self.articulation_start[i] = joint_remap.get(start_i, start_i)
|
|
2624
2627
|
# remove empty articulation starts, i.e. where the start and end are the same
|
|
2625
2628
|
self.articulation_start = list(set(self.articulation_start))
|
|
2626
2629
|
|
|
@@ -4269,8 +4272,7 @@ class ModelBuilder:
|
|
|
4269
4272
|
pos = wp.vec3(pos[0], pos[1], pos[2])
|
|
4270
4273
|
# add particles
|
|
4271
4274
|
for v in vertices:
|
|
4272
|
-
|
|
4273
|
-
p = wp.quat_rotate(rot, v * scale) + pos
|
|
4275
|
+
p = wp.quat_rotate(rot, wp.vec3(v[0], v[1], v[2]) * scale) + pos
|
|
4274
4276
|
|
|
4275
4277
|
self.add_particle(p, vel, 0.0)
|
|
4276
4278
|
|
|
@@ -4402,16 +4404,18 @@ class ModelBuilder:
|
|
|
4402
4404
|
balance_colors: Whether to apply the color balancing algorithm to balance the size of each color
|
|
4403
4405
|
target_max_min_color_ratio: the color balancing algorithm will stop when the ratio between the largest color and
|
|
4404
4406
|
the smallest color reaches this value
|
|
4405
|
-
algorithm: Value should an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
|
|
4407
|
+
algorithm: Value should be an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
|
|
4406
4408
|
while ColoringAlgorithm.ordered_greedy means using the degree-ordered greedy algorithm. The MCS algorithm typically generates 30% to 50% fewer colors
|
|
4407
4409
|
compared to the ordered greedy algorithm, while maintaining the same linear complexity. Although MCS has a constant overhead that makes it about twice
|
|
4408
4410
|
as slow as the greedy algorithm, it produces significantly better coloring results. We recommend using MCS, especially if coloring is only part of the
|
|
4409
|
-
preprocessing
|
|
4411
|
+
preprocessing.
|
|
4410
4412
|
|
|
4411
4413
|
Note:
|
|
4412
4414
|
|
|
4413
4415
|
References to the coloring algorithm:
|
|
4416
|
+
|
|
4414
4417
|
MCS: Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
4418
|
+
|
|
4415
4419
|
Ordered Greedy: Ton-That, Q. M., Kry, P. G., & Andrews, S. (2023). Parallel block Neo-Hookean XPBD using graph clustering. Computers & Graphics, 110, 1-10.
|
|
4416
4420
|
|
|
4417
4421
|
"""
|
warp/sparse.py
CHANGED
warp/stubs.py
CHANGED
|
@@ -975,7 +975,7 @@ def tile_load(a: Array[Any], i: int32, j: int32, m: int32, n: int32, storage: st
|
|
|
975
975
|
|
|
976
976
|
|
|
977
977
|
@over
|
|
978
|
-
def tile_store(a: Array[Any], i: int32, t:
|
|
978
|
+
def tile_store(a: Array[Any], i: int32, t: Tile):
|
|
979
979
|
"""Stores a 1D tile to a global memory array.
|
|
980
980
|
|
|
981
981
|
This method will cooperatively store a tile to global memory using all threads in the block.
|
|
@@ -988,7 +988,7 @@ def tile_store(a: Array[Any], i: int32, t: Any):
|
|
|
988
988
|
|
|
989
989
|
|
|
990
990
|
@over
|
|
991
|
-
def tile_store(a: Array[Any], i: int32, j: int32, t:
|
|
991
|
+
def tile_store(a: Array[Any], i: int32, j: int32, t: Tile):
|
|
992
992
|
"""Stores a tile to a global memory array.
|
|
993
993
|
|
|
994
994
|
This method will cooperatively store a tile to global memory using all threads in the block.
|
|
@@ -1002,7 +1002,7 @@ def tile_store(a: Array[Any], i: int32, j: int32, t: Any):
|
|
|
1002
1002
|
|
|
1003
1003
|
|
|
1004
1004
|
@over
|
|
1005
|
-
def tile_atomic_add(a: Array[Any], x: int32, y: int32, t:
|
|
1005
|
+
def tile_atomic_add(a: Array[Any], x: int32, y: int32, t: Tile) -> Tile:
|
|
1006
1006
|
"""Atomically add a tile to the array `a`, each element will be updated atomically.
|
|
1007
1007
|
|
|
1008
1008
|
:param a: Array in global memory, should have the same ``dtype`` as the input tile
|
|
@@ -1077,7 +1077,7 @@ def tile(x: Any) -> Tile:
|
|
|
1077
1077
|
|
|
1078
1078
|
|
|
1079
1079
|
@over
|
|
1080
|
-
def untile(a:
|
|
1080
|
+
def untile(a: Tile) -> Scalar:
|
|
1081
1081
|
"""Convert a Tile back to per-thread values.
|
|
1082
1082
|
|
|
1083
1083
|
This function converts a block-wide tile back to per-thread values.
|
|
@@ -1100,7 +1100,7 @@ def untile(a: Any) -> Scalar:
|
|
|
1100
1100
|
t = wp.tile(i) * 2
|
|
1101
1101
|
|
|
1102
1102
|
# convert back to per-thread values
|
|
1103
|
-
s = wp.untile()
|
|
1103
|
+
s = wp.untile(t)
|
|
1104
1104
|
|
|
1105
1105
|
print(s)
|
|
1106
1106
|
|
|
@@ -1154,7 +1154,7 @@ def tile_transpose(a: Tile) -> Tile:
|
|
|
1154
1154
|
def tile_broadcast(a: Tile, m: int32, n: int32) -> Tile:
|
|
1155
1155
|
"""Broadcast a tile.
|
|
1156
1156
|
|
|
1157
|
-
This
|
|
1157
|
+
This function will attempt to broadcast the input tile ``a`` to the destination shape (m, n), broadcasting follows NumPy broadcast rules.
|
|
1158
1158
|
|
|
1159
1159
|
:param a: Tile to broadcast
|
|
1160
1160
|
:returns: Tile with broadcast ``shape=(m, n)``
|
|
@@ -1178,10 +1178,10 @@ def tile_sum(a: Tile) -> Tile:
|
|
|
1178
1178
|
t = wp.tile_ones(dtype=float, m=16, n=16)
|
|
1179
1179
|
s = wp.tile_sum(t)
|
|
1180
1180
|
|
|
1181
|
-
print(
|
|
1181
|
+
print(s)
|
|
1182
1182
|
|
|
1183
1183
|
|
|
1184
|
-
wp.
|
|
1184
|
+
wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=64)
|
|
1185
1185
|
|
|
1186
1186
|
Prints:
|
|
1187
1187
|
|
|
@@ -1207,19 +1207,19 @@ def tile_min(a: Tile) -> Tile:
|
|
|
1207
1207
|
|
|
1208
1208
|
@wp.kernel
|
|
1209
1209
|
def compute():
|
|
1210
|
-
t = wp.tile_arange(
|
|
1210
|
+
t = wp.tile_arange(64, 128)
|
|
1211
1211
|
s = wp.tile_min(t)
|
|
1212
1212
|
|
|
1213
|
-
print(
|
|
1213
|
+
print(s)
|
|
1214
1214
|
|
|
1215
1215
|
|
|
1216
|
-
wp.
|
|
1216
|
+
wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=64)
|
|
1217
1217
|
|
|
1218
1218
|
Prints:
|
|
1219
1219
|
|
|
1220
1220
|
.. code-block:: text
|
|
1221
1221
|
|
|
1222
|
-
tile(m=1, n=1, storage=register) = [[
|
|
1222
|
+
tile(m=1, n=1, storage=register) = [[64 ]]
|
|
1223
1223
|
|
|
1224
1224
|
|
|
1225
1225
|
"""
|
|
@@ -1239,19 +1239,19 @@ def tile_max(a: Tile) -> Tile:
|
|
|
1239
1239
|
|
|
1240
1240
|
@wp.kernel
|
|
1241
1241
|
def compute():
|
|
1242
|
-
t = wp.tile_arange(
|
|
1243
|
-
s = wp.
|
|
1242
|
+
t = wp.tile_arange(64, 128)
|
|
1243
|
+
s = wp.tile_max(t)
|
|
1244
1244
|
|
|
1245
|
-
print(
|
|
1245
|
+
print(s)
|
|
1246
1246
|
|
|
1247
1247
|
|
|
1248
|
-
wp.
|
|
1248
|
+
wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=64)
|
|
1249
1249
|
|
|
1250
1250
|
Prints:
|
|
1251
1251
|
|
|
1252
1252
|
.. code-block:: text
|
|
1253
1253
|
|
|
1254
|
-
tile(m=1, n=1, storage=register) = [[
|
|
1254
|
+
tile(m=1, n=1, storage=register) = [[127 ]]
|
|
1255
1255
|
|
|
1256
1256
|
|
|
1257
1257
|
"""
|
|
@@ -1259,7 +1259,7 @@ def tile_max(a: Tile) -> Tile:
|
|
|
1259
1259
|
|
|
1260
1260
|
|
|
1261
1261
|
@over
|
|
1262
|
-
def tile_reduce(op: Callable, a:
|
|
1262
|
+
def tile_reduce(op: Callable, a: Tile) -> Tile:
|
|
1263
1263
|
"""Apply a custom reduction operator across the tile.
|
|
1264
1264
|
|
|
1265
1265
|
This function cooperatively performs a reduction using the provided operator across the tile.
|
|
@@ -1280,7 +1280,7 @@ def tile_reduce(op: Callable, a: Any) -> Tile:
|
|
|
1280
1280
|
print(s)
|
|
1281
1281
|
|
|
1282
1282
|
|
|
1283
|
-
wp.
|
|
1283
|
+
wp.launch_tiled(factorial, dim=[1], inputs=[], block_dim=16)
|
|
1284
1284
|
|
|
1285
1285
|
Prints:
|
|
1286
1286
|
|
|
@@ -1293,7 +1293,7 @@ def tile_reduce(op: Callable, a: Any) -> Tile:
|
|
|
1293
1293
|
|
|
1294
1294
|
|
|
1295
1295
|
@over
|
|
1296
|
-
def tile_map(op: Callable, a:
|
|
1296
|
+
def tile_map(op: Callable, a: Tile) -> Tile:
|
|
1297
1297
|
"""Apply a unary function onto the tile.
|
|
1298
1298
|
|
|
1299
1299
|
This function cooperatively applies a unary function to each element of the tile using all threads in the block.
|
|
@@ -1314,7 +1314,7 @@ def tile_map(op: Callable, a: Any) -> Tile:
|
|
|
1314
1314
|
print(s)
|
|
1315
1315
|
|
|
1316
1316
|
|
|
1317
|
-
wp.
|
|
1317
|
+
wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=16)
|
|
1318
1318
|
|
|
1319
1319
|
Prints:
|
|
1320
1320
|
|
|
@@ -1327,7 +1327,7 @@ def tile_map(op: Callable, a: Any) -> Tile:
|
|
|
1327
1327
|
|
|
1328
1328
|
|
|
1329
1329
|
@over
|
|
1330
|
-
def tile_map(op: Callable, a:
|
|
1330
|
+
def tile_map(op: Callable, a: Tile, b: Tile) -> Tile:
|
|
1331
1331
|
"""Apply a binary function onto the tile.
|
|
1332
1332
|
|
|
1333
1333
|
This function cooperatively applies a binary function to each element of the tiles using all threads in the block.
|
|
@@ -1352,7 +1352,7 @@ def tile_map(op: Callable, a: Any, b: Any) -> Tile:
|
|
|
1352
1352
|
print(s)
|
|
1353
1353
|
|
|
1354
1354
|
|
|
1355
|
-
wp.
|
|
1355
|
+
wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=16)
|
|
1356
1356
|
|
|
1357
1357
|
Prints:
|
|
1358
1358
|
|
warp/tests/test_coloring.py
CHANGED
|
@@ -11,7 +11,12 @@ import numpy as np
|
|
|
11
11
|
import warp as wp
|
|
12
12
|
import warp.examples
|
|
13
13
|
import warp.sim
|
|
14
|
-
from warp.sim.graph_coloring import
|
|
14
|
+
from warp.sim.graph_coloring import (
|
|
15
|
+
ColoringAlgorithm,
|
|
16
|
+
construct_trimesh_graph_edges,
|
|
17
|
+
convert_to_color_groups,
|
|
18
|
+
validate_graph_coloring,
|
|
19
|
+
)
|
|
15
20
|
from warp.tests.unittest_utils import *
|
|
16
21
|
|
|
17
22
|
|
|
@@ -120,7 +125,7 @@ def test_coloring_trimesh(test, device):
|
|
|
120
125
|
ColoringAlgorithm.MCS.value,
|
|
121
126
|
particle_colors.__ctype__(),
|
|
122
127
|
)
|
|
123
|
-
wp.context.runtime.core.balance_coloring(
|
|
128
|
+
max_min_ratio = wp.context.runtime.core.balance_coloring(
|
|
124
129
|
model.particle_count,
|
|
125
130
|
edge_indices_cpu_with_bending.__ctype__(),
|
|
126
131
|
num_colors_mcs,
|
|
@@ -134,6 +139,11 @@ def test_coloring_trimesh(test, device):
|
|
|
134
139
|
device="cpu",
|
|
135
140
|
)
|
|
136
141
|
|
|
142
|
+
color_categories_balanced = convert_to_color_groups(num_colors_mcs, particle_colors)
|
|
143
|
+
|
|
144
|
+
color_sizes = np.array([c.shape[0] for c in color_categories_balanced], dtype=np.float32)
|
|
145
|
+
test.assertTrue(np.max(color_sizes) / np.min(color_sizes) <= max_min_ratio)
|
|
146
|
+
|
|
137
147
|
|
|
138
148
|
@unittest.skipUnless(USD_AVAILABLE, "Requires usd-core")
|
|
139
149
|
def test_combine_coloring(test, device):
|
warp/tests/test_examples.py
CHANGED
|
@@ -165,7 +165,9 @@ def add_example_test(
|
|
|
165
165
|
|
|
166
166
|
# with wp.ScopedTimer(f"{name}_{sanitize_identifier(device)}"):
|
|
167
167
|
# Run the script as a subprocess
|
|
168
|
-
result = subprocess.run(
|
|
168
|
+
result = subprocess.run(
|
|
169
|
+
command, capture_output=True, text=True, env=env_vars, timeout=test_timeout, check=False
|
|
170
|
+
)
|
|
169
171
|
|
|
170
172
|
# Check the return code (0 is standard for success)
|
|
171
173
|
test.assertEqual(
|
warp/tests/test_func.py
CHANGED
|
@@ -162,7 +162,7 @@ def user_func_with_defaults(a: int = 123, b: int = 234) -> int:
|
|
|
162
162
|
|
|
163
163
|
|
|
164
164
|
@wp.kernel
|
|
165
|
-
def
|
|
165
|
+
def user_func_with_defaults_kernel():
|
|
166
166
|
a = user_func_with_defaults()
|
|
167
167
|
wp.expect_eq(a, 357)
|
|
168
168
|
|
|
@@ -179,6 +179,25 @@ def test_user_func_with_defaults():
|
|
|
179
179
|
wp.expect_eq(e, 234)
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
def test_user_func_with_defaults(test, device):
|
|
183
|
+
wp.launch(user_func_with_defaults_kernel, dim=1, device=device)
|
|
184
|
+
|
|
185
|
+
a = user_func_with_defaults()
|
|
186
|
+
assert a == 357
|
|
187
|
+
|
|
188
|
+
b = user_func_with_defaults(111)
|
|
189
|
+
assert b == 345
|
|
190
|
+
|
|
191
|
+
c = user_func_with_defaults(111, 222)
|
|
192
|
+
assert c == 333
|
|
193
|
+
|
|
194
|
+
d = user_func_with_defaults(a=111)
|
|
195
|
+
assert d == 345
|
|
196
|
+
|
|
197
|
+
e = user_func_with_defaults(b=111)
|
|
198
|
+
assert e == 234
|
|
199
|
+
|
|
200
|
+
|
|
182
201
|
@wp.func
|
|
183
202
|
def user_func_return_multiple_values(a: int, b: float) -> Tuple[int, float]:
|
|
184
203
|
return a + a, b * b
|
|
@@ -406,9 +425,7 @@ add_function_test(TestFunc, func=test_func_closure_capture, name="test_func_clos
|
|
|
406
425
|
add_function_test(TestFunc, func=test_multi_valued_func, name="test_multi_valued_func", devices=devices)
|
|
407
426
|
add_kernel_test(TestFunc, kernel=test_func_defaults, name="test_func_defaults", dim=1, devices=devices)
|
|
408
427
|
add_kernel_test(TestFunc, kernel=test_builtin_shadowing, name="test_builtin_shadowing", dim=1, devices=devices)
|
|
409
|
-
|
|
410
|
-
TestFunc, kernel=test_user_func_with_defaults, name="test_user_func_with_defaults", dim=1, devices=devices
|
|
411
|
-
)
|
|
428
|
+
add_function_test(TestFunc, func=test_user_func_with_defaults, name="test_user_func_with_defaults", devices=devices)
|
|
412
429
|
add_kernel_test(
|
|
413
430
|
TestFunc,
|
|
414
431
|
kernel=test_user_func_return_multiple_values,
|