warp-lang 1.5.0__py3-none-manylinux2014_x86_64.whl → 1.5.1__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

warp/native/cuda_util.cpp CHANGED
@@ -120,15 +120,17 @@ static inline int get_minor(int version)
120
120
  return (version % 1000) / 10;
121
121
  }
122
122
 
123
- static bool get_driver_entry_point(const char* name, void** pfn)
123
+ // Get versioned driver entry point. The version argument should match the function pointer type.
124
+ // For example, to initialize PFN_cuCtxCreate_v3020 use version 3020.
125
+ static bool get_driver_entry_point(const char* name, int version, void** pfn)
124
126
  {
125
127
  if (!pfn_cuGetProcAddress || !name || !pfn)
126
128
  return false;
127
129
 
128
130
  #if CUDA_VERSION < 12000
129
- CUresult r = pfn_cuGetProcAddress(name, pfn, WP_CUDA_DRIVER_VERSION, CU_GET_PROC_ADDRESS_DEFAULT);
131
+ CUresult r = pfn_cuGetProcAddress(name, pfn, version, CU_GET_PROC_ADDRESS_DEFAULT);
130
132
  #else
131
- CUresult r = pfn_cuGetProcAddress(name, pfn, WP_CUDA_DRIVER_VERSION, CU_GET_PROC_ADDRESS_DEFAULT, NULL);
133
+ CUresult r = pfn_cuGetProcAddress(name, pfn, version, CU_GET_PROC_ADDRESS_DEFAULT, NULL);
132
134
  #endif
133
135
 
134
136
  if (r != CUDA_SUCCESS)
@@ -170,7 +172,8 @@ bool init_cuda_driver()
170
172
 
171
173
  // check the CUDA driver version and report an error if it's too low
172
174
  int driver_version = 0;
173
- if (get_driver_entry_point("cuDriverGetVersion", &(void*&)pfn_cuDriverGetVersion) && check_cu(pfn_cuDriverGetVersion(&driver_version)))
175
+ if (get_driver_entry_point("cuDriverGetVersion", 2020, &(void*&)pfn_cuDriverGetVersion) &&
176
+ check_cu(pfn_cuDriverGetVersion(&driver_version)))
174
177
  {
175
178
  if (driver_version < WP_CUDA_DRIVER_VERSION)
176
179
  {
@@ -186,55 +189,55 @@ bool init_cuda_driver()
186
189
  }
187
190
 
188
191
  // initialize driver entry points
189
- get_driver_entry_point("cuGetErrorString", &(void*&)pfn_cuGetErrorString);
190
- get_driver_entry_point("cuGetErrorName", &(void*&)pfn_cuGetErrorName);
191
- get_driver_entry_point("cuInit", &(void*&)pfn_cuInit);
192
- get_driver_entry_point("cuDeviceGet", &(void*&)pfn_cuDeviceGet);
193
- get_driver_entry_point("cuDeviceGetCount", &(void*&)pfn_cuDeviceGetCount);
194
- get_driver_entry_point("cuDeviceGetName", &(void*&)pfn_cuDeviceGetName);
195
- get_driver_entry_point("cuDeviceGetAttribute", &(void*&)pfn_cuDeviceGetAttribute);
196
- get_driver_entry_point("cuDeviceGetUuid", &(void*&)pfn_cuDeviceGetUuid);
197
- get_driver_entry_point("cuDevicePrimaryCtxRetain", &(void*&)pfn_cuDevicePrimaryCtxRetain);
198
- get_driver_entry_point("cuDevicePrimaryCtxRelease", &(void*&)pfn_cuDevicePrimaryCtxRelease);
199
- get_driver_entry_point("cuDeviceCanAccessPeer", &(void*&)pfn_cuDeviceCanAccessPeer);
200
- get_driver_entry_point("cuMemGetInfo", &(void*&)pfn_cuMemGetInfo);
201
- get_driver_entry_point("cuCtxSetCurrent", &(void*&)pfn_cuCtxSetCurrent);
202
- get_driver_entry_point("cuCtxGetCurrent", &(void*&)pfn_cuCtxGetCurrent);
203
- get_driver_entry_point("cuCtxPushCurrent", &(void*&)pfn_cuCtxPushCurrent);
204
- get_driver_entry_point("cuCtxPopCurrent", &(void*&)pfn_cuCtxPopCurrent);
205
- get_driver_entry_point("cuCtxSynchronize", &(void*&)pfn_cuCtxSynchronize);
206
- get_driver_entry_point("cuCtxGetDevice", &(void*&)pfn_cuCtxGetDevice);
207
- get_driver_entry_point("cuCtxCreate", &(void*&)pfn_cuCtxCreate);
208
- get_driver_entry_point("cuCtxDestroy", &(void*&)pfn_cuCtxDestroy);
209
- get_driver_entry_point("cuCtxEnablePeerAccess", &(void*&)pfn_cuCtxEnablePeerAccess);
210
- get_driver_entry_point("cuCtxDisablePeerAccess", &(void*&)pfn_cuCtxDisablePeerAccess);
211
- get_driver_entry_point("cuStreamCreate", &(void*&)pfn_cuStreamCreate);
212
- get_driver_entry_point("cuStreamDestroy", &(void*&)pfn_cuStreamDestroy);
213
- get_driver_entry_point("cuStreamSynchronize", &(void*&)pfn_cuStreamSynchronize);
214
- get_driver_entry_point("cuStreamWaitEvent", &(void*&)pfn_cuStreamWaitEvent);
215
- get_driver_entry_point("cuStreamGetCtx", &(void*&)pfn_cuStreamGetCtx);
216
- get_driver_entry_point("cuStreamGetCaptureInfo", &(void*&)pfn_cuStreamGetCaptureInfo);
217
- get_driver_entry_point("cuStreamUpdateCaptureDependencies", &(void*&)pfn_cuStreamUpdateCaptureDependencies);
218
- get_driver_entry_point("cuStreamCreateWithPriority", &(void*&)pfn_cuStreamCreateWithPriority);
219
- get_driver_entry_point("cuStreamGetPriority", &(void*&)pfn_cuStreamGetPriority);
220
- get_driver_entry_point("cuEventCreate", &(void*&)pfn_cuEventCreate);
221
- get_driver_entry_point("cuEventDestroy", &(void*&)pfn_cuEventDestroy);
222
- get_driver_entry_point("cuEventRecord", &(void*&)pfn_cuEventRecord);
223
- get_driver_entry_point("cuEventRecordWithFlags", &(void*&)pfn_cuEventRecordWithFlags);
224
- get_driver_entry_point("cuEventSynchronize", &(void*&)pfn_cuEventSynchronize);
225
- get_driver_entry_point("cuModuleLoadDataEx", &(void*&)pfn_cuModuleLoadDataEx);
226
- get_driver_entry_point("cuModuleUnload", &(void*&)pfn_cuModuleUnload);
227
- get_driver_entry_point("cuModuleGetFunction", &(void*&)pfn_cuModuleGetFunction);
228
- get_driver_entry_point("cuLaunchKernel", &(void*&)pfn_cuLaunchKernel);
229
- get_driver_entry_point("cuMemcpyPeerAsync", &(void*&)pfn_cuMemcpyPeerAsync);
230
- get_driver_entry_point("cuPointerGetAttribute", &(void*&)pfn_cuPointerGetAttribute);
231
- get_driver_entry_point("cuGraphicsMapResources", &(void*&)pfn_cuGraphicsMapResources);
232
- get_driver_entry_point("cuGraphicsUnmapResources", &(void*&)pfn_cuGraphicsUnmapResources);
233
- get_driver_entry_point("cuGraphicsResourceGetMappedPointer", &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
234
- get_driver_entry_point("cuGraphicsGLRegisterBuffer", &(void*&)pfn_cuGraphicsGLRegisterBuffer);
235
- get_driver_entry_point("cuGraphicsUnregisterResource", &(void*&)pfn_cuGraphicsUnregisterResource);
236
- get_driver_entry_point("cuModuleGetGlobal", &(void*&)pfn_cuModuleGetGlobal);
237
- get_driver_entry_point("cuFuncSetAttribute", &(void*&)pfn_cuFuncSetAttribute);
192
+ get_driver_entry_point("cuGetErrorString", 6000, &(void*&)pfn_cuGetErrorString);
193
+ get_driver_entry_point("cuGetErrorName", 6000, &(void*&)pfn_cuGetErrorName);
194
+ get_driver_entry_point("cuInit", 2000, &(void*&)pfn_cuInit);
195
+ get_driver_entry_point("cuDeviceGet", 2000, &(void*&)pfn_cuDeviceGet);
196
+ get_driver_entry_point("cuDeviceGetCount", 2000, &(void*&)pfn_cuDeviceGetCount);
197
+ get_driver_entry_point("cuDeviceGetName", 2000, &(void*&)pfn_cuDeviceGetName);
198
+ get_driver_entry_point("cuDeviceGetAttribute", 2000, &(void*&)pfn_cuDeviceGetAttribute);
199
+ get_driver_entry_point("cuDeviceGetUuid", 110400, &(void*&)pfn_cuDeviceGetUuid);
200
+ get_driver_entry_point("cuDevicePrimaryCtxRetain", 7000, &(void*&)pfn_cuDevicePrimaryCtxRetain);
201
+ get_driver_entry_point("cuDevicePrimaryCtxRelease", 11000, &(void*&)pfn_cuDevicePrimaryCtxRelease);
202
+ get_driver_entry_point("cuDeviceCanAccessPeer", 4000, &(void*&)pfn_cuDeviceCanAccessPeer);
203
+ get_driver_entry_point("cuMemGetInfo", 3020, &(void*&)pfn_cuMemGetInfo);
204
+ get_driver_entry_point("cuCtxSetCurrent", 4000, &(void*&)pfn_cuCtxSetCurrent);
205
+ get_driver_entry_point("cuCtxGetCurrent", 4000, &(void*&)pfn_cuCtxGetCurrent);
206
+ get_driver_entry_point("cuCtxPushCurrent", 4000, &(void*&)pfn_cuCtxPushCurrent);
207
+ get_driver_entry_point("cuCtxPopCurrent", 4000, &(void*&)pfn_cuCtxPopCurrent);
208
+ get_driver_entry_point("cuCtxSynchronize", 2000, &(void*&)pfn_cuCtxSynchronize);
209
+ get_driver_entry_point("cuCtxGetDevice", 2000, &(void*&)pfn_cuCtxGetDevice);
210
+ get_driver_entry_point("cuCtxCreate", 3020, &(void*&)pfn_cuCtxCreate);
211
+ get_driver_entry_point("cuCtxDestroy", 4000, &(void*&)pfn_cuCtxDestroy);
212
+ get_driver_entry_point("cuCtxEnablePeerAccess", 4000, &(void*&)pfn_cuCtxEnablePeerAccess);
213
+ get_driver_entry_point("cuCtxDisablePeerAccess", 4000, &(void*&)pfn_cuCtxDisablePeerAccess);
214
+ get_driver_entry_point("cuStreamCreate", 2000, &(void*&)pfn_cuStreamCreate);
215
+ get_driver_entry_point("cuStreamDestroy", 4000, &(void*&)pfn_cuStreamDestroy);
216
+ get_driver_entry_point("cuStreamSynchronize", 2000, &(void*&)pfn_cuStreamSynchronize);
217
+ get_driver_entry_point("cuStreamWaitEvent", 3020, &(void*&)pfn_cuStreamWaitEvent);
218
+ get_driver_entry_point("cuStreamGetCtx", 9020, &(void*&)pfn_cuStreamGetCtx);
219
+ get_driver_entry_point("cuStreamGetCaptureInfo", 11030, &(void*&)pfn_cuStreamGetCaptureInfo);
220
+ get_driver_entry_point("cuStreamUpdateCaptureDependencies", 11030, &(void*&)pfn_cuStreamUpdateCaptureDependencies);
221
+ get_driver_entry_point("cuStreamCreateWithPriority", 5050, &(void*&)pfn_cuStreamCreateWithPriority);
222
+ get_driver_entry_point("cuStreamGetPriority", 5050, &(void*&)pfn_cuStreamGetPriority);
223
+ get_driver_entry_point("cuEventCreate", 2000, &(void*&)pfn_cuEventCreate);
224
+ get_driver_entry_point("cuEventDestroy", 4000, &(void*&)pfn_cuEventDestroy);
225
+ get_driver_entry_point("cuEventRecord", 2000, &(void*&)pfn_cuEventRecord);
226
+ get_driver_entry_point("cuEventRecordWithFlags", 11010, &(void*&)pfn_cuEventRecordWithFlags);
227
+ get_driver_entry_point("cuEventSynchronize", 2000, &(void*&)pfn_cuEventSynchronize);
228
+ get_driver_entry_point("cuModuleLoadDataEx", 2010, &(void*&)pfn_cuModuleLoadDataEx);
229
+ get_driver_entry_point("cuModuleUnload", 2000, &(void*&)pfn_cuModuleUnload);
230
+ get_driver_entry_point("cuModuleGetFunction", 2000, &(void*&)pfn_cuModuleGetFunction);
231
+ get_driver_entry_point("cuLaunchKernel", 4000, &(void*&)pfn_cuLaunchKernel);
232
+ get_driver_entry_point("cuMemcpyPeerAsync", 4000, &(void*&)pfn_cuMemcpyPeerAsync);
233
+ get_driver_entry_point("cuPointerGetAttribute", 4000, &(void*&)pfn_cuPointerGetAttribute);
234
+ get_driver_entry_point("cuGraphicsMapResources", 3000, &(void*&)pfn_cuGraphicsMapResources);
235
+ get_driver_entry_point("cuGraphicsUnmapResources", 3000, &(void*&)pfn_cuGraphicsUnmapResources);
236
+ get_driver_entry_point("cuGraphicsResourceGetMappedPointer", 3020, &(void*&)pfn_cuGraphicsResourceGetMappedPointer);
237
+ get_driver_entry_point("cuGraphicsGLRegisterBuffer", 3000, &(void*&)pfn_cuGraphicsGLRegisterBuffer);
238
+ get_driver_entry_point("cuGraphicsUnregisterResource", 3000, &(void*&)pfn_cuGraphicsUnregisterResource);
239
+ get_driver_entry_point("cuModuleGetGlobal", 3020, &(void*&)pfn_cuModuleGetGlobal);
240
+ get_driver_entry_point("cuFuncSetAttribute", 9000, &(void*&)pfn_cuFuncSetAttribute);
238
241
 
239
242
  if (pfn_cuInit)
240
243
  cuda_driver_initialized = check_cu(pfn_cuInit(0));
warp/native/tile.h CHANGED
@@ -1125,8 +1125,6 @@ inline CUDA_CALLABLE auto untile(Tile& tile)
1125
1125
  }
1126
1126
  }
1127
1127
 
1128
-
1129
-
1130
1128
  template <typename Tile, typename Value>
1131
1129
  inline CUDA_CALLABLE void adj_untile(Tile& tile, Tile& adj_tile, Value& adj_ret)
1132
1130
  {
@@ -1156,7 +1154,7 @@ inline CUDA_CALLABLE auto tile_zeros()
1156
1154
  return T(0);
1157
1155
  }
1158
1156
 
1159
- // zero initialized tile
1157
+ // one-initialized tile
1160
1158
  template <typename T, int M, int N>
1161
1159
  inline CUDA_CALLABLE auto tile_ones()
1162
1160
  {
@@ -1164,7 +1162,7 @@ inline CUDA_CALLABLE auto tile_ones()
1164
1162
  return T(1);
1165
1163
  }
1166
1164
 
1167
- // zero initialized tile
1165
+ // tile with evenly spaced values
1168
1166
  template <typename T, int M, int N>
1169
1167
  inline CUDA_CALLABLE auto tile_arange(T start, T stop, T step)
1170
1168
  {
@@ -1220,7 +1218,6 @@ inline CUDA_CALLABLE void tile_store(array_t<T>& dest, int x, int y, Tile& src)
1220
1218
  src.copy_to_global(dest, x, y);
1221
1219
  }
1222
1220
 
1223
- // entry point for store
1224
1221
  template <typename T, typename Tile>
1225
1222
  inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, Tile& src)
1226
1223
  {
@@ -1040,7 +1040,7 @@ class OpenGLRenderer:
1040
1040
  self.render_depth = render_depth
1041
1041
  self.enable_backface_culling = enable_backface_culling
1042
1042
 
1043
- self._device = wp.get_cuda_device()
1043
+ self._device = wp.get_preferred_device()
1044
1044
  self._title = title
1045
1045
 
1046
1046
  self.window = pyglet.window.Window(
@@ -2278,14 +2278,9 @@ Instances: {len(self._instances)}"""
2278
2278
  colors1 = np.array(colors1, dtype=np.float32)
2279
2279
  colors2 = np.array(colors2, dtype=np.float32)
2280
2280
 
2281
- # create buffer for checkerboard colors
2282
- self._instance_color1_buffer = gl.GLuint()
2283
- gl.glGenBuffers(1, self._instance_color1_buffer)
2284
2281
  gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_color1_buffer)
2285
2282
  gl.glBufferData(gl.GL_ARRAY_BUFFER, colors1.nbytes, colors1.ctypes.data, gl.GL_STATIC_DRAW)
2286
2283
 
2287
- self._instance_color2_buffer = gl.GLuint()
2288
- gl.glGenBuffers(1, self._instance_color2_buffer)
2289
2284
  gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_color2_buffer)
2290
2285
  gl.glBufferData(gl.GL_ARRAY_BUFFER, colors2.nbytes, colors2.ctypes.data, gl.GL_STATIC_DRAW)
2291
2286
 
@@ -2322,6 +2317,12 @@ Instances: {len(self._instances)}"""
2322
2317
  int(self._instance_transform_gl_buffer.value), self._device
2323
2318
  )
2324
2319
 
2320
+ # create color buffers
2321
+ self._instance_color1_buffer = gl.GLuint()
2322
+ gl.glGenBuffers(1, self._instance_color1_buffer)
2323
+ self._instance_color2_buffer = gl.GLuint()
2324
+ gl.glGenBuffers(1, self._instance_color2_buffer)
2325
+
2325
2326
  self.update_instance_colors()
2326
2327
 
2327
2328
  # set up instance attribute pointers
warp/sim/import_urdf.py CHANGED
@@ -211,14 +211,14 @@ def parse_urdf(
211
211
  if hasattr(m, "geometry"):
212
212
  # multiple meshes are contained in a scene
213
213
  for geom in m.geometry.values():
214
- vertices = np.array(geom.vertices, dtype=np.float32) * scaling
215
- faces = np.array(geom.faces.flatten(), dtype=np.int32)
216
- mesh = Mesh(vertices, faces)
214
+ geom_vertices = np.array(geom.vertices, dtype=np.float32) * scaling
215
+ geom_faces = np.array(geom.faces.flatten(), dtype=np.int32)
216
+ geom_mesh = Mesh(geom_vertices, geom_faces)
217
217
  s = builder.add_shape_mesh(
218
218
  body=link,
219
219
  pos=wp.vec3(tf.p),
220
220
  rot=wp.quat(tf.q),
221
- mesh=mesh,
221
+ mesh=geom_mesh,
222
222
  density=density,
223
223
  is_visible=visible,
224
224
  has_ground_collision=not just_visual,
@@ -228,14 +228,14 @@ def parse_urdf(
228
228
  shapes.append(s)
229
229
  else:
230
230
  # a single mesh
231
- vertices = np.array(m.vertices, dtype=np.float32) * scaling
232
- faces = np.array(m.faces.flatten(), dtype=np.int32)
233
- mesh = Mesh(vertices, faces)
231
+ m_vertices = np.array(m.vertices, dtype=np.float32) * scaling
232
+ m_faces = np.array(m.faces.flatten(), dtype=np.int32)
233
+ m_mesh = Mesh(m_vertices, m_faces)
234
234
  s = builder.add_shape_mesh(
235
235
  body=link,
236
236
  pos=wp.vec3(tf.p),
237
237
  rot=wp.quat(tf.q),
238
- mesh=mesh,
238
+ mesh=m_mesh,
239
239
  density=density,
240
240
  is_visible=visible,
241
241
  has_ground_collision=not just_visual,
warp/sim/model.py CHANGED
@@ -578,14 +578,14 @@ class Model:
578
578
 
579
579
  This setting is not supported by :class:`FeatherstoneIntegrator`.
580
580
 
581
- joint_limit_lower (array): Joint lower position limits, shape [joint_count], float
582
- joint_limit_upper (array): Joint upper position limits, shape [joint_count], float
583
- joint_limit_ke (array): Joint position limit stiffness (used by the Euler integrators), shape [joint_count], float
584
- joint_limit_kd (array): Joint position limit damping (used by the Euler integrators), shape [joint_count], float
581
+ joint_limit_lower (array): Joint lower position limits, shape [joint_axis_count], float
582
+ joint_limit_upper (array): Joint upper position limits, shape [joint_axis_count], float
583
+ joint_limit_ke (array): Joint position limit stiffness (used by the Euler integrators), shape [joint_axis_count], float
584
+ joint_limit_kd (array): Joint position limit damping (used by the Euler integrators), shape [joint_axis_count], float
585
585
  joint_twist_lower (array): Joint lower twist limit, shape [joint_count], float
586
586
  joint_twist_upper (array): Joint upper twist limit, shape [joint_count], float
587
- joint_q_start (array): Start index of the first position coordinate per joint, shape [joint_count], int
588
- joint_qd_start (array): Start index of the first velocity coordinate per joint, shape [joint_count], int
587
+ joint_q_start (array): Start index of the first position coordinate per joint (note the last value is an additional sentinel entry to allow for querying the q dimensionality of joint i via ``joint_q_start[i+1] - joint_q_start[i]``), shape [joint_count + 1], int
588
+ joint_qd_start (array): Start index of the first velocity coordinate per joint (note the last value is an additional sentinel entry to allow for querying the qd dimensionality of joint i via ``joint_qd_start[i+1] - joint_qd_start[i]``), shape [joint_count + 1], int
589
589
  articulation_start (array): Articulation start index, shape [articulation_count], int
590
590
  joint_name (list): Joint names, shape [joint_count], str
591
591
  joint_attach_ke (float): Joint attachment force stiffness (used by :class:`SemiImplicitIntegrator`)
@@ -1442,12 +1442,14 @@ class ModelBuilder:
1442
1442
  self.shape_collision_filter_pairs.add((i + shape_count, j + shape_count))
1443
1443
  for group, shapes in builder.shape_collision_group_map.items():
1444
1444
  if separate_collision_group:
1445
- group = self.last_collision_group + 1
1445
+ extend_group = self.last_collision_group + 1
1446
1446
  else:
1447
- group = group + self.last_collision_group if group > -1 else -1
1448
- if group not in self.shape_collision_group_map:
1449
- self.shape_collision_group_map[group] = []
1450
- self.shape_collision_group_map[group].extend([s + shape_count for s in shapes])
1447
+ extend_group = group + self.last_collision_group if group > -1 else -1
1448
+
1449
+ if extend_group not in self.shape_collision_group_map:
1450
+ self.shape_collision_group_map[extend_group] = []
1451
+
1452
+ self.shape_collision_group_map[extend_group].extend([s + shape_count for s in shapes])
1451
1453
 
1452
1454
  # update last collision group counter
1453
1455
  if separate_collision_group:
@@ -2616,11 +2618,12 @@ class ModelBuilder:
2616
2618
  joint_remap[joint["original_id"]] = i
2617
2619
  # update articulation_start
2618
2620
  for i, old_i in enumerate(self.articulation_start):
2619
- while old_i not in joint_remap:
2620
- old_i += 1
2621
- if old_i >= self.joint_count:
2621
+ start_i = old_i
2622
+ while start_i not in joint_remap:
2623
+ start_i += 1
2624
+ if start_i >= self.joint_count:
2622
2625
  break
2623
- self.articulation_start[i] = joint_remap.get(old_i, old_i)
2626
+ self.articulation_start[i] = joint_remap.get(start_i, start_i)
2624
2627
  # remove empty articulation starts, i.e. where the start and end are the same
2625
2628
  self.articulation_start = list(set(self.articulation_start))
2626
2629
 
@@ -4269,8 +4272,7 @@ class ModelBuilder:
4269
4272
  pos = wp.vec3(pos[0], pos[1], pos[2])
4270
4273
  # add particles
4271
4274
  for v in vertices:
4272
- v = wp.vec3(v[0], v[1], v[2])
4273
- p = wp.quat_rotate(rot, v * scale) + pos
4275
+ p = wp.quat_rotate(rot, wp.vec3(v[0], v[1], v[2]) * scale) + pos
4274
4276
 
4275
4277
  self.add_particle(p, vel, 0.0)
4276
4278
 
@@ -4402,16 +4404,18 @@ class ModelBuilder:
4402
4404
  balance_colors: Whether to apply the color balancing algorithm to balance the size of each color
4403
4405
  target_max_min_color_ratio: the color balancing algorithm will stop when the ratio between the largest color and
4404
4406
  the smallest color reaches this value
4405
- algorithm: Value should an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
4407
+ algorithm: Value should be an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
4406
4408
  while ColoringAlgorithm.ordered_greedy means using the degree-ordered greedy algorithm. The MCS algorithm typically generates 30% to 50% fewer colors
4407
4409
  compared to the ordered greedy algorithm, while maintaining the same linear complexity. Although MCS has a constant overhead that makes it about twice
4408
4410
  as slow as the greedy algorithm, it produces significantly better coloring results. We recommend using MCS, especially if coloring is only part of the
4409
- preprocessing stage.e.
4411
+ preprocessing.
4410
4412
 
4411
4413
  Note:
4412
4414
 
4413
4415
  References to the coloring algorithm:
4416
+
4414
4417
  MCS: Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
4418
+
4415
4419
  Ordered Greedy: Ton-That, Q. M., Kry, P. G., & Andrews, S. (2023). Parallel block Neo-Hookean XPBD using graph clustering. Computers & Graphics, 110, 1-10.
4416
4420
 
4417
4421
  """
warp/sparse.py CHANGED
@@ -8,7 +8,7 @@ from warp.types import Array, Cols, Rows, Scalar, Vector
8
8
 
9
9
  # typing hints
10
10
 
11
- _BlockType = TypeVar("BlockType")
11
+ _BlockType = TypeVar("BlockType") # noqa: PLC0132
12
12
 
13
13
 
14
14
  class _MatrixBlockType(Generic[Rows, Cols, Scalar]):
warp/stubs.py CHANGED
@@ -975,7 +975,7 @@ def tile_load(a: Array[Any], i: int32, j: int32, m: int32, n: int32, storage: st
975
975
 
976
976
 
977
977
  @over
978
- def tile_store(a: Array[Any], i: int32, t: Any):
978
+ def tile_store(a: Array[Any], i: int32, t: Tile):
979
979
  """Stores a 1D tile to a global memory array.
980
980
 
981
981
  This method will cooperatively store a tile to global memory using all threads in the block.
@@ -988,7 +988,7 @@ def tile_store(a: Array[Any], i: int32, t: Any):
988
988
 
989
989
 
990
990
  @over
991
- def tile_store(a: Array[Any], i: int32, j: int32, t: Any):
991
+ def tile_store(a: Array[Any], i: int32, j: int32, t: Tile):
992
992
  """Stores a tile to a global memory array.
993
993
 
994
994
  This method will cooperatively store a tile to global memory using all threads in the block.
@@ -1002,7 +1002,7 @@ def tile_store(a: Array[Any], i: int32, j: int32, t: Any):
1002
1002
 
1003
1003
 
1004
1004
  @over
1005
- def tile_atomic_add(a: Array[Any], x: int32, y: int32, t: Any) -> Tile:
1005
+ def tile_atomic_add(a: Array[Any], x: int32, y: int32, t: Tile) -> Tile:
1006
1006
  """Atomically add a tile to the array `a`, each element will be updated atomically.
1007
1007
 
1008
1008
  :param a: Array in global memory, should have the same ``dtype`` as the input tile
@@ -1077,7 +1077,7 @@ def tile(x: Any) -> Tile:
1077
1077
 
1078
1078
 
1079
1079
  @over
1080
- def untile(a: Any) -> Scalar:
1080
+ def untile(a: Tile) -> Scalar:
1081
1081
  """Convert a Tile back to per-thread values.
1082
1082
 
1083
1083
  This function converts a block-wide tile back to per-thread values.
@@ -1100,7 +1100,7 @@ def untile(a: Any) -> Scalar:
1100
1100
  t = wp.tile(i) * 2
1101
1101
 
1102
1102
  # convert back to per-thread values
1103
- s = wp.untile()
1103
+ s = wp.untile(t)
1104
1104
 
1105
1105
  print(s)
1106
1106
 
@@ -1154,7 +1154,7 @@ def tile_transpose(a: Tile) -> Tile:
1154
1154
  def tile_broadcast(a: Tile, m: int32, n: int32) -> Tile:
1155
1155
  """Broadcast a tile.
1156
1156
 
1157
- This method will attempt to broadcast the input tile ``a`` to the destination shape (m, n), broadcasting follows NumPy broadcast rules.
1157
+ This function will attempt to broadcast the input tile ``a`` to the destination shape (m, n), broadcasting follows NumPy broadcast rules.
1158
1158
 
1159
1159
  :param a: Tile to broadcast
1160
1160
  :returns: Tile with broadcast ``shape=(m, n)``
@@ -1178,10 +1178,10 @@ def tile_sum(a: Tile) -> Tile:
1178
1178
  t = wp.tile_ones(dtype=float, m=16, n=16)
1179
1179
  s = wp.tile_sum(t)
1180
1180
 
1181
- print(t)
1181
+ print(s)
1182
1182
 
1183
1183
 
1184
- wp.launch(compute, dim=[64], inputs=[])
1184
+ wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=64)
1185
1185
 
1186
1186
  Prints:
1187
1187
 
@@ -1207,19 +1207,19 @@ def tile_min(a: Tile) -> Tile:
1207
1207
 
1208
1208
  @wp.kernel
1209
1209
  def compute():
1210
- t = wp.tile_arange(start=--10, stop=10, dtype=float)
1210
+ t = wp.tile_arange(64, 128)
1211
1211
  s = wp.tile_min(t)
1212
1212
 
1213
- print(t)
1213
+ print(s)
1214
1214
 
1215
1215
 
1216
- wp.launch(compute, dim=[64], inputs=[])
1216
+ wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=64)
1217
1217
 
1218
1218
  Prints:
1219
1219
 
1220
1220
  .. code-block:: text
1221
1221
 
1222
- tile(m=1, n=1, storage=register) = [[-10]]
1222
+ tile(m=1, n=1, storage=register) = [[64 ]]
1223
1223
 
1224
1224
 
1225
1225
  """
@@ -1239,19 +1239,19 @@ def tile_max(a: Tile) -> Tile:
1239
1239
 
1240
1240
  @wp.kernel
1241
1241
  def compute():
1242
- t = wp.tile_arange(start=--10, stop=10, dtype=float)
1243
- s = wp.tile_min(t)
1242
+ t = wp.tile_arange(64, 128)
1243
+ s = wp.tile_max(t)
1244
1244
 
1245
- print(t)
1245
+ print(s)
1246
1246
 
1247
1247
 
1248
- wp.launch(compute, dim=[64], inputs=[])
1248
+ wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=64)
1249
1249
 
1250
1250
  Prints:
1251
1251
 
1252
1252
  .. code-block:: text
1253
1253
 
1254
- tile(m=1, n=1, storage=register) = [[10]]
1254
+ tile(m=1, n=1, storage=register) = [[127 ]]
1255
1255
 
1256
1256
 
1257
1257
  """
@@ -1259,7 +1259,7 @@ def tile_max(a: Tile) -> Tile:
1259
1259
 
1260
1260
 
1261
1261
  @over
1262
- def tile_reduce(op: Callable, a: Any) -> Tile:
1262
+ def tile_reduce(op: Callable, a: Tile) -> Tile:
1263
1263
  """Apply a custom reduction operator across the tile.
1264
1264
 
1265
1265
  This function cooperatively performs a reduction using the provided operator across the tile.
@@ -1280,7 +1280,7 @@ def tile_reduce(op: Callable, a: Any) -> Tile:
1280
1280
  print(s)
1281
1281
 
1282
1282
 
1283
- wp.launch(factorial, dim=[16], inputs=[], block_dim=16)
1283
+ wp.launch_tiled(factorial, dim=[1], inputs=[], block_dim=16)
1284
1284
 
1285
1285
  Prints:
1286
1286
 
@@ -1293,7 +1293,7 @@ def tile_reduce(op: Callable, a: Any) -> Tile:
1293
1293
 
1294
1294
 
1295
1295
  @over
1296
- def tile_map(op: Callable, a: Any) -> Tile:
1296
+ def tile_map(op: Callable, a: Tile) -> Tile:
1297
1297
  """Apply a unary function onto the tile.
1298
1298
 
1299
1299
  This function cooperatively applies a unary function to each element of the tile using all threads in the block.
@@ -1314,7 +1314,7 @@ def tile_map(op: Callable, a: Any) -> Tile:
1314
1314
  print(s)
1315
1315
 
1316
1316
 
1317
- wp.launch(compute, dim=[16], inputs=[])
1317
+ wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=16)
1318
1318
 
1319
1319
  Prints:
1320
1320
 
@@ -1327,7 +1327,7 @@ def tile_map(op: Callable, a: Any) -> Tile:
1327
1327
 
1328
1328
 
1329
1329
  @over
1330
- def tile_map(op: Callable, a: Any, b: Any) -> Tile:
1330
+ def tile_map(op: Callable, a: Tile, b: Tile) -> Tile:
1331
1331
  """Apply a binary function onto the tile.
1332
1332
 
1333
1333
  This function cooperatively applies a binary function to each element of the tiles using all threads in the block.
@@ -1352,7 +1352,7 @@ def tile_map(op: Callable, a: Any, b: Any) -> Tile:
1352
1352
  print(s)
1353
1353
 
1354
1354
 
1355
- wp.launch(compute, dim=[16], inputs=[])
1355
+ wp.launch_tiled(compute, dim=[1], inputs=[], block_dim=16)
1356
1356
 
1357
1357
  Prints:
1358
1358
 
@@ -11,7 +11,12 @@ import numpy as np
11
11
  import warp as wp
12
12
  import warp.examples
13
13
  import warp.sim
14
- from warp.sim.graph_coloring import ColoringAlgorithm, construct_trimesh_graph_edges, validate_graph_coloring
14
+ from warp.sim.graph_coloring import (
15
+ ColoringAlgorithm,
16
+ construct_trimesh_graph_edges,
17
+ convert_to_color_groups,
18
+ validate_graph_coloring,
19
+ )
15
20
  from warp.tests.unittest_utils import *
16
21
 
17
22
 
@@ -120,7 +125,7 @@ def test_coloring_trimesh(test, device):
120
125
  ColoringAlgorithm.MCS.value,
121
126
  particle_colors.__ctype__(),
122
127
  )
123
- wp.context.runtime.core.balance_coloring(
128
+ max_min_ratio = wp.context.runtime.core.balance_coloring(
124
129
  model.particle_count,
125
130
  edge_indices_cpu_with_bending.__ctype__(),
126
131
  num_colors_mcs,
@@ -134,6 +139,11 @@ def test_coloring_trimesh(test, device):
134
139
  device="cpu",
135
140
  )
136
141
 
142
+ color_categories_balanced = convert_to_color_groups(num_colors_mcs, particle_colors)
143
+
144
+ color_sizes = np.array([c.shape[0] for c in color_categories_balanced], dtype=np.float32)
145
+ test.assertTrue(np.max(color_sizes) / np.min(color_sizes) <= max_min_ratio)
146
+
137
147
 
138
148
  @unittest.skipUnless(USD_AVAILABLE, "Requires usd-core")
139
149
  def test_combine_coloring(test, device):
@@ -165,7 +165,9 @@ def add_example_test(
165
165
 
166
166
  # with wp.ScopedTimer(f"{name}_{sanitize_identifier(device)}"):
167
167
  # Run the script as a subprocess
168
- result = subprocess.run(command, capture_output=True, text=True, env=env_vars, timeout=test_timeout)
168
+ result = subprocess.run(
169
+ command, capture_output=True, text=True, env=env_vars, timeout=test_timeout, check=False
170
+ )
169
171
 
170
172
  # Check the return code (0 is standard for success)
171
173
  test.assertEqual(
warp/tests/test_func.py CHANGED
@@ -162,7 +162,7 @@ def user_func_with_defaults(a: int = 123, b: int = 234) -> int:
162
162
 
163
163
 
164
164
  @wp.kernel
165
- def test_user_func_with_defaults():
165
+ def user_func_with_defaults_kernel():
166
166
  a = user_func_with_defaults()
167
167
  wp.expect_eq(a, 357)
168
168
 
@@ -179,6 +179,25 @@ def test_user_func_with_defaults():
179
179
  wp.expect_eq(e, 234)
180
180
 
181
181
 
182
+ def test_user_func_with_defaults(test, device):
183
+ wp.launch(user_func_with_defaults_kernel, dim=1, device=device)
184
+
185
+ a = user_func_with_defaults()
186
+ assert a == 357
187
+
188
+ b = user_func_with_defaults(111)
189
+ assert b == 345
190
+
191
+ c = user_func_with_defaults(111, 222)
192
+ assert c == 333
193
+
194
+ d = user_func_with_defaults(a=111)
195
+ assert d == 345
196
+
197
+ e = user_func_with_defaults(b=111)
198
+ assert e == 234
199
+
200
+
182
201
  @wp.func
183
202
  def user_func_return_multiple_values(a: int, b: float) -> Tuple[int, float]:
184
203
  return a + a, b * b
@@ -406,9 +425,7 @@ add_function_test(TestFunc, func=test_func_closure_capture, name="test_func_clos
406
425
  add_function_test(TestFunc, func=test_multi_valued_func, name="test_multi_valued_func", devices=devices)
407
426
  add_kernel_test(TestFunc, kernel=test_func_defaults, name="test_func_defaults", dim=1, devices=devices)
408
427
  add_kernel_test(TestFunc, kernel=test_builtin_shadowing, name="test_builtin_shadowing", dim=1, devices=devices)
409
- add_kernel_test(
410
- TestFunc, kernel=test_user_func_with_defaults, name="test_user_func_with_defaults", dim=1, devices=devices
411
- )
428
+ add_function_test(TestFunc, func=test_user_func_with_defaults, name="test_user_func_with_defaults", devices=devices)
412
429
  add_kernel_test(
413
430
  TestFunc,
414
431
  kernel=test_user_func_return_multiple_values,