warp-lang 1.3.3__py3-none-win_amd64.whl → 1.4.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +6 -0
- warp/autograd.py +59 -6
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build_dll.py +8 -10
- warp/builtins.py +103 -3
- warp/codegen.py +447 -53
- warp/config.py +1 -1
- warp/context.py +682 -405
- warp/dlpack.py +2 -0
- warp/examples/benchmarks/benchmark_cloth.py +10 -0
- warp/examples/core/example_render_opengl.py +12 -10
- warp/examples/fem/example_adaptive_grid.py +251 -0
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +2 -2
- warp/examples/fem/example_magnetostatics.py +1 -1
- warp/examples/fem/example_streamlines.py +1 -0
- warp/examples/fem/utils.py +25 -5
- warp/examples/sim/example_cloth.py +50 -6
- warp/fem/__init__.py +2 -0
- warp/fem/adaptivity.py +493 -0
- warp/fem/field/field.py +2 -1
- warp/fem/field/nodal_field.py +18 -26
- warp/fem/field/test.py +4 -4
- warp/fem/field/trial.py +4 -4
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/adaptive_nanogrid.py +843 -0
- warp/fem/geometry/nanogrid.py +55 -28
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/nanogrid_function_space.py +69 -35
- warp/fem/utils.py +118 -107
- warp/jax_experimental.py +28 -15
- warp/native/array.h +0 -1
- warp/native/builtin.h +103 -6
- warp/native/bvh.cu +4 -2
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -0
- warp/native/error.cpp +4 -2
- warp/native/exports.h +99 -0
- warp/native/mat.h +97 -0
- warp/native/mesh.cpp +36 -0
- warp/native/mesh.cu +52 -1
- warp/native/mesh.h +1 -0
- warp/native/quat.h +43 -0
- warp/native/range.h +11 -2
- warp/native/spatial.h +6 -0
- warp/native/vec.h +74 -0
- warp/native/warp.cpp +2 -1
- warp/native/warp.cu +10 -3
- warp/native/warp.h +8 -1
- warp/paddle.py +382 -0
- warp/sim/__init__.py +1 -0
- warp/sim/collide.py +519 -0
- warp/sim/integrator_euler.py +18 -5
- warp/sim/integrator_featherstone.py +5 -5
- warp/sim/integrator_vbd.py +1026 -0
- warp/sim/integrator_xpbd.py +2 -6
- warp/sim/model.py +50 -25
- warp/sparse.py +9 -7
- warp/stubs.py +459 -0
- warp/tape.py +2 -0
- warp/tests/aux_test_dependent.py +1 -0
- warp/tests/aux_test_name_clash1.py +32 -0
- warp/tests/aux_test_name_clash2.py +32 -0
- warp/tests/aux_test_square.py +1 -0
- warp/tests/test_array.py +188 -0
- warp/tests/test_async.py +3 -3
- warp/tests/test_atomic.py +6 -0
- warp/tests/test_closest_point_edge_edge.py +93 -1
- warp/tests/test_codegen.py +93 -15
- warp/tests/test_codegen_instancing.py +1457 -0
- warp/tests/test_collision.py +486 -0
- warp/tests/test_compile_consts.py +3 -28
- warp/tests/test_dlpack.py +170 -0
- warp/tests/test_examples.py +22 -8
- warp/tests/test_fast_math.py +10 -4
- warp/tests/test_fem.py +81 -1
- warp/tests/test_func.py +46 -0
- warp/tests/test_implicit_init.py +49 -0
- warp/tests/test_jax.py +58 -0
- warp/tests/test_mat.py +84 -0
- warp/tests/test_mesh_query_point.py +188 -0
- warp/tests/test_model.py +13 -0
- warp/tests/test_module_hashing.py +40 -0
- warp/tests/test_multigpu.py +3 -3
- warp/tests/test_overwrite.py +8 -0
- warp/tests/test_paddle.py +852 -0
- warp/tests/test_print.py +89 -0
- warp/tests/test_quat.py +111 -0
- warp/tests/test_reload.py +31 -1
- warp/tests/test_scalar_ops.py +2 -0
- warp/tests/test_static.py +568 -0
- warp/tests/test_streams.py +64 -3
- warp/tests/test_struct.py +4 -4
- warp/tests/test_torch.py +24 -0
- warp/tests/test_triangle_closest_point.py +137 -0
- warp/tests/test_types.py +1 -1
- warp/tests/test_vbd.py +386 -0
- warp/tests/test_vec.py +143 -0
- warp/tests/test_vec_scalar_ops.py +139 -0
- warp/tests/unittest_suites.py +12 -0
- warp/tests/unittest_utils.py +9 -5
- warp/thirdparty/dlpack.py +3 -1
- warp/types.py +167 -36
- warp/utils.py +37 -14
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/METADATA +10 -8
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/RECORD +110 -98
- warp/tests/test_point_triangle_closest_point.py +0 -143
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/top_level.txt +0 -0
warp/sim/integrator_xpbd.py
CHANGED
|
@@ -2808,12 +2808,8 @@ class XPBDIntegrator(Integrator):
|
|
|
2808
2808
|
|
|
2809
2809
|
with wp.ScopedTimer("simulate", False):
|
|
2810
2810
|
if model.particle_count:
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
particle_qd = state_out.particle_qd
|
|
2814
|
-
else:
|
|
2815
|
-
particle_q = state_out.particle_q
|
|
2816
|
-
particle_qd = state_out.particle_qd
|
|
2811
|
+
particle_q = state_out.particle_q
|
|
2812
|
+
particle_qd = state_out.particle_qd
|
|
2817
2813
|
|
|
2818
2814
|
self.particle_q_init = wp.clone(state_in.particle_q)
|
|
2819
2815
|
if self.enable_restitution:
|
warp/sim/model.py
CHANGED
|
@@ -527,8 +527,9 @@ class Model:
|
|
|
527
527
|
tri_poses (array): Triangle element rest pose, shape [tri_count, 2, 2], float
|
|
528
528
|
tri_activations (array): Triangle element activations, shape [tri_count], float
|
|
529
529
|
tri_materials (array): Triangle element materials, shape [tri_count, 5], float
|
|
530
|
+
tri_areas (array): Triangle element rest areas, shape [tri_count], float
|
|
530
531
|
|
|
531
|
-
edge_indices (array): Bending edge indices, shape [edge_count*4], int
|
|
532
|
+
edge_indices (array): Bending edge indices, shape [edge_count*4], int, each row is [o0, o1, v1, v2], where v1, v2 are on the edge
|
|
532
533
|
edge_rest_angle (array): Bending edge rest angle, shape [edge_count], float
|
|
533
534
|
edge_bending_properties (array): Bending edge stiffness and damping parameters, shape [edge_count, 2], float
|
|
534
535
|
|
|
@@ -558,6 +559,7 @@ class Model:
|
|
|
558
559
|
joint_type (array): Joint type, shape [joint_count], int
|
|
559
560
|
joint_parent (array): Joint parent body indices, shape [joint_count], int
|
|
560
561
|
joint_child (array): Joint child body indices, shape [joint_count], int
|
|
562
|
+
joint_ancestor (array): Maps from joint index to the index of the joint that has the current joint parent body as child (-1 if no such joint ancestor exists), shape [joint_count], int
|
|
561
563
|
joint_X_p (array): Joint transform in parent frame, shape [joint_count, 7], float
|
|
562
564
|
joint_X_c (array): Joint mass frame in child frame, shape [joint_count, 7], float
|
|
563
565
|
joint_axis (array): Joint axis in child frame, shape [joint_axis_count, 3], float
|
|
@@ -639,6 +641,8 @@ class Model:
|
|
|
639
641
|
joint_dof_count (int): Total number of velocity degrees of freedom of all joints in the system
|
|
640
642
|
joint_coord_count (int): Total number of position degrees of freedom of all joints in the system
|
|
641
643
|
|
|
644
|
+
particle_coloring (list of array): The coloring of all the particles, used for VBD's Gauss-Seidel iteration.
|
|
645
|
+
|
|
642
646
|
device (wp.Device): Device on which the Model was allocated
|
|
643
647
|
|
|
644
648
|
Note:
|
|
@@ -695,6 +699,7 @@ class Model:
|
|
|
695
699
|
self.tri_poses = None
|
|
696
700
|
self.tri_activations = None
|
|
697
701
|
self.tri_materials = None
|
|
702
|
+
self.tri_areas = None
|
|
698
703
|
|
|
699
704
|
self.edge_indices = None
|
|
700
705
|
self.edge_rest_angle = None
|
|
@@ -727,6 +732,7 @@ class Model:
|
|
|
727
732
|
self.joint_type = None
|
|
728
733
|
self.joint_parent = None
|
|
729
734
|
self.joint_child = None
|
|
735
|
+
self.joint_ancestor = None
|
|
730
736
|
self.joint_X_p = None
|
|
731
737
|
self.joint_X_c = None
|
|
732
738
|
self.joint_axis = None
|
|
@@ -806,6 +812,8 @@ class Model:
|
|
|
806
812
|
self.joint_dof_count = 0
|
|
807
813
|
self.joint_coord_count = 0
|
|
808
814
|
|
|
815
|
+
self.particle_coloring = []
|
|
816
|
+
|
|
809
817
|
self.device = wp.get_device(device)
|
|
810
818
|
|
|
811
819
|
def state(self, requires_grad=None) -> State:
|
|
@@ -1174,6 +1182,7 @@ class ModelBuilder:
|
|
|
1174
1182
|
self.tri_poses = []
|
|
1175
1183
|
self.tri_activations = []
|
|
1176
1184
|
self.tri_materials = []
|
|
1185
|
+
self.tri_areas = []
|
|
1177
1186
|
|
|
1178
1187
|
# edges (bending)
|
|
1179
1188
|
self.edge_indices = []
|
|
@@ -1395,9 +1404,8 @@ class ModelBuilder:
|
|
|
1395
1404
|
self.joint_X_p.extend(joint_X_p)
|
|
1396
1405
|
self.joint_q.extend(joint_q)
|
|
1397
1406
|
|
|
1398
|
-
self.add_articulation()
|
|
1399
|
-
|
|
1400
1407
|
# offset the indices
|
|
1408
|
+
self.articulation_start.extend([a + self.joint_count for a in builder.articulation_start])
|
|
1401
1409
|
self.joint_parent.extend([p + self.joint_count if p != -1 else -1 for p in builder.joint_parent])
|
|
1402
1410
|
self.joint_child.extend([c + self.joint_count for c in builder.joint_child])
|
|
1403
1411
|
|
|
@@ -1493,6 +1501,7 @@ class ModelBuilder:
|
|
|
1493
1501
|
"tri_poses",
|
|
1494
1502
|
"tri_activations",
|
|
1495
1503
|
"tri_materials",
|
|
1504
|
+
"tri_areas",
|
|
1496
1505
|
"tet_poses",
|
|
1497
1506
|
"tet_activations",
|
|
1498
1507
|
"tet_materials",
|
|
@@ -3529,6 +3538,7 @@ class ModelBuilder:
|
|
|
3529
3538
|
self.tri_poses.append(inv_D.tolist())
|
|
3530
3539
|
self.tri_activations.append(0.0)
|
|
3531
3540
|
self.tri_materials.append((tri_ke, tri_ka, tri_kd, tri_drag, tri_lift))
|
|
3541
|
+
self.tri_areas.append(area)
|
|
3532
3542
|
return area
|
|
3533
3543
|
|
|
3534
3544
|
def add_triangles(
|
|
@@ -3617,7 +3627,9 @@ class ModelBuilder:
|
|
|
3617
3627
|
np.array(tri_lift)[valid_inds],
|
|
3618
3628
|
)
|
|
3619
3629
|
)
|
|
3620
|
-
|
|
3630
|
+
areas = areas.tolist()
|
|
3631
|
+
self.tri_areas.extend(areas)
|
|
3632
|
+
return areas
|
|
3621
3633
|
|
|
3622
3634
|
def add_tetrahedron(
|
|
3623
3635
|
self, i: int, j: int, k: int, l: int, k_mu: float = 1.0e3, k_lambda: float = 1.0e3, k_damp: float = 0.0
|
|
@@ -3685,10 +3697,10 @@ class ModelBuilder:
|
|
|
3685
3697
|
by the `model.tri_kb` parameter.
|
|
3686
3698
|
|
|
3687
3699
|
Args:
|
|
3688
|
-
i: The index of the first particle
|
|
3689
|
-
j: The index of the second particle
|
|
3690
|
-
k: The index of the third particle
|
|
3691
|
-
l: The index of the fourth particle
|
|
3700
|
+
i: The index of the first particle, i.e., opposite vertex 0
|
|
3701
|
+
j: The index of the second particle, i.e., opposite vertex 1
|
|
3702
|
+
k: The index of the third particle, i.e., vertex 0
|
|
3703
|
+
l: The index of the fourth particle, i.e., vertex 1
|
|
3692
3704
|
rest: The rest angle across the edge in radians, if not specified it will be computed
|
|
3693
3705
|
|
|
3694
3706
|
Note:
|
|
@@ -3736,10 +3748,10 @@ class ModelBuilder:
|
|
|
3736
3748
|
by the `model.tri_kb` parameter.
|
|
3737
3749
|
|
|
3738
3750
|
Args:
|
|
3739
|
-
i: The
|
|
3740
|
-
j: The
|
|
3741
|
-
k: The
|
|
3742
|
-
l: The
|
|
3751
|
+
i: The index of the first particle, i.e., opposite vertex 0
|
|
3752
|
+
j: The index of the second particle, i.e., opposite vertex 1
|
|
3753
|
+
k: The index of the third particle, i.e., vertex 0
|
|
3754
|
+
l: The index of the fourth particle, i.e., vertex 1
|
|
3743
3755
|
rest: The rest angles across the edges in radians, if not specified they will be computed
|
|
3744
3756
|
|
|
3745
3757
|
Note:
|
|
@@ -3849,16 +3861,22 @@ class ModelBuilder:
|
|
|
3849
3861
|
p = wp.quat_rotate(rot, g) + pos
|
|
3850
3862
|
m = mass
|
|
3851
3863
|
|
|
3864
|
+
particle_flag = PARTICLE_FLAG_ACTIVE
|
|
3865
|
+
|
|
3852
3866
|
if x == 0 and fix_left:
|
|
3853
3867
|
m = 0.0
|
|
3868
|
+
particle_flag = wp.uint32(int(particle_flag) & ~int(PARTICLE_FLAG_ACTIVE))
|
|
3854
3869
|
elif x == dim_x and fix_right:
|
|
3855
3870
|
m = 0.0
|
|
3871
|
+
particle_flag = wp.uint32(int(particle_flag) & ~int(PARTICLE_FLAG_ACTIVE))
|
|
3856
3872
|
elif y == 0 and fix_bottom:
|
|
3857
3873
|
m = 0.0
|
|
3874
|
+
particle_flag = wp.uint32(int(particle_flag) & ~int(PARTICLE_FLAG_ACTIVE))
|
|
3858
3875
|
elif y == dim_y and fix_top:
|
|
3859
3876
|
m = 0.0
|
|
3877
|
+
particle_flag = wp.uint32(int(particle_flag) & ~int(PARTICLE_FLAG_ACTIVE))
|
|
3860
3878
|
|
|
3861
|
-
self.add_particle(p, vel, m)
|
|
3879
|
+
self.add_particle(p, vel, m, flags=particle_flag)
|
|
3862
3880
|
|
|
3863
3881
|
if x > 0 and y > 0:
|
|
3864
3882
|
if reverse_winding:
|
|
@@ -3902,22 +3920,20 @@ class ModelBuilder:
|
|
|
3902
3920
|
spring_indices = set()
|
|
3903
3921
|
|
|
3904
3922
|
for _k, e in adj.edges.items():
|
|
3905
|
-
# skip open edges
|
|
3906
|
-
if e.f0 == -1 or e.f1 == -1:
|
|
3907
|
-
continue
|
|
3908
|
-
|
|
3909
3923
|
self.add_edge(
|
|
3910
3924
|
e.o0, e.o1, e.v0, e.v1, edge_ke=edge_ke, edge_kd=edge_kd
|
|
3911
3925
|
) # opposite 0, opposite 1, vertex 0, vertex 1
|
|
3912
3926
|
|
|
3913
|
-
|
|
3914
|
-
|
|
3915
|
-
|
|
3927
|
+
# skip constraints open edges
|
|
3928
|
+
if e.f0 != -1 and e.f1 != -1:
|
|
3929
|
+
spring_indices.add((min(e.o0, e.o1), max(e.o0, e.o1)))
|
|
3930
|
+
spring_indices.add((min(e.o0, e.v0), max(e.o0, e.v0)))
|
|
3931
|
+
spring_indices.add((min(e.o0, e.v1), max(e.o0, e.v1)))
|
|
3916
3932
|
|
|
3917
|
-
|
|
3918
|
-
|
|
3933
|
+
spring_indices.add((min(e.o1, e.v0), max(e.o1, e.v0)))
|
|
3934
|
+
spring_indices.add((min(e.o1, e.v1), max(e.o1, e.v1)))
|
|
3919
3935
|
|
|
3920
|
-
|
|
3936
|
+
spring_indices.add((min(e.v0, e.v1), max(e.v0, e.v1)))
|
|
3921
3937
|
|
|
3922
3938
|
if add_springs:
|
|
3923
3939
|
for i, j in spring_indices:
|
|
@@ -4001,14 +4017,14 @@ class ModelBuilder:
|
|
|
4001
4017
|
adj = wp.utils.MeshAdjacency(self.tri_indices[start_tri:end_tri], end_tri - start_tri)
|
|
4002
4018
|
|
|
4003
4019
|
edgeinds = np.fromiter(
|
|
4004
|
-
(x for e in adj.edges.values()
|
|
4020
|
+
(x for e in adj.edges.values() for x in (e.o0, e.o1, e.v0, e.v1)),
|
|
4005
4021
|
int,
|
|
4006
4022
|
).reshape(-1, 4)
|
|
4007
4023
|
self.add_edges(
|
|
4008
4024
|
edgeinds[:, 0],
|
|
4009
4025
|
edgeinds[:, 1],
|
|
4010
4026
|
edgeinds[:, 2],
|
|
4011
|
-
edgeinds[:,
|
|
4027
|
+
edgeinds[:, 3],
|
|
4012
4028
|
edge_ke=[edge_ke] * len(edgeinds),
|
|
4013
4029
|
edge_kd=[edge_kd] * len(edgeinds),
|
|
4014
4030
|
)
|
|
@@ -4459,6 +4475,7 @@ class ModelBuilder:
|
|
|
4459
4475
|
m.tri_poses = wp.array(self.tri_poses, dtype=wp.mat22, requires_grad=requires_grad)
|
|
4460
4476
|
m.tri_activations = wp.array(self.tri_activations, dtype=wp.float32, requires_grad=requires_grad)
|
|
4461
4477
|
m.tri_materials = wp.array(self.tri_materials, dtype=wp.float32, requires_grad=requires_grad)
|
|
4478
|
+
m.tri_areas = wp.array(self.tri_areas, dtype=wp.float32, requires_grad=requires_grad)
|
|
4462
4479
|
|
|
4463
4480
|
# ---------------------
|
|
4464
4481
|
# edges
|
|
@@ -4514,6 +4531,14 @@ class ModelBuilder:
|
|
|
4514
4531
|
m.joint_q = wp.array(self.joint_q, dtype=wp.float32, requires_grad=requires_grad)
|
|
4515
4532
|
m.joint_qd = wp.array(self.joint_qd, dtype=wp.float32, requires_grad=requires_grad)
|
|
4516
4533
|
m.joint_name = self.joint_name
|
|
4534
|
+
# compute joint ancestors
|
|
4535
|
+
child_to_joint = {}
|
|
4536
|
+
for i, child in enumerate(self.joint_child):
|
|
4537
|
+
child_to_joint[child] = i
|
|
4538
|
+
parent_joint = []
|
|
4539
|
+
for parent in self.joint_parent:
|
|
4540
|
+
parent_joint.append(child_to_joint.get(parent, -1))
|
|
4541
|
+
m.joint_ancestor = wp.array(parent_joint, dtype=wp.int32)
|
|
4517
4542
|
|
|
4518
4543
|
# dynamics properties
|
|
4519
4544
|
m.joint_armature = wp.array(self.joint_armature, dtype=wp.float32, requires_grad=requires_grad)
|
warp/sparse.py
CHANGED
|
@@ -106,7 +106,7 @@ class BsrMatrix(Generic[_BlockType]):
|
|
|
106
106
|
return
|
|
107
107
|
|
|
108
108
|
BsrMatrix.__setattr__(
|
|
109
|
-
self, "_nnz_buf", wp.
|
|
109
|
+
self, "_nnz_buf", wp.empty(dtype=int, shape=(1,), device="cpu", pinned=self.device.is_cuda)
|
|
110
110
|
)
|
|
111
111
|
if self.device.is_cuda:
|
|
112
112
|
BsrMatrix.__setattr__(self, "_nnz_event", wp.Event(self.device))
|
|
@@ -524,7 +524,7 @@ def _bsr_assign_split_blocks(
|
|
|
524
524
|
if dest_block >= dest_offsets[dest_row_count]:
|
|
525
525
|
return
|
|
526
526
|
|
|
527
|
-
dest_row = wp.lower_bound(dest_offsets, dest_block + 1) - 1
|
|
527
|
+
dest_row = wp.lower_bound(dest_offsets, 0, dest_row_count + 1, dest_block + 1) - 1
|
|
528
528
|
src_row = dest_row // row_factor
|
|
529
529
|
|
|
530
530
|
dest_col_in_row = dest_block - dest_offsets[dest_row]
|
|
@@ -566,7 +566,7 @@ def _bsr_assign_merge_row_col(
|
|
|
566
566
|
dest_rows[block] = -1 # invalid
|
|
567
567
|
dest_cols[block] = -1
|
|
568
568
|
else:
|
|
569
|
-
row = wp.lower_bound(src_offsets, block + 1) - 1
|
|
569
|
+
row = wp.lower_bound(src_offsets, 0, src_row_count + 1, block + 1) - 1
|
|
570
570
|
dest_rows[block] = row // row_factor
|
|
571
571
|
dest_cols[block] = src_columns[block] // col_factor
|
|
572
572
|
|
|
@@ -589,7 +589,7 @@ def _bsr_assign_merge_blocks(
|
|
|
589
589
|
if src_block >= src_offsets[src_row_count]:
|
|
590
590
|
return
|
|
591
591
|
|
|
592
|
-
src_row = wp.lower_bound(src_offsets, src_block + 1) - 1
|
|
592
|
+
src_row = wp.lower_bound(src_offsets, 0, src_row_count + 1, src_block + 1) - 1
|
|
593
593
|
src_col = src_columns[src_block]
|
|
594
594
|
|
|
595
595
|
dest_row = src_row // row_factor
|
|
@@ -828,7 +828,7 @@ def bsr_copy(
|
|
|
828
828
|
block_type=block_type,
|
|
829
829
|
device=A.device,
|
|
830
830
|
)
|
|
831
|
-
bsr_assign(dest=copy, src=A)
|
|
831
|
+
bsr_assign(dest=copy, src=A, structure_only=structure_only)
|
|
832
832
|
return copy
|
|
833
833
|
|
|
834
834
|
|
|
@@ -1190,7 +1190,7 @@ def _bsr_get_block_row(dest_offset: int, row_count: int, bsr_offsets: wp.array(d
|
|
|
1190
1190
|
if i >= bsr_offsets[row_count]:
|
|
1191
1191
|
rows[dest_offset + i] = -1 # invalid
|
|
1192
1192
|
else:
|
|
1193
|
-
row = wp.lower_bound(bsr_offsets, i + 1) - 1
|
|
1193
|
+
row = wp.lower_bound(bsr_offsets, 0, row_count + 1, i + 1) - 1
|
|
1194
1194
|
rows[dest_offset + i] = row
|
|
1195
1195
|
|
|
1196
1196
|
|
|
@@ -1461,13 +1461,14 @@ def _bsr_mm_compute_values(
|
|
|
1461
1461
|
y_offsets: wp.array(dtype=int),
|
|
1462
1462
|
y_columns: wp.array(dtype=int),
|
|
1463
1463
|
y_values: wp.array(dtype=Any),
|
|
1464
|
+
mm_row_count: int,
|
|
1464
1465
|
mm_offsets: wp.array(dtype=int),
|
|
1465
1466
|
mm_cols: wp.array(dtype=int),
|
|
1466
1467
|
mm_values: wp.array(dtype=Any),
|
|
1467
1468
|
):
|
|
1468
1469
|
mm_block = wp.tid()
|
|
1469
1470
|
|
|
1470
|
-
row = wp.lower_bound(mm_offsets, mm_block + 1) - 1
|
|
1471
|
+
row = wp.lower_bound(mm_offsets, 0, mm_row_count + 1, mm_block + 1) - 1
|
|
1471
1472
|
col = mm_cols[mm_block]
|
|
1472
1473
|
|
|
1473
1474
|
mm_val = mm_values.dtype(type(alpha)(0.0))
|
|
@@ -1759,6 +1760,7 @@ def bsr_mm(
|
|
|
1759
1760
|
work_arrays._old_z_offsets if y == z else y.offsets,
|
|
1760
1761
|
work_arrays._old_z_columns if y == z else y.columns,
|
|
1761
1762
|
work_arrays._old_z_values if y == z else y.values,
|
|
1763
|
+
z.nrow,
|
|
1762
1764
|
z.offsets,
|
|
1763
1765
|
z.columns,
|
|
1764
1766
|
mm_values,
|