sparse-convolution 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparse_convolution
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Sparse convolution in python using Toeplitz convolution matrix multiplication.
5
5
  Home-page: https://github.com/RichieHakim/sparse_convolution
6
6
  Author: Richard Hakim
@@ -21,11 +21,18 @@ Requires-Dist: numpy
21
21
  Sparse convolution in python. \
22
22
  Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
23
23
  This allows for extremely fast convolution when:
24
- - The kernel is small (<= 30x30)
24
+ - The kernel is small (<= 100x100)
25
25
  - The input array is sparse (<= 1% density)
26
- - Many arrays are convolved with the same kernel
26
+ - The input array is small (<= 1000x1000)
27
+ - Many arrays are convolved with the same kernel (large batch size >= 1000)
27
28
 
28
29
  ## Install:
30
+ The package is available on PyPI. \
31
+ `pip install sparse_convolution`
32
+
33
+ <br>
34
+
35
+ Alternatively, you can install from source. \
29
36
  `git clone https://github.com/RichieHakim/sparse_convolution` \
30
37
  `cd sparse_convolution` \
31
38
  `pip install -e .`
@@ -45,7 +52,7 @@ A = scipy.sparse.rand(100, 100, density=0.1)
45
52
  B = np.random.rand(3, 3)
46
53
 
47
54
  # Prepare class
48
- conv = Toeplitz_convolution2d(
55
+ conv = sc.Toeplitz_convolution2d(
49
56
  x_shape=A.shape,
50
57
  k=B,
51
58
  mode='same',
@@ -56,7 +63,6 @@ conv = Toeplitz_convolution2d(
56
63
  C = conv(
57
64
  x=A,
58
65
  batching=False,
59
- mode='same',
60
66
  ).toarray()
61
67
  ```
62
68
 
@@ -92,7 +98,6 @@ conv = sc.Toeplitz_convolution2d(
92
98
  C = conv(
93
99
  x=A,
94
100
  batching=True,
95
- mode='same',
96
101
  )
97
102
 
98
103
  # Reshape the output back to (3, 100, 100)
@@ -2,11 +2,18 @@
2
2
  Sparse convolution in python. \
3
3
  Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
4
4
  This allows for extremely fast convolution when:
5
- - The kernel is small (<= 30x30)
5
+ - The kernel is small (<= 100x100)
6
6
  - The input array is sparse (<= 1% density)
7
- - Many arrays are convolved with the same kernel
7
+ - The input array is small (<= 1000x1000)
8
+ - Many arrays are convolved with the same kernel (large batch size >= 1000)
8
9
 
9
10
  ## Install:
11
+ The package is available on PyPI. \
12
+ `pip install sparse_convolution`
13
+
14
+ <br>
15
+
16
+ Alternatively, you can install from source. \
10
17
  `git clone https://github.com/RichieHakim/sparse_convolution` \
11
18
  `cd sparse_convolution` \
12
19
  `pip install -e .`
@@ -26,7 +33,7 @@ A = scipy.sparse.rand(100, 100, density=0.1)
26
33
  B = np.random.rand(3, 3)
27
34
 
28
35
  # Prepare class
29
- conv = Toeplitz_convolution2d(
36
+ conv = sc.Toeplitz_convolution2d(
30
37
  x_shape=A.shape,
31
38
  k=B,
32
39
  mode='same',
@@ -37,7 +44,6 @@ conv = Toeplitz_convolution2d(
37
44
  C = conv(
38
45
  x=A,
39
46
  batching=False,
40
- mode='same',
41
47
  ).toarray()
42
48
  ```
43
49
 
@@ -73,7 +79,6 @@ conv = sc.Toeplitz_convolution2d(
73
79
  C = conv(
74
80
  x=A,
75
81
  batching=True,
76
- mode='same',
77
82
  )
78
83
 
79
84
  # Reshape the output back to (3, 100, 100)
@@ -1,3 +1,3 @@
1
1
  from sparse_convolution.sparse_convolution import Toeplitz_convolution2d
2
2
 
3
- __version__ = '0.1.1'
3
+ __version__ = '0.1.3'
@@ -0,0 +1,217 @@
1
+ from typing import Tuple, Optional, Union
2
+
3
+ import scipy.sparse
4
+ import numpy as np
5
+
6
+ class Toeplitz_convolution2d():
7
+ """
8
+ Convolve a 2D array with a 2D kernel using the Toeplitz matrix
9
+ multiplication method. This class is ideal when 'x' is very sparse
10
+ (density<0.01), 'x' is small (shape <(1000,1000)), 'k' is small (shape
11
+ <(100,100)), and the batch size is large (e.g. 1000+). Generally, it is
12
+ faster than scipy.signal.convolve2d when convolving multiple arrays with the
13
+ same kernel. It maintains a low memory footprint by storing the toeplitz
14
+ matrix as a sparse matrix.
15
+ RH 2022
16
+
17
+ Attributes:
18
+ x_shape (Tuple[int, int]):
19
+ The shape of the 2D array to be convolved.
20
+ k (np.ndarray):
21
+ 2D kernel to convolve with.
22
+ mode (str):
23
+ Either ``'full'``, ``'same'``, or ``'valid'``. See
24
+ scipy.signal.convolve2d for details.
25
+ dtype (Optional[np.dtype]):
26
+ The data type to use for the Toeplitz matrix.
27
+ If ``None``, then the data type of the kernel is used.
28
+
29
+ Args:
30
+ x_shape (Tuple[int, int]):
31
+ The shape of the 2D array to be convolved.
32
+ k (np.ndarray):
33
+ 2D kernel to convolve with.
34
+ mode (str):
35
+ Convolution method to use, either ``'full'``, ``'same'``, or
36
+ ``'valid'``.
37
+ See scipy.signal.convolve2d for details. (Default is 'same')
38
+ dtype (Optional[np.dtype]):
39
+ The data type to use for the Toeplitz matrix. Ideally, this matches
40
+ the data type of the input array. If ``None``, then the data type of
41
+ the kernel is used. (Default is ``None``)
42
+
43
+ Example:
44
+ .. highlight:: python
45
+ .. code-block:: python
46
+
47
+ # create Toeplitz_convolution2d object
48
+ toeplitz_convolution2d = Toeplitz_convolution2d(
49
+ x_shape=(100,30),
50
+ k=np.random.rand(10,10),
51
+ mode='same',
52
+ )
53
+ toeplitz_convolution2d(
54
+ x=scipy.sparse.csr_matrix(np.random.rand(5,3000)),
55
+ batch_size=True,
56
+ )
57
+ """
58
+ def __init__(
59
+ self,
60
+ x_shape: Tuple[int, int],
61
+ k: np.ndarray,
62
+ mode: str = 'same',
63
+ dtype: Optional[np.dtype] = None,
64
+ verbose: Union[bool, int] = False,
65
+ ):
66
+ """
67
+ Initializes the Toeplitz_convolution2d object and stores the Toeplitz
68
+ matrix.
69
+ """
70
+ ## Type checking
71
+ assert isinstance(x_shape, tuple), "x_shape must be a tuple"
72
+ assert all([isinstance(s, (int, float, np.int_, np.float_)) for s in x_shape]), "x_shape must be a tuple of integers"
73
+ x_shape = (int(x_shape[0]), int(x_shape[1]))
74
+
75
+ assert isinstance(k, np.ndarray), "k must be a numpy array"
76
+ assert k.ndim == 2, "k must be a 2D array"
77
+
78
+ assert isinstance(mode, str), "mode must be a string"
79
+ assert mode in ['full', 'same', 'valid'], "mode must be 'full', 'same', or 'valid'"
80
+
81
+ # if dtype is not None:
82
+ # assert isinstance(dtype, np.dtype), "dtype must be a numpy dtype"
83
+
84
+ ## Warn if x_shape is large
85
+ if verbose > 0:
86
+ n_nz_elements_expected = x_shape[0]*x_shape[1]*k.shape[0]*k.shape[1]
87
+ if n_nz_elements_expected >= 1e8:
88
+ print("Warning: Expected number of non-zero elements in the Toeplitz matrix is large. \n"
89
+ f"(x_shape[0]*x_shape[1]*k.shape[0]*k.shape[1]) = {n_nz_elements_expected} non-zero elements. \n"
90
+ "This will likely be slow and have a large memory footprint. \n"
91
+ "Consider breaking the `x` array into smaller chunks or tiles so that `x_shape` can be smaller and performing the convolution in batches.")
92
+
93
+
94
+ self.k = k = np.flipud(k.copy())
95
+ self.mode = mode
96
+ self.x_shape = x_shape
97
+ dtype = k.dtype if dtype is None else dtype
98
+
99
+ if mode == 'valid':
100
+ assert x_shape[0] >= k.shape[0] and x_shape[1] >= k.shape[1], "x must be larger than k in both dimensions for mode='valid'"
101
+
102
+ self.so = so = size_output_array = ( (k.shape[0] + x_shape[0] -1), (k.shape[1] + x_shape[1] -1)) ## 'size out' is the size of the output array
103
+
104
+ ## make the toeplitz matrices
105
+ t = toeplitz_matrices = [scipy.sparse.diags(
106
+ diagonals=np.ones((k.shape[1], x_shape[1]), dtype=dtype) * k_i[::-1][:,None],
107
+ offsets=np.arange(-k.shape[1]+1, 1),
108
+ shape=(so[1], x_shape[1]),
109
+ dtype=dtype,
110
+ ) for k_i in k[::-1]] ## make the toeplitz matrices for the rows of the kernel
111
+ tc = toeplitz_concatenated = scipy.sparse.vstack(t + [scipy.sparse.dia_matrix((t[0].shape), dtype=dtype)]*(x_shape[0]-1)) ## add empty matrices to the bottom of the block due to padding, then concatenate
112
+
113
+ ## make the double block toeplitz matrix
114
+ self.dt = double_toeplitz = scipy.sparse.hstack([self._roll_sparse(
115
+ x=tc,
116
+ shift=(ii>0)*ii*(so[1]) ## shift the blocks by the size of the output array
117
+ ) for ii in range(x_shape[0])]).tocsr()
118
+
119
+ def __call__(
120
+ self,
121
+ x: Union[np.ndarray, scipy.sparse.csc_matrix, scipy.sparse.csr_matrix],
122
+ batching: bool = True,
123
+ mode: Optional[str] = None,
124
+ ) -> Union[np.ndarray, scipy.sparse.csr_matrix]:
125
+ """
126
+ Convolve the input array with the kernel.
127
+
128
+ Args:
129
+ x (Union[np.ndarray, scipy.sparse.csc_matrix,
130
+ scipy.sparse.csr_matrix]):
131
+ Input array(s) (i.e. image(s)) to convolve with the kernel. \n
132
+ * If ``batching==False``: Single 2D array to convolve with the
133
+ kernel. Shape: *(self.x_shape[0], self.x_shape[1])*
134
+ * If ``batching==True``: Multiple 2D arrays that have been
135
+ flattened into row vectors (with order='C'). \n
136
+ Shape: *(n_arrays, self.x_shape[0]*self.x_shape[1])*
137
+
138
+ batching (bool):
139
+ * ``False``: x is a single 2D array.
140
+ * ``True``: x is a 2D array where each row is a flattened 2D
141
+ array. \n
142
+ (Default is ``True``)
143
+
144
+ mode (Optional[str]):
145
+ Defines the mode of the convolution. Options are 'full', 'same'
146
+ or 'valid'. See `scipy.signal.convolve2d` for details. Overrides
147
+ the mode set in __init__. (Default is ``None``)
148
+
149
+ Returns:
150
+ (Union[np.ndarray, scipy.sparse.csr_matrix]):
151
+ out (Union[np.ndarray, scipy.sparse.csr_matrix]):
152
+ * ``batching==True``: Multiple convolved 2D arrays that have
153
+ been flattened into row vectors (with order='C'). Shape:
154
+ *(n_arrays, height*width)*
155
+ * ``batching==False``: Single convolved 2D array of shape
156
+ *(height, width)*
157
+ """
158
+ if mode is None:
159
+ mode = self.mode ## use the mode that was set in the init if not specified
160
+ issparse = scipy.sparse.issparse(x)
161
+
162
+ if batching:
163
+ x_v = x.T ## transpose into column vectors
164
+ else:
165
+ x_v = x.reshape(-1, 1) ## reshape 2D array into a column vector
166
+
167
+ if issparse:
168
+ x_v = x_v.tocsc()
169
+
170
+ out_v = self.dt @ x_v ## if sparse, then 'out_v' will be a csc matrix
171
+
172
+ ## crop the output to the correct size
173
+ if mode == 'full':
174
+ t = 0
175
+ b = self.so[0]+1
176
+ l = 0
177
+ r = self.so[1]+1
178
+ if mode == 'same':
179
+ t = (self.k.shape[0]-1)//2
180
+ b = -(self.k.shape[0]-1)//2
181
+ l = (self.k.shape[1]-1)//2
182
+ r = -(self.k.shape[1]-1)//2
183
+
184
+ b = self.x_shape[0]+1 if b==0 else b
185
+ r = self.x_shape[1]+1 if r==0 else r
186
+ if mode == 'valid':
187
+ t = (self.k.shape[0]-1)
188
+ b = -(self.k.shape[0]-1)
189
+ l = (self.k.shape[1]-1)
190
+ r = -(self.k.shape[1]-1)
191
+
192
+ b = self.x_shape[0]+1 if b==0 else b
193
+ r = self.x_shape[1]+1 if r==0 else r
194
+
195
+ if batching:
196
+ idx_crop = np.zeros((self.so), dtype=np.bool_)
197
+ idx_crop[t:b, l:r] = True
198
+ idx_crop = idx_crop.reshape(-1)
199
+ out = out_v[idx_crop,:].T
200
+ else:
201
+ if issparse:
202
+ out = out_v.reshape((self.so)).tocsc()[t:b, l:r]
203
+ else:
204
+ out = out_v.reshape((self.so))[t:b, l:r] ## reshape back into 2D array and crop
205
+ return out
206
+
207
+ def _roll_sparse(
208
+ self,
209
+ x: scipy.sparse.csr_matrix,
210
+ shift: int,
211
+ ):
212
+ """
213
+ Roll columns of a sparse matrix.
214
+ """
215
+ out = x.copy()
216
+ out.row += shift
217
+ return out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sparse_convolution
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Sparse convolution in python using Toeplitz convolution matrix multiplication.
5
5
  Home-page: https://github.com/RichieHakim/sparse_convolution
6
6
  Author: Richard Hakim
@@ -21,11 +21,18 @@ Requires-Dist: numpy
21
21
  Sparse convolution in python. \
22
22
  Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
23
23
  This allows for extremely fast convolution when:
24
- - The kernel is small (<= 30x30)
24
+ - The kernel is small (<= 100x100)
25
25
  - The input array is sparse (<= 1% density)
26
- - Many arrays are convolved with the same kernel
26
+ - The input array is small (<= 1000x1000)
27
+ - Many arrays are convolved with the same kernel (large batch size >= 1000)
27
28
 
28
29
  ## Install:
30
+ The package is available on PyPI. \
31
+ `pip install sparse_convolution`
32
+
33
+ <br>
34
+
35
+ Alternatively, you can install from source. \
29
36
  `git clone https://github.com/RichieHakim/sparse_convolution` \
30
37
  `cd sparse_convolution` \
31
38
  `pip install -e .`
@@ -45,7 +52,7 @@ A = scipy.sparse.rand(100, 100, density=0.1)
45
52
  B = np.random.rand(3, 3)
46
53
 
47
54
  # Prepare class
48
- conv = Toeplitz_convolution2d(
55
+ conv = sc.Toeplitz_convolution2d(
49
56
  x_shape=A.shape,
50
57
  k=B,
51
58
  mode='same',
@@ -56,7 +63,6 @@ conv = Toeplitz_convolution2d(
56
63
  C = conv(
57
64
  x=A,
58
65
  batching=False,
59
- mode='same',
60
66
  ).toarray()
61
67
  ```
62
68
 
@@ -92,7 +98,6 @@ conv = sc.Toeplitz_convolution2d(
92
98
  C = conv(
93
99
  x=A,
94
100
  batching=True,
95
- mode='same',
96
101
  )
97
102
 
98
103
  # Reshape the output back to (3, 100, 100)
@@ -1,172 +0,0 @@
1
- import scipy.sparse
2
- import numpy as np
3
-
4
- class Toeplitz_convolution2d:
5
- """
6
- Convolve a 2D array with a 2D kernel using the Toeplitz matrix
7
- multiplication method.
8
- Allows for SPARSE 'x' inputs. 'k' should remain dense.
9
- Ideal when 'x' is very sparse (density<0.01), 'x' is small
10
- (shape <(1000,1000)), 'k' is small (shape <(100,100)), and
11
- the batch size is large (e.g. 1000+).
12
- Generally faster than scipy.signal.convolve2d when convolving mutliple
13
- arrays with the same kernel. Maintains low memory footprint by
14
- storing the toeplitz matrix as a sparse matrix.
15
-
16
- See: https://stackoverflow.com/a/51865516 and https://github.com/alisaaalehi/convolution_as_multiplication
17
- for a nice illustration.
18
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.convolution_matrix.html
19
- for 1D version.
20
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.matmul_toeplitz.html#scipy.linalg.matmul_toeplitz
21
- for potential ways to make this implementation faster.
22
-
23
- Test with: tests.test_toeplitz_convolution2d()
24
- RH 2022
25
- """
26
- def __init__(
27
- self,
28
- x_shape,
29
- k,
30
- mode='same',
31
- dtype=None,
32
- ):
33
- """
34
- Initialize the convolution object.
35
- Makes the Toeplitz matrix and stores it.
36
-
37
- Args:
38
- x_shape (tuple):
39
- The shape of the 2D array to be convolved.
40
- k (np.ndarray):
41
- 2D kernel to convolve with
42
- mode (str):
43
- 'full', 'same' or 'valid'
44
- see scipy.signal.convolve2d for details
45
- dtype (np.dtype):
46
- The data type to use for the Toeplitz matrix.
47
- Ideally, this matches the data type of the input array.
48
- If None, then the data type of the kernel is used.
49
- """
50
- self.k = k = np.flipud(k.copy())
51
- self.mode = mode
52
- self.x_shape = x_shape
53
- self.dtype = k.dtype if dtype is None else dtype
54
-
55
- if mode == 'valid':
56
- assert x_shape[0] >= k.shape[0] and x_shape[1] >= k.shape[1], "x must be larger than k in both dimensions for mode='valid'"
57
-
58
- self.so = so = size_output_array = ( (k.shape[0] + x_shape[0] -1), (k.shape[1] + x_shape[1] -1)) ## 'size out' is the size of the output array
59
-
60
- ## make the toeplitz matrices
61
- t = toeplitz_matrices = [scipy.sparse.diags(
62
- diagonals=np.ones((k.shape[1], x_shape[1]), dtype=self.dtype) * k_i[::-1][:,None],
63
- offsets=np.arange(-k.shape[1]+1, 1),
64
- shape=(so[1], x_shape[1]),
65
- dtype=self.dtype,
66
- ) for k_i in k[::-1]] ## make the toeplitz matrices for the rows of the kernel
67
- tc = toeplitz_concatenated = scipy.sparse.vstack(t + [scipy.sparse.dia_matrix((t[0].shape), dtype=self.dtype)]*(x_shape[0]-1)) ## add empty matrices to the bottom of the block due to padding, then concatenate
68
-
69
- ## make the double block toeplitz matrix
70
- self.dt = double_toeplitz = scipy.sparse.hstack([self._roll_sparse(
71
- x=tc,
72
- shift=(ii>0)*ii*(so[1]) ## shift the blocks by the size of the output array
73
- ) for ii in range(x_shape[0])]).tocsr()
74
-
75
- def __call__(
76
- self,
77
- x,
78
- batching=True,
79
- mode=None,
80
- ):
81
- """
82
- Convolve the input array with the kernel.
83
-
84
- Args:
85
- x (np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix):
86
- Input array(s) (i.e. image(s)) to convolve with the kernel
87
- If batching==False: Single 2D array to convolve with the kernel.
88
- shape: (self.x_shape[0], self.x_shape[1])
89
- type: np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix
90
- If batching==True: Multiple 2D arrays that have been flattened
91
- into row vectors (with order='C').
92
- shape: (n_arrays, self.x_shape[0]*self.x_shape[1])
93
- type: np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix
94
- batching (bool):
95
- If False, x is a single 2D array.
96
- If True, x is a 2D array where each row is a flattened 2D array.
97
- mode (str):
98
- 'full', 'same' or 'valid'
99
- see scipy.signal.convolve2d for details
100
- Overrides the mode set in __init__.
101
-
102
- Returns:
103
- out (np.ndarray or scipy.sparse.csr_matrix):
104
- If batching==True: Multiple convolved 2D arrays that have been flattened
105
- into row vectors (with order='C').
106
- shape: (n_arrays, height*width)
107
- type: np.ndarray or scipy.sparse.csc_matrix
108
- If batching==False: Single convolved 2D array of shape (height, width)
109
- """
110
- # if batching:
111
- # if x.shape[0] > 9999:
112
- # print("RH WARNING: scipy.sparse.lil_matrix doesn't seem to work well with arrays with large numbers of rows. Consider breaking your job into smaller batches.")
113
- if mode is None:
114
- mode = self.mode ## use the mode that was set in the init if not specified
115
- issparse = scipy.sparse.issparse(x)
116
-
117
- if batching:
118
- x_v = x.T ## transpose into column vectors
119
- else:
120
- x_v = x.reshape(-1, 1) ## reshape 2D array into a column vector
121
-
122
- if issparse:
123
- x_v = x_v.tocsc()
124
-
125
- out_v = self.dt @ x_v ## if sparse, then 'out_v' will be a csc matrix
126
-
127
- ## crop the output to the correct size
128
- if mode == 'full':
129
- p_t = 0
130
- p_b = self.so[0]+1
131
- p_l = 0
132
- p_r = self.so[1]+1
133
- if mode == 'same':
134
- p_t = (self.k.shape[0]-1)//2
135
- p_b = -(self.k.shape[0]-1)//2
136
- p_l = (self.k.shape[1]-1)//2
137
- p_r = -(self.k.shape[1]-1)//2
138
-
139
- p_b = self.x_shape[0]+1 if p_b==0 else p_b
140
- p_r = self.x_shape[1]+1 if p_r==0 else p_r
141
- if mode == 'valid':
142
- p_t = (self.k.shape[0]-1)
143
- p_b = -(self.k.shape[0]-1)
144
- p_l = (self.k.shape[1]-1)
145
- p_r = -(self.k.shape[1]-1)
146
-
147
- p_b = self.x_shape[0]+1 if p_b==0 else p_b
148
- p_r = self.x_shape[1]+1 if p_r==0 else p_r
149
-
150
- if batching:
151
- idx_crop = np.zeros((self.so), dtype=np.bool_)
152
- idx_crop[p_t:p_b, p_l:p_r] = True
153
- idx_crop = idx_crop.reshape(-1)
154
- out = out_v[idx_crop,:].T
155
- else:
156
- if issparse:
157
- out = out_v.reshape((self.so)).tocsc()[p_t:p_b, p_l:p_r]
158
- else:
159
- out = out_v.reshape((self.so))[p_t:p_b, p_l:p_r] ## reshape back into 2D array and crop
160
- return out
161
-
162
- def _roll_sparse(
163
- self,
164
- x,
165
- shift,
166
- ):
167
- """
168
- Roll columns of a sparse matrix.
169
- """
170
- out = x.copy()
171
- out.row += shift
172
- return out