sparse-convolution 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sparse_convolution-0.1.1/LICENSE +21 -0
- sparse_convolution-0.1.1/MANIFEST.in +1 -0
- sparse_convolution-0.1.1/PKG-INFO +109 -0
- sparse_convolution-0.1.1/README.md +90 -0
- sparse_convolution-0.1.1/requirements.txt +2 -0
- sparse_convolution-0.1.1/setup.cfg +7 -0
- sparse_convolution-0.1.1/setup.py +118 -0
- sparse_convolution-0.1.1/sparse_convolution/__init__.py +3 -0
- sparse_convolution-0.1.1/sparse_convolution/sparse_convolution.py +172 -0
- sparse_convolution-0.1.1/sparse_convolution.egg-info/PKG-INFO +109 -0
- sparse_convolution-0.1.1/sparse_convolution.egg-info/SOURCES.txt +14 -0
- sparse_convolution-0.1.1/sparse_convolution.egg-info/dependency_links.txt +1 -0
- sparse_convolution-0.1.1/sparse_convolution.egg-info/requires.txt +2 -0
- sparse_convolution-0.1.1/sparse_convolution.egg-info/top_level.txt +1 -0
- sparse_convolution-0.1.1/tests/test_unit.py +98 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2021 RichieHakim
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include requirements.txt
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sparse_convolution
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Sparse convolution in python using Toeplitz convolution matrix multiplication.
|
|
5
|
+
Home-page: https://github.com/RichieHakim/sparse_convolution
|
|
6
|
+
Author: Richard Hakim
|
|
7
|
+
Author-email: richhakim@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: sparse convolution Toeplitz python
|
|
10
|
+
Platform: Any
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: scipy
|
|
18
|
+
Requires-Dist: numpy
|
|
19
|
+
|
|
20
|
+
# sparse_convolution
|
|
21
|
+
Sparse convolution in python. \
|
|
22
|
+
Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
|
|
23
|
+
This allows for extremely fast convolution when:
|
|
24
|
+
- The kernel is small (<= 30x30)
|
|
25
|
+
- The input array is sparse (<= 1% density)
|
|
26
|
+
- Many arrays are convolved with the same kernel
|
|
27
|
+
|
|
28
|
+
## Install:
|
|
29
|
+
`git clone https://github.com/RichieHakim/sparse_convolution` \
|
|
30
|
+
`cd sparse_convolution` \
|
|
31
|
+
`pip install -e .`
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## Basic usage:
|
|
35
|
+
Convolve a single sparse 2D array with a 2D kernel.
|
|
36
|
+
```
|
|
37
|
+
import sparse_convolution as sc
|
|
38
|
+
import numpy as np
|
|
39
|
+
import scipy.sparse
|
|
40
|
+
|
|
41
|
+
# Create a single sparse matrix
|
|
42
|
+
A = scipy.sparse.rand(100, 100, density=0.1)
|
|
43
|
+
|
|
44
|
+
# Create a dense kernel
|
|
45
|
+
B = np.random.rand(3, 3)
|
|
46
|
+
|
|
47
|
+
# Prepare class
|
|
48
|
+
conv = Toeplitz_convolution2d(
|
|
49
|
+
x_shape=A.shape,
|
|
50
|
+
k=B,
|
|
51
|
+
mode='same',
|
|
52
|
+
dtype=np.float32,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Convolve
|
|
56
|
+
C = conv(
|
|
57
|
+
x=A,
|
|
58
|
+
batching=False,
|
|
59
|
+
mode='same',
|
|
60
|
+
).toarray()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
## Batching usage:
|
|
65
|
+
Convolve multiple sparse 2D arrays with a 2D kernel. \
|
|
66
|
+
The input arrays must be reshaped into flattened vectors and stacked into a single sparse array of shape: `(n_arrays, height * width)`.
|
|
67
|
+
```
|
|
68
|
+
import sparse_convolution as sc
|
|
69
|
+
import numpy as np
|
|
70
|
+
import scipy.sparse
|
|
71
|
+
|
|
72
|
+
# Create multiple sparse matrices
|
|
73
|
+
# note that the shape of A will be (3, 100**2)
|
|
74
|
+
A = scipy.sparse.vstack([
|
|
75
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
76
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
77
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
78
|
+
]).tocsr()
|
|
79
|
+
|
|
80
|
+
# Create a dense kernel
|
|
81
|
+
B = np.random.rand(3, 3)
|
|
82
|
+
|
|
83
|
+
# Prepare class
|
|
84
|
+
conv = sc.Toeplitz_convolution2d(
|
|
85
|
+
x_shape=(100, 100), # note that the input shape here is (100, 100)
|
|
86
|
+
k=B,
|
|
87
|
+
mode='same',
|
|
88
|
+
dtype=np.float32,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Convolve
|
|
92
|
+
C = conv(
|
|
93
|
+
x=A,
|
|
94
|
+
batching=True,
|
|
95
|
+
mode='same',
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Reshape the output back to (3, 100, 100)
|
|
99
|
+
C_reshaped = np.stack([c.reshape(100, 100).toarray() for c in C], axis=0)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## References
|
|
103
|
+
- See: https://stackoverflow.com/a/51865516 and https://github.com/alisaaalehi/convolution_as_multiplication
|
|
104
|
+
for a nice illustration.
|
|
105
|
+
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.convolution_matrix.html
|
|
106
|
+
for 1D version.
|
|
107
|
+
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.matmul_toeplitz.html#scipy.linalg.matmul_toeplitz
|
|
108
|
+
for potential ways to make this implementation faster.
|
|
109
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# sparse_convolution
|
|
2
|
+
Sparse convolution in python. \
|
|
3
|
+
Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
|
|
4
|
+
This allows for extremely fast convolution when:
|
|
5
|
+
- The kernel is small (<= 30x30)
|
|
6
|
+
- The input array is sparse (<= 1% density)
|
|
7
|
+
- Many arrays are convolved with the same kernel
|
|
8
|
+
|
|
9
|
+
## Install:
|
|
10
|
+
`git clone https://github.com/RichieHakim/sparse_convolution` \
|
|
11
|
+
`cd sparse_convolution` \
|
|
12
|
+
`pip install -e .`
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
## Basic usage:
|
|
16
|
+
Convolve a single sparse 2D array with a 2D kernel.
|
|
17
|
+
```
|
|
18
|
+
import sparse_convolution as sc
|
|
19
|
+
import numpy as np
|
|
20
|
+
import scipy.sparse
|
|
21
|
+
|
|
22
|
+
# Create a single sparse matrix
|
|
23
|
+
A = scipy.sparse.rand(100, 100, density=0.1)
|
|
24
|
+
|
|
25
|
+
# Create a dense kernel
|
|
26
|
+
B = np.random.rand(3, 3)
|
|
27
|
+
|
|
28
|
+
# Prepare class
|
|
29
|
+
conv = Toeplitz_convolution2d(
|
|
30
|
+
x_shape=A.shape,
|
|
31
|
+
k=B,
|
|
32
|
+
mode='same',
|
|
33
|
+
dtype=np.float32,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Convolve
|
|
37
|
+
C = conv(
|
|
38
|
+
x=A,
|
|
39
|
+
batching=False,
|
|
40
|
+
mode='same',
|
|
41
|
+
).toarray()
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
## Batching usage:
|
|
46
|
+
Convolve multiple sparse 2D arrays with a 2D kernel. \
|
|
47
|
+
The input arrays must be reshaped into flattened vectors and stacked into a single sparse array of shape: `(n_arrays, height * width)`.
|
|
48
|
+
```
|
|
49
|
+
import sparse_convolution as sc
|
|
50
|
+
import numpy as np
|
|
51
|
+
import scipy.sparse
|
|
52
|
+
|
|
53
|
+
# Create multiple sparse matrices
|
|
54
|
+
# note that the shape of A will be (3, 100**2)
|
|
55
|
+
A = scipy.sparse.vstack([
|
|
56
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
57
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
58
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
59
|
+
]).tocsr()
|
|
60
|
+
|
|
61
|
+
# Create a dense kernel
|
|
62
|
+
B = np.random.rand(3, 3)
|
|
63
|
+
|
|
64
|
+
# Prepare class
|
|
65
|
+
conv = sc.Toeplitz_convolution2d(
|
|
66
|
+
x_shape=(100, 100), # note that the input shape here is (100, 100)
|
|
67
|
+
k=B,
|
|
68
|
+
mode='same',
|
|
69
|
+
dtype=np.float32,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Convolve
|
|
73
|
+
C = conv(
|
|
74
|
+
x=A,
|
|
75
|
+
batching=True,
|
|
76
|
+
mode='same',
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Reshape the output back to (3, 100, 100)
|
|
80
|
+
C_reshaped = np.stack([c.reshape(100, 100).toarray() for c in C], axis=0)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## References
|
|
84
|
+
- See: https://stackoverflow.com/a/51865516 and https://github.com/alisaaalehi/convolution_as_multiplication
|
|
85
|
+
for a nice illustration.
|
|
86
|
+
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.convolution_matrix.html
|
|
87
|
+
for 1D version.
|
|
88
|
+
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.matmul_toeplitz.html#scipy.linalg.matmul_toeplitz
|
|
89
|
+
for potential ways to make this implementation faster.
|
|
90
|
+
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
## setup.py file for roicat
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from distutils.core import setup
|
|
5
|
+
|
|
6
|
+
## Get the parent directory of this file
|
|
7
|
+
dir_parent = Path(__file__).parent
|
|
8
|
+
|
|
9
|
+
## Get requirements from requirements.txt
|
|
10
|
+
def read_requirements():
|
|
11
|
+
with open(str(dir_parent / "requirements.txt"), "r") as req:
|
|
12
|
+
content = req.read() ## read the file
|
|
13
|
+
requirements = content.split("\n") ## make a list of requirements split by (\n) which is the new line character
|
|
14
|
+
|
|
15
|
+
## Filter out any empty strings from the list
|
|
16
|
+
requirements = [req for req in requirements if req]
|
|
17
|
+
## Filter out any lines starting with #
|
|
18
|
+
requirements = [req for req in requirements if not req.startswith("#")]
|
|
19
|
+
## Remove any commas, quotation marks, and spaces from each requirement
|
|
20
|
+
requirements = [req.replace(",", "").replace("\"", "").replace("\'", "").strip() for req in requirements]
|
|
21
|
+
|
|
22
|
+
return requirements
|
|
23
|
+
deps_all = read_requirements()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
## Dependencies: latest versions of requirements
|
|
27
|
+
### remove everything starting and after the first =,>,<,! sign
|
|
28
|
+
deps_names = [req.split('=')[0].split('>')[0].split('<')[0].split('!')[0] for req in deps_all]
|
|
29
|
+
deps_all_dict = dict(zip(deps_names, deps_all))
|
|
30
|
+
deps_all_latest = dict(zip(deps_names, deps_names))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
## Get README.md
|
|
34
|
+
with open(str(dir_parent / "README.md"), "r") as f:
|
|
35
|
+
readme = f.read()
|
|
36
|
+
|
|
37
|
+
## Get version number
|
|
38
|
+
with open(str(dir_parent / "sparse_convolution" / "__init__.py"), "r") as f:
|
|
39
|
+
for line in f:
|
|
40
|
+
if line.startswith("__version__"):
|
|
41
|
+
version = line.split("=")[1].strip().replace("\"", "").replace("\'", "")
|
|
42
|
+
break
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
setup(
|
|
47
|
+
name='sparse_convolution',
|
|
48
|
+
version=version,
|
|
49
|
+
|
|
50
|
+
description='Sparse convolution in python using Toeplitz convolution matrix multiplication.',
|
|
51
|
+
long_description=open('README.md').read(),
|
|
52
|
+
long_description_content_type='text/markdown',
|
|
53
|
+
|
|
54
|
+
# The project's main homepage.
|
|
55
|
+
url='https://github.com/RichieHakim/sparse_convolution',
|
|
56
|
+
|
|
57
|
+
# Author details
|
|
58
|
+
author='Richard Hakim',
|
|
59
|
+
author_email='richhakim@gmail.com',
|
|
60
|
+
|
|
61
|
+
# Choose your license
|
|
62
|
+
license='MIT',
|
|
63
|
+
|
|
64
|
+
# Supported platforms
|
|
65
|
+
platforms=['Any'],
|
|
66
|
+
|
|
67
|
+
# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
|
|
68
|
+
classifiers=[
|
|
69
|
+
# How mature is this project? Common values are
|
|
70
|
+
# 3 - Alpha
|
|
71
|
+
# 4 - Beta
|
|
72
|
+
# 5 - Production/Stable
|
|
73
|
+
'Development Status :: 5 - Production/Stable',
|
|
74
|
+
|
|
75
|
+
# Indicate who your project is intended for
|
|
76
|
+
'Intended Audience :: Science/Research',
|
|
77
|
+
'Topic :: Scientific/Engineering :: Bio-Informatics',
|
|
78
|
+
|
|
79
|
+
# Pick your license as you wish (should match "license" above)
|
|
80
|
+
# 'License :: OSI Approved :: MIT License',
|
|
81
|
+
|
|
82
|
+
# Specify the Python versions you support here. In particular, ensure
|
|
83
|
+
# that you indicate whether you support Python 2, Python 3 or both.
|
|
84
|
+
# 'Programming Language :: Python :: 2',
|
|
85
|
+
# 'Programming Language :: Python :: 2.7',
|
|
86
|
+
'Programming Language :: Python :: 3',
|
|
87
|
+
# 'Programming Language :: Python :: 3.6',
|
|
88
|
+
# 'Programming Language :: Python :: 3.7'
|
|
89
|
+
],
|
|
90
|
+
|
|
91
|
+
# What does your project relate to?
|
|
92
|
+
keywords='sparse convolution Toeplitz python',
|
|
93
|
+
|
|
94
|
+
# You can just specify the packages manually here if your project is
|
|
95
|
+
# simple. Or you can use find_packages().
|
|
96
|
+
packages=['sparse_convolution'],
|
|
97
|
+
|
|
98
|
+
# Alternatively, if you want to distribute just a my_module.py, uncomment
|
|
99
|
+
# this:
|
|
100
|
+
# py_modules=["my_module"],
|
|
101
|
+
|
|
102
|
+
# List run-time dependencies here. These will be installed by pip when
|
|
103
|
+
# your project is installed. For an analysis of "install_requires" vs pip's
|
|
104
|
+
# requirements files see:
|
|
105
|
+
# https://packaging.python.org/en/latest/requirements.html
|
|
106
|
+
install_requires=list(deps_all_dict.values()),
|
|
107
|
+
|
|
108
|
+
include_package_data=True,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import scipy.sparse
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
class Toeplitz_convolution2d:
|
|
5
|
+
"""
|
|
6
|
+
Convolve a 2D array with a 2D kernel using the Toeplitz matrix
|
|
7
|
+
multiplication method.
|
|
8
|
+
Allows for SPARSE 'x' inputs. 'k' should remain dense.
|
|
9
|
+
Ideal when 'x' is very sparse (density<0.01), 'x' is small
|
|
10
|
+
(shape <(1000,1000)), 'k' is small (shape <(100,100)), and
|
|
11
|
+
the batch size is large (e.g. 1000+).
|
|
12
|
+
Generally faster than scipy.signal.convolve2d when convolving mutliple
|
|
13
|
+
arrays with the same kernel. Maintains low memory footprint by
|
|
14
|
+
storing the toeplitz matrix as a sparse matrix.
|
|
15
|
+
|
|
16
|
+
See: https://stackoverflow.com/a/51865516 and https://github.com/alisaaalehi/convolution_as_multiplication
|
|
17
|
+
for a nice illustration.
|
|
18
|
+
See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.convolution_matrix.html
|
|
19
|
+
for 1D version.
|
|
20
|
+
See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.matmul_toeplitz.html#scipy.linalg.matmul_toeplitz
|
|
21
|
+
for potential ways to make this implementation faster.
|
|
22
|
+
|
|
23
|
+
Test with: tests.test_toeplitz_convolution2d()
|
|
24
|
+
RH 2022
|
|
25
|
+
"""
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
x_shape,
|
|
29
|
+
k,
|
|
30
|
+
mode='same',
|
|
31
|
+
dtype=None,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Initialize the convolution object.
|
|
35
|
+
Makes the Toeplitz matrix and stores it.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
x_shape (tuple):
|
|
39
|
+
The shape of the 2D array to be convolved.
|
|
40
|
+
k (np.ndarray):
|
|
41
|
+
2D kernel to convolve with
|
|
42
|
+
mode (str):
|
|
43
|
+
'full', 'same' or 'valid'
|
|
44
|
+
see scipy.signal.convolve2d for details
|
|
45
|
+
dtype (np.dtype):
|
|
46
|
+
The data type to use for the Toeplitz matrix.
|
|
47
|
+
Ideally, this matches the data type of the input array.
|
|
48
|
+
If None, then the data type of the kernel is used.
|
|
49
|
+
"""
|
|
50
|
+
self.k = k = np.flipud(k.copy())
|
|
51
|
+
self.mode = mode
|
|
52
|
+
self.x_shape = x_shape
|
|
53
|
+
self.dtype = k.dtype if dtype is None else dtype
|
|
54
|
+
|
|
55
|
+
if mode == 'valid':
|
|
56
|
+
assert x_shape[0] >= k.shape[0] and x_shape[1] >= k.shape[1], "x must be larger than k in both dimensions for mode='valid'"
|
|
57
|
+
|
|
58
|
+
self.so = so = size_output_array = ( (k.shape[0] + x_shape[0] -1), (k.shape[1] + x_shape[1] -1)) ## 'size out' is the size of the output array
|
|
59
|
+
|
|
60
|
+
## make the toeplitz matrices
|
|
61
|
+
t = toeplitz_matrices = [scipy.sparse.diags(
|
|
62
|
+
diagonals=np.ones((k.shape[1], x_shape[1]), dtype=self.dtype) * k_i[::-1][:,None],
|
|
63
|
+
offsets=np.arange(-k.shape[1]+1, 1),
|
|
64
|
+
shape=(so[1], x_shape[1]),
|
|
65
|
+
dtype=self.dtype,
|
|
66
|
+
) for k_i in k[::-1]] ## make the toeplitz matrices for the rows of the kernel
|
|
67
|
+
tc = toeplitz_concatenated = scipy.sparse.vstack(t + [scipy.sparse.dia_matrix((t[0].shape), dtype=self.dtype)]*(x_shape[0]-1)) ## add empty matrices to the bottom of the block due to padding, then concatenate
|
|
68
|
+
|
|
69
|
+
## make the double block toeplitz matrix
|
|
70
|
+
self.dt = double_toeplitz = scipy.sparse.hstack([self._roll_sparse(
|
|
71
|
+
x=tc,
|
|
72
|
+
shift=(ii>0)*ii*(so[1]) ## shift the blocks by the size of the output array
|
|
73
|
+
) for ii in range(x_shape[0])]).tocsr()
|
|
74
|
+
|
|
75
|
+
def __call__(
|
|
76
|
+
self,
|
|
77
|
+
x,
|
|
78
|
+
batching=True,
|
|
79
|
+
mode=None,
|
|
80
|
+
):
|
|
81
|
+
"""
|
|
82
|
+
Convolve the input array with the kernel.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
x (np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix):
|
|
86
|
+
Input array(s) (i.e. image(s)) to convolve with the kernel
|
|
87
|
+
If batching==False: Single 2D array to convolve with the kernel.
|
|
88
|
+
shape: (self.x_shape[0], self.x_shape[1])
|
|
89
|
+
type: np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix
|
|
90
|
+
If batching==True: Multiple 2D arrays that have been flattened
|
|
91
|
+
into row vectors (with order='C').
|
|
92
|
+
shape: (n_arrays, self.x_shape[0]*self.x_shape[1])
|
|
93
|
+
type: np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix
|
|
94
|
+
batching (bool):
|
|
95
|
+
If False, x is a single 2D array.
|
|
96
|
+
If True, x is a 2D array where each row is a flattened 2D array.
|
|
97
|
+
mode (str):
|
|
98
|
+
'full', 'same' or 'valid'
|
|
99
|
+
see scipy.signal.convolve2d for details
|
|
100
|
+
Overrides the mode set in __init__.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
out (np.ndarray or scipy.sparse.csr_matrix):
|
|
104
|
+
If batching==True: Multiple convolved 2D arrays that have been flattened
|
|
105
|
+
into row vectors (with order='C').
|
|
106
|
+
shape: (n_arrays, height*width)
|
|
107
|
+
type: np.ndarray or scipy.sparse.csc_matrix
|
|
108
|
+
If batching==False: Single convolved 2D array of shape (height, width)
|
|
109
|
+
"""
|
|
110
|
+
# if batching:
|
|
111
|
+
# if x.shape[0] > 9999:
|
|
112
|
+
# print("RH WARNING: scipy.sparse.lil_matrix doesn't seem to work well with arrays with large numbers of rows. Consider breaking your job into smaller batches.")
|
|
113
|
+
if mode is None:
|
|
114
|
+
mode = self.mode ## use the mode that was set in the init if not specified
|
|
115
|
+
issparse = scipy.sparse.issparse(x)
|
|
116
|
+
|
|
117
|
+
if batching:
|
|
118
|
+
x_v = x.T ## transpose into column vectors
|
|
119
|
+
else:
|
|
120
|
+
x_v = x.reshape(-1, 1) ## reshape 2D array into a column vector
|
|
121
|
+
|
|
122
|
+
if issparse:
|
|
123
|
+
x_v = x_v.tocsc()
|
|
124
|
+
|
|
125
|
+
out_v = self.dt @ x_v ## if sparse, then 'out_v' will be a csc matrix
|
|
126
|
+
|
|
127
|
+
## crop the output to the correct size
|
|
128
|
+
if mode == 'full':
|
|
129
|
+
p_t = 0
|
|
130
|
+
p_b = self.so[0]+1
|
|
131
|
+
p_l = 0
|
|
132
|
+
p_r = self.so[1]+1
|
|
133
|
+
if mode == 'same':
|
|
134
|
+
p_t = (self.k.shape[0]-1)//2
|
|
135
|
+
p_b = -(self.k.shape[0]-1)//2
|
|
136
|
+
p_l = (self.k.shape[1]-1)//2
|
|
137
|
+
p_r = -(self.k.shape[1]-1)//2
|
|
138
|
+
|
|
139
|
+
p_b = self.x_shape[0]+1 if p_b==0 else p_b
|
|
140
|
+
p_r = self.x_shape[1]+1 if p_r==0 else p_r
|
|
141
|
+
if mode == 'valid':
|
|
142
|
+
p_t = (self.k.shape[0]-1)
|
|
143
|
+
p_b = -(self.k.shape[0]-1)
|
|
144
|
+
p_l = (self.k.shape[1]-1)
|
|
145
|
+
p_r = -(self.k.shape[1]-1)
|
|
146
|
+
|
|
147
|
+
p_b = self.x_shape[0]+1 if p_b==0 else p_b
|
|
148
|
+
p_r = self.x_shape[1]+1 if p_r==0 else p_r
|
|
149
|
+
|
|
150
|
+
if batching:
|
|
151
|
+
idx_crop = np.zeros((self.so), dtype=np.bool_)
|
|
152
|
+
idx_crop[p_t:p_b, p_l:p_r] = True
|
|
153
|
+
idx_crop = idx_crop.reshape(-1)
|
|
154
|
+
out = out_v[idx_crop,:].T
|
|
155
|
+
else:
|
|
156
|
+
if issparse:
|
|
157
|
+
out = out_v.reshape((self.so)).tocsc()[p_t:p_b, p_l:p_r]
|
|
158
|
+
else:
|
|
159
|
+
out = out_v.reshape((self.so))[p_t:p_b, p_l:p_r] ## reshape back into 2D array and crop
|
|
160
|
+
return out
|
|
161
|
+
|
|
162
|
+
def _roll_sparse(
|
|
163
|
+
self,
|
|
164
|
+
x,
|
|
165
|
+
shift,
|
|
166
|
+
):
|
|
167
|
+
"""
|
|
168
|
+
Roll columns of a sparse matrix.
|
|
169
|
+
"""
|
|
170
|
+
out = x.copy()
|
|
171
|
+
out.row += shift
|
|
172
|
+
return out
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sparse_convolution
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Sparse convolution in python using Toeplitz convolution matrix multiplication.
|
|
5
|
+
Home-page: https://github.com/RichieHakim/sparse_convolution
|
|
6
|
+
Author: Richard Hakim
|
|
7
|
+
Author-email: richhakim@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: sparse convolution Toeplitz python
|
|
10
|
+
Platform: Any
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: scipy
|
|
18
|
+
Requires-Dist: numpy
|
|
19
|
+
|
|
20
|
+
# sparse_convolution
|
|
21
|
+
Sparse convolution in python. \
|
|
22
|
+
Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
|
|
23
|
+
This allows for extremely fast convolution when:
|
|
24
|
+
- The kernel is small (<= 30x30)
|
|
25
|
+
- The input array is sparse (<= 1% density)
|
|
26
|
+
- Many arrays are convolved with the same kernel
|
|
27
|
+
|
|
28
|
+
## Install:
|
|
29
|
+
`git clone https://github.com/RichieHakim/sparse_convolution` \
|
|
30
|
+
`cd sparse_convolution` \
|
|
31
|
+
`pip install -e .`
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
## Basic usage:
|
|
35
|
+
Convolve a single sparse 2D array with a 2D kernel.
|
|
36
|
+
```
|
|
37
|
+
import sparse_convolution as sc
|
|
38
|
+
import numpy as np
|
|
39
|
+
import scipy.sparse
|
|
40
|
+
|
|
41
|
+
# Create a single sparse matrix
|
|
42
|
+
A = scipy.sparse.rand(100, 100, density=0.1)
|
|
43
|
+
|
|
44
|
+
# Create a dense kernel
|
|
45
|
+
B = np.random.rand(3, 3)
|
|
46
|
+
|
|
47
|
+
# Prepare class
|
|
48
|
+
conv = Toeplitz_convolution2d(
|
|
49
|
+
x_shape=A.shape,
|
|
50
|
+
k=B,
|
|
51
|
+
mode='same',
|
|
52
|
+
dtype=np.float32,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Convolve
|
|
56
|
+
C = conv(
|
|
57
|
+
x=A,
|
|
58
|
+
batching=False,
|
|
59
|
+
mode='same',
|
|
60
|
+
).toarray()
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
## Batching usage:
|
|
65
|
+
Convolve multiple sparse 2D arrays with a 2D kernel. \
|
|
66
|
+
The input arrays must be reshaped into flattened vectors and stacked into a single sparse array of shape: `(n_arrays, height * width)`.
|
|
67
|
+
```
|
|
68
|
+
import sparse_convolution as sc
|
|
69
|
+
import numpy as np
|
|
70
|
+
import scipy.sparse
|
|
71
|
+
|
|
72
|
+
# Create multiple sparse matrices
|
|
73
|
+
# note that the shape of A will be (3, 100**2)
|
|
74
|
+
A = scipy.sparse.vstack([
|
|
75
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
76
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
77
|
+
scipy.sparse.rand(100, 100, density=0.1).reshape(1, -1),
|
|
78
|
+
]).tocsr()
|
|
79
|
+
|
|
80
|
+
# Create a dense kernel
|
|
81
|
+
B = np.random.rand(3, 3)
|
|
82
|
+
|
|
83
|
+
# Prepare class
|
|
84
|
+
conv = sc.Toeplitz_convolution2d(
|
|
85
|
+
x_shape=(100, 100), # note that the input shape here is (100, 100)
|
|
86
|
+
k=B,
|
|
87
|
+
mode='same',
|
|
88
|
+
dtype=np.float32,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Convolve
|
|
92
|
+
C = conv(
|
|
93
|
+
x=A,
|
|
94
|
+
batching=True,
|
|
95
|
+
mode='same',
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Reshape the output back to (3, 100, 100)
|
|
99
|
+
C_reshaped = np.stack([c.reshape(100, 100).toarray() for c in C], axis=0)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## References
|
|
103
|
+
- See: https://stackoverflow.com/a/51865516 and https://github.com/alisaaalehi/convolution_as_multiplication
|
|
104
|
+
for a nice illustration.
|
|
105
|
+
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.convolution_matrix.html
|
|
106
|
+
for 1D version.
|
|
107
|
+
- See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.matmul_toeplitz.html#scipy.linalg.matmul_toeplitz
|
|
108
|
+
for potential ways to make this implementation faster.
|
|
109
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
MANIFEST.in
|
|
3
|
+
README.md
|
|
4
|
+
requirements.txt
|
|
5
|
+
setup.cfg
|
|
6
|
+
setup.py
|
|
7
|
+
sparse_convolution/__init__.py
|
|
8
|
+
sparse_convolution/sparse_convolution.py
|
|
9
|
+
sparse_convolution.egg-info/PKG-INFO
|
|
10
|
+
sparse_convolution.egg-info/SOURCES.txt
|
|
11
|
+
sparse_convolution.egg-info/dependency_links.txt
|
|
12
|
+
sparse_convolution.egg-info/requires.txt
|
|
13
|
+
sparse_convolution.egg-info/top_level.txt
|
|
14
|
+
tests/test_unit.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sparse_convolution
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import traceback
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import scipy.signal
|
|
6
|
+
|
|
7
|
+
from sparse_convolution import Toeplitz_convolution2d
|
|
8
|
+
|
|
9
|
+
def test_toeplitz_convolution2d():
|
|
10
|
+
"""
|
|
11
|
+
Test toeplitz_convolution2d
|
|
12
|
+
Tests for modes, shapes, values, and for sparse matrices against
|
|
13
|
+
scipy.signal.convolve2d.
|
|
14
|
+
|
|
15
|
+
RH 2022
|
|
16
|
+
"""
|
|
17
|
+
## test toepltiz convolution
|
|
18
|
+
|
|
19
|
+
print(f'testing with batching=False')
|
|
20
|
+
|
|
21
|
+
stt = shapes_to_try = np.meshgrid(np.arange(1, 7), np.arange(1, 7), np.arange(1, 7), np.arange(1, 7))
|
|
22
|
+
stt = [s.reshape(-1) for s in stt]
|
|
23
|
+
|
|
24
|
+
for mode in ['full', 'same', 'valid']:
|
|
25
|
+
for ii in range(len(stt[0])):
|
|
26
|
+
x = np.random.rand(stt[0][ii], stt[1][ii])
|
|
27
|
+
k = np.random.rand(stt[2][ii], stt[3][ii])
|
|
28
|
+
# print(stt[0][ii], stt[1][ii], stt[2][ii], stt[3][ii])
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
t = Toeplitz_convolution2d(x_shape=x.shape, k=k, mode=mode, dtype=None)
|
|
32
|
+
out_t2d = t(x, batching=False, mode=mode)
|
|
33
|
+
out_t2d_s = t(scipy.sparse.csr_matrix(x), batching=False, mode=mode)
|
|
34
|
+
out_sp = scipy.signal.convolve2d(x, k, mode=mode)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
if mode == 'valid' and (stt[0][ii] < stt[2][ii] or stt[1][ii] < stt[3][ii]):
|
|
37
|
+
if 'x must be larger than k' in str(e):
|
|
38
|
+
continue
|
|
39
|
+
print(f'A) test failed with shapes: x: {x.shape}, k: {k.shape} and mode: {mode} and Exception: {e} {traceback.format_exc()}')
|
|
40
|
+
success = False
|
|
41
|
+
break
|
|
42
|
+
try:
|
|
43
|
+
if np.allclose(out_t2d, out_t2d_s.A) and np.allclose(out_t2d, out_sp) and np.allclose(out_sp, out_t2d_s.A):
|
|
44
|
+
success = True
|
|
45
|
+
continue
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(f'B) test failed with shapes: x: {x.shape}, k: {k.shape} and mode: {mode} and Exception: {e} {traceback.format_exc()}')
|
|
48
|
+
success = False
|
|
49
|
+
break
|
|
50
|
+
|
|
51
|
+
else:
|
|
52
|
+
print(f'C) test failed with batching==False, shapes: x: {x.shape}, k: {k.shape} and mode: {mode}')
|
|
53
|
+
success = False
|
|
54
|
+
break
|
|
55
|
+
|
|
56
|
+
print(f'testing with batching=True')
|
|
57
|
+
|
|
58
|
+
for mode in ['full', 'same', 'valid']:
|
|
59
|
+
for ii in range(len(stt[0])):
|
|
60
|
+
x = np.stack([np.random.rand(stt[0][ii], stt[1][ii]).reshape(-1) for jj in range(3)], axis=0)
|
|
61
|
+
k = np.random.rand(stt[2][ii], stt[3][ii])
|
|
62
|
+
# print(stt[0][ii], stt[1][ii], stt[2][ii], stt[3][ii])
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
t = Toeplitz_convolution2d(x_shape=(stt[0][ii], stt[1][ii]), k=k, mode=mode, dtype=None)
|
|
66
|
+
out_sp = np.stack([scipy.signal.convolve2d(x_i.reshape(stt[0][ii], stt[1][ii]), k, mode=mode) for x_i in x], axis=0)
|
|
67
|
+
out_t2d = t(x, batching=True, mode=mode).reshape(3, out_sp.shape[1], out_sp.shape[2])
|
|
68
|
+
out_t2d_s = t(scipy.sparse.csr_matrix(x), batching=True, mode=mode).toarray().reshape(3, out_sp.shape[1], out_sp.shape[2])
|
|
69
|
+
except Exception as e:
|
|
70
|
+
if mode == 'valid' and (stt[0][ii] < stt[2][ii] or stt[1][ii] < stt[3][ii]):
|
|
71
|
+
if 'x must be larger than k' in str(e):
|
|
72
|
+
continue
|
|
73
|
+
else:
|
|
74
|
+
print(f'A) test failed with shapes: x: {x.shape}, k: {k.shape} and mode: {mode} and Exception: {e} {traceback.format_exc()}')
|
|
75
|
+
success = False
|
|
76
|
+
break
|
|
77
|
+
try:
|
|
78
|
+
if np.allclose(out_t2d, out_t2d_s) and np.allclose(out_t2d, out_sp) and np.allclose(out_sp, out_t2d_s):
|
|
79
|
+
success = True
|
|
80
|
+
continue
|
|
81
|
+
except Exception as e:
|
|
82
|
+
print(f'B) test failed with shapes: x: {x.shape}, k: {k.shape} and mode: {mode} and Exception: {e} {traceback.format_exc()}')
|
|
83
|
+
success = False
|
|
84
|
+
break
|
|
85
|
+
|
|
86
|
+
else:
|
|
87
|
+
print(f'C) test failed with batching==False, shapes: x: {x.shape}, k: {k.shape} and mode: {mode}')
|
|
88
|
+
print(f"Failure analysis: \n")
|
|
89
|
+
print(f"Shapes: x: {x.shape}, k: {k.shape}, out_t2d: {out_t2d.shape}, out_t2d_s: {out_t2d_s.shape}, out_sp: {out_sp.shape}")
|
|
90
|
+
print(f"out_t2d: {out_t2d}")
|
|
91
|
+
print(f"out_t2d_s: {out_t2d_s}")
|
|
92
|
+
print(f"out_sp: {out_sp}")
|
|
93
|
+
|
|
94
|
+
success = False
|
|
95
|
+
break
|
|
96
|
+
print(f'success with all shapes and modes') if success else None
|
|
97
|
+
assert success, 'test failed'
|
|
98
|
+
# return success
|