PyPI - sparse-convolution - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

sparse-convolution 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{sparse_convolution-0.1.1/sparse_convolution.egg-info → sparse_convolution-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparse_convolution
-Version: 0.1.1
+Version: 0.1.3
 Summary: Sparse convolution in python using Toeplitz convolution matrix multiplication.
 Home-page: https://github.com/RichieHakim/sparse_convolution
 Author: Richard Hakim
@@ -21,11 +21,18 @@ Requires-Dist: numpy
 Sparse convolution in python. \
 Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
 This allows for extremely fast convolution when:
-- The kernel is small (<= 30x30)
+- The kernel is small (<= 100x100)
 - The input array is sparse (<= 1% density)
-- Many arrays are convolved with the same kernel
+- The input array is small (<= 1000x1000)
+- Many arrays are convolved with the same kernel (large batch size >= 1000)
 ## Install:
+The package is available on PyPI. \
+`pip install sparse_convolution`
+<br>
+Alternatively, you can install from source. \
 `git clone https://github.com/RichieHakim/sparse_convolution` \
 `cd sparse_convolution` \
 `pip install -e .`
@@ -45,7 +52,7 @@ A = scipy.sparse.rand(100, 100, density=0.1)
 B = np.random.rand(3, 3)
 # Prepare class
-conv = Toeplitz_convolution2d(
+conv = sc.Toeplitz_convolution2d(
     x_shape=A.shape,
     k=B,
     mode='same',
@@ -56,7 +63,6 @@ conv = Toeplitz_convolution2d(
 C = conv(
     x=A,
     batching=False,
-    mode='same',
 ).toarray()
 ```
@@ -92,7 +98,6 @@ conv = sc.Toeplitz_convolution2d(
 C = conv(
     x=A,
     batching=True,
-    mode='same',
 )
 # Reshape the output back to (3, 100, 100)

{sparse_convolution-0.1.1 → sparse_convolution-0.1.3}/README.md RENAMED Viewed

@@ -2,11 +2,18 @@
 Sparse convolution in python. \
 Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
 This allows for extremely fast convolution when:
-- The kernel is small (<= 30x30)
+- The kernel is small (<= 100x100)
 - The input array is sparse (<= 1% density)
-- Many arrays are convolved with the same kernel
+- The input array is small (<= 1000x1000)
+- Many arrays are convolved with the same kernel (large batch size >= 1000)
 ## Install:
+The package is available on PyPI. \
+`pip install sparse_convolution`
+<br>
+Alternatively, you can install from source. \
 `git clone https://github.com/RichieHakim/sparse_convolution` \
 `cd sparse_convolution` \
 `pip install -e .`
@@ -26,7 +33,7 @@ A = scipy.sparse.rand(100, 100, density=0.1)
 B = np.random.rand(3, 3)
 # Prepare class
-conv = Toeplitz_convolution2d(
+conv = sc.Toeplitz_convolution2d(
     x_shape=A.shape,
     k=B,
     mode='same',
@@ -37,7 +44,6 @@ conv = Toeplitz_convolution2d(
 C = conv(
     x=A,
     batching=False,
-    mode='same',
 ).toarray()
 ```
@@ -73,7 +79,6 @@ conv = sc.Toeplitz_convolution2d(
 C = conv(
     x=A,
     batching=True,
-    mode='same',
 )
 # Reshape the output back to (3, 100, 100)

{sparse_convolution-0.1.1 → sparse_convolution-0.1.3}/sparse_convolution/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 from sparse_convolution.sparse_convolution import Toeplitz_convolution2d
-__version__ = '0.1.1'
+__version__ = '0.1.3'

sparse_convolution-0.1.3/sparse_convolution/sparse_convolution.py ADDED Viewed

@@ -0,0 +1,217 @@
+from typing import Tuple, Optional, Union
+import scipy.sparse
+import numpy as np
+class Toeplitz_convolution2d():
+    """
+    Convolve a 2D array with a 2D kernel using the Toeplitz matrix
+    multiplication method. This class is ideal when 'x' is very sparse
+    (density<0.01), 'x' is small (shape <(1000,1000)), 'k' is small (shape
+    <(100,100)), and the batch size is large (e.g. 1000+). Generally, it is
+    faster than scipy.signal.convolve2d when convolving multiple arrays with the
+    same kernel. It maintains a low memory footprint by storing the toeplitz
+    matrix as a sparse matrix.
+    RH 2022
+    Attributes:
+        x_shape (Tuple[int, int]):
+            The shape of the 2D array to be convolved.
+        k (np.ndarray):
+            2D kernel to convolve with.
+        mode (str):
+            Either ``'full'``, ``'same'``, or ``'valid'``. See
+            scipy.signal.convolve2d for details.
+        dtype (Optional[np.dtype]):
+            The data type to use for the Toeplitz matrix.
+            If ``None``, then the data type of the kernel is used.
+    Args:
+        x_shape (Tuple[int, int]):
+            The shape of the 2D array to be convolved.
+        k (np.ndarray):
+            2D kernel to convolve with.
+        mode (str):
+            Convolution method to use, either ``'full'``, ``'same'``, or
+            ``'valid'``.
+            See scipy.signal.convolve2d for details. (Default is 'same')
+        dtype (Optional[np.dtype]):
+            The data type to use for the Toeplitz matrix. Ideally, this matches
+            the data type of the input array. If ``None``, then the data type of
+            the kernel is used. (Default is ``None``)
+    Example:
+        .. highlight:: python
+        .. code-block:: python
+            # create Toeplitz_convolution2d object
+            toeplitz_convolution2d = Toeplitz_convolution2d(
+                x_shape=(100,30),
+                k=np.random.rand(10,10),
+                mode='same',
+            )
+            toeplitz_convolution2d(
+                x=scipy.sparse.csr_matrix(np.random.rand(5,3000)),
+                batch_size=True,
+            )
+    """
+    def __init__(
+        self,
+        x_shape: Tuple[int, int],
+        k: np.ndarray,
+        mode: str = 'same',
+        dtype: Optional[np.dtype] = None,
+        verbose: Union[bool, int] = False,
+    ):
+        """
+        Initializes the Toeplitz_convolution2d object and stores the Toeplitz
+        matrix.
+        """
+        ## Type checking
+        assert isinstance(x_shape, tuple), "x_shape must be a tuple"
+        assert all([isinstance(s, (int, float, np.int_, np.float_)) for s in x_shape]), "x_shape must be a tuple of integers"
+        x_shape = (int(x_shape[0]), int(x_shape[1]))
+        assert isinstance(k, np.ndarray), "k must be a numpy array"
+        assert k.ndim == 2, "k must be a 2D array"
+        assert isinstance(mode, str), "mode must be a string"
+        assert mode in ['full', 'same', 'valid'], "mode must be 'full', 'same', or 'valid'"
+        # if dtype is not None:
+        #     assert isinstance(dtype, np.dtype), "dtype must be a numpy dtype"
+        ## Warn if x_shape is large
+        if verbose > 0:
+            n_nz_elements_expected = x_shape[0]*x_shape[1]*k.shape[0]*k.shape[1]
+            if n_nz_elements_expected >= 1e8:
+                print("Warning: Expected number of non-zero elements in the Toeplitz matrix is large. \n"
+                      f"(x_shape[0]*x_shape[1]*k.shape[0]*k.shape[1]) = {n_nz_elements_expected} non-zero elements. \n"
+                      "This will likely be slow and have a large memory footprint. \n"
+                      "Consider breaking the `x` array into smaller chunks or tiles so that `x_shape` can be smaller and performing the convolution in batches.")
+        self.k = k = np.flipud(k.copy())
+        self.mode = mode
+        self.x_shape = x_shape
+        dtype = k.dtype if dtype is None else dtype
+        if mode == 'valid':
+            assert x_shape[0] >= k.shape[0] and x_shape[1] >= k.shape[1], "x must be larger than k in both dimensions for mode='valid'"
+        self.so = so = size_output_array = ( (k.shape[0] + x_shape[0] -1), (k.shape[1] + x_shape[1] -1))  ## 'size out' is the size of the output array
+        ## make the toeplitz matrices
+        t = toeplitz_matrices = [scipy.sparse.diags(
+            diagonals=np.ones((k.shape[1], x_shape[1]), dtype=dtype) * k_i[::-1][:,None],
+            offsets=np.arange(-k.shape[1]+1, 1),
+            shape=(so[1], x_shape[1]),
+            dtype=dtype,
+        ) for k_i in k[::-1]]  ## make the toeplitz matrices for the rows of the kernel
+        tc = toeplitz_concatenated = scipy.sparse.vstack(t + [scipy.sparse.dia_matrix((t[0].shape), dtype=dtype)]*(x_shape[0]-1))  ## add empty matrices to the bottom of the block due to padding, then concatenate
+        ## make the double block toeplitz matrix
+        self.dt = double_toeplitz = scipy.sparse.hstack([self._roll_sparse(
+            x=tc,
+            shift=(ii>0)*ii*(so[1])  ## shift the blocks by the size of the output array
+        ) for ii in range(x_shape[0])]).tocsr()
+    def __call__(
+        self,
+        x: Union[np.ndarray, scipy.sparse.csc_matrix, scipy.sparse.csr_matrix],
+        batching: bool = True,
+        mode: Optional[str] = None,
+    ) -> Union[np.ndarray, scipy.sparse.csr_matrix]:
+        """
+        Convolve the input array with the kernel.
+        Args:
+            x (Union[np.ndarray, scipy.sparse.csc_matrix,
+            scipy.sparse.csr_matrix]):
+                Input array(s) (i.e. image(s)) to convolve with the kernel. \n
+                * If ``batching==False``: Single 2D array to convolve with the
+                  kernel. Shape: *(self.x_shape[0], self.x_shape[1])*
+                * If ``batching==True``: Multiple 2D arrays that have been
+                  flattened into row vectors (with order='C'). \n
+                Shape: *(n_arrays, self.x_shape[0]*self.x_shape[1])*
+            batching (bool):
+                * ``False``: x is a single 2D array.
+                * ``True``: x is a 2D array where each row is a flattened 2D
+                  array. \n
+                (Default is ``True``)
+            mode (Optional[str]):
+                Defines the mode of the convolution. Options are 'full', 'same'
+                or 'valid'. See `scipy.signal.convolve2d` for details. Overrides
+                the mode set in __init__. (Default is ``None``)
+        Returns:
+            (Union[np.ndarray, scipy.sparse.csr_matrix]):
+                out (Union[np.ndarray, scipy.sparse.csr_matrix]):
+                    * ``batching==True``: Multiple convolved 2D arrays that have
+                      been flattened into row vectors (with order='C'). Shape:
+                      *(n_arrays, height*width)*
+                    * ``batching==False``: Single convolved 2D array of shape
+                      *(height, width)*
+        """
+        if mode is None:
+            mode = self.mode  ## use the mode that was set in the init if not specified
+        issparse = scipy.sparse.issparse(x)
+        if batching:
+            x_v = x.T  ## transpose into column vectors
+        else:
+            x_v = x.reshape(-1, 1)  ## reshape 2D array into a column vector
+        if issparse:
+            x_v = x_v.tocsc()
+        out_v = self.dt @ x_v  ## if sparse, then 'out_v' will be a csc matrix
+        ## crop the output to the correct size
+        if mode == 'full':
+            t = 0
+            b = self.so[0]+1
+            l = 0
+            r = self.so[1]+1
+        if mode == 'same':
+            t = (self.k.shape[0]-1)//2
+            b = -(self.k.shape[0]-1)//2
+            l = (self.k.shape[1]-1)//2
+            r = -(self.k.shape[1]-1)//2
+            b = self.x_shape[0]+1 if b==0 else b
+            r = self.x_shape[1]+1 if r==0 else r
+        if mode == 'valid':
+            t = (self.k.shape[0]-1)
+            b = -(self.k.shape[0]-1)
+            l = (self.k.shape[1]-1)
+            r = -(self.k.shape[1]-1)
+            b = self.x_shape[0]+1 if b==0 else b
+            r = self.x_shape[1]+1 if r==0 else r
+        if batching:
+            idx_crop = np.zeros((self.so), dtype=np.bool_)
+            idx_crop[t:b, l:r] = True
+            idx_crop = idx_crop.reshape(-1)
+            out = out_v[idx_crop,:].T
+        else:
+            if issparse:
+                out = out_v.reshape((self.so)).tocsc()[t:b, l:r]
+            else:
+                out = out_v.reshape((self.so))[t:b, l:r]  ## reshape back into 2D array and crop
+        return out
+    def _roll_sparse(
+        self,
+        x: scipy.sparse.csr_matrix,
+        shift: int,
+    ):
+        """
+        Roll columns of a sparse matrix.
+        """
+        out = x.copy()
+        out.row += shift
+        return out

{sparse_convolution-0.1.1 → sparse_convolution-0.1.3/sparse_convolution.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sparse_convolution
-Version: 0.1.1
+Version: 0.1.3
 Summary: Sparse convolution in python using Toeplitz convolution matrix multiplication.
 Home-page: https://github.com/RichieHakim/sparse_convolution
 Author: Richard Hakim
@@ -21,11 +21,18 @@ Requires-Dist: numpy
 Sparse convolution in python. \
 Uses Toeplitz convolutional matrix multiplication to perform sparse convolution. \
 This allows for extremely fast convolution when:
-- The kernel is small (<= 30x30)
+- The kernel is small (<= 100x100)
 - The input array is sparse (<= 1% density)
-- Many arrays are convolved with the same kernel
+- The input array is small (<= 1000x1000)
+- Many arrays are convolved with the same kernel (large batch size >= 1000)
 ## Install:
+The package is available on PyPI. \
+`pip install sparse_convolution`
+<br>
+Alternatively, you can install from source. \
 `git clone https://github.com/RichieHakim/sparse_convolution` \
 `cd sparse_convolution` \
 `pip install -e .`
@@ -45,7 +52,7 @@ A = scipy.sparse.rand(100, 100, density=0.1)
 B = np.random.rand(3, 3)
 # Prepare class
-conv = Toeplitz_convolution2d(
+conv = sc.Toeplitz_convolution2d(
     x_shape=A.shape,
     k=B,
     mode='same',
@@ -56,7 +63,6 @@ conv = Toeplitz_convolution2d(
 C = conv(
     x=A,
     batching=False,
-    mode='same',
 ).toarray()
 ```
@@ -92,7 +98,6 @@ conv = sc.Toeplitz_convolution2d(
 C = conv(
     x=A,
     batching=True,
-    mode='same',
 )
 # Reshape the output back to (3, 100, 100)

sparse_convolution-0.1.1/sparse_convolution/sparse_convolution.py DELETED Viewed

@@ -1,172 +0,0 @@
-import scipy.sparse
-import numpy as np
-class Toeplitz_convolution2d:
-    """
-    Convolve a 2D array with a 2D kernel using the Toeplitz matrix
-     multiplication method.
-    Allows for SPARSE 'x' inputs. 'k' should remain dense.
-    Ideal when 'x' is very sparse (density<0.01), 'x' is small
-     (shape <(1000,1000)), 'k' is small (shape <(100,100)), and
-     the batch size is large (e.g. 1000+).
-    Generally faster than scipy.signal.convolve2d when convolving mutliple
-     arrays with the same kernel. Maintains low memory footprint by
-     storing the toeplitz matrix as a sparse matrix.
-    See: https://stackoverflow.com/a/51865516 and https://github.com/alisaaalehi/convolution_as_multiplication
-     for a nice illustration.
-    See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.convolution_matrix.html
-     for 1D version.
-    See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.matmul_toeplitz.html#scipy.linalg.matmul_toeplitz
-     for potential ways to make this implementation faster.
-    Test with: tests.test_toeplitz_convolution2d()
-    RH 2022
-    """
-    def __init__(
-        self,
-        x_shape,
-        k,
-        mode='same',
-        dtype=None,
-    ):
-        """
-        Initialize the convolution object.
-        Makes the Toeplitz matrix and stores it.
-        Args:
-            x_shape (tuple):
-                The shape of the 2D array to be convolved.
-            k (np.ndarray):
-                2D kernel to convolve with
-            mode (str):
-                'full', 'same' or 'valid'
-                see scipy.signal.convolve2d for details
-            dtype (np.dtype):
-                The data type to use for the Toeplitz matrix.
-                Ideally, this matches the data type of the input array.
-                If None, then the data type of the kernel is used.
-        """
-        self.k = k = np.flipud(k.copy())
-        self.mode = mode
-        self.x_shape = x_shape
-        self.dtype = k.dtype if dtype is None else dtype
-        if mode == 'valid':
-            assert x_shape[0] >= k.shape[0] and x_shape[1] >= k.shape[1], "x must be larger than k in both dimensions for mode='valid'"
-        self.so = so = size_output_array = ( (k.shape[0] + x_shape[0] -1), (k.shape[1] + x_shape[1] -1))  ## 'size out' is the size of the output array
-        ## make the toeplitz matrices
-        t = toeplitz_matrices = [scipy.sparse.diags(
-            diagonals=np.ones((k.shape[1], x_shape[1]), dtype=self.dtype) * k_i[::-1][:,None],
-            offsets=np.arange(-k.shape[1]+1, 1),
-            shape=(so[1], x_shape[1]),
-            dtype=self.dtype,
-        ) for k_i in k[::-1]]  ## make the toeplitz matrices for the rows of the kernel
-        tc = toeplitz_concatenated = scipy.sparse.vstack(t + [scipy.sparse.dia_matrix((t[0].shape), dtype=self.dtype)]*(x_shape[0]-1))  ## add empty matrices to the bottom of the block due to padding, then concatenate
-        ## make the double block toeplitz matrix
-        self.dt = double_toeplitz = scipy.sparse.hstack([self._roll_sparse(
-            x=tc,
-            shift=(ii>0)*ii*(so[1])  ## shift the blocks by the size of the output array
-        ) for ii in range(x_shape[0])]).tocsr()
-    def __call__(
-        self,
-        x,
-        batching=True,
-        mode=None,
-    ):
-        """
-        Convolve the input array with the kernel.
-        Args:
-            x (np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix):
-                Input array(s) (i.e. image(s)) to convolve with the kernel
-                If batching==False: Single 2D array to convolve with the kernel.
-                    shape: (self.x_shape[0], self.x_shape[1])
-                    type: np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix
-                If batching==True: Multiple 2D arrays that have been flattened
-                 into row vectors (with order='C').
-                    shape: (n_arrays, self.x_shape[0]*self.x_shape[1])
-                    type: np.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix
-            batching (bool):
-                If False, x is a single 2D array.
-                If True, x is a 2D array where each row is a flattened 2D array.
-            mode (str):
-                'full', 'same' or 'valid'
-                see scipy.signal.convolve2d for details
-                Overrides the mode set in __init__.
-        Returns:
-            out (np.ndarray or scipy.sparse.csr_matrix):
-                If batching==True: Multiple convolved 2D arrays that have been flattened
-                 into row vectors (with order='C').
-                    shape: (n_arrays, height*width)
-                    type: np.ndarray or scipy.sparse.csc_matrix
-                If batching==False: Single convolved 2D array of shape (height, width)
-        """
-        # if batching:
-        #     if x.shape[0] > 9999:
-        #         print("RH WARNING: scipy.sparse.lil_matrix doesn't seem to work well with arrays with large numbers of rows. Consider breaking your job into smaller batches.")
-        if mode is None:
-            mode = self.mode  ## use the mode that was set in the init if not specified
-        issparse = scipy.sparse.issparse(x)
-        if batching:
-            x_v = x.T  ## transpose into column vectors
-        else:
-            x_v = x.reshape(-1, 1)  ## reshape 2D array into a column vector
-        if issparse:
-            x_v = x_v.tocsc()
-        out_v = self.dt @ x_v  ## if sparse, then 'out_v' will be a csc matrix
-        ## crop the output to the correct size
-        if mode == 'full':
-            p_t = 0
-            p_b = self.so[0]+1
-            p_l = 0
-            p_r = self.so[1]+1
-        if mode == 'same':
-            p_t = (self.k.shape[0]-1)//2
-            p_b = -(self.k.shape[0]-1)//2
-            p_l = (self.k.shape[1]-1)//2
-            p_r = -(self.k.shape[1]-1)//2
-            p_b = self.x_shape[0]+1 if p_b==0 else p_b
-            p_r = self.x_shape[1]+1 if p_r==0 else p_r
-        if mode == 'valid':
-            p_t = (self.k.shape[0]-1)
-            p_b = -(self.k.shape[0]-1)
-            p_l = (self.k.shape[1]-1)
-            p_r = -(self.k.shape[1]-1)
-            p_b = self.x_shape[0]+1 if p_b==0 else p_b
-            p_r = self.x_shape[1]+1 if p_r==0 else p_r
-        if batching:
-            idx_crop = np.zeros((self.so), dtype=np.bool_)
-            idx_crop[p_t:p_b, p_l:p_r] = True
-            idx_crop = idx_crop.reshape(-1)
-            out = out_v[idx_crop,:].T
-        else:
-            if issparse:
-                out = out_v.reshape((self.so)).tocsc()[p_t:p_b, p_l:p_r]
-            else:
-                out = out_v.reshape((self.so))[p_t:p_b, p_l:p_r]  ## reshape back into 2D array and crop
-        return out
-    def _roll_sparse(
-        self,
-        x,
-        shift,
-    ):
-        """
-        Roll columns of a sparse matrix.
-        """
-        out = x.copy()
-        out.row += shift
-        return out