PyPI - spacr - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

spacr 0.0.20py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

spacr/alpha.py +291 -14
spacr/annotate_app.py +2 -2
spacr/core.py +1301 -426
spacr/foldseek.py +793 -0
spacr/get_alfafold_structures.py +72 -0
spacr/gui_mask_app.py +30 -10
spacr/gui_utils.py +17 -2
spacr/io.py +260 -102
spacr/measure.py +150 -64
spacr/plot.py +151 -12
spacr/sim.py +666 -119
spacr/timelapse.py +139 -9
spacr/train.py +18 -10
spacr/utils.py +43 -43
{spacr-0.0.20.dist-info → spacr-0.0.21.dist-info}/METADATA +5 -2
spacr-0.0.21.dist-info/RECORD +33 -0
spacr-0.0.20.dist-info/RECORD +0 -31
{spacr-0.0.20.dist-info → spacr-0.0.21.dist-info}/LICENSE +0 -0
{spacr-0.0.20.dist-info → spacr-0.0.21.dist-info}/WHEEL +0 -0
{spacr-0.0.20.dist-info → spacr-0.0.21.dist-info}/entry_points.txt +0 -0
{spacr-0.0.20.dist-info → spacr-0.0.21.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose
+import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob
 import numpy as np
 import pandas as pd
 import tifffile
@@ -45,19 +45,19 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
     if not image_files is None and not label_files is None:
         for img_file, lbl_file in zip(image_files, label_files):
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
                 image = apply_mask(image, output_value=0)
-            label = cellpose.imread(lbl_file)
+            label = cellpose.io.imread(lbl_file)
             if image.max() > 1:
                 image = image / image.max()
             images.append(image)
             labels.append(label)
     elif not image_files is None:
         for img_file in image_files:
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
@@ -67,7 +67,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
             images.append(image)
     elif not image_files is None:
             for lbl_file in label_files:
-                label = cellpose.imread(lbl_file)
+                label = cellpose.io.imread(lbl_file)
                 if circular:
                     label = apply_mask(label, output_value=0)
             labels.append(label)
@@ -109,15 +109,17 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     if label_files is not None:
         label_names = [os.path.basename(f) for f in label_files]
+        label_dir = os.path.dirname(label_files[0])
     # Load images and check percentiles
     for i,img_file in enumerate(image_files):
-        image = cellpose.imread(img_file)
+        #print(img_file)
+        image = cellpose.io.imread(img_file)
         if invert:
             image = invert_image(image)
         if circular:
             image = apply_mask(image, output_value=0)
+        #print(image.shape)
         # If specific channels are specified, select them
         if channels is not None and image.ndim == 3:
             image = image[..., channels]
@@ -169,7 +171,7 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     if label_files is not None:
         for lbl_file in label_files:
-            labels.append(cellpose.imread(lbl_file))
+            labels.append(cellpose.io.imread(lbl_file))
     else:
         label_names = []
         label_dir = None
@@ -178,85 +180,6 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     return normalized_images, labels, image_names, label_names
-class MyDataset(Dataset):
-    """
-    Custom dataset class for loading and processing image data.
-    Args:
-        data_dir (str): The directory path where the data is stored.
-        loader_classes (list): List of class names.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
-    Attributes:
-        data_dir (str): The directory path where the data is stored.
-        classes (list): List of class names.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
-        shuffle (bool): Whether to shuffle the dataset.
-        load_to_memory (bool): Whether to load images into memory.
-        filenames (list): List of file paths.
-        labels (list): List of labels corresponding to each file.
-        images (list): List of loaded images.
-        image_cache (Cache): Cache object for storing loaded images.
-    Methods:
-        load_image: Load an image from file.
-        __len__: Get the length of the dataset.
-        shuffle_dataset: Shuffle the dataset.
-        __getitem__: Get an item from the dataset.
-    """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
-        from .utils import Cache
-        self.data_dir = data_dir
-        self.classes = loader_classes
-        self.transform = transform
-        self.shuffle = shuffle
-        self.load_to_memory = load_to_memory
-        self.filenames = []
-        self.labels = []
-        self.images = []
-        self.image_cache = Cache(50)
-        for class_name in self.classes:
-            class_path = os.path.join(data_dir, class_name)
-            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
-            self.filenames.extend(class_files)
-            self.labels.extend([self.classes.index(class_name)] * len(class_files))
-        if self.shuffle:
-            self.shuffle_dataset()
-        if self.load_to_memory:
-            self.images = [self.load_image(f) for f in self.filenames]
-    def load_image(self, img_path):
-        img = self.image_cache.get(img_path)
-        if img is None:
-            img = Image.open(img_path).convert('RGB')
-            self.image_cache.put(img_path, img)
-        return img
-    def _len__(self):
-        return len(self.filenames)
-    def shuffle_dataset(self):
-        combined = list(zip(self.filenames, self.labels))
-        random.shuffle(combined)
-        self.filenames, self.labels = zip(*combined)
-    def _getitem__(self, index):
-        label = self.labels[index]
-        filename = self.filenames[index]
-        if self.load_to_memory:
-            img = self.images[index]
-        else:
-            img = self.load_image(filename)
-        if self.transform is not None:
-            img = self.transform(img)
-        else:
-            img = ToTensor()(img)
-        return img, label, filename
 class CombineLoaders:
     """
     A class that combines multiple data loaders into a single iterator.
@@ -383,6 +306,85 @@ class NoClassDataset(Dataset):
             img = ToTensor()(img)
         # Return both the image and its filename
         return img, self.filenames[index]
+class MyDataset_v1(Dataset):
+    """
+    Custom dataset class for loading and processing image data.
+    Args:
+        data_dir (str): The directory path where the data is stored.
+        loader_classes (list): List of class names.
+        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
+        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
+        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
+    Attributes:
+        data_dir (str): The directory path where the data is stored.
+        classes (list): List of class names.
+        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
+        shuffle (bool): Whether to shuffle the dataset.
+        load_to_memory (bool): Whether to load images into memory.
+        filenames (list): List of file paths.
+        labels (list): List of labels corresponding to each file.
+        images (list): List of loaded images.
+        image_cache (Cache): Cache object for storing loaded images.
+    Methods:
+        load_image: Load an image from file.
+        __len__: Get the length of the dataset.
+        shuffle_dataset: Shuffle the dataset.
+        __getitem__: Get an item from the dataset.
+    """
+    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
+        from .utils import Cache
+        self.data_dir = data_dir
+        self.classes = loader_classes
+        self.transform = transform
+        self.shuffle = shuffle
+        self.load_to_memory = load_to_memory
+        self.filenames = []
+        self.labels = []
+        self.images = []
+        self.image_cache = Cache(50)
+        for class_name in self.classes:
+            class_path = os.path.join(data_dir, class_name)
+            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
+            self.filenames.extend(class_files)
+            self.labels.extend([self.classes.index(class_name)] * len(class_files))
+        if self.shuffle:
+            self.shuffle_dataset()
+        if self.load_to_memory:
+            self.images = [self.load_image(f) for f in self.filenames]
+    def load_image(self, img_path):
+        img = self.image_cache.get(img_path)
+        if img is None:
+            img = Image.open(img_path).convert('RGB')
+            self.image_cache.put(img_path, img)
+        return img
+    def _len__(self):
+        return len(self.filenames)
+    def shuffle_dataset(self):
+        combined = list(zip(self.filenames, self.labels))
+        random.shuffle(combined)
+        self.filenames, self.labels = zip(*combined)
+    def _getitem__(self, index):
+        label = self.labels[index]
+        filename = self.filenames[index]
+        if self.load_to_memory:
+            img = self.images[index]
+        else:
+            img = self.load_image(filename)
+        if self.transform is not None:
+            img = self.transform(img)
+        else:
+            img = ToTensor()(img)
+        return img, label, filename
 class MyDataset(Dataset):
     """
@@ -398,7 +400,7 @@ class MyDataset(Dataset):
         specific_labels (list, optional): A list of specific labels corresponding to the specific files. Default is None.
     """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
+    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
         self.data_dir = data_dir
         self.classes = loader_classes
         self.transform = transform
@@ -427,7 +429,7 @@ class MyDataset(Dataset):
         img = Image.open(img_path).convert('RGB')
         return img
-    def _len__(self):
+    def __len__(self):
         return len(self.filenames)
     def shuffle_dataset(self):
@@ -439,7 +441,7 @@ class MyDataset(Dataset):
         filename = os.path.basename(filepath)  # Get just the filename from the full path
         return filename.split('_')[0]
-    def _getitem__(self, index):
+    def __getitem__(self, index):
         label = self.labels[index]
         filename = self.filenames[index]
         img = self.load_image(filename)
@@ -600,7 +602,7 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_file(chan_dirs, stack_dir, file):
+def _merge_file_v1(chan_dirs, stack_dir, file):
     """
     Merge multiple channels into a single stack and save it as a numpy array.
@@ -625,15 +627,80 @@ def _merge_file(chan_dirs, stack_dir, file):
         stack = np.concatenate(channels, axis=2)
         np.save(new_file, stack)
-def _is_dir_empty(dir_path):
+def _merge_file_v1(chan_dirs, stack_dir, file):
     """
-    Check if a directory is empty.
+    Merge multiple channels into a single stack and save it as a numpy array.
+    Args:
+        chan_dirs (list): List of directories containing channel images.
+        stack_dir (str): Directory to save the merged stack.
+        file (str): File name of the channel image.
+    Returns:
+        None
+    """
+    new_file = stack_dir / (file.stem + '.npy')
+    if not new_file.exists():
+        stack_dir.mkdir(exist_ok=True)
+        channels = []
+        for i, chan_dir in enumerate(chan_dirs):
+            img_path = str(chan_dir / file.name)
+            img = cv2.imread(img_path, -1)
+            if img is None:
+                print(f"Warning: Failed to read image {img_path}")
+                continue
+            chan = np.expand_dims(img, axis=2)
+            channels.append(chan)
+            del img  # Explicitly delete the reference to the image to free up memory
+            if i % 10 == 0:  # Periodically suggest garbage collection
+                gc.collect()
+        if channels:
+            stack = np.concatenate(channels, axis=2)
+            np.save(new_file, stack)
+        else:
+            print(f"No valid channels to merge for file {file.name}")
+def _merge_file(chan_dirs, stack_dir, file_name):
+    """
+    Merge multiple channels into a single stack and save it as a numpy array, using os module for path handling.
     Args:
-        dir_path (str): The path to the directory.
+        chan_dirs (list): List of directories containing channel images.
+        stack_dir (str): Directory to save the merged stack.
+        file_name (str): File name of the channel image.
     Returns:
-        bool: True if the directory is empty, False otherwise.
+        None
+    """
+    # Construct new file path
+    file_root, file_ext = os.path.splitext(file_name)
+    new_file = os.path.join(stack_dir, file_root + '.npy')
+    # Check if the new file exists and create the stack directory if it doesn't
+    if not os.path.exists(new_file):
+        os.makedirs(stack_dir, exist_ok=True)
+        channels = []
+        for i, chan_dir in enumerate(chan_dirs):
+            img_path = os.path.join(chan_dir, file_name)
+            img = cv2.imread(img_path, -1)
+            if img is None:
+                print(f"Warning: Failed to read image {img_path}")
+                continue
+            chan = np.expand_dims(img, axis=2)
+            channels.append(chan)
+            del img  # Explicitly delete the reference to the image to free up memory
+            if i % 10 == 0:  # Periodically suggest garbage collection
+                gc.collect()
+        if channels:
+            stack = np.concatenate(channels, axis=2)
+            np.save(new_file, stack)
+        else:
+            print(f"No valid channels to merge for file {file_name}")
+def _is_dir_empty(dir_path):
+    """
+    Check if a directory is empty using os module.
     """
     return len(os.listdir(dir_path)) == 0
@@ -733,7 +800,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_channels(src, plot=False):
+def _merge_channels_v2(src, plot=False):
     from .plot import plot_arrays
     """
     Merge the channels in the given source directory and save the merged files in a 'stack' directory.
@@ -761,7 +828,8 @@ def _merge_channels(src, plot=False):
     print(f'generated folder with merged arrays: {stack_dir}')
     if _is_dir_empty(stack_dir):
-        with Pool(cpu_count()) as pool:
+        with Pool(max(cpu_count() // 2, 1)) as pool:
+        #with Pool(cpu_count()) as pool:
             merge_func = partial(_merge_file, chan_dirs, stack_dir)
             pool.map(merge_func, dir_files)
@@ -773,6 +841,47 @@ def _merge_channels(src, plot=False):
     return
+def _merge_channels(src, plot=False):
+    """
+    Merge the channels in the given source directory and save the merged files in a 'stack' directory without using multiprocessing.
+    """
+    from .plot import plot_arrays
+    stack_dir = os.path.join(src, 'stack')
+    allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
+    # List directories that match the allowed names
+    chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
+    chan_dirs.sort()
+    print(f'List of folders in src: {chan_dirs}. Single channel folders.')
+    start_time = time.time()
+    # Assuming chan_dirs[0] is not empty and exists, adjust according to your logic
+    first_dir_path = os.path.join(src, chan_dirs[0])
+    dir_files = os.listdir(first_dir_path)
+    # Create the 'stack' directory if it doesn't exist
+    if not os.path.exists(stack_dir):
+        os.makedirs(stack_dir, exist_ok=True)
+    print(f'Generated folder with merged arrays: {stack_dir}')
+    if _is_dir_empty(stack_dir):
+        for file_name in dir_files:
+            full_file_path = os.path.join(first_dir_path, file_name)
+            if os.path.isfile(full_file_path):
+                _merge_file([os.path.join(src, d) for d in chan_dirs], stack_dir, file_name)
+    elapsed_time = time.time() - start_time
+    avg_time = elapsed_time / len(dir_files) if dir_files else 0
+    print(f'Average Time: {avg_time:.3f} sec, Total Elapsed Time: {elapsed_time:.3f} sec')
+    if plot:
+        plot_arrays(os.path.join(src, 'stack'))
+    return
 def _mip_all(src, include_first_chan=True):
     """
@@ -1206,7 +1315,7 @@ def preprocess_img_data(settings):
             print('Found existing channel_stack folder.')
         if os.path.exists(src+'/norm_channel_stack'):
             print('Found existing norm_channel_stack folder. Skipping preprocessing')
-            return
+            return settings, src
     cmap = 'inferno'
     figuresize = 20
@@ -1214,8 +1323,10 @@ def preprocess_img_data(settings):
     save_dtype = 'uint16'
     correct_illumination = False
-    mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
-    backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
+    #mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
+    #backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
+    mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
+    backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
     metadata_type = settings['metadata_type']
     custom_regex = settings['custom_regex']
@@ -1230,7 +1341,6 @@ def preprocess_img_data(settings):
     pick_slice = settings['pick_slice']
     skip_mode = settings['skip_mode']
     if not img_format == None:
         if metadata_type == 'cellvoyager':
             regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
@@ -1248,6 +1358,8 @@ def preprocess_img_data(settings):
         print(f'regex mode:{metadata_type} regex:{regex}')
     if settings.get('test_mode', False):
+        print(f'Running spacr in test mode')
+        settings['plot'] = True
         try:
             os.rmdir(os.path.join(src, 'test'))
             print(f"Deleted test directory: {os.path.join(src, 'test')}")
@@ -1256,6 +1368,10 @@ def preprocess_img_data(settings):
         src = _run_test_mode(settings['src'], regex, timelapse=timelapse)
         settings['src'] = src
+    if img_format == None:
+        if not os.path.exists(src+'/stack'):
+            _merge_channels(src, plot=False)
     if not os.path.exists(src+'/stack'):
         try:
@@ -2273,6 +2389,8 @@ def convert_numpy_to_tiff(folder_path, limit=None):
     for i, filename in enumerate(files):
         if limit is not None and i >= limit:
             break
+        if not filename.endswith('.npy'):
+            continue
         # Construct the full file path
         file_path = os.path.join(folder_path, filename)
@@ -2289,7 +2407,47 @@ def convert_numpy_to_tiff(folder_path, limit=None):
         print(f"Converted {filename} to {tiff_filename} and saved in 'tiff' subdirectory.")
     return
+def generate_cellpose_train_test(src, test_split=0.1):
+    mask_src = os.path.join(src, 'masks')
+    img_paths = glob.glob(os.path.join(src, '*.tif'))
+    img_filenames = [os.path.basename(file) for file in img_paths + img_paths]
+    img_filenames = [file for file in img_filenames if os.path.exists(os.path.join(mask_src, file))]
+    print(f'Found {len(img_filenames)} images with masks')
+    random.shuffle(img_filenames)
+    split_index = int(len(img_filenames) * test_split)
+    train_files = img_filenames[split_index:]
+    test_files = img_filenames[:split_index]
+    list_of_lists = [test_files, train_files]
+    print(f'Split dataset into Train {len(train_files)} and Test {len(test_files)} files')
+    train_dir = os.path.join(os.path.dirname(src), 'train')
+    train_dir_masks = os.path.join(train_dir, 'mask')
+    test_dir = os.path.join(os.path.dirname(src), 'test')
+    test_dir_masks = os.path.join(test_dir, 'mask')
+    os.makedirs(train_dir_masks, exist_ok=True)
+    os.makedirs(test_dir_masks, exist_ok=True)
+    for i, ls in enumerate(list_of_lists):
+        if i == 0:
+            dst = test_dir
+            dst_mask = test_dir_masks
+            _type = 'Test'
+        if i == 1:
+            dst = train_dir
+            dst_mask = train_dir_masks
+            _type = 'Train'
+        for idx, filename in enumerate(ls):
+            img_path = os.path.join(src, filename)
+            mask_path = os.path.join(mask_src, filename)
+            new_img_path = os.path.join(dst, filename)
+            new_mask_path = os.path.join(dst_mask, filename)
+            shutil.copy(img_path, new_img_path)
+            shutil.copy(mask_path, new_mask_path)
+            print(f'Copied {idx+1}/{len(ls)} images to {_type} set', end='\r', flush=True)

spacr 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl

spacr 0.0.20py3-none-any.whl → 0.0.21py3-none-any.whl