PyPI - spacr - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

spacr 0.0.18py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

spacr/alpha.py +291 -14
spacr/annotate_app.py +2 -2
spacr/core.py +1377 -296
spacr/foldseek.py +793 -0
spacr/get_alfafold_structures.py +72 -0
spacr/graph_learning.py +259 -65
spacr/graph_learning_lap.py +73 -71
spacr/gui_classify_app.py +5 -21
spacr/gui_mask_app.py +36 -30
spacr/gui_measure_app.py +10 -24
spacr/gui_utils.py +82 -54
spacr/io.py +505 -205
spacr/measure.py +160 -80
spacr/old_code.py +155 -1
spacr/plot.py +243 -99
spacr/sim.py +666 -119
spacr/timelapse.py +343 -52
spacr/train.py +18 -10
spacr/utils.py +252 -151
{spacr-0.0.18.dist-info → spacr-0.0.21.dist-info}/METADATA +32 -27
spacr-0.0.21.dist-info/RECORD +33 -0
{spacr-0.0.18.dist-info → spacr-0.0.21.dist-info}/WHEEL +1 -1
spacr/gui_temp.py +0 -212
spacr/test_annotate_app.py +0 -58
spacr/test_plot.py +0 -43
spacr/test_train.py +0 -39
spacr/test_utils.py +0 -33
spacr-0.0.18.dist-info/RECORD +0 -36
{spacr-0.0.18.dist-info → spacr-0.0.21.dist-info}/LICENSE +0 -0
{spacr-0.0.18.dist-info → spacr-0.0.21.dist-info}/entry_points.txt +0 -0
{spacr-0.0.18.dist-info → spacr-0.0.21.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -1,6 +1,7 @@
-import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose
+import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob
 import numpy as np
 import pandas as pd
+import tifffile
 from PIL import Image
 from collections import defaultdict, Counter
 from pathlib import Path
@@ -22,9 +23,9 @@ import seaborn as sns
 import matplotlib.pyplot as plt
 from torchvision.transforms import ToTensor
 from .logger import log_function_call
-@log_function_call
 def _load_images_and_labels(image_files, label_files, circular=False, invert=False, image_extension="*.tif", label_extension="*.tif"):
     from .utils import invert_image, apply_mask
@@ -44,19 +45,19 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
     if not image_files is None and not label_files is None:
         for img_file, lbl_file in zip(image_files, label_files):
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
                 image = apply_mask(image, output_value=0)
-            label = cellpose.imread(lbl_file)
+            label = cellpose.io.imread(lbl_file)
             if image.max() > 1:
                 image = image / image.max()
             images.append(image)
             labels.append(label)
     elif not image_files is None:
         for img_file in image_files:
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
@@ -66,7 +67,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
             images.append(image)
     elif not image_files is None:
             for lbl_file in label_files:
-                label = cellpose.imread(lbl_file)
+                label = cellpose.io.imread(lbl_file)
                 if circular:
                     label = apply_mask(label, output_value=0)
             labels.append(label)
@@ -87,7 +88,6 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
         print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
     return images, labels, image_names, label_names
-@log_function_call
 def _load_normalized_images_and_labels(image_files, label_files, signal_thresholds=[1000], channels=None, percentiles=None,  circular=False, invert=False, visualize=False):
     from .plot import normalize_and_visualize
@@ -109,15 +109,17 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     if label_files is not None:
         label_names = [os.path.basename(f) for f in label_files]
+        label_dir = os.path.dirname(label_files[0])
     # Load images and check percentiles
     for i,img_file in enumerate(image_files):
-        image = cellpose.imread(img_file)
+        #print(img_file)
+        image = cellpose.io.imread(img_file)
         if invert:
             image = invert_image(image)
         if circular:
             image = apply_mask(image, output_value=0)
+        #print(image.shape)
         # If specific channels are specified, select them
         if channels is not None and image.ndim == 3:
             image = image[..., channels]
@@ -169,7 +171,7 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     if label_files is not None:
         for lbl_file in label_files:
-            labels.append(cellpose.imread(lbl_file))
+            labels.append(cellpose.io.imread(lbl_file))
     else:
         label_names = []
         label_dir = None
@@ -178,85 +180,6 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     return normalized_images, labels, image_names, label_names
-class MyDataset(Dataset):
-    """
-    Custom dataset class for loading and processing image data.
-    Args:
-        data_dir (str): The directory path where the data is stored.
-        loader_classes (list): List of class names.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
-    Attributes:
-        data_dir (str): The directory path where the data is stored.
-        classes (list): List of class names.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
-        shuffle (bool): Whether to shuffle the dataset.
-        load_to_memory (bool): Whether to load images into memory.
-        filenames (list): List of file paths.
-        labels (list): List of labels corresponding to each file.
-        images (list): List of loaded images.
-        image_cache (Cache): Cache object for storing loaded images.
-    Methods:
-        load_image: Load an image from file.
-        __len__: Get the length of the dataset.
-        shuffle_dataset: Shuffle the dataset.
-        __getitem__: Get an item from the dataset.
-    """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
-        from .utils import Cache
-        self.data_dir = data_dir
-        self.classes = loader_classes
-        self.transform = transform
-        self.shuffle = shuffle
-        self.load_to_memory = load_to_memory
-        self.filenames = []
-        self.labels = []
-        self.images = []
-        self.image_cache = Cache(50)
-        for class_name in self.classes:
-            class_path = os.path.join(data_dir, class_name)
-            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
-            self.filenames.extend(class_files)
-            self.labels.extend([self.classes.index(class_name)] * len(class_files))
-        if self.shuffle:
-            self.shuffle_dataset()
-        if self.load_to_memory:
-            self.images = [self.load_image(f) for f in self.filenames]
-    def load_image(self, img_path):
-        img = self.image_cache.get(img_path)
-        if img is None:
-            img = Image.open(img_path).convert('RGB')
-            self.image_cache.put(img_path, img)
-        return img
-    def _len__(self):
-        return len(self.filenames)
-    def shuffle_dataset(self):
-        combined = list(zip(self.filenames, self.labels))
-        random.shuffle(combined)
-        self.filenames, self.labels = zip(*combined)
-    def _getitem__(self, index):
-        label = self.labels[index]
-        filename = self.filenames[index]
-        if self.load_to_memory:
-            img = self.images[index]
-        else:
-            img = self.load_image(filename)
-        if self.transform is not None:
-            img = self.transform(img)
-        else:
-            img = ToTensor()(img)
-        return img, label, filename
 class CombineLoaders:
     """
     A class that combines multiple data loaders into a single iterator.
@@ -383,6 +306,85 @@ class NoClassDataset(Dataset):
             img = ToTensor()(img)
         # Return both the image and its filename
         return img, self.filenames[index]
+class MyDataset_v1(Dataset):
+    """
+    Custom dataset class for loading and processing image data.
+    Args:
+        data_dir (str): The directory path where the data is stored.
+        loader_classes (list): List of class names.
+        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
+        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
+        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
+    Attributes:
+        data_dir (str): The directory path where the data is stored.
+        classes (list): List of class names.
+        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
+        shuffle (bool): Whether to shuffle the dataset.
+        load_to_memory (bool): Whether to load images into memory.
+        filenames (list): List of file paths.
+        labels (list): List of labels corresponding to each file.
+        images (list): List of loaded images.
+        image_cache (Cache): Cache object for storing loaded images.
+    Methods:
+        load_image: Load an image from file.
+        __len__: Get the length of the dataset.
+        shuffle_dataset: Shuffle the dataset.
+        __getitem__: Get an item from the dataset.
+    """
+    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
+        from .utils import Cache
+        self.data_dir = data_dir
+        self.classes = loader_classes
+        self.transform = transform
+        self.shuffle = shuffle
+        self.load_to_memory = load_to_memory
+        self.filenames = []
+        self.labels = []
+        self.images = []
+        self.image_cache = Cache(50)
+        for class_name in self.classes:
+            class_path = os.path.join(data_dir, class_name)
+            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
+            self.filenames.extend(class_files)
+            self.labels.extend([self.classes.index(class_name)] * len(class_files))
+        if self.shuffle:
+            self.shuffle_dataset()
+        if self.load_to_memory:
+            self.images = [self.load_image(f) for f in self.filenames]
+    def load_image(self, img_path):
+        img = self.image_cache.get(img_path)
+        if img is None:
+            img = Image.open(img_path).convert('RGB')
+            self.image_cache.put(img_path, img)
+        return img
+    def _len__(self):
+        return len(self.filenames)
+    def shuffle_dataset(self):
+        combined = list(zip(self.filenames, self.labels))
+        random.shuffle(combined)
+        self.filenames, self.labels = zip(*combined)
+    def _getitem__(self, index):
+        label = self.labels[index]
+        filename = self.filenames[index]
+        if self.load_to_memory:
+            img = self.images[index]
+        else:
+            img = self.load_image(filename)
+        if self.transform is not None:
+            img = self.transform(img)
+        else:
+            img = ToTensor()(img)
+        return img, label, filename
 class MyDataset(Dataset):
     """
@@ -398,7 +400,7 @@ class MyDataset(Dataset):
         specific_labels (list, optional): A list of specific labels corresponding to the specific files. Default is None.
     """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
+    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
         self.data_dir = data_dir
         self.classes = loader_classes
         self.transform = transform
@@ -427,7 +429,7 @@ class MyDataset(Dataset):
         img = Image.open(img_path).convert('RGB')
         return img
-    def _len__(self):
+    def __len__(self):
         return len(self.filenames)
     def shuffle_dataset(self):
@@ -439,7 +441,7 @@ class MyDataset(Dataset):
         filename = os.path.basename(filepath)  # Get just the filename from the full path
         return filename.split('_')[0]
-    def _getitem__(self, index):
+    def __getitem__(self, index):
         label = self.labels[index]
         filename = self.filenames[index]
         img = self.load_image(filename)
@@ -527,6 +529,7 @@ class TarImageDataset(Dataset):
         return img, m.name
+@log_function_call
 def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=False, skip_mode='01', metadata_type='', img_format='.tif'):
     """
     Convert z-stack images to maximum intensity projection (MIP) images.
@@ -599,7 +602,7 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_file(chan_dirs, stack_dir, file):
+def _merge_file_v1(chan_dirs, stack_dir, file):
     """
     Merge multiple channels into a single stack and save it as a numpy array.
@@ -624,15 +627,80 @@ def _merge_file(chan_dirs, stack_dir, file):
         stack = np.concatenate(channels, axis=2)
         np.save(new_file, stack)
-def _is_dir_empty(dir_path):
+def _merge_file_v1(chan_dirs, stack_dir, file):
     """
-    Check if a directory is empty.
+    Merge multiple channels into a single stack and save it as a numpy array.
+    Args:
+        chan_dirs (list): List of directories containing channel images.
+        stack_dir (str): Directory to save the merged stack.
+        file (str): File name of the channel image.
+    Returns:
+        None
+    """
+    new_file = stack_dir / (file.stem + '.npy')
+    if not new_file.exists():
+        stack_dir.mkdir(exist_ok=True)
+        channels = []
+        for i, chan_dir in enumerate(chan_dirs):
+            img_path = str(chan_dir / file.name)
+            img = cv2.imread(img_path, -1)
+            if img is None:
+                print(f"Warning: Failed to read image {img_path}")
+                continue
+            chan = np.expand_dims(img, axis=2)
+            channels.append(chan)
+            del img  # Explicitly delete the reference to the image to free up memory
+            if i % 10 == 0:  # Periodically suggest garbage collection
+                gc.collect()
+        if channels:
+            stack = np.concatenate(channels, axis=2)
+            np.save(new_file, stack)
+        else:
+            print(f"No valid channels to merge for file {file.name}")
+def _merge_file(chan_dirs, stack_dir, file_name):
+    """
+    Merge multiple channels into a single stack and save it as a numpy array, using os module for path handling.
     Args:
-        dir_path (str): The path to the directory.
+        chan_dirs (list): List of directories containing channel images.
+        stack_dir (str): Directory to save the merged stack.
+        file_name (str): File name of the channel image.
     Returns:
-        bool: True if the directory is empty, False otherwise.
+        None
+    """
+    # Construct new file path
+    file_root, file_ext = os.path.splitext(file_name)
+    new_file = os.path.join(stack_dir, file_root + '.npy')
+    # Check if the new file exists and create the stack directory if it doesn't
+    if not os.path.exists(new_file):
+        os.makedirs(stack_dir, exist_ok=True)
+        channels = []
+        for i, chan_dir in enumerate(chan_dirs):
+            img_path = os.path.join(chan_dir, file_name)
+            img = cv2.imread(img_path, -1)
+            if img is None:
+                print(f"Warning: Failed to read image {img_path}")
+                continue
+            chan = np.expand_dims(img, axis=2)
+            channels.append(chan)
+            del img  # Explicitly delete the reference to the image to free up memory
+            if i % 10 == 0:  # Periodically suggest garbage collection
+                gc.collect()
+        if channels:
+            stack = np.concatenate(channels, axis=2)
+            np.save(new_file, stack)
+        else:
+            print(f"No valid channels to merge for file {file_name}")
+def _is_dir_empty(dir_path):
+    """
+    Check if a directory is empty using os module.
     """
     return len(os.listdir(dir_path)) == 0
@@ -706,7 +774,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                         if metadata_type =='cq1':
                             orig_wellID = wellID
                             wellID = _convert_cq1_well_id(wellID)
-                            print(f'Converted Well ID: {orig_wellID} to {wellID}')
+                            print(f'Converted Well ID: {orig_wellID} to {wellID}')#, end='\r', flush=True)
                         newname = f"{plateID}_{wellID}_{fieldID}_{timeID if timelapse else ''}{ext}"
                         newpath = src / chanID
@@ -732,7 +800,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_channels(src, plot=False):
+def _merge_channels_v2(src, plot=False):
     from .plot import plot_arrays
     """
     Merge the channels in the given source directory and save the merged files in a 'stack' directory.
@@ -757,9 +825,11 @@ def _merge_channels(src, plot=False):
     # Create the 'stack' directory if it doesn't exist
     stack_dir.mkdir(exist_ok=True)
+    print(f'generated folder with merged arrays: {stack_dir}')
     if _is_dir_empty(stack_dir):
-        with Pool(cpu_count()) as pool:
+        with Pool(max(cpu_count() // 2, 1)) as pool:
+        #with Pool(cpu_count()) as pool:
             merge_func = partial(_merge_file, chan_dirs, stack_dir)
             pool.map(merge_func, dir_files)
@@ -771,6 +841,47 @@ def _merge_channels(src, plot=False):
     return
+def _merge_channels(src, plot=False):
+    """
+    Merge the channels in the given source directory and save the merged files in a 'stack' directory without using multiprocessing.
+    """
+    from .plot import plot_arrays
+    stack_dir = os.path.join(src, 'stack')
+    allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
+    # List directories that match the allowed names
+    chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
+    chan_dirs.sort()
+    print(f'List of folders in src: {chan_dirs}. Single channel folders.')
+    start_time = time.time()
+    # Assuming chan_dirs[0] is not empty and exists, adjust according to your logic
+    first_dir_path = os.path.join(src, chan_dirs[0])
+    dir_files = os.listdir(first_dir_path)
+    # Create the 'stack' directory if it doesn't exist
+    if not os.path.exists(stack_dir):
+        os.makedirs(stack_dir, exist_ok=True)
+    print(f'Generated folder with merged arrays: {stack_dir}')
+    if _is_dir_empty(stack_dir):
+        for file_name in dir_files:
+            full_file_path = os.path.join(first_dir_path, file_name)
+            if os.path.isfile(full_file_path):
+                _merge_file([os.path.join(src, d) for d in chan_dirs], stack_dir, file_name)
+    elapsed_time = time.time() - start_time
+    avg_time = elapsed_time / len(dir_files) if dir_files else 0
+    print(f'Average Time: {avg_time:.3f} sec, Total Elapsed Time: {elapsed_time:.3f} sec')
+    if plot:
+        plot_arrays(os.path.join(src, 'stack'))
+    return
 def _mip_all(src, include_first_chan=True):
     """
@@ -819,6 +930,7 @@ def _mip_all(src, include_first_chan=True):
             np.save(os.path.join(src, filename), concatenated)
     return
+@log_function_call
 def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_size=100):
     """
     Concatenates channel data from multiple files and saves the concatenated data as numpy arrays.
@@ -853,7 +965,7 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
                     array = np.take(array, channels, axis=2)
                     stack_region.append(array)
                     filenames_region.append(os.path.basename(path))
-                clear_output(wait=True)
+                #clear_output(wait=True)
                 print(f'Region {i+1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
                 stack = np.stack(stack_region)
                 save_loc = os.path.join(channel_stack_loc, f'{name}.npz')
@@ -879,7 +991,7 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
             array = np.take(array, channels, axis=2)
             stack_ls.append(array)
             filenames_batch.append(os.path.basename(path))  # store the filename
-            clear_output(wait=True)
+            #clear_output(wait=True)
             print(f'Concatenated: {i+1}/{nr_files} files')
             #print(f'Concatenated: {i+1}/{nr_files} files', end='\r', flush=True)
@@ -887,7 +999,7 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
                 unique_shapes = {arr.shape[:-1] for arr in stack_ls}
                 if len(unique_shapes) > 1:
                     max_dims = np.max(np.array(list(unique_shapes)), axis=0)
-                    clear_output(wait=True)
+                    #clear_output(wait=True)
                     print(f'Warning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}')
                     #print(f'Warning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
                     padded_stack_ls = []
@@ -1015,7 +1127,7 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
                 duration = (stop - start)*single_channel.shape[0]
                 time_ls.append(duration)
                 average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
-                clear_output(wait=True)
+                #clear_output(wait=True)
                 print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
                 #print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec', end='\r', flush=True)
             normalized_single_channel = exposure.rescale_intensity(arr_2d_normalized, out_range='dtype')
@@ -1072,8 +1184,6 @@ def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
     print(f'\nSaved normalized stacks: {output_fldr}')
 def _create_movies_from_npy_per_channel(src, fps=10):
     """
     Create movies from numpy files per channel.
@@ -1125,9 +1235,33 @@ def _create_movies_from_npy_per_channel(src, fps=10):
         channel_save_path = os.path.join(save_path, f'{plate}_{well}_{field}_channel_{channel}.mp4')
         _npz_to_movie(normalized_channel_arrays_3d, filenames, channel_save_path, fps)
+def delete_empty_subdirectories(folder_path):
+    """
+    Deletes all empty subdirectories in the specified folder.
+    Args:
+    - folder_path (str): The path to the folder in which to look for empty subdirectories.
+    """
+    # Check each item in the specified folder
+    for dirpath, dirnames, filenames in os.walk(folder_path, topdown=False):
+        # os.walk is used with topdown=False to start from the innermost directories and work upwards.
+        for dirname in dirnames:
+            # Construct the full path to the subdirectory
+            full_dir_path = os.path.join(dirpath, dirname)
+            # Try to remove the directory and catch any error (like if the directory is not empty)
+            try:
+                os.rmdir(full_dir_path)
+                print(f"Deleted empty directory: {full_dir_path}")
+            except OSError as e:
+                continue
+                # An error occurred, likely because the directory is not empty
+                #print(f"Skipping non-empty directory: {full_dir_path}")
+@log_function_call
 def preprocess_img_data(settings):
     from .plot import plot_arrays, _plot_4D_arrays
+    from .utils import _run_test_mode
     """
     Preprocesses image data by converting z-stack images to maximum intensity projection (MIP) images.
@@ -1158,12 +1292,16 @@ def preprocess_img_data(settings):
     Returns:
         None
     """
     src = settings['src']
     valid_ext = ['tif', 'tiff', 'png', 'jpeg']
     files = os.listdir(src)
     extensions = [file.split('.')[-1] for file in files]
     extension_counts = Counter(extensions)
     most_common_extension = extension_counts.most_common(1)[0][0]
+    img_format = None
+    delete_empty_subdirectories(src)
     # Check if the most common extension is one of the specified image formats
     if most_common_extension in valid_ext:
@@ -1171,16 +1309,24 @@ def preprocess_img_data(settings):
         print(f'Found {extension_counts[most_common_extension]} {most_common_extension} files')
     else:
         print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
-        return
+        if os.path.exists(src+'/stack'):
+            print('Found existing stack folder.')
+        if os.path.exists(src+'/channel_stack'):
+            print('Found existing channel_stack folder.')
+        if os.path.exists(src+'/norm_channel_stack'):
+            print('Found existing norm_channel_stack folder. Skipping preprocessing')
+            return settings, src
     cmap = 'inferno'
     figuresize = 20
     normalize = True
     save_dtype = 'uint16'
     correct_illumination = False
-    mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
-    backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
+    #mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
+    #backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
+    mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
+    backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
     metadata_type = settings['metadata_type']
     custom_regex = settings['custom_regex']
@@ -1194,57 +1340,78 @@ def preprocess_img_data(settings):
     all_to_mip = settings['all_to_mip']
     pick_slice = settings['pick_slice']
     skip_mode = settings['skip_mode']
-    if metadata_type == 'cellvoyager':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'cq1':
-        regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'nikon':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'zeis':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'leica':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'custom':
-        regex = f'({custom_regex}){img_format}'
+    if not img_format == None:
+        if metadata_type == 'cellvoyager':
+            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+        elif metadata_type == 'cq1':
+            regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+        elif metadata_type == 'nikon':
+            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+        elif metadata_type == 'zeis':
+            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+        elif metadata_type == 'leica':
+            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+        elif metadata_type == 'custom':
+            regex = f'({custom_regex}){img_format}'
-    print(f'regex mode:{metadata_type} regex:{regex}')
+        print(f'regex mode:{metadata_type} regex:{regex}')
+    if settings.get('test_mode', False):
+        print(f'Running spacr in test mode')
+        settings['plot'] = True
+        try:
+            os.rmdir(os.path.join(src, 'test'))
+            print(f"Deleted test directory: {os.path.join(src, 'test')}")
+        except OSError as e:
+            pass
+        src = _run_test_mode(settings['src'], regex, timelapse=timelapse)
+        settings['src'] = src
+    if img_format == None:
+        if not os.path.exists(src+'/stack'):
+            _merge_channels(src, plot=False)
     if not os.path.exists(src+'/stack'):
-        if timelapse:
-            _move_to_chan_folder(src, regex, timelapse, metadata_type)
-        else:
-            #_z_to_mip(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
-            _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
-            #Make sure no batches will be of only one image
-            all_imgs = len(src+'/stack')
-            full_batches = all_imgs // batch_size
-            last_batch_size = all_imgs % batch_size
-            # Check if the last batch is of size 1
-            if last_batch_size == 1:
-                # If there's only one batch and its size is 1, it's also an issue
-                if full_batches == 0:
-                    raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
-                # If the last batch is of size 1, merge it with the second last batch
-                elif full_batches > 0:
-                    raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
-        _merge_channels(src, plot=False)
-        if timelapse:
-            _create_movies_from_npy_per_channel(src+'/stack', fps=2)
-        if plot:
-            print(f'plotting {nr} images from {src}/stack')
-            plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
-        if all_to_mip:
-            _mip_all(src+'/stack')
-            if plot:
-                print(f'plotting {nr} images from {src}/stack')
-                plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
-    #nr_of_stacks = len(src+'/channel_stack')
+        try:
+            if not img_format == None:
+                if timelapse:
+                    _move_to_chan_folder(src, regex, timelapse, metadata_type)
+                else:
+                    _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
+                    #Make sure no batches will be of only one image
+                    all_imgs = len(src+'/stack')
+                    full_batches = all_imgs // batch_size
+                    last_batch_size = all_imgs % batch_size
+                    # Check if the last batch is of size 1
+                    if last_batch_size == 1:
+                        # If there's only one batch and its size is 1, it's also an issue
+                        if full_batches == 0:
+                            raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
+                        # If the last batch is of size 1, merge it with the second last batch
+                        elif full_batches > 0:
+                            raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
+                _merge_channels(src, plot=False)
+                if timelapse:
+                    _create_movies_from_npy_per_channel(src+'/stack', fps=2)
+                if plot:
+                    print(f'plotting {nr} images from {src}/stack')
+                    plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
+                if all_to_mip:
+                    _mip_all(src+'/stack')
+                    if plot:
+                        print(f'plotting {nr} images from {src}/stack')
+                        plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
+        except Exception as e:
+            print(f"Error: {e}")
+    print('concatinating cahnnels')
     _concatenate_channel(src+'/stack',
                         channels=mask_channels,
                         randomize=randomize,
@@ -1254,7 +1421,6 @@ def preprocess_img_data(settings):
     if plot:
         print(f'plotting {nr} images from {src}/channel_stack')
         _plot_4D_arrays(src+'/channel_stack', figuresize, cmap, nr_npz=1, nr=nr)
-    nr_of_chan_stacks = len(src+'/channel_stack')
     backgrounds, signal_to_noise, signal_thresholds = _get_lists_for_normalization(settings=settings)
@@ -1273,7 +1439,7 @@ def preprocess_img_data(settings):
     if plot:
         _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
-    return
+    return settings, src
 def _check_masks(batch, batch_filenames, output_folder):
     """
@@ -1295,8 +1461,7 @@ def _check_masks(batch, batch_filenames, output_folder):
     filtered_filenames = [f for f, exists in zip(batch_filenames, existing_files_mask) if exists]
     return np.array(filtered_batch), filtered_filenames
 def _get_avg_object_size(masks):
     """
     Calculate the average size of objects in a list of masks.
@@ -1450,6 +1615,56 @@ def _save_settings_to_db(settings):
     settings_df.to_sql('settings', conn, if_exists='replace', index=False)  # Replace the table if it already exists
     conn.close()
+def _save_mask_timelapse_as_gif_v1(masks, path, cmap, norm, filenames):
+    """
+    Save a timelapse of masks as a GIF.
+    Parameters:
+    masks (list): List of mask frames.
+    path (str): Path to save the GIF.
+    cmap: Colormap for displaying the masks.
+    norm: Normalization for the masks.
+    filenames (list): List of filenames corresponding to each mask frame.
+    Returns:
+    None
+    """
+    def _update(frame):
+        """
+        Update the plot with the given frame.
+        Parameters:
+        frame (int): The frame number to update the plot with.
+        Returns:
+        None
+        """
+        nonlocal filename_text_obj
+        if filename_text_obj is not None:
+            filename_text_obj.remove()
+        ax.clear()
+        ax.axis('off')
+        current_mask = masks[frame]
+        ax.imshow(current_mask, cmap=cmap, norm=norm)
+        ax.set_title(f'Frame: {frame}', fontsize=24, color='white')
+        filename_text = filenames[frame]
+        filename_text_obj = fig.text(0.5, 0.01, filename_text, ha='center', va='center', fontsize=20, color='white')
+        for label_value in np.unique(current_mask):
+            if label_value == 0: continue  # Skip background
+            y, x = np.mean(np.where(current_mask == label_value), axis=1)
+            ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
+    fig, ax = plt.subplots(figsize=(50, 50), facecolor='black')
+    ax.set_facecolor('black')
+    ax.axis('off')
+    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
+    filename_text_obj = None
+    anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
+    anim.save(path, writer='pillow', fps=2, dpi=80)  # Adjust DPI for size/quality
+    plt.close(fig)
+    print(f'Saved timelapse to {path}')
 def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
     """
     Save a timelapse animation of masks as a GIF.
@@ -1504,9 +1719,10 @@ def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
             ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
         # Overlay tracks
-        for track in tracks_df['track_id'].unique():
-            _track = tracks_df[tracks_df['track_id'] == track]
-            ax.plot(_track['x'], _track['y'], '-w', linewidth=1)
+        if tracks_df is not None:
+            for track in tracks_df['track_id'].unique():
+                _track = tracks_df[tracks_df['track_id'] == track]
+                ax.plot(_track['x'], _track['y'], '-w', linewidth=1)
     anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
     anim.save(path, writer='pillow', fps=2, dpi=80)  # Adjust DPI for size/quality
@@ -1620,56 +1836,63 @@ def _load_and_concatenate_arrays(src, channels, cell_chann_dim, nucleus_chann_di
     # Iterate through each file in the reference folder
     for filename in os.listdir(reference_folder):
         stack_ls = []
-        array_path = []
         if filename.endswith('.npy'):
-            count+=1
-            # Initialize the concatenated array with the array from the reference folder
-            concatenated_array = np.load(os.path.join(reference_folder, filename))
-            if channels is not None:
-                concatenated_array = np.take(concatenated_array, channels, axis=2)
+            count += 1
+            # Check if this file exists in all the other specified folders
+            exists_in_all_folders = all(os.path.isfile(os.path.join(folder, filename)) for folder in folder_paths)
+            if exists_in_all_folders:
+                # Load and potentially modify the array from the reference folder
+                ref_array_path = os.path.join(reference_folder, filename)
+                concatenated_array = np.load(ref_array_path)
+                if channels is not None:
+                    concatenated_array = np.take(concatenated_array, channels, axis=2)
+                # Add the array from the reference folder to 'stack_ls'
                 stack_ls.append(concatenated_array)
-            # For each of the other folders, load the array and concatenate it
-            for folder in folder_paths[1:]:
-                array_path = os.path.join(folder, filename)
-                if os.path.isfile(array_path):
+                # For each of the other folders, load the array and add it to 'stack_ls'
+                for folder in folder_paths[1:]:
+                    array_path = os.path.join(folder, filename)
                     array = np.load(array_path)
                     if array.ndim == 2:
-                        array = np.expand_dims(array, axis=-1)  # add an extra dimension if the array is 2D
+                        array = np.expand_dims(array, axis=-1)  # Add an extra dimension if the array is 2D
                     stack_ls.append(array)
-            stack_ls = [np.expand_dims(arr, axis=-1) if arr.ndim == 2 else arr for arr in stack_ls]
-            unique_shapes = {arr.shape[:-1] for arr in stack_ls}
-            if len(unique_shapes) > 1:
-                #max_dims = np.max(np.array(list(unique_shapes)), axis=0)
-                # Determine the maximum length of tuples in unique_shapes
-                max_tuple_length = max(len(shape) for shape in unique_shapes)
-                # Pad shorter tuples with zeros to make them all the same length
-                padded_shapes = [shape + (0,) * (max_tuple_length - len(shape)) for shape in unique_shapes]
-                # Now create a NumPy array and find the maximum dimensions
-                max_dims = np.max(np.array(padded_shapes), axis=0)
-                clear_output(wait=True)
-                print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}')
-                #print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
-                padded_stack_ls = []
-                for arr in stack_ls:
-                    pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
-                    pad_width.append((0, 0))
-                    padded_arr = np.pad(arr, pad_width)
-                    padded_stack_ls.append(padded_arr)
-                # Concatenate the padded arrays along the channel dimension (last dimension)
-                stack = np.concatenate(padded_stack_ls, axis=-1)
+            if len(stack_ls) > 0:
+                stack_ls = [np.expand_dims(arr, axis=-1) if arr.ndim == 2 else arr for arr in stack_ls]
+                unique_shapes = {arr.shape[:-1] for arr in stack_ls}
+                if len(unique_shapes) > 1:
+                    #max_dims = np.max(np.array(list(unique_shapes)), axis=0)
+                    # Determine the maximum length of tuples in unique_shapes
+                    max_tuple_length = max(len(shape) for shape in unique_shapes)
+                    # Pad shorter tuples with zeros to make them all the same length
+                    padded_shapes = [shape + (0,) * (max_tuple_length - len(shape)) for shape in unique_shapes]
+                    # Now create a NumPy array and find the maximum dimensions
+                    max_dims = np.max(np.array(padded_shapes), axis=0)
+                    #clear_output(wait=True)
+                    print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}')
+                    #print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
+                    padded_stack_ls = []
+                    for arr in stack_ls:
+                        pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
+                        pad_width.append((0, 0))
+                        padded_arr = np.pad(arr, pad_width)
+                        padded_stack_ls.append(padded_arr)
+                    # Concatenate the padded arrays along the channel dimension (last dimension)
+                    stack = np.concatenate(padded_stack_ls, axis=-1)
-            else:
-                stack = np.concatenate(stack_ls, axis=-1)
+                else:
+                    stack = np.concatenate(stack_ls, axis=-1)
-            if stack.shape[-1] > concatenated_array.shape[-1]:
-                output_path = os.path.join(output_folder, filename)
-                np.save(output_path, stack)
+                if stack.shape[-1] > concatenated_array.shape[-1]:
+                    output_path = os.path.join(output_folder, filename)
+                    np.save(output_path, stack)
-        clear_output(wait=True)
+        #clear_output(wait=True)
         print(f'Files merged: {count}/{all_imgs}')
         #print(f'Files merged: {count}/{all_imgs}', end='\r', flush=True)
     return
@@ -2145,9 +2368,86 @@ def _read_mask(mask_path):
     if mask.dtype != np.uint16:
         mask = img_as_uint(mask)
     return mask
+def convert_numpy_to_tiff(folder_path, limit=None):
+    """
+    Converts all numpy files in a folder to TIFF format and saves them in a subdirectory 'tiff'.
+    Args:
+    folder_path (str): The path to the folder containing numpy files.
+    """
+    # Create the subdirectory 'tiff' within the specified folder if it doesn't already exist
+    tiff_subdir = os.path.join(folder_path, 'tiff')
+    os.makedirs(tiff_subdir, exist_ok=True)
+    files = os.listdir(folder_path)
+    npy_files = [f for f in files if f.endswith('.npy')]
+    # Iterate over all files in the folder
+    for i, filename in enumerate(files):
+        if limit is not None and i >= limit:
+            break
+        if not filename.endswith('.npy'):
+            continue
+        # Construct the full file path
+        file_path = os.path.join(folder_path, filename)
+        # Load the numpy file
+        numpy_array = np.load(file_path)
+        # Construct the output TIFF file path
+        tiff_filename = os.path.splitext(filename)[0] + '.tif'
+        tiff_file_path = os.path.join(tiff_subdir, tiff_filename)
+        # Save the numpy array as a TIFF file
+        tifffile.imwrite(tiff_file_path, numpy_array)
+        print(f"Converted {filename} to {tiff_filename} and saved in 'tiff' subdirectory.")
+    return
+def generate_cellpose_train_test(src, test_split=0.1):
+    mask_src = os.path.join(src, 'masks')
+    img_paths = glob.glob(os.path.join(src, '*.tif'))
+    img_filenames = [os.path.basename(file) for file in img_paths + img_paths]
+    img_filenames = [file for file in img_filenames if os.path.exists(os.path.join(mask_src, file))]
+    print(f'Found {len(img_filenames)} images with masks')
+    random.shuffle(img_filenames)
+    split_index = int(len(img_filenames) * test_split)
+    train_files = img_filenames[split_index:]
+    test_files = img_filenames[:split_index]
+    list_of_lists = [test_files, train_files]
+    print(f'Split dataset into Train {len(train_files)} and Test {len(test_files)} files')
+    train_dir = os.path.join(os.path.dirname(src), 'train')
+    train_dir_masks = os.path.join(train_dir, 'mask')
+    test_dir = os.path.join(os.path.dirname(src), 'test')
+    test_dir_masks = os.path.join(test_dir, 'mask')
+    os.makedirs(train_dir_masks, exist_ok=True)
+    os.makedirs(test_dir_masks, exist_ok=True)
+    for i, ls in enumerate(list_of_lists):
+        if i == 0:
+            dst = test_dir
+            dst_mask = test_dir_masks
+            _type = 'Test'
+        if i == 1:
+            dst = train_dir
+            dst_mask = train_dir_masks
+            _type = 'Train'
+        for idx, filename in enumerate(ls):
+            img_path = os.path.join(src, filename)
+            mask_path = os.path.join(mask_src, filename)
+            new_img_path = os.path.join(dst, filename)
+            new_mask_path = os.path.join(dst_mask, filename)
+            shutil.copy(img_path, new_img_path)
+            shutil.copy(mask_path, new_mask_path)
+            print(f'Copied {idx+1}/{len(ls)} images to {_type} set', end='\r', flush=True)

spacr 0.0.18__py3-none-any.whl → 0.0.21__py3-none-any.whl

spacr 0.0.18py3-none-any.whl → 0.0.21py3-none-any.whl