PyPI - spacr - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

spacr 0.0.1py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

spacr/__init__.py +6 -2
spacr/__main__.py +0 -2
spacr/alpha.py +807 -0
spacr/annotate_app.py +118 -120
spacr/chris.py +50 -0
spacr/cli.py +25 -187
spacr/core.py +1611 -389
spacr/deep_spacr.py +696 -0
spacr/foldseek.py +779 -0
spacr/get_alfafold_structures.py +72 -0
spacr/graph_learning.py +320 -0
spacr/graph_learning_lap.py +84 -0
spacr/gui.py +145 -0
spacr/gui_2.py +90 -0
spacr/gui_classify_app.py +187 -0
spacr/gui_mask_app.py +149 -174
spacr/gui_measure_app.py +116 -109
spacr/gui_sim_app.py +0 -0
spacr/gui_utils.py +679 -139
spacr/io.py +620 -469
spacr/mask_app.py +116 -9
spacr/measure.py +178 -84
spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/old_code.py +255 -1
spacr/plot.py +263 -100
spacr/sequencing.py +1130 -0
spacr/sim.py +634 -122
spacr/timelapse.py +343 -53
spacr/train.py +195 -22
spacr/umap.py +0 -689
spacr/utils.py +1530 -188
spacr-0.0.6.dist-info/METADATA +118 -0
spacr-0.0.6.dist-info/RECORD +39 -0
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/WHEEL +1 -1
spacr-0.0.6.dist-info/entry_points.txt +9 -0
spacr-0.0.1.dist-info/METADATA +0 -64
spacr-0.0.1.dist-info/RECORD +0 -26
spacr-0.0.1.dist-info/entry_points.txt +0 -5
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/LICENSE +0 -0
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -1,6 +1,7 @@
-import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose
+import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob
 import numpy as np
 import pandas as pd
+import tifffile
 from PIL import Image
 from collections import defaultdict, Counter
 from pathlib import Path
@@ -18,13 +19,12 @@ from io import BytesIO
 from IPython.display import display, clear_output
 from multiprocessing import Pool, cpu_count
 from torch.utils.data import Dataset
-import seaborn as sns
 import matplotlib.pyplot as plt
 from torchvision.transforms import ToTensor
 from .logger import log_function_call
-@log_function_call
 def _load_images_and_labels(image_files, label_files, circular=False, invert=False, image_extension="*.tif", label_extension="*.tif"):
     from .utils import invert_image, apply_mask
@@ -44,19 +44,19 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
     if not image_files is None and not label_files is None:
         for img_file, lbl_file in zip(image_files, label_files):
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
                 image = apply_mask(image, output_value=0)
-            label = cellpose.imread(lbl_file)
+            label = cellpose.io.imread(lbl_file)
             if image.max() > 1:
                 image = image / image.max()
             images.append(image)
             labels.append(label)
     elif not image_files is None:
         for img_file in image_files:
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
@@ -66,7 +66,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
             images.append(image)
     elif not image_files is None:
             for lbl_file in label_files:
-                label = cellpose.imread(lbl_file)
+                label = cellpose.io.imread(lbl_file)
                 if circular:
                     label = apply_mask(label, output_value=0)
             labels.append(label)
@@ -87,16 +87,13 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
         print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
     return images, labels, image_names, label_names
-@log_function_call
-def _load_normalized_images_and_labels(image_files, label_files, signal_thresholds=[1000], channels=None, percentiles=None,  circular=False, invert=False, visualize=False):
+def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,  circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
     from .plot import normalize_and_visualize
     from .utils import invert_image, apply_mask
-    if isinstance(signal_thresholds, int):
-        signal_thresholds = [signal_thresholds] * (len(channels) if channels is not None else 1)
-    elif not isinstance(signal_thresholds, list):
-        signal_thresholds = [signal_thresholds]
+    signal_thresholds = background*Signal_to_noise
+    lower_percentile = 2
     images = []
     labels = []
@@ -109,18 +106,22 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     if label_files is not None:
         label_names = [os.path.basename(f) for f in label_files]
+        label_dir = os.path.dirname(label_files[0])
     # Load images and check percentiles
     for i,img_file in enumerate(image_files):
-        image = cellpose.imread(img_file)
+        image = cellpose.io.imread(img_file)
         if invert:
             image = invert_image(image)
         if circular:
             image = apply_mask(image, output_value=0)
         # If specific channels are specified, select them
         if channels is not None and image.ndim == 3:
             image = image[..., channels]
+        if remove_background:
+            image[image < background] = 0
         if image.ndim < 3:
             image = np.expand_dims(image, axis=-1)
@@ -128,11 +129,11 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         images.append(image)
         if percentiles is None:
             for c in range(image.shape[-1]):
-                p1 = np.percentile(image[..., c], 1)
+                p1 = np.percentile(image[..., c], lower_percentile)
                 percentiles_1[c].append(p1)
-                for percentile in [99, 99.9, 99.99, 99.999]:
+                for percentile in [98, 99, 99.9, 99.99, 99.999]:
                     p = np.percentile(image[..., c], percentile)
-                    if p > signal_thresholds[min(c, len(signal_thresholds)-1)]:
+                    if p > signal_thresholds:
                         percentiles_99[c].append(p)
                         break
@@ -141,8 +142,8 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         for image in images:
             normalized_image = np.zeros_like(image, dtype=np.float32)
             for c in range(image.shape[-1]):
-                high_p = np.percentile(image[..., c], percentiles[1])
                 low_p = np.percentile(image[..., c], percentiles[0])
+                high_p = np.percentile(image[..., c], percentiles[1])
                 normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
             normalized_images.append(normalized_image)
             if visualize:
@@ -153,23 +154,26 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         avg_p1 = [np.mean(p) for p in percentiles_1]
         avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
+        print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
         normalized_images = []
         for image in images:
             normalized_image = np.zeros_like(image, dtype=np.float32)
-        for c in range(image.shape[-1]):
-            normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
-        normalized_images.append(normalized_image)
-        if visualize:
-            normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
+            for c in range(image.shape[-1]):
+                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
+            normalized_images.append(normalized_image)
+            if visualize:
+                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
     if not image_files is None:
         image_dir = os.path.dirname(image_files[0])
     else:
         image_dir = None
     if label_files is not None:
         for lbl_file in label_files:
-            labels.append(cellpose.imread(lbl_file))
+            labels.append(cellpose.io.imread(lbl_file))
     else:
         label_names = []
         label_dir = None
@@ -178,86 +182,8 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     return normalized_images, labels, image_names, label_names
-class MyDataset(Dataset):
-    """
-    Custom dataset class for loading and processing image data.
-    Args:
-        data_dir (str): The directory path where the data is stored.
-        loader_classes (list): List of class names.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
-    Attributes:
-        data_dir (str): The directory path where the data is stored.
-        classes (list): List of class names.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
-        shuffle (bool): Whether to shuffle the dataset.
-        load_to_memory (bool): Whether to load images into memory.
-        filenames (list): List of file paths.
-        labels (list): List of labels corresponding to each file.
-        images (list): List of loaded images.
-        image_cache (Cache): Cache object for storing loaded images.
-    Methods:
-        load_image: Load an image from file.
-        __len__: Get the length of the dataset.
-        shuffle_dataset: Shuffle the dataset.
-        __getitem__: Get an item from the dataset.
-    """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
-        from .utils import Cache
-        self.data_dir = data_dir
-        self.classes = loader_classes
-        self.transform = transform
-        self.shuffle = shuffle
-        self.load_to_memory = load_to_memory
-        self.filenames = []
-        self.labels = []
-        self.images = []
-        self.image_cache = Cache(50)
-        for class_name in self.classes:
-            class_path = os.path.join(data_dir, class_name)
-            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
-            self.filenames.extend(class_files)
-            self.labels.extend([self.classes.index(class_name)] * len(class_files))
-        if self.shuffle:
-            self.shuffle_dataset()
-        if self.load_to_memory:
-            self.images = [self.load_image(f) for f in self.filenames]
-    def load_image(self, img_path):
-        img = self.image_cache.get(img_path)
-        if img is None:
-            img = Image.open(img_path).convert('RGB')
-            self.image_cache.put(img_path, img)
-        return img
-    def _len__(self):
-        return len(self.filenames)
-    def shuffle_dataset(self):
-        combined = list(zip(self.filenames, self.labels))
-        random.shuffle(combined)
-        self.filenames, self.labels = zip(*combined)
-    def _getitem__(self, index):
-        label = self.labels[index]
-        filename = self.filenames[index]
-        if self.load_to_memory:
-            img = self.images[index]
-        else:
-            img = self.load_image(filename)
-        if self.transform is not None:
-            img = self.transform(img)
-        else:
-            img = ToTensor()(img)
-        return img, label, filename
 class CombineLoaders:
     """
     A class that combines multiple data loaders into a single iterator.
@@ -398,7 +324,7 @@ class MyDataset(Dataset):
         specific_labels (list, optional): A list of specific labels corresponding to the specific files. Default is None.
     """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
+    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
         self.data_dir = data_dir
         self.classes = loader_classes
         self.transform = transform
@@ -427,7 +353,7 @@ class MyDataset(Dataset):
         img = Image.open(img_path).convert('RGB')
         return img
-    def _len__(self):
+    def __len__(self):
         return len(self.filenames)
     def shuffle_dataset(self):
@@ -439,7 +365,7 @@ class MyDataset(Dataset):
         filename = os.path.basename(filepath)  # Get just the filename from the full path
         return filename.split('_')[0]
-    def _getitem__(self, index):
+    def __getitem__(self, index):
         label = self.labels[index]
         filename = self.filenames[index]
         img = self.load_image(filename)
@@ -527,6 +453,7 @@ class TarImageDataset(Dataset):
         return img, m.name
+#@log_function_call
 def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=False, skip_mode='01', metadata_type='', img_format='.tif'):
     """
     Convert z-stack images to maximum intensity projection (MIP) images.
@@ -599,40 +526,47 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_file(chan_dirs, stack_dir, file):
+def _merge_file(chan_dirs, stack_dir, file_name):
     """
-    Merge multiple channels into a single stack and save it as a numpy array.
+    Merge multiple channels into a single stack and save it as a numpy array, using os module for path handling.
     Args:
         chan_dirs (list): List of directories containing channel images.
         stack_dir (str): Directory to save the merged stack.
-        file (str): File name of the channel image.
+        file_name (str): File name of the channel image.
     Returns:
         None
     """
-    chan1 = cv2.imread(str(file), -1)
-    chan1 = np.expand_dims(chan1, axis=2)
-    new_file = stack_dir / (file.stem + '.npy')
-    if not new_file.exists():
-        stack_dir.mkdir(exist_ok=True)
-        channels = [chan1]
-        for chan_dir in chan_dirs[1:]:
-            img = cv2.imread(str(chan_dir / file.name), -1)
+    # Construct new file path
+    file_root, file_ext = os.path.splitext(file_name)
+    new_file = os.path.join(stack_dir, file_root + '.npy')
+    # Check if the new file exists and create the stack directory if it doesn't
+    if not os.path.exists(new_file):
+        os.makedirs(stack_dir, exist_ok=True)
+        channels = []
+        for i, chan_dir in enumerate(chan_dirs):
+            img_path = os.path.join(chan_dir, file_name)
+            img = cv2.imread(img_path, -1)
+            if img is None:
+                print(f"Warning: Failed to read image {img_path}")
+                continue
             chan = np.expand_dims(img, axis=2)
             channels.append(chan)
-        stack = np.concatenate(channels, axis=2)
-        np.save(new_file, stack)
+            del img  # Explicitly delete the reference to the image to free up memory
+            if i % 10 == 0:  # Periodically suggest garbage collection
+                gc.collect()
+        if channels:
+            stack = np.concatenate(channels, axis=2)
+            np.save(new_file, stack)
+        else:
+            print(f"No valid channels to merge for file {file_name}")
 def _is_dir_empty(dir_path):
     """
-    Check if a directory is empty.
-    Args:
-        dir_path (str): The path to the directory.
-    Returns:
-        bool: True if the directory is empty, False otherwise.
+    Check if a directory is empty using os module.
     """
     return len(os.listdir(dir_path)) == 0
@@ -706,7 +640,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                         if metadata_type =='cq1':
                             orig_wellID = wellID
                             wellID = _convert_cq1_well_id(wellID)
-                            print(f'Converted Well ID: {orig_wellID} to {wellID}')
+                            print(f'Converted Well ID: {orig_wellID} to {wellID}')#, end='\r', flush=True)
                         newname = f"{plateID}_{wellID}_{fieldID}_{timeID if timelapse else ''}{ext}"
                         newpath = src / chanID
@@ -732,7 +666,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_channels(src, plot=False):
+def _merge_channels_v2(src, plot=False):
     from .plot import plot_arrays
     """
     Merge the channels in the given source directory and save the merged files in a 'stack' directory.
@@ -757,9 +691,11 @@ def _merge_channels(src, plot=False):
     # Create the 'stack' directory if it doesn't exist
     stack_dir.mkdir(exist_ok=True)
+    print(f'generated folder with merged arrays: {stack_dir}')
     if _is_dir_empty(stack_dir):
-        with Pool(cpu_count()) as pool:
+        with Pool(max(cpu_count() // 2, 1)) as pool:
+        #with Pool(cpu_count()) as pool:
             merge_func = partial(_merge_file, chan_dirs, stack_dir)
             pool.map(merge_func, dir_files)
@@ -771,6 +707,47 @@ def _merge_channels(src, plot=False):
     return
+def _merge_channels(src, plot=False):
+    """
+    Merge the channels in the given source directory and save the merged files in a 'stack' directory without using multiprocessing.
+    """
+    from .plot import plot_arrays
+    stack_dir = os.path.join(src, 'stack')
+    allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
+    # List directories that match the allowed names
+    chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
+    chan_dirs.sort()
+    print(f'List of folders in src: {chan_dirs}. Single channel folders.')
+    start_time = time.time()
+    # Assuming chan_dirs[0] is not empty and exists, adjust according to your logic
+    first_dir_path = os.path.join(src, chan_dirs[0])
+    dir_files = os.listdir(first_dir_path)
+    # Create the 'stack' directory if it doesn't exist
+    if not os.path.exists(stack_dir):
+        os.makedirs(stack_dir, exist_ok=True)
+    print(f'Generated folder with merged arrays: {stack_dir}')
+    if _is_dir_empty(stack_dir):
+        for file_name in dir_files:
+            full_file_path = os.path.join(first_dir_path, file_name)
+            if os.path.isfile(full_file_path):
+                _merge_file([os.path.join(src, d) for d in chan_dirs], stack_dir, file_name)
+    elapsed_time = time.time() - start_time
+    avg_time = elapsed_time / len(dir_files) if dir_files else 0
+    print(f'Average Time: {avg_time:.3f} sec, Total Elapsed Time: {elapsed_time:.3f} sec')
+    if plot:
+        plot_arrays(os.path.join(src, 'stack'))
+    return
 def _mip_all(src, include_first_chan=True):
     """
@@ -819,6 +796,7 @@ def _mip_all(src, include_first_chan=True):
             np.save(os.path.join(src, filename), concatenated)
     return
+#@log_function_call
 def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_size=100):
     """
     Concatenates channel data from multiple files and saves the concatenated data as numpy arrays.
@@ -853,8 +831,8 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
                     array = np.take(array, channels, axis=2)
                     stack_region.append(array)
                     filenames_region.append(os.path.basename(path))
-                clear_output(wait=True)
-                print(f'\033[KRegion {i+1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
+                #clear_output(wait=True)
+                print(f'Region {i+1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
                 stack = np.stack(stack_region)
                 save_loc = os.path.join(channel_stack_loc, f'{name}.npz')
                 np.savez(save_loc, data=stack, filenames=filenames_region)
@@ -879,15 +857,17 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
             array = np.take(array, channels, axis=2)
             stack_ls.append(array)
             filenames_batch.append(os.path.basename(path))  # store the filename
-            clear_output(wait=True)
-            print(f'\033[KConcatenated: {i+1}/{nr_files} files', end='\r', flush=True)
+            #clear_output(wait=True)
+            print(f'Concatenated: {i+1}/{nr_files} files')
+            #print(f'Concatenated: {i+1}/{nr_files} files', end='\r', flush=True)
             if (i+1) % batch_size == 0 or i+1 == nr_files:
                 unique_shapes = {arr.shape[:-1] for arr in stack_ls}
                 if len(unique_shapes) > 1:
                     max_dims = np.max(np.array(list(unique_shapes)), axis=0)
-                    clear_output(wait=True)
-                    print(f'\033[KWarning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
+                    #clear_output(wait=True)
+                    print(f'Warning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}')
+                    #print(f'Warning: arrays with multiple shapes found in batch {i+1}. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
                     padded_stack_ls = []
                     for arr in stack_ls:
                         pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
@@ -904,9 +884,226 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
                 stack_ls = []  # empty the list for the next batch
                 filenames_batch = []  # empty the filenames list for the next batch
                 padded_stack_ls = []
-    #print(f'\nAll files concatenated and saved to:{channel_stack_loc}')
+    print(f'All files concatenated and saved to:{channel_stack_loc}')
     return channel_stack_loc
+def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, batch_size=100, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
+    """
+    Concatenates and normalizes channel data from multiple files and saves the normalized data.
+    Args:
+        src (str): The source directory containing the channel data files.
+        channels (list): The list of channel indices to be concatenated and normalized.
+        randomize (bool, optional): Whether to randomize the order of the files. Defaults to True.
+        timelapse (bool, optional): Whether the channel data is from a timelapse experiment. Defaults to False.
+        batch_size (int, optional): The number of files to be processed in each batch. Defaults to 100.
+        backgrounds (list, optional): Background values for each channel. Defaults to [100, 100, 100].
+        remove_backgrounds (list, optional): Whether to remove background values for each channel. Defaults to [False, False, False].
+        lower_percentile (int, optional): Lower percentile value for normalization. Defaults to 2.
+        save_dtype (numpy.dtype, optional): Data type for saving the normalized stack. Defaults to np.float32.
+        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5, 5, 5].
+        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000, 1000, 1000].
+    Returns:
+        str: The directory path where the concatenated and normalized channel data is saved.
+    """
+    channels = [item for item in channels if item is not None]
+    paths = []
+    output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
+    os.makedirs(output_fldr, exist_ok=True)
+    if timelapse:
+        try:
+            time_stack_path_lists = _generate_time_lists(os.listdir(src))
+            for i, time_stack_list in enumerate(time_stack_path_lists):
+                stack_region = []
+                filenames_region = []
+                for idx, file in enumerate(time_stack_list):
+                    path = os.path.join(src, file)
+                    if idx == 0:
+                        parts = file.split('_')
+                        name = parts[0] + '_' + parts[1] + '_' + parts[2]
+                    array = np.load(path)
+                    array = np.take(array, channels, axis=2)
+                    stack_region.append(array)
+                    filenames_region.append(os.path.basename(path))
+                print(f'Region {i + 1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
+                stack = np.stack(stack_region)
+                normalized_stack = _normalize_stack(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                save_loc = os.path.join(output_fldr, f'{name}_norm_timelapse.npz')
+                np.savez(save_loc, data=normalized_stack, filenames=filenames_region)
+                print(save_loc)
+                del stack, normalized_stack
+        except Exception as e:
+            print(f"Error processing files, make sure filenames metadata is structured plate_well_field_time.npy")
+            print(f"Error: {e}")
+    else:
+        for file in os.listdir(src):
+            if file.endswith('.npy'):
+                path = os.path.join(src, file)
+                paths.append(path)
+        if randomize:
+            random.shuffle(paths)
+        nr_files = len(paths)
+        batch_index = 0
+        stack_ls = []
+        filenames_batch = []
+        for i, path in enumerate(paths):
+            array = np.load(path)
+            array = np.take(array, channels, axis=2)
+            stack_ls.append(array)
+            filenames_batch.append(os.path.basename(path))
+            print(f'Concatenated: {i + 1}/{nr_files} files')
+            if (i + 1) % batch_size == 0 or i + 1 == nr_files:
+                unique_shapes = {arr.shape[:-1] for arr in stack_ls}
+                if len(unique_shapes) > 1:
+                    max_dims = np.max(np.array(list(unique_shapes)), axis=0)
+                    print(f'Warning: arrays with multiple shapes found in batch {i + 1}. Padding arrays to max X,Y dimensions {max_dims}')
+                    padded_stack_ls = []
+                    for arr in stack_ls:
+                        pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
+                        pad_width.append((0, 0))
+                        padded_arr = np.pad(arr, pad_width)
+                        padded_stack_ls.append(padded_arr)
+                    stack = np.stack(padded_stack_ls)
+                else:
+                    stack = np.stack(stack_ls)
+                normalized_stack = _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                save_loc = os.path.join(output_fldr, f'stack_{batch_index}_norm.npz')
+                np.savez(save_loc, data=normalized_stack, filenames=filenames_batch)
+                batch_index += 1
+                del stack, normalized_stack
+                stack_ls = []
+                filenames_batch = []
+                padded_stack_ls = []
+    print(f'All files concatenated and normalized. Saved to: {output_fldr}')
+    return output_fldr
+def _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        backgrounds (list): Background values for each channel.
+        remove_backgrounds (list): Whether to remove background values for each channel.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
+        signal_thresholds (list): Signal thresholds for each channel.
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    for chan_index, channel in enumerate(range(stack.shape[-1])):
+        single_channel = stack[:, :, :, channel]
+        background = backgrounds[chan_index]
+        signal_threshold = signal_thresholds[chan_index]
+        remove_background = remove_backgrounds[chan_index]
+        print(f'Processing channel {chan_index}: background={background}, signal_threshold={signal_threshold}, remove_background={remove_background}')
+        # Step 3: Remove background if required
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        # Step 4: Calculate global lower percentile for the channel
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+        # Step 5: Calculate global upper percentile for the channel
+        global_upper = None
+        for upper_p in np.linspace(98, 99.5, num=16):
+            upper_value = np.percentile(non_zero_single_channel, upper_p)
+            if upper_value >= signal_threshold:
+                global_upper = upper_value
+                break
+        if global_upper is None:
+            global_upper = np.percentile(non_zero_single_channel, 99.5)  # Fallback in case no upper percentile met the threshold
+        print(f'Channel {chan_index}: global_lower={global_lower}, global_upper={global_upper}, Signal-to-noise={global_upper / global_lower}')
+        # Step 6: Normalize each array from global_lower to global_upper between 0 and 1
+        for array_index in range(single_channel.shape[0]):
+            arr_2d = single_channel[array_index, :, :]
+            arr_2d_normalized = exposure.rescale_intensity(arr_2d, in_range=(global_lower, global_upper), out_range=(0, 1))
+            normalized_stack[array_index, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
+def _normalize_img_batch_v1(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        backgrounds (list): Background values for each channel.
+        remove_backgrounds (list): Whether to remove background values for each channel.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
+        signal_thresholds (list): Signal thresholds for each channel.
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    time_ls = []
+    for chan_index, channel in enumerate(range(stack.shape[-1])):
+        single_channel = stack[:, :, :, channel]
+        background = backgrounds[chan_index]
+        signal_threshold = signal_thresholds[chan_index]
+        remove_background = remove_backgrounds[chan_index]
+        signal_2_noise = signal_to_noise[chan_index]
+        print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+        for upper_p in np.linspace(98, 99.5, num=20).tolist():
+            global_upper = np.percentile(non_zero_single_channel, upper_p)
+            if global_upper >= signal_threshold:
+                break
+        arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
+        signal_to_noise_ratio_ls = []
+        for array_index in range(single_channel.shape[0]):
+            start = time.time()
+            arr_2d = single_channel[array_index, :, :]
+            non_zero_arr_2d = arr_2d[arr_2d != 0]
+            if non_zero_arr_2d.size > 0:
+                lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
+                signal_to_noise_ratio = upper / lower
+            else:
+                signal_to_noise_ratio = 0
+            signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
+            average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
+            if signal_to_noise_ratio > signal_2_noise:
+                arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
+                arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
+            else:
+                arr_2d_normalized[array_index, :, :] = arr_2d
+            stop = time.time()
+            duration = (stop - start) * single_channel.shape[0]
+            time_ls.append(duration)
+            average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
+            print(f'Progress: channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
+        normalized_stack[:, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
 def _get_lists_for_normalization(settings):
     """
     Get lists for normalization based on the provided settings.
@@ -921,7 +1118,8 @@ def _get_lists_for_normalization(settings):
     # Initialize the lists
     backgrounds = []
     signal_to_noise = []
-    signal_thresholds = []
+    signal_thresholds = []
+    remove_background = []
     # Iterate through the channels and append the corresponding values if the channel is not None
     for ch in settings['channels']:
@@ -929,29 +1127,31 @@ def _get_lists_for_normalization(settings):
             backgrounds.append(settings['nucleus_background'])
             signal_to_noise.append(settings['nucleus_Signal_to_noise'])
             signal_thresholds.append(settings['nucleus_Signal_to_noise']*settings['nucleus_background'])
+            remove_background.append(settings['remove_background_nucleus'])
         elif ch == settings['cell_channel']:
             backgrounds.append(settings['cell_background'])
             signal_to_noise.append(settings['cell_Signal_to_noise'])
             signal_thresholds.append(settings['cell_Signal_to_noise']*settings['cell_background'])
+            remove_background.append(settings['remove_background_cell'])
         elif ch == settings['pathogen_channel']:
             backgrounds.append(settings['pathogen_background'])
             signal_to_noise.append(settings['pathogen_Signal_to_noise'])
             signal_thresholds.append(settings['pathogen_Signal_to_noise']*settings['pathogen_background'])
-    return backgrounds, signal_to_noise, signal_thresholds
+            remove_background.append(settings['remove_background_pathogen'])
+    return backgrounds, signal_to_noise, signal_thresholds, remove_background
-def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lower_quantile=0.01, save_dtype=np.float32, signal_to_noise=[5,5,5], signal_thresholds=[1000,1000,1000], correct_illumination=False):
+def _normalize_stack(src, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
     """
     Normalize the stack of images.
     Args:
         src (str): The source directory containing the stack of images.
-        backgrounds (list, optional): Background values for each channel. Defaults to [100,100,100].
-        remove_background (bool, optional): Whether to remove background values. Defaults to False.
-        lower_quantile (float, optional): Lower quantile value for normalization. Defaults to 0.01.
+        backgrounds (list, optional): Background values for each channel. Defaults to [100, 100, 100].
+        remove_background (list, optional): Whether to remove background values for each channel. Defaults to [False, False, False].
+        lower_percentile (int, optional): Lower percentile value for normalization. Defaults to 2.
         save_dtype (numpy.dtype, optional): Data type for saving the normalized stack. Defaults to np.float32.
-        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5,5,5].
-        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000,1000,1000].
-        correct_illumination (bool, optional): Whether to correct illumination. Defaults to False.
+        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5, 5, 5].
+        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000, 1000, 1000].
     Returns:
         None
@@ -960,11 +1160,13 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
     output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
     os.makedirs(output_fldr, exist_ok=True)
     time_ls = []
     for file_index, path in enumerate(paths):
         with np.load(path) as data:
             stack = data['data']
             filenames = data['filenames']
-        normalized_stack = np.zeros_like(stack, dtype=stack.dtype)
+        normalized_stack = np.zeros_like(stack, dtype=np.float32)
         file = os.path.basename(path)
         name, _ = os.path.splitext(file)
@@ -972,24 +1174,22 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
             single_channel = stack[:, :, :, channel]
             background = backgrounds[chan_index]
             signal_threshold = signal_thresholds[chan_index]
-            #print(f'signal_threshold:{signal_threshold} in {signal_thresholds} for {chan_index}')
+            remove_background = remove_backgrounds[chan_index]
             signal_2_noise = signal_to_noise[chan_index]
+            print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
             if remove_background:
                 single_channel[single_channel < background] = 0
-            if correct_illumination:
-                bg = filters.gaussian(single_channel, sigma=50)
-                single_channel = single_channel - bg
-            #Calculate the global lower and upper quantiles for non-zero pixels
+            # Calculate the global lower and upper percentiles for non-zero pixels
             non_zero_single_channel = single_channel[single_channel != 0]
-            global_lower = np.quantile(non_zero_single_channel, lower_quantile)
-            for upper_p in np.linspace(0.98, 1.0, num=100).tolist():
-                global_upper = np.quantile(non_zero_single_channel, upper_p)
+            global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+            for upper_p in np.linspace(98, 100, num=100).tolist():
+                global_upper = np.percentile(non_zero_single_channel, upper_p)
                 if global_upper >= signal_threshold:
                     break
-            #Normalize the pixels in each image to the global quantiles and then dtype.
+            # Normalize the pixels in each image to the global percentiles and then dtype.
             arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
             signal_to_noise_ratio_ls = []
             for array_index in range(single_channel.shape[0]):
@@ -997,40 +1197,40 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
                 arr_2d = single_channel[array_index, :, :]
                 non_zero_arr_2d = arr_2d[arr_2d != 0]
                 if non_zero_arr_2d.size > 0:
-                    lower, upper = np.quantile(non_zero_arr_2d, (lower_quantile, upper_p))
-                    signal_to_noise_ratio = upper/lower
+                    lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
+                    signal_to_noise_ratio = upper / lower
                 else:
                     signal_to_noise_ratio = 0
                 signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
                 average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
                 if signal_to_noise_ratio > signal_2_noise:
-                    arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(global_lower, global_upper))
+                    arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
                     arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
                 else:
                     arr_2d_normalized[array_index, :, :] = arr_2d
                 stop = time.time()
-                duration = (stop - start)*single_channel.shape[0]
+                duration = (stop - start) * single_channel.shape[0]
                 time_ls.append(duration)
                 average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
-                clear_output(wait=True)
-                print(f'\033[KProgress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec', end='\r', flush=True)
-            normalized_single_channel = exposure.rescale_intensity(arr_2d_normalized, out_range='dtype')
-            normalized_stack[:, :, :, channel] = normalized_single_channel
-        save_loc = output_fldr+'/'+name+'_norm_stack.npz'
-        normalized_stack = normalized_stack.astype(save_dtype)
-        np.savez(save_loc, data=normalized_stack, filenames=filenames)
-        del normalized_stack, single_channel, normalized_single_channel, stack, filenames
+                print(f'Progress: files {file_index + 1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
+            normalized_stack[:, :, :, channel] = arr_2d_normalized
+        save_loc = os.path.join(output_fldr, f'{name}_norm_stack.npz')
+        np.savez(save_loc, data=normalized_stack.astype(save_dtype), filenames=filenames)
+        del normalized_stack, single_channel, arr_2d_normalized, stack, filenames
         gc.collect()
-    return print(f'Saved stacks:{output_fldr}')
+    return print(f'Saved stacks: {output_fldr}')
-def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
+def _normalize_timelapse(src, lower_percentile=2, save_dtype=np.float32):
     """
     Normalize the timelapse data by rescaling the intensity values based on percentiles.
     Args:
         src (str): The source directory containing the timelapse data files.
-        lower_quantile (float, optional): The lower quantile used to calculate the intensity range. Defaults to 0.01.
+        lower_percentile (int, optional): The lower percentile used to calculate the intensity range. Defaults to 1.
         save_dtype (numpy.dtype, optional): The data type to save the normalized stack. Defaults to np.float32.
     """
     paths = [os.path.join(src, file) for file in os.listdir(src) if file.endswith('.npz')]
@@ -1052,7 +1252,7 @@ def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
             for array_index in range(single_channel.shape[0]):
                 arr_2d = single_channel[array_index]
                 # Calculate the 1% and 98% percentiles for this specific image
-                q_low = np.percentile(arr_2d[arr_2d != 0], 2)
+                q_low = np.percentile(arr_2d[arr_2d != 0], lower_percentile)
                 q_high = np.percentile(arr_2d[arr_2d != 0], 98)
                 # Rescale intensity based on the calculated percentiles to fill the dtype range
@@ -1069,8 +1269,6 @@ def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
     print(f'\nSaved normalized stacks: {output_fldr}')
 def _create_movies_from_npy_per_channel(src, fps=10):
     """
     Create movies from numpy files per channel.
@@ -1122,9 +1320,33 @@ def _create_movies_from_npy_per_channel(src, fps=10):
         channel_save_path = os.path.join(save_path, f'{plate}_{well}_{field}_channel_{channel}.mp4')
         _npz_to_movie(normalized_channel_arrays_3d, filenames, channel_save_path, fps)
+def delete_empty_subdirectories(folder_path):
+    """
+    Deletes all empty subdirectories in the specified folder.
+    Args:
+    - folder_path (str): The path to the folder in which to look for empty subdirectories.
+    """
+    # Check each item in the specified folder
+    for dirpath, dirnames, filenames in os.walk(folder_path, topdown=False):
+        # os.walk is used with topdown=False to start from the innermost directories and work upwards.
+        for dirname in dirnames:
+            # Construct the full path to the subdirectory
+            full_dir_path = os.path.join(dirpath, dirname)
+            # Try to remove the directory and catch any error (like if the directory is not empty)
+            try:
+                os.rmdir(full_dir_path)
+                print(f"Deleted empty directory: {full_dir_path}")
+            except OSError as e:
+                continue
+                # An error occurred, likely because the directory is not empty
+                #print(f"Skipping non-empty directory: {full_dir_path}")
+#@log_function_call
 def preprocess_img_data(settings):
     from .plot import plot_arrays, _plot_4D_arrays
+    from .utils import _run_test_mode, _get_regex, set_default_settings_preprocess_img_data
     """
     Preprocesses image data by converting z-stack images to maximum intensity projection (MIP) images.
@@ -1143,9 +1365,8 @@ def preprocess_img_data(settings):
         timelapse (bool, optional): Whether the images are from a timelapse experiment. Defaults to False.
         remove_background (bool, optional): Whether to remove the background from the images. Defaults to False.
         backgrounds (int, optional): The number of background images to use for background removal. Defaults to 100.
-        lower_quantile (float, optional): The lower quantile used for background removal. Defaults to 0.01.
+        lower_percentile (float, optional): The lower percentile used for background removal. Defaults to 1.
         save_dtype (type, optional): The data type used for saving the preprocessed images. Defaults to np.float32.
-        correct_illumination (bool, optional): Whether to correct the illumination of the images. Defaults to False.
         randomize (bool, optional): Whether to randomize the order of the images. Defaults to True.
         all_to_mip (bool, optional): Whether to convert all images to MIP. Defaults to False.
         pick_slice (bool, optional): Whether to pick a specific slice based on the provided skip mode. Defaults to False.
@@ -1155,12 +1376,16 @@ def preprocess_img_data(settings):
     Returns:
         None
     """
     src = settings['src']
     valid_ext = ['tif', 'tiff', 'png', 'jpeg']
     files = os.listdir(src)
     extensions = [file.split('.')[-1] for file in files]
     extension_counts = Counter(extensions)
     most_common_extension = extension_counts.most_common(1)[0][0]
+    img_format = None
+    delete_empty_subdirectories(src)
     # Check if the most common extension is one of the specified image formats
     if most_common_extension in valid_ext:
@@ -1168,109 +1393,94 @@ def preprocess_img_data(settings):
         print(f'Found {extension_counts[most_common_extension]} {most_common_extension} files')
     else:
         print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
-        return
-    cmap = 'inferno'
-    figuresize = 20
-    normalize = True
-    save_dtype = 'uint16'
-    correct_illumination = False
-    mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
-    backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
-    metadata_type = settings['metadata_type']
-    custom_regex = settings['custom_regex']
-    nr = settings['examples_to_plot']
-    plot = settings['plot']
-    batch_size = settings['batch_size']
-    timelapse = settings['timelapse']
-    remove_background = settings['remove_background']
-    lower_quantile = settings['lower_quantile']
-    randomize = settings['randomize']
-    all_to_mip = settings['all_to_mip']
-    pick_slice = settings['pick_slice']
-    skip_mode = settings['skip_mode']
-    if metadata_type == 'cellvoyager':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'cq1':
-        regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'nikon':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'zeis':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'leica':
-        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-    elif metadata_type == 'custom':
-        regex = f'({custom_regex}){img_format}'
+        if os.path.exists(src+'/stack'):
+            print('Found existing stack folder.')
+        if os.path.exists(src+'/channel_stack'):
+            print('Found existing channel_stack folder.')
+        if os.path.exists(src+'/norm_channel_stack'):
+            print('Found existing norm_channel_stack folder. Skipping preprocessing')
+            return settings, src
-    print(f'regex mode:{metadata_type} regex:{regex}')
+    mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
+    backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
+    settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test = set_default_settings_preprocess_img_data(settings)
+    regex = _get_regex(metadata_type, img_format, custom_regex)
+    if test_mode:
+        print(f'Running spacr in test mode')
+        settings['plot'] = True
+        try:
+            os.rmdir(os.path.join(src, 'test'))
+            print(f"Deleted test directory: {os.path.join(src, 'test')}")
+        except OSError as e:
+            pass
+        src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
+        settings['src'] = src
+    if img_format == None:
+        if not os.path.exists(src+'/stack'):
+            _merge_channels(src, plot=False)
     if not os.path.exists(src+'/stack'):
-        if timelapse:
-            _move_to_chan_folder(src, regex, timelapse, metadata_type)
-        else:
-            #_z_to_mip(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
-            _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
-            #Make sure no batches will be of only one image
-            all_imgs = len(src+'/stack')
-            full_batches = all_imgs // batch_size
-            last_batch_size = all_imgs % batch_size
-            # Check if the last batch is of size 1
-            if last_batch_size == 1:
-                # If there's only one batch and its size is 1, it's also an issue
-                if full_batches == 0:
-                    raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
-                # If the last batch is of size 1, merge it with the second last batch
-                elif full_batches > 0:
-                    raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
-        _merge_channels(src, plot=False)
-        if timelapse:
-            _create_movies_from_npy_per_channel(src+'/stack', fps=2)
-        if plot:
-            print(f'plotting {nr} images from {src}/stack')
-            plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
-        if all_to_mip:
-            _mip_all(src+'/stack')
-            if plot:
-                print(f'plotting {nr} images from {src}/stack')
-                plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
-    #nr_of_stacks = len(src+'/channel_stack')
-    _concatenate_channel(src+'/stack',
-                        channels=mask_channels,
-                        randomize=randomize,
-                        timelapse=timelapse,
-                        batch_size=batch_size)
-    if plot:
-        print(f'plotting {nr} images from {src}/channel_stack')
-        _plot_4D_arrays(src+'/channel_stack', figuresize, cmap, nr_npz=1, nr=nr)
-    nr_of_chan_stacks = len(src+'/channel_stack')
-    backgrounds, signal_to_noise, signal_thresholds = _get_lists_for_normalization(settings=settings)
-    if not timelapse:
-        _normalize_stack(src+'/channel_stack',
-                    backgrounds=backgrounds,
-                    lower_quantile=lower_quantile,
-                    save_dtype=save_dtype,
-                    signal_thresholds=signal_thresholds,
-                    correct_illumination=correct_illumination,
-                    signal_to_noise=signal_to_noise,
-                    remove_background=remove_background)
-    else:
-        _normalize_timelapse(src+'/channel_stack', lower_quantile=lower_quantile, save_dtype=np.float32)
+        try:
+            if not img_format == None:
+                if timelapse:
+                    _move_to_chan_folder(src, regex, timelapse, metadata_type)
+                else:
+                    _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
+                    #Make sure no batches will be of only one image
+                    all_imgs = len(src+'/stack')
+                    full_batches = all_imgs // batch_size
+                    last_batch_size = all_imgs % batch_size
+                    # Check if the last batch is of size 1
+                    if last_batch_size == 1:
+                        # If there's only one batch and its size is 1, it's also an issue
+                        if full_batches == 0:
+                            raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
+                        # If the last batch is of size 1, merge it with the second last batch
+                        elif full_batches > 0:
+                            raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
+                _merge_channels(src, plot=False)
+                if timelapse:
+                    _create_movies_from_npy_per_channel(src+'/stack', fps=2)
+                if plot:
+                    print(f'plotting {nr} images from {src}/stack')
+                    plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
+                if all_to_mip:
+                    _mip_all(src+'/stack')
+                    if plot:
+                        print(f'plotting {nr} images from {src}/stack')
+                        plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
+        except Exception as e:
+            print(f"Error: {e}")
+    backgrounds, signal_to_noise, signal_thresholds, remove_backgrounds = _get_lists_for_normalization(settings=settings)
+    concatenate_and_normalize(src+'/stack',
+                              mask_channels,
+                              randomize,
+                              timelapse,
+                              batch_size,
+                              backgrounds,
+                              remove_backgrounds,
+                              lower_percentile,
+                              np.float32,
+                              signal_to_noise,
+                              signal_thresholds)
     if plot:
         _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
-    return
+    return settings, src
 def _check_masks(batch, batch_filenames, output_folder):
     """
@@ -1292,8 +1502,7 @@ def _check_masks(batch, batch_filenames, output_folder):
     filtered_filenames = [f for f, exists in zip(batch_filenames, existing_files_mask) if exists]
     return np.array(filtered_batch), filtered_filenames
 def _get_avg_object_size(masks):
     """
     Calculate the average size of objects in a list of masks.
@@ -1321,27 +1530,6 @@ def _get_avg_object_size(masks):
         return sum(object_areas) / len(object_areas)
     else:
         return 0  # Return 0 if no objects are found
-def _save_figure_v1(fig, src, text, dpi=300, ):
-    """
-    Save a figure to a specified location.
-    Parameters:
-    fig (matplotlib.figure.Figure): The figure to be saved.
-    src (str): The source file path.
-    text (str): The text to be included in the figure name.
-    dpi (int, optional): The resolution of the saved figure. Defaults to 300.
-    """
-    save_folder = os.path.dirname(src)
-    obj_type = os.path.basename(src)
-    name = os.path.basename(save_folder)
-    save_folder = os.path.join(save_folder, 'figure')
-    os.makedirs(save_folder, exist_ok=True)
-    fig_name = f'{obj_type}_{name}_{text}.pdf'
-    save_location = os.path.join(save_folder, fig_name)
-    fig.savefig(save_location, bbox_inches='tight', dpi=dpi)
-    print(f'Saved single cell figure: {save_location}')
-    plt.close()
 def _save_figure(fig, src, text, dpi=300, i=1, all_folders=1):
     """
@@ -1362,7 +1550,8 @@ def _save_figure(fig, src, text, dpi=300, i=1, all_folders=1):
     save_location = os.path.join(save_folder, fig_name)
     fig.savefig(save_location, bbox_inches='tight', dpi=dpi)
     clear_output(wait=True)
-    print(f'\033[KProgress: {i}/{all_folders}, Saved single cell figure: {os.path.basename(save_location)}', end='\r', flush=True)
+    print(f'Progress: {i}/{all_folders}, Saved single cell figure: {os.path.basename(save_location)}')
+    #print(f'Progress: {i}/{all_folders}, Saved single cell figure: {os.path.basename(save_location)}', end='\r', flush=True)
     # Close and delete the figure to free up memory
     plt.close(fig)
     del fig
@@ -1500,9 +1689,10 @@ def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
             ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
         # Overlay tracks
-        for track in tracks_df['track_id'].unique():
-            _track = tracks_df[tracks_df['track_id'] == track]
-            ax.plot(_track['x'], _track['y'], '-w', linewidth=1)
+        if tracks_df is not None:
+            for track in tracks_df['track_id'].unique():
+                _track = tracks_df[tracks_df['track_id'] == track]
+                ax.plot(_track['x'], _track['y'], '-w', linewidth=1)
     anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
     anim.save(path, writer='pillow', fps=2, dpi=80)  # Adjust DPI for size/quality
@@ -1616,56 +1806,65 @@ def _load_and_concatenate_arrays(src, channels, cell_chann_dim, nucleus_chann_di
     # Iterate through each file in the reference folder
     for filename in os.listdir(reference_folder):
         stack_ls = []
-        array_path = []
         if filename.endswith('.npy'):
-            count+=1
-            # Initialize the concatenated array with the array from the reference folder
-            concatenated_array = np.load(os.path.join(reference_folder, filename))
-            if channels is not None:
-                concatenated_array = np.take(concatenated_array, channels, axis=2)
+            count += 1
+            # Check if this file exists in all the other specified folders
+            exists_in_all_folders = all(os.path.isfile(os.path.join(folder, filename)) for folder in folder_paths)
+            if exists_in_all_folders:
+                # Load and potentially modify the array from the reference folder
+                ref_array_path = os.path.join(reference_folder, filename)
+                concatenated_array = np.load(ref_array_path)
+                if channels is not None:
+                    concatenated_array = np.take(concatenated_array, channels, axis=2)
+                # Add the array from the reference folder to 'stack_ls'
                 stack_ls.append(concatenated_array)
-            # For each of the other folders, load the array and concatenate it
-            for folder in folder_paths[1:]:
-                array_path = os.path.join(folder, filename)
-                if os.path.isfile(array_path):
+                # For each of the other folders, load the array and add it to 'stack_ls'
+                for folder in folder_paths[1:]:
+                    array_path = os.path.join(folder, filename)
                     array = np.load(array_path)
                     if array.ndim == 2:
-                        array = np.expand_dims(array, axis=-1)  # add an extra dimension if the array is 2D
+                        array = np.expand_dims(array, axis=-1)  # Add an extra dimension if the array is 2D
                     stack_ls.append(array)
-            stack_ls = [np.expand_dims(arr, axis=-1) if arr.ndim == 2 else arr for arr in stack_ls]
-            unique_shapes = {arr.shape[:-1] for arr in stack_ls}
-            if len(unique_shapes) > 1:
-                #max_dims = np.max(np.array(list(unique_shapes)), axis=0)
-                # Determine the maximum length of tuples in unique_shapes
-                max_tuple_length = max(len(shape) for shape in unique_shapes)
-                # Pad shorter tuples with zeros to make them all the same length
-                padded_shapes = [shape + (0,) * (max_tuple_length - len(shape)) for shape in unique_shapes]
-                # Now create a NumPy array and find the maximum dimensions
-                max_dims = np.max(np.array(padded_shapes), axis=0)
-                clear_output(wait=True)
-                print(f'\033[KWarning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
-                padded_stack_ls = []
-                for arr in stack_ls:
-                    pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
-                    pad_width.append((0, 0))
-                    padded_arr = np.pad(arr, pad_width)
-                    padded_stack_ls.append(padded_arr)
-                # Concatenate the padded arrays along the channel dimension (last dimension)
-                stack = np.concatenate(padded_stack_ls, axis=-1)
+            if len(stack_ls) > 0:
+                stack_ls = [np.expand_dims(arr, axis=-1) if arr.ndim == 2 else arr for arr in stack_ls]
+                unique_shapes = {arr.shape[:-1] for arr in stack_ls}
+                if len(unique_shapes) > 1:
+                    #max_dims = np.max(np.array(list(unique_shapes)), axis=0)
+                    # Determine the maximum length of tuples in unique_shapes
+                    max_tuple_length = max(len(shape) for shape in unique_shapes)
+                    # Pad shorter tuples with zeros to make them all the same length
+                    padded_shapes = [shape + (0,) * (max_tuple_length - len(shape)) for shape in unique_shapes]
+                    # Now create a NumPy array and find the maximum dimensions
+                    max_dims = np.max(np.array(padded_shapes), axis=0)
+                    #clear_output(wait=True)
+                    print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}')
+                    #print(f'Warning: arrays with multiple shapes found. Padding arrays to max X,Y dimentions {max_dims}', end='\r', flush=True)
+                    padded_stack_ls = []
+                    for arr in stack_ls:
+                        pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
+                        pad_width.append((0, 0))
+                        padded_arr = np.pad(arr, pad_width)
+                        padded_stack_ls.append(padded_arr)
+                    # Concatenate the padded arrays along the channel dimension (last dimension)
+                    stack = np.concatenate(padded_stack_ls, axis=-1)
-            else:
-                stack = np.concatenate(stack_ls, axis=-1)
+                else:
+                    stack = np.concatenate(stack_ls, axis=-1)
-            if stack.shape[-1] > concatenated_array.shape[-1]:
-                output_path = os.path.join(output_folder, filename)
-                np.save(output_path, stack)
+                if stack.shape[-1] > concatenated_array.shape[-1]:
+                    output_path = os.path.join(output_folder, filename)
+                    np.save(output_path, stack)
-        clear_output(wait=True)
-        #print(f'\033[KFiles merged: {count}/{all_imgs}', end='\r', flush=True)
+        #clear_output(wait=True)
+        print(f'Files merged: {count}/{all_imgs}')
+        #print(f'Files merged: {count}/{all_imgs}', end='\r', flush=True)
     return
 def _read_db(db_loc, tables):
@@ -2139,133 +2338,85 @@ def _read_mask(mask_path):
     if mask.dtype != np.uint16:
         mask = img_as_uint(mask)
     return mask
+def convert_numpy_to_tiff(folder_path, limit=None):
+    """
+    Converts all numpy files in a folder to TIFF format and saves them in a subdirectory 'tiff'.
+    Args:
+    folder_path (str): The path to the folder containing numpy files.
+    """
+    # Create the subdirectory 'tiff' within the specified folder if it doesn't already exist
+    tiff_subdir = os.path.join(folder_path, 'tiff')
+    os.makedirs(tiff_subdir, exist_ok=True)
+    files = os.listdir(folder_path)
+    npy_files = [f for f in files if f.endswith('.npy')]
+    # Iterate over all files in the folder
+    for i, filename in enumerate(files):
+        if limit is not None and i >= limit:
+            break
+        if not filename.endswith('.npy'):
+            continue
+        # Construct the full file path
+        file_path = os.path.join(folder_path, filename)
+        # Load the numpy file
+        numpy_array = np.load(file_path)
+        # Construct the output TIFF file path
+        tiff_filename = os.path.splitext(filename)[0] + '.tif'
+        tiff_file_path = os.path.join(tiff_subdir, tiff_filename)
+        # Save the numpy array as a TIFF file
+        tifffile.imwrite(tiff_file_path, numpy_array)
+        print(f"Converted {filename} to {tiff_filename} and saved in 'tiff' subdirectory.")
+    return
+def generate_cellpose_train_test(src, test_split=0.1):
+    mask_src = os.path.join(src, 'masks')
+    img_paths = glob.glob(os.path.join(src, '*.tif'))
+    img_filenames = [os.path.basename(file) for file in img_paths]
+    img_filenames = [file for file in img_filenames if os.path.exists(os.path.join(mask_src, file))]
+    print(f'Found {len(img_filenames)} images with masks')
+    random.shuffle(img_filenames)
+    split_index = int(len(img_filenames) * test_split)
+    train_files = img_filenames[split_index:]
+    test_files = img_filenames[:split_index]
+    list_of_lists = [test_files, train_files]
+    print(f'Split dataset into Train {len(train_files)} and Test {len(test_files)} files')
+    train_dir = os.path.join(os.path.dirname(src), 'train')
+    train_dir_masks = os.path.join(train_dir, 'masks')
+    test_dir = os.path.join(os.path.dirname(src), 'test')
+    test_dir_masks = os.path.join(test_dir, 'masks')
+    os.makedirs(train_dir, exist_ok=True)
+    os.makedirs(train_dir_masks, exist_ok=True)
+    os.makedirs(test_dir, exist_ok=True)
+    os.makedirs(test_dir_masks, exist_ok=True)
+    for i, ls in enumerate(list_of_lists):
+        if i == 0:
+            dst = test_dir
+            dst_mask = test_dir_masks
+            _type = 'Test'
+        else:
+            dst = train_dir
+            dst_mask = train_dir_masks
+            _type = 'Train'
+        for idx, filename in enumerate(ls):
+            img_path = os.path.join(src, filename)
+            mask_path = os.path.join(mask_src, filename)
+            new_img_path = os.path.join(dst, filename)
+            new_mask_path = os.path.join(dst_mask, filename)
+            shutil.copy(img_path, new_img_path)
+            shutil.copy(mask_path, new_mask_path)
+            print(f'Copied {idx+1}/{len(ls)} images to {_type} set', end='\r', flush=True)

spacr 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl

spacr 0.0.1py3-none-any.whl → 0.0.6py3-none-any.whl