PyPI - spacr - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

spacr 0.0.2py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

spacr/__init__.py +2 -2
spacr/__main__.py +0 -2
spacr/alpha.py +803 -14
spacr/annotate_app.py +118 -120
spacr/chris.py +50 -0
spacr/core.py +1544 -533
spacr/deep_spacr.py +696 -0
spacr/foldseek.py +779 -0
spacr/get_alfafold_structures.py +72 -0
spacr/graph_learning.py +297 -253
spacr/gui.py +145 -0
spacr/gui_2.py +90 -0
spacr/gui_classify_app.py +70 -80
spacr/gui_mask_app.py +114 -91
spacr/gui_measure_app.py +109 -88
spacr/gui_utils.py +376 -32
spacr/io.py +441 -438
spacr/mask_app.py +116 -9
spacr/measure.py +169 -69
spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/old_code.py +70 -2
spacr/plot.py +173 -17
spacr/sequencing.py +1130 -0
spacr/sim.py +630 -125
spacr/timelapse.py +139 -10
spacr/train.py +188 -21
spacr/umap.py +0 -689
spacr/utils.py +1360 -119
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/METADATA +17 -29
spacr-0.0.6.dist-info/RECORD +39 -0
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/WHEEL +1 -1
spacr-0.0.6.dist-info/entry_points.txt +9 -0
spacr-0.0.2.dist-info/RECORD +0 -31
spacr-0.0.2.dist-info/entry_points.txt +0 -7
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/LICENSE +0 -0
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose
+import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob
 import numpy as np
 import pandas as pd
 import tifffile
@@ -19,7 +19,6 @@ from io import BytesIO
 from IPython.display import display, clear_output
 from multiprocessing import Pool, cpu_count
 from torch.utils.data import Dataset
-import seaborn as sns
 import matplotlib.pyplot as plt
 from torchvision.transforms import ToTensor
@@ -45,19 +44,19 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
     if not image_files is None and not label_files is None:
         for img_file, lbl_file in zip(image_files, label_files):
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
                 image = apply_mask(image, output_value=0)
-            label = cellpose.imread(lbl_file)
+            label = cellpose.io.imread(lbl_file)
             if image.max() > 1:
                 image = image / image.max()
             images.append(image)
             labels.append(label)
     elif not image_files is None:
         for img_file in image_files:
-            image = cellpose.imread(img_file)
+            image = cellpose.io.imread(img_file)
             if invert:
                 image = invert_image(image)
             if circular:
@@ -67,7 +66,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
             images.append(image)
     elif not image_files is None:
             for lbl_file in label_files:
-                label = cellpose.imread(lbl_file)
+                label = cellpose.io.imread(lbl_file)
                 if circular:
                     label = apply_mask(label, output_value=0)
             labels.append(label)
@@ -88,15 +87,13 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
         print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
     return images, labels, image_names, label_names
-def _load_normalized_images_and_labels(image_files, label_files, signal_thresholds=[1000], channels=None, percentiles=None,  circular=False, invert=False, visualize=False):
+def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,  circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
     from .plot import normalize_and_visualize
     from .utils import invert_image, apply_mask
-    if isinstance(signal_thresholds, int):
-        signal_thresholds = [signal_thresholds] * (len(channels) if channels is not None else 1)
-    elif not isinstance(signal_thresholds, list):
-        signal_thresholds = [signal_thresholds]
+    signal_thresholds = background*Signal_to_noise
+    lower_percentile = 2
     images = []
     labels = []
@@ -109,18 +106,22 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     if label_files is not None:
         label_names = [os.path.basename(f) for f in label_files]
+        label_dir = os.path.dirname(label_files[0])
     # Load images and check percentiles
     for i,img_file in enumerate(image_files):
-        image = cellpose.imread(img_file)
+        image = cellpose.io.imread(img_file)
         if invert:
             image = invert_image(image)
         if circular:
             image = apply_mask(image, output_value=0)
         # If specific channels are specified, select them
         if channels is not None and image.ndim == 3:
             image = image[..., channels]
+        if remove_background:
+            image[image < background] = 0
         if image.ndim < 3:
             image = np.expand_dims(image, axis=-1)
@@ -128,11 +129,11 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         images.append(image)
         if percentiles is None:
             for c in range(image.shape[-1]):
-                p1 = np.percentile(image[..., c], 1)
+                p1 = np.percentile(image[..., c], lower_percentile)
                 percentiles_1[c].append(p1)
-                for percentile in [99, 99.9, 99.99, 99.999]:
+                for percentile in [98, 99, 99.9, 99.99, 99.999]:
                     p = np.percentile(image[..., c], percentile)
-                    if p > signal_thresholds[min(c, len(signal_thresholds)-1)]:
+                    if p > signal_thresholds:
                         percentiles_99[c].append(p)
                         break
@@ -141,8 +142,8 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         for image in images:
             normalized_image = np.zeros_like(image, dtype=np.float32)
             for c in range(image.shape[-1]):
-                high_p = np.percentile(image[..., c], percentiles[1])
                 low_p = np.percentile(image[..., c], percentiles[0])
+                high_p = np.percentile(image[..., c], percentiles[1])
                 normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
             normalized_images.append(normalized_image)
             if visualize:
@@ -153,23 +154,26 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         avg_p1 = [np.mean(p) for p in percentiles_1]
         avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
+        print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
         normalized_images = []
         for image in images:
             normalized_image = np.zeros_like(image, dtype=np.float32)
-        for c in range(image.shape[-1]):
-            normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
-        normalized_images.append(normalized_image)
-        if visualize:
-            normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
+            for c in range(image.shape[-1]):
+                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
+            normalized_images.append(normalized_image)
+            if visualize:
+                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
     if not image_files is None:
         image_dir = os.path.dirname(image_files[0])
     else:
         image_dir = None
     if label_files is not None:
         for lbl_file in label_files:
-            labels.append(cellpose.imread(lbl_file))
+            labels.append(cellpose.io.imread(lbl_file))
     else:
         label_names = []
         label_dir = None
@@ -178,86 +182,8 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     return normalized_images, labels, image_names, label_names
-class MyDataset(Dataset):
-    """
-    Custom dataset class for loading and processing image data.
-    Args:
-        data_dir (str): The directory path where the data is stored.
-        loader_classes (list): List of class names.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
-    Attributes:
-        data_dir (str): The directory path where the data is stored.
-        classes (list): List of class names.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
-        shuffle (bool): Whether to shuffle the dataset.
-        load_to_memory (bool): Whether to load images into memory.
-        filenames (list): List of file paths.
-        labels (list): List of labels corresponding to each file.
-        images (list): List of loaded images.
-        image_cache (Cache): Cache object for storing loaded images.
-    Methods:
-        load_image: Load an image from file.
-        __len__: Get the length of the dataset.
-        shuffle_dataset: Shuffle the dataset.
-        __getitem__: Get an item from the dataset.
-    """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
-        from .utils import Cache
-        self.data_dir = data_dir
-        self.classes = loader_classes
-        self.transform = transform
-        self.shuffle = shuffle
-        self.load_to_memory = load_to_memory
-        self.filenames = []
-        self.labels = []
-        self.images = []
-        self.image_cache = Cache(50)
-        for class_name in self.classes:
-            class_path = os.path.join(data_dir, class_name)
-            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
-            self.filenames.extend(class_files)
-            self.labels.extend([self.classes.index(class_name)] * len(class_files))
-        if self.shuffle:
-            self.shuffle_dataset()
-        if self.load_to_memory:
-            self.images = [self.load_image(f) for f in self.filenames]
-    def load_image(self, img_path):
-        img = self.image_cache.get(img_path)
-        if img is None:
-            img = Image.open(img_path).convert('RGB')
-            self.image_cache.put(img_path, img)
-        return img
-    def _len__(self):
-        return len(self.filenames)
-    def shuffle_dataset(self):
-        combined = list(zip(self.filenames, self.labels))
-        random.shuffle(combined)
-        self.filenames, self.labels = zip(*combined)
-    def _getitem__(self, index):
-        label = self.labels[index]
-        filename = self.filenames[index]
-        if self.load_to_memory:
-            img = self.images[index]
-        else:
-            img = self.load_image(filename)
-        if self.transform is not None:
-            img = self.transform(img)
-        else:
-            img = ToTensor()(img)
-        return img, label, filename
 class CombineLoaders:
     """
     A class that combines multiple data loaders into a single iterator.
@@ -398,7 +324,7 @@ class MyDataset(Dataset):
         specific_labels (list, optional): A list of specific labels corresponding to the specific files. Default is None.
     """
-    def _init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
+    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
         self.data_dir = data_dir
         self.classes = loader_classes
         self.transform = transform
@@ -427,7 +353,7 @@ class MyDataset(Dataset):
         img = Image.open(img_path).convert('RGB')
         return img
-    def _len__(self):
+    def __len__(self):
         return len(self.filenames)
     def shuffle_dataset(self):
@@ -439,7 +365,7 @@ class MyDataset(Dataset):
         filename = os.path.basename(filepath)  # Get just the filename from the full path
         return filename.split('_')[0]
-    def _getitem__(self, index):
+    def __getitem__(self, index):
         label = self.labels[index]
         filename = self.filenames[index]
         img = self.load_image(filename)
@@ -527,7 +453,7 @@ class TarImageDataset(Dataset):
         return img, m.name
-@log_function_call
+#@log_function_call
 def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=False, skip_mode='01', metadata_type='', img_format='.tif'):
     """
     Convert z-stack images to maximum intensity projection (MIP) images.
@@ -600,40 +526,47 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_file(chan_dirs, stack_dir, file):
+def _merge_file(chan_dirs, stack_dir, file_name):
     """
-    Merge multiple channels into a single stack and save it as a numpy array.
+    Merge multiple channels into a single stack and save it as a numpy array, using os module for path handling.
     Args:
         chan_dirs (list): List of directories containing channel images.
         stack_dir (str): Directory to save the merged stack.
-        file (str): File name of the channel image.
+        file_name (str): File name of the channel image.
     Returns:
         None
     """
-    chan1 = cv2.imread(str(file), -1)
-    chan1 = np.expand_dims(chan1, axis=2)
-    new_file = stack_dir / (file.stem + '.npy')
-    if not new_file.exists():
-        stack_dir.mkdir(exist_ok=True)
-        channels = [chan1]
-        for chan_dir in chan_dirs[1:]:
-            img = cv2.imread(str(chan_dir / file.name), -1)
+    # Construct new file path
+    file_root, file_ext = os.path.splitext(file_name)
+    new_file = os.path.join(stack_dir, file_root + '.npy')
+    # Check if the new file exists and create the stack directory if it doesn't
+    if not os.path.exists(new_file):
+        os.makedirs(stack_dir, exist_ok=True)
+        channels = []
+        for i, chan_dir in enumerate(chan_dirs):
+            img_path = os.path.join(chan_dir, file_name)
+            img = cv2.imread(img_path, -1)
+            if img is None:
+                print(f"Warning: Failed to read image {img_path}")
+                continue
             chan = np.expand_dims(img, axis=2)
             channels.append(chan)
-        stack = np.concatenate(channels, axis=2)
-        np.save(new_file, stack)
+            del img  # Explicitly delete the reference to the image to free up memory
+            if i % 10 == 0:  # Periodically suggest garbage collection
+                gc.collect()
+        if channels:
+            stack = np.concatenate(channels, axis=2)
+            np.save(new_file, stack)
+        else:
+            print(f"No valid channels to merge for file {file_name}")
 def _is_dir_empty(dir_path):
     """
-    Check if a directory is empty.
-    Args:
-        dir_path (str): The path to the directory.
-    Returns:
-        bool: True if the directory is empty, False otherwise.
+    Check if a directory is empty using os module.
     """
     return len(os.listdir(dir_path)) == 0
@@ -733,7 +666,7 @@ def _move_to_chan_folder(src, regex, timelapse=False, metadata_type=''):
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_channels(src, plot=False):
+def _merge_channels_v2(src, plot=False):
     from .plot import plot_arrays
     """
     Merge the channels in the given source directory and save the merged files in a 'stack' directory.
@@ -761,7 +694,8 @@ def _merge_channels(src, plot=False):
     print(f'generated folder with merged arrays: {stack_dir}')
     if _is_dir_empty(stack_dir):
-        with Pool(cpu_count()) as pool:
+        with Pool(max(cpu_count() // 2, 1)) as pool:
+        #with Pool(cpu_count()) as pool:
             merge_func = partial(_merge_file, chan_dirs, stack_dir)
             pool.map(merge_func, dir_files)
@@ -773,6 +707,47 @@ def _merge_channels(src, plot=False):
     return
+def _merge_channels(src, plot=False):
+    """
+    Merge the channels in the given source directory and save the merged files in a 'stack' directory without using multiprocessing.
+    """
+    from .plot import plot_arrays
+    stack_dir = os.path.join(src, 'stack')
+    allowed_names = ['01', '02', '03', '04', '00', '1', '2', '3', '4', '0']
+    # List directories that match the allowed names
+    chan_dirs = [d for d in os.listdir(src) if os.path.isdir(os.path.join(src, d)) and d in allowed_names]
+    chan_dirs.sort()
+    print(f'List of folders in src: {chan_dirs}. Single channel folders.')
+    start_time = time.time()
+    # Assuming chan_dirs[0] is not empty and exists, adjust according to your logic
+    first_dir_path = os.path.join(src, chan_dirs[0])
+    dir_files = os.listdir(first_dir_path)
+    # Create the 'stack' directory if it doesn't exist
+    if not os.path.exists(stack_dir):
+        os.makedirs(stack_dir, exist_ok=True)
+    print(f'Generated folder with merged arrays: {stack_dir}')
+    if _is_dir_empty(stack_dir):
+        for file_name in dir_files:
+            full_file_path = os.path.join(first_dir_path, file_name)
+            if os.path.isfile(full_file_path):
+                _merge_file([os.path.join(src, d) for d in chan_dirs], stack_dir, file_name)
+    elapsed_time = time.time() - start_time
+    avg_time = elapsed_time / len(dir_files) if dir_files else 0
+    print(f'Average Time: {avg_time:.3f} sec, Total Elapsed Time: {elapsed_time:.3f} sec')
+    if plot:
+        plot_arrays(os.path.join(src, 'stack'))
+    return
 def _mip_all(src, include_first_chan=True):
     """
@@ -821,7 +796,7 @@ def _mip_all(src, include_first_chan=True):
             np.save(os.path.join(src, filename), concatenated)
     return
-@log_function_call
+#@log_function_call
 def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_size=100):
     """
     Concatenates channel data from multiple files and saves the concatenated data as numpy arrays.
@@ -912,6 +887,223 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
     print(f'All files concatenated and saved to:{channel_stack_loc}')
     return channel_stack_loc
+def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, batch_size=100, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
+    """
+    Concatenates and normalizes channel data from multiple files and saves the normalized data.
+    Args:
+        src (str): The source directory containing the channel data files.
+        channels (list): The list of channel indices to be concatenated and normalized.
+        randomize (bool, optional): Whether to randomize the order of the files. Defaults to True.
+        timelapse (bool, optional): Whether the channel data is from a timelapse experiment. Defaults to False.
+        batch_size (int, optional): The number of files to be processed in each batch. Defaults to 100.
+        backgrounds (list, optional): Background values for each channel. Defaults to [100, 100, 100].
+        remove_backgrounds (list, optional): Whether to remove background values for each channel. Defaults to [False, False, False].
+        lower_percentile (int, optional): Lower percentile value for normalization. Defaults to 2.
+        save_dtype (numpy.dtype, optional): Data type for saving the normalized stack. Defaults to np.float32.
+        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5, 5, 5].
+        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000, 1000, 1000].
+    Returns:
+        str: The directory path where the concatenated and normalized channel data is saved.
+    """
+    channels = [item for item in channels if item is not None]
+    paths = []
+    output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
+    os.makedirs(output_fldr, exist_ok=True)
+    if timelapse:
+        try:
+            time_stack_path_lists = _generate_time_lists(os.listdir(src))
+            for i, time_stack_list in enumerate(time_stack_path_lists):
+                stack_region = []
+                filenames_region = []
+                for idx, file in enumerate(time_stack_list):
+                    path = os.path.join(src, file)
+                    if idx == 0:
+                        parts = file.split('_')
+                        name = parts[0] + '_' + parts[1] + '_' + parts[2]
+                    array = np.load(path)
+                    array = np.take(array, channels, axis=2)
+                    stack_region.append(array)
+                    filenames_region.append(os.path.basename(path))
+                print(f'Region {i + 1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
+                stack = np.stack(stack_region)
+                normalized_stack = _normalize_stack(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                save_loc = os.path.join(output_fldr, f'{name}_norm_timelapse.npz')
+                np.savez(save_loc, data=normalized_stack, filenames=filenames_region)
+                print(save_loc)
+                del stack, normalized_stack
+        except Exception as e:
+            print(f"Error processing files, make sure filenames metadata is structured plate_well_field_time.npy")
+            print(f"Error: {e}")
+    else:
+        for file in os.listdir(src):
+            if file.endswith('.npy'):
+                path = os.path.join(src, file)
+                paths.append(path)
+        if randomize:
+            random.shuffle(paths)
+        nr_files = len(paths)
+        batch_index = 0
+        stack_ls = []
+        filenames_batch = []
+        for i, path in enumerate(paths):
+            array = np.load(path)
+            array = np.take(array, channels, axis=2)
+            stack_ls.append(array)
+            filenames_batch.append(os.path.basename(path))
+            print(f'Concatenated: {i + 1}/{nr_files} files')
+            if (i + 1) % batch_size == 0 or i + 1 == nr_files:
+                unique_shapes = {arr.shape[:-1] for arr in stack_ls}
+                if len(unique_shapes) > 1:
+                    max_dims = np.max(np.array(list(unique_shapes)), axis=0)
+                    print(f'Warning: arrays with multiple shapes found in batch {i + 1}. Padding arrays to max X,Y dimensions {max_dims}')
+                    padded_stack_ls = []
+                    for arr in stack_ls:
+                        pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
+                        pad_width.append((0, 0))
+                        padded_arr = np.pad(arr, pad_width)
+                        padded_stack_ls.append(padded_arr)
+                    stack = np.stack(padded_stack_ls)
+                else:
+                    stack = np.stack(stack_ls)
+                normalized_stack = _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                save_loc = os.path.join(output_fldr, f'stack_{batch_index}_norm.npz')
+                np.savez(save_loc, data=normalized_stack, filenames=filenames_batch)
+                batch_index += 1
+                del stack, normalized_stack
+                stack_ls = []
+                filenames_batch = []
+                padded_stack_ls = []
+    print(f'All files concatenated and normalized. Saved to: {output_fldr}')
+    return output_fldr
+def _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        backgrounds (list): Background values for each channel.
+        remove_backgrounds (list): Whether to remove background values for each channel.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
+        signal_thresholds (list): Signal thresholds for each channel.
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    for chan_index, channel in enumerate(range(stack.shape[-1])):
+        single_channel = stack[:, :, :, channel]
+        background = backgrounds[chan_index]
+        signal_threshold = signal_thresholds[chan_index]
+        remove_background = remove_backgrounds[chan_index]
+        print(f'Processing channel {chan_index}: background={background}, signal_threshold={signal_threshold}, remove_background={remove_background}')
+        # Step 3: Remove background if required
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        # Step 4: Calculate global lower percentile for the channel
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+        # Step 5: Calculate global upper percentile for the channel
+        global_upper = None
+        for upper_p in np.linspace(98, 99.5, num=16):
+            upper_value = np.percentile(non_zero_single_channel, upper_p)
+            if upper_value >= signal_threshold:
+                global_upper = upper_value
+                break
+        if global_upper is None:
+            global_upper = np.percentile(non_zero_single_channel, 99.5)  # Fallback in case no upper percentile met the threshold
+        print(f'Channel {chan_index}: global_lower={global_lower}, global_upper={global_upper}, Signal-to-noise={global_upper / global_lower}')
+        # Step 6: Normalize each array from global_lower to global_upper between 0 and 1
+        for array_index in range(single_channel.shape[0]):
+            arr_2d = single_channel[array_index, :, :]
+            arr_2d_normalized = exposure.rescale_intensity(arr_2d, in_range=(global_lower, global_upper), out_range=(0, 1))
+            normalized_stack[array_index, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
+def _normalize_img_batch_v1(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        backgrounds (list): Background values for each channel.
+        remove_backgrounds (list): Whether to remove background values for each channel.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
+        signal_thresholds (list): Signal thresholds for each channel.
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    time_ls = []
+    for chan_index, channel in enumerate(range(stack.shape[-1])):
+        single_channel = stack[:, :, :, channel]
+        background = backgrounds[chan_index]
+        signal_threshold = signal_thresholds[chan_index]
+        remove_background = remove_backgrounds[chan_index]
+        signal_2_noise = signal_to_noise[chan_index]
+        print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+        for upper_p in np.linspace(98, 99.5, num=20).tolist():
+            global_upper = np.percentile(non_zero_single_channel, upper_p)
+            if global_upper >= signal_threshold:
+                break
+        arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
+        signal_to_noise_ratio_ls = []
+        for array_index in range(single_channel.shape[0]):
+            start = time.time()
+            arr_2d = single_channel[array_index, :, :]
+            non_zero_arr_2d = arr_2d[arr_2d != 0]
+            if non_zero_arr_2d.size > 0:
+                lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
+                signal_to_noise_ratio = upper / lower
+            else:
+                signal_to_noise_ratio = 0
+            signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
+            average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
+            if signal_to_noise_ratio > signal_2_noise:
+                arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
+                arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
+            else:
+                arr_2d_normalized[array_index, :, :] = arr_2d
+            stop = time.time()
+            duration = (stop - start) * single_channel.shape[0]
+            time_ls.append(duration)
+            average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
+            print(f'Progress: channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
+        normalized_stack[:, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
 def _get_lists_for_normalization(settings):
     """
     Get lists for normalization based on the provided settings.
@@ -926,7 +1118,8 @@ def _get_lists_for_normalization(settings):
     # Initialize the lists
     backgrounds = []
     signal_to_noise = []
-    signal_thresholds = []
+    signal_thresholds = []
+    remove_background = []
     # Iterate through the channels and append the corresponding values if the channel is not None
     for ch in settings['channels']:
@@ -934,29 +1127,31 @@ def _get_lists_for_normalization(settings):
             backgrounds.append(settings['nucleus_background'])
             signal_to_noise.append(settings['nucleus_Signal_to_noise'])
             signal_thresholds.append(settings['nucleus_Signal_to_noise']*settings['nucleus_background'])
+            remove_background.append(settings['remove_background_nucleus'])
         elif ch == settings['cell_channel']:
             backgrounds.append(settings['cell_background'])
             signal_to_noise.append(settings['cell_Signal_to_noise'])
             signal_thresholds.append(settings['cell_Signal_to_noise']*settings['cell_background'])
+            remove_background.append(settings['remove_background_cell'])
         elif ch == settings['pathogen_channel']:
             backgrounds.append(settings['pathogen_background'])
             signal_to_noise.append(settings['pathogen_Signal_to_noise'])
             signal_thresholds.append(settings['pathogen_Signal_to_noise']*settings['pathogen_background'])
-    return backgrounds, signal_to_noise, signal_thresholds
+            remove_background.append(settings['remove_background_pathogen'])
+    return backgrounds, signal_to_noise, signal_thresholds, remove_background
-def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lower_quantile=0.01, save_dtype=np.float32, signal_to_noise=[5,5,5], signal_thresholds=[1000,1000,1000], correct_illumination=False):
+def _normalize_stack(src, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
     """
     Normalize the stack of images.
     Args:
         src (str): The source directory containing the stack of images.
-        backgrounds (list, optional): Background values for each channel. Defaults to [100,100,100].
-        remove_background (bool, optional): Whether to remove background values. Defaults to False.
-        lower_quantile (float, optional): Lower quantile value for normalization. Defaults to 0.01.
+        backgrounds (list, optional): Background values for each channel. Defaults to [100, 100, 100].
+        remove_background (list, optional): Whether to remove background values for each channel. Defaults to [False, False, False].
+        lower_percentile (int, optional): Lower percentile value for normalization. Defaults to 2.
         save_dtype (numpy.dtype, optional): Data type for saving the normalized stack. Defaults to np.float32.
-        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5,5,5].
-        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000,1000,1000].
-        correct_illumination (bool, optional): Whether to correct illumination. Defaults to False.
+        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5, 5, 5].
+        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000, 1000, 1000].
     Returns:
         None
@@ -965,11 +1160,13 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
     output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
     os.makedirs(output_fldr, exist_ok=True)
     time_ls = []
     for file_index, path in enumerate(paths):
         with np.load(path) as data:
             stack = data['data']
             filenames = data['filenames']
-        normalized_stack = np.zeros_like(stack, dtype=stack.dtype)
+        normalized_stack = np.zeros_like(stack, dtype=np.float32)
         file = os.path.basename(path)
         name, _ = os.path.splitext(file)
@@ -977,24 +1174,22 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
             single_channel = stack[:, :, :, channel]
             background = backgrounds[chan_index]
             signal_threshold = signal_thresholds[chan_index]
-            #print(f'signal_threshold:{signal_threshold} in {signal_thresholds} for {chan_index}')
+            remove_background = remove_backgrounds[chan_index]
             signal_2_noise = signal_to_noise[chan_index]
+            print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
             if remove_background:
                 single_channel[single_channel < background] = 0
-            if correct_illumination:
-                bg = filters.gaussian(single_channel, sigma=50)
-                single_channel = single_channel - bg
-            #Calculate the global lower and upper quantiles for non-zero pixels
+            # Calculate the global lower and upper percentiles for non-zero pixels
             non_zero_single_channel = single_channel[single_channel != 0]
-            global_lower = np.quantile(non_zero_single_channel, lower_quantile)
-            for upper_p in np.linspace(0.98, 1.0, num=100).tolist():
-                global_upper = np.quantile(non_zero_single_channel, upper_p)
+            global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+            for upper_p in np.linspace(98, 100, num=100).tolist():
+                global_upper = np.percentile(non_zero_single_channel, upper_p)
                 if global_upper >= signal_threshold:
                     break
-            #Normalize the pixels in each image to the global quantiles and then dtype.
+            # Normalize the pixels in each image to the global percentiles and then dtype.
             arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
             signal_to_noise_ratio_ls = []
             for array_index in range(single_channel.shape[0]):
@@ -1002,41 +1197,40 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
                 arr_2d = single_channel[array_index, :, :]
                 non_zero_arr_2d = arr_2d[arr_2d != 0]
                 if non_zero_arr_2d.size > 0:
-                    lower, upper = np.quantile(non_zero_arr_2d, (lower_quantile, upper_p))
-                    signal_to_noise_ratio = upper/lower
+                    lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
+                    signal_to_noise_ratio = upper / lower
                 else:
                     signal_to_noise_ratio = 0
                 signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
                 average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
                 if signal_to_noise_ratio > signal_2_noise:
-                    arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(global_lower, global_upper))
+                    arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
                     arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
                 else:
                     arr_2d_normalized[array_index, :, :] = arr_2d
                 stop = time.time()
-                duration = (stop - start)*single_channel.shape[0]
+                duration = (stop - start) * single_channel.shape[0]
                 time_ls.append(duration)
                 average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
-                #clear_output(wait=True)
-                print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
-                #print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec', end='\r', flush=True)
-            normalized_single_channel = exposure.rescale_intensity(arr_2d_normalized, out_range='dtype')
-            normalized_stack[:, :, :, channel] = normalized_single_channel
-        save_loc = output_fldr+'/'+name+'_norm_stack.npz'
-        normalized_stack = normalized_stack.astype(save_dtype)
-        np.savez(save_loc, data=normalized_stack, filenames=filenames)
-        del normalized_stack, single_channel, normalized_single_channel, stack, filenames
+                print(f'Progress: files {file_index + 1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
+            normalized_stack[:, :, :, channel] = arr_2d_normalized
+        save_loc = os.path.join(output_fldr, f'{name}_norm_stack.npz')
+        np.savez(save_loc, data=normalized_stack.astype(save_dtype), filenames=filenames)
+        del normalized_stack, single_channel, arr_2d_normalized, stack, filenames
         gc.collect()
-    return print(f'Saved stacks:{output_fldr}')
+    return print(f'Saved stacks: {output_fldr}')
-def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
+def _normalize_timelapse(src, lower_percentile=2, save_dtype=np.float32):
     """
     Normalize the timelapse data by rescaling the intensity values based on percentiles.
     Args:
         src (str): The source directory containing the timelapse data files.
-        lower_quantile (float, optional): The lower quantile used to calculate the intensity range. Defaults to 0.01.
+        lower_percentile (int, optional): The lower percentile used to calculate the intensity range. Defaults to 1.
         save_dtype (numpy.dtype, optional): The data type to save the normalized stack. Defaults to np.float32.
     """
     paths = [os.path.join(src, file) for file in os.listdir(src) if file.endswith('.npz')]
@@ -1058,7 +1252,7 @@ def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
             for array_index in range(single_channel.shape[0]):
                 arr_2d = single_channel[array_index]
                 # Calculate the 1% and 98% percentiles for this specific image
-                q_low = np.percentile(arr_2d[arr_2d != 0], 2)
+                q_low = np.percentile(arr_2d[arr_2d != 0], lower_percentile)
                 q_high = np.percentile(arr_2d[arr_2d != 0], 98)
                 # Rescale intensity based on the calculated percentiles to fill the dtype range
@@ -1148,11 +1342,11 @@ def delete_empty_subdirectories(folder_path):
                 # An error occurred, likely because the directory is not empty
                 #print(f"Skipping non-empty directory: {full_dir_path}")
-@log_function_call
+#@log_function_call
 def preprocess_img_data(settings):
     from .plot import plot_arrays, _plot_4D_arrays
-    from .utils import _run_test_mode
+    from .utils import _run_test_mode, _get_regex, set_default_settings_preprocess_img_data
     """
     Preprocesses image data by converting z-stack images to maximum intensity projection (MIP) images.
@@ -1171,9 +1365,8 @@ def preprocess_img_data(settings):
         timelapse (bool, optional): Whether the images are from a timelapse experiment. Defaults to False.
         remove_background (bool, optional): Whether to remove the background from the images. Defaults to False.
         backgrounds (int, optional): The number of background images to use for background removal. Defaults to 100.
-        lower_quantile (float, optional): The lower quantile used for background removal. Defaults to 0.01.
+        lower_percentile (float, optional): The lower percentile used for background removal. Defaults to 1.
         save_dtype (type, optional): The data type used for saving the preprocessed images. Defaults to np.float32.
-        correct_illumination (bool, optional): Whether to correct the illumination of the images. Defaults to False.
         randomize (bool, optional): Whether to randomize the order of the images. Defaults to True.
         all_to_mip (bool, optional): Whether to convert all images to MIP. Defaults to False.
         pick_slice (bool, optional): Whether to pick a specific slice based on the provided skip mode. Defaults to False.
@@ -1191,7 +1384,7 @@ def preprocess_img_data(settings):
     extension_counts = Counter(extensions)
     most_common_extension = extension_counts.most_common(1)[0][0]
     img_format = None
     delete_empty_subdirectories(src)
     # Check if the most common extension is one of the specified image formats
@@ -1206,56 +1399,31 @@ def preprocess_img_data(settings):
             print('Found existing channel_stack folder.')
         if os.path.exists(src+'/norm_channel_stack'):
             print('Found existing norm_channel_stack folder. Skipping preprocessing')
-            return
-    cmap = 'inferno'
-    figuresize = 20
-    normalize = True
-    save_dtype = 'uint16'
-    correct_illumination = False
-    mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
-    backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
-    metadata_type = settings['metadata_type']
-    custom_regex = settings['custom_regex']
-    nr = settings['examples_to_plot']
-    plot = settings['plot']
-    batch_size = settings['batch_size']
-    timelapse = settings['timelapse']
-    remove_background = settings['remove_background']
-    lower_quantile = settings['lower_quantile']
-    randomize = settings['randomize']
-    all_to_mip = settings['all_to_mip']
-    pick_slice = settings['pick_slice']
-    skip_mode = settings['skip_mode']
-    if not img_format == None:
-        if metadata_type == 'cellvoyager':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'cq1':
-            regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'nikon':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'zeis':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'leica':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'custom':
-            regex = f'({custom_regex}){img_format}'
+            return settings, src
-        print(f'regex mode:{metadata_type} regex:{regex}')
+    mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
+    backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
+    settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test = set_default_settings_preprocess_img_data(settings)
+    regex = _get_regex(metadata_type, img_format, custom_regex)
+    if test_mode:
-    if settings.get('test_mode', False):
+        print(f'Running spacr in test mode')
+        settings['plot'] = True
         try:
             os.rmdir(os.path.join(src, 'test'))
             print(f"Deleted test directory: {os.path.join(src, 'test')}")
         except OSError as e:
             pass
-        src = _run_test_mode(settings['src'], regex, timelapse=timelapse)
+        src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
         settings['src'] = src
+    if img_format == None:
+        if not os.path.exists(src+'/stack'):
+            _merge_channels(src, plot=False)
     if not os.path.exists(src+'/stack'):
         try:
@@ -1295,31 +1463,20 @@ def preprocess_img_data(settings):
         except Exception as e:
             print(f"Error: {e}")
-    print('concatinating cahnnels')
-    _concatenate_channel(src+'/stack',
-                        channels=mask_channels,
-                        randomize=randomize,
-                        timelapse=timelapse,
-                        batch_size=batch_size)
-    if plot:
-        print(f'plotting {nr} images from {src}/channel_stack')
-        _plot_4D_arrays(src+'/channel_stack', figuresize, cmap, nr_npz=1, nr=nr)
-    backgrounds, signal_to_noise, signal_thresholds = _get_lists_for_normalization(settings=settings)
-    if not timelapse:
-        _normalize_stack(src+'/channel_stack',
-                    backgrounds=backgrounds,
-                    lower_quantile=lower_quantile,
-                    save_dtype=save_dtype,
-                    signal_thresholds=signal_thresholds,
-                    correct_illumination=correct_illumination,
-                    signal_to_noise=signal_to_noise,
-                    remove_background=remove_background)
-    else:
-        _normalize_timelapse(src+'/channel_stack', lower_quantile=lower_quantile, save_dtype=np.float32)
+    backgrounds, signal_to_noise, signal_thresholds, remove_backgrounds = _get_lists_for_normalization(settings=settings)
+    concatenate_and_normalize(src+'/stack',
+                              mask_channels,
+                              randomize,
+                              timelapse,
+                              batch_size,
+                              backgrounds,
+                              remove_backgrounds,
+                              lower_percentile,
+                              np.float32,
+                              signal_to_noise,
+                              signal_thresholds)
     if plot:
         _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
@@ -1373,27 +1530,6 @@ def _get_avg_object_size(masks):
         return sum(object_areas) / len(object_areas)
     else:
         return 0  # Return 0 if no objects are found
-def _save_figure_v1(fig, src, text, dpi=300, ):
-    """
-    Save a figure to a specified location.
-    Parameters:
-    fig (matplotlib.figure.Figure): The figure to be saved.
-    src (str): The source file path.
-    text (str): The text to be included in the figure name.
-    dpi (int, optional): The resolution of the saved figure. Defaults to 300.
-    """
-    save_folder = os.path.dirname(src)
-    obj_type = os.path.basename(src)
-    name = os.path.basename(save_folder)
-    save_folder = os.path.join(save_folder, 'figure')
-    os.makedirs(save_folder, exist_ok=True)
-    fig_name = f'{obj_type}_{name}_{text}.pdf'
-    save_location = os.path.join(save_folder, fig_name)
-    fig.savefig(save_location, bbox_inches='tight', dpi=dpi)
-    print(f'Saved single cell figure: {save_location}')
-    plt.close()
 def _save_figure(fig, src, text, dpi=300, i=1, all_folders=1):
     """
@@ -1499,56 +1635,6 @@ def _save_settings_to_db(settings):
     settings_df.to_sql('settings', conn, if_exists='replace', index=False)  # Replace the table if it already exists
     conn.close()
-def _save_mask_timelapse_as_gif_v1(masks, path, cmap, norm, filenames):
-    """
-    Save a timelapse of masks as a GIF.
-    Parameters:
-    masks (list): List of mask frames.
-    path (str): Path to save the GIF.
-    cmap: Colormap for displaying the masks.
-    norm: Normalization for the masks.
-    filenames (list): List of filenames corresponding to each mask frame.
-    Returns:
-    None
-    """
-    def _update(frame):
-        """
-        Update the plot with the given frame.
-        Parameters:
-        frame (int): The frame number to update the plot with.
-        Returns:
-        None
-        """
-        nonlocal filename_text_obj
-        if filename_text_obj is not None:
-            filename_text_obj.remove()
-        ax.clear()
-        ax.axis('off')
-        current_mask = masks[frame]
-        ax.imshow(current_mask, cmap=cmap, norm=norm)
-        ax.set_title(f'Frame: {frame}', fontsize=24, color='white')
-        filename_text = filenames[frame]
-        filename_text_obj = fig.text(0.5, 0.01, filename_text, ha='center', va='center', fontsize=20, color='white')
-        for label_value in np.unique(current_mask):
-            if label_value == 0: continue  # Skip background
-            y, x = np.mean(np.where(current_mask == label_value), axis=1)
-            ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
-    fig, ax = plt.subplots(figsize=(50, 50), facecolor='black')
-    ax.set_facecolor('black')
-    ax.axis('off')
-    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
-    filename_text_obj = None
-    anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
-    anim.save(path, writer='pillow', fps=2, dpi=80)  # Adjust DPI for size/quality
-    plt.close(fig)
-    print(f'Saved timelapse to {path}')
 def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
     """
     Save a timelapse animation of masks as a GIF.
@@ -2273,6 +2359,8 @@ def convert_numpy_to_tiff(folder_path, limit=None):
     for i, filename in enumerate(files):
         if limit is not None and i >= limit:
             break
+        if not filename.endswith('.npy'):
+            continue
         # Construct the full file path
         file_path = os.path.join(folder_path, filename)
@@ -2289,131 +2377,46 @@ def convert_numpy_to_tiff(folder_path, limit=None):
         print(f"Converted {filename} to {tiff_filename} and saved in 'tiff' subdirectory.")
     return
+def generate_cellpose_train_test(src, test_split=0.1):
+    mask_src = os.path.join(src, 'masks')
+    img_paths = glob.glob(os.path.join(src, '*.tif'))
+    img_filenames = [os.path.basename(file) for file in img_paths]
+    img_filenames = [file for file in img_filenames if os.path.exists(os.path.join(mask_src, file))]
+    print(f'Found {len(img_filenames)} images with masks')
+    random.shuffle(img_filenames)
+    split_index = int(len(img_filenames) * test_split)
+    train_files = img_filenames[split_index:]
+    test_files = img_filenames[:split_index]
+    list_of_lists = [test_files, train_files]
+    print(f'Split dataset into Train {len(train_files)} and Test {len(test_files)} files')
+    train_dir = os.path.join(os.path.dirname(src), 'train')
+    train_dir_masks = os.path.join(train_dir, 'masks')
+    test_dir = os.path.join(os.path.dirname(src), 'test')
+    test_dir_masks = os.path.join(test_dir, 'masks')
+    os.makedirs(train_dir, exist_ok=True)
+    os.makedirs(train_dir_masks, exist_ok=True)
+    os.makedirs(test_dir, exist_ok=True)
+    os.makedirs(test_dir_masks, exist_ok=True)
+    for i, ls in enumerate(list_of_lists):
+        if i == 0:
+            dst = test_dir
+            dst_mask = test_dir_masks
+            _type = 'Test'
+        else:
+            dst = train_dir
+            dst_mask = train_dir_masks
+            _type = 'Train'
+        for idx, filename in enumerate(ls):
+            img_path = os.path.join(src, filename)
+            mask_path = os.path.join(mask_src, filename)
+            new_img_path = os.path.join(dst, filename)
+            new_mask_path = os.path.join(dst_mask, filename)
+            shutil.copy(img_path, new_img_path)
+            shutil.copy(mask_path, new_mask_path)
+            print(f'Copied {idx+1}/{len(ls)} images to {_type} set', end='\r', flush=True)

spacr 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl

spacr 0.0.2py3-none-any.whl → 0.0.6py3-none-any.whl