PyPI - spacr - Versions diffs - 0.0.36__py3-none-any.whl → 0.0.62__py3-none-any.whl - Mend

spacr 0.0.36py3-none-any.whl → 0.0.62py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

spacr/__init__.py +11 -4
spacr/__main__.py +0 -2
spacr/alpha.py +514 -2
spacr/annotate_app.py +112 -116
spacr/core.py +864 -728
spacr/deep_spacr.py +696 -0
spacr/foldseek.py +2 -16
spacr/graph_learning.py +297 -253
spacr/gui.py +9 -8
spacr/gui_2.py +90 -0
spacr/gui_classify_app.py +3 -4
spacr/gui_mask_app.py +9 -9
spacr/gui_measure_app.py +3 -5
spacr/gui_utils.py +132 -33
spacr/io.py +308 -464
spacr/mask_app.py +109 -5
spacr/measure.py +15 -1
spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/old_code.py +69 -1
spacr/plot.py +23 -6
spacr/sequencing.py +1130 -0
spacr/sim.py +0 -42
spacr/timelapse.py +0 -1
spacr/train.py +172 -13
spacr/umap.py +0 -689
spacr/utils.py +1322 -75
{spacr-0.0.36.dist-info → spacr-0.0.62.dist-info}/METADATA +14 -29
spacr-0.0.62.dist-info/RECORD +39 -0
{spacr-0.0.36.dist-info → spacr-0.0.62.dist-info}/entry_points.txt +1 -0
spacr-0.0.36.dist-info/RECORD +0 -35
{spacr-0.0.36.dist-info → spacr-0.0.62.dist-info}/LICENSE +0 -0
{spacr-0.0.36.dist-info → spacr-0.0.62.dist-info}/WHEEL +0 -0
{spacr-0.0.36.dist-info → spacr-0.0.62.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -19,7 +19,6 @@ from io import BytesIO
 from IPython.display import display, clear_output
 from multiprocessing import Pool, cpu_count
 from torch.utils.data import Dataset
-import seaborn as sns
 import matplotlib.pyplot as plt
 from torchvision.transforms import ToTensor
@@ -88,15 +87,13 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
         print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
     return images, labels, image_names, label_names
-def _load_normalized_images_and_labels(image_files, label_files, signal_thresholds=[1000], channels=None, percentiles=None,  circular=False, invert=False, visualize=False):
+def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,  circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
     from .plot import normalize_and_visualize
     from .utils import invert_image, apply_mask
-    if isinstance(signal_thresholds, int):
-        signal_thresholds = [signal_thresholds] * (len(channels) if channels is not None else 1)
-    elif not isinstance(signal_thresholds, list):
-        signal_thresholds = [signal_thresholds]
+    signal_thresholds = background*Signal_to_noise
+    lower_percentile = 2
     images = []
     labels = []
@@ -113,16 +110,18 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     # Load images and check percentiles
     for i,img_file in enumerate(image_files):
-        #print(img_file)
         image = cellpose.io.imread(img_file)
         if invert:
             image = invert_image(image)
         if circular:
             image = apply_mask(image, output_value=0)
-        #print(image.shape)
         # If specific channels are specified, select them
         if channels is not None and image.ndim == 3:
             image = image[..., channels]
+        if remove_background:
+            image[image < background] = 0
         if image.ndim < 3:
             image = np.expand_dims(image, axis=-1)
@@ -130,11 +129,11 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         images.append(image)
         if percentiles is None:
             for c in range(image.shape[-1]):
-                p1 = np.percentile(image[..., c], 1)
+                p1 = np.percentile(image[..., c], lower_percentile)
                 percentiles_1[c].append(p1)
-                for percentile in [99, 99.9, 99.99, 99.999]:
+                for percentile in [98, 99, 99.9, 99.99, 99.999]:
                     p = np.percentile(image[..., c], percentile)
-                    if p > signal_thresholds[min(c, len(signal_thresholds)-1)]:
+                    if p > signal_thresholds:
                         percentiles_99[c].append(p)
                         break
@@ -143,8 +142,8 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         for image in images:
             normalized_image = np.zeros_like(image, dtype=np.float32)
             for c in range(image.shape[-1]):
-                high_p = np.percentile(image[..., c], percentiles[1])
                 low_p = np.percentile(image[..., c], percentiles[0])
+                high_p = np.percentile(image[..., c], percentiles[1])
                 normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
             normalized_images.append(normalized_image)
             if visualize:
@@ -155,17 +154,20 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
         avg_p1 = [np.mean(p) for p in percentiles_1]
         avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
+        print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
         normalized_images = []
         for image in images:
             normalized_image = np.zeros_like(image, dtype=np.float32)
-        for c in range(image.shape[-1]):
-            normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
-        normalized_images.append(normalized_image)
-        if visualize:
-            normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
+            for c in range(image.shape[-1]):
+                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
+            normalized_images.append(normalized_image)
+            if visualize:
+                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
     if not image_files is None:
         image_dir = os.path.dirname(image_files[0])
     else:
         image_dir = None
@@ -181,6 +183,7 @@ def _load_normalized_images_and_labels(image_files, label_files, signal_threshol
     return normalized_images, labels, image_names, label_names
 class CombineLoaders:
     """
     A class that combines multiple data loaders into a single iterator.
@@ -306,85 +309,6 @@ class NoClassDataset(Dataset):
             img = ToTensor()(img)
         # Return both the image and its filename
         return img, self.filenames[index]
-class MyDataset_v1(Dataset):
-    """
-    Custom dataset class for loading and processing image data.
-    Args:
-        data_dir (str): The directory path where the data is stored.
-        loader_classes (list): List of class names.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        load_to_memory (bool, optional): Whether to load images into memory. Default is False.
-    Attributes:
-        data_dir (str): The directory path where the data is stored.
-        classes (list): List of class names.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
-        shuffle (bool): Whether to shuffle the dataset.
-        load_to_memory (bool): Whether to load images into memory.
-        filenames (list): List of file paths.
-        labels (list): List of labels corresponding to each file.
-        images (list): List of loaded images.
-        image_cache (Cache): Cache object for storing loaded images.
-    Methods:
-        load_image: Load an image from file.
-        __len__: Get the length of the dataset.
-        shuffle_dataset: Shuffle the dataset.
-        __getitem__: Get an item from the dataset.
-    """
-    def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, load_to_memory=False):
-        from .utils import Cache
-        self.data_dir = data_dir
-        self.classes = loader_classes
-        self.transform = transform
-        self.shuffle = shuffle
-        self.load_to_memory = load_to_memory
-        self.filenames = []
-        self.labels = []
-        self.images = []
-        self.image_cache = Cache(50)
-        for class_name in self.classes:
-            class_path = os.path.join(data_dir, class_name)
-            class_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
-            self.filenames.extend(class_files)
-            self.labels.extend([self.classes.index(class_name)] * len(class_files))
-        if self.shuffle:
-            self.shuffle_dataset()
-        if self.load_to_memory:
-            self.images = [self.load_image(f) for f in self.filenames]
-    def load_image(self, img_path):
-        img = self.image_cache.get(img_path)
-        if img is None:
-            img = Image.open(img_path).convert('RGB')
-            self.image_cache.put(img_path, img)
-        return img
-    def _len__(self):
-        return len(self.filenames)
-    def shuffle_dataset(self):
-        combined = list(zip(self.filenames, self.labels))
-        random.shuffle(combined)
-        self.filenames, self.labels = zip(*combined)
-    def _getitem__(self, index):
-        label = self.labels[index]
-        filename = self.filenames[index]
-        if self.load_to_memory:
-            img = self.images[index]
-        else:
-            img = self.load_image(filename)
-        if self.transform is not None:
-            img = self.transform(img)
-        else:
-            img = ToTensor()(img)
-        return img, label, filename
 class MyDataset(Dataset):
     """
@@ -602,64 +526,6 @@ def _rename_and_organize_image_files(src, regex, batch_size=100, pick_slice=Fals
                     shutil.move(os.path.join(src, filename), move)
     return
-def _merge_file_v1(chan_dirs, stack_dir, file):
-    """
-    Merge multiple channels into a single stack and save it as a numpy array.
-    Args:
-        chan_dirs (list): List of directories containing channel images.
-        stack_dir (str): Directory to save the merged stack.
-        file (str): File name of the channel image.
-    Returns:
-        None
-    """
-    chan1 = cv2.imread(str(file), -1)
-    chan1 = np.expand_dims(chan1, axis=2)
-    new_file = stack_dir / (file.stem + '.npy')
-    if not new_file.exists():
-        stack_dir.mkdir(exist_ok=True)
-        channels = [chan1]
-        for chan_dir in chan_dirs[1:]:
-            img = cv2.imread(str(chan_dir / file.name), -1)
-            chan = np.expand_dims(img, axis=2)
-            channels.append(chan)
-        stack = np.concatenate(channels, axis=2)
-        np.save(new_file, stack)
-def _merge_file_v1(chan_dirs, stack_dir, file):
-    """
-    Merge multiple channels into a single stack and save it as a numpy array.
-    Args:
-        chan_dirs (list): List of directories containing channel images.
-        stack_dir (str): Directory to save the merged stack.
-        file (str): File name of the channel image.
-    Returns:
-        None
-    """
-    new_file = stack_dir / (file.stem + '.npy')
-    if not new_file.exists():
-        stack_dir.mkdir(exist_ok=True)
-        channels = []
-        for i, chan_dir in enumerate(chan_dirs):
-            img_path = str(chan_dir / file.name)
-            img = cv2.imread(img_path, -1)
-            if img is None:
-                print(f"Warning: Failed to read image {img_path}")
-                continue
-            chan = np.expand_dims(img, axis=2)
-            channels.append(chan)
-            del img  # Explicitly delete the reference to the image to free up memory
-            if i % 10 == 0:  # Periodically suggest garbage collection
-                gc.collect()
-        if channels:
-            stack = np.concatenate(channels, axis=2)
-            np.save(new_file, stack)
-        else:
-            print(f"No valid channels to merge for file {file.name}")
 def _merge_file(chan_dirs, stack_dir, file_name):
     """
     Merge multiple channels into a single stack and save it as a numpy array, using os module for path handling.
@@ -1021,6 +887,223 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
     print(f'All files concatenated and saved to:{channel_stack_loc}')
     return channel_stack_loc
+def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, batch_size=100, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
+    """
+    Concatenates and normalizes channel data from multiple files and saves the normalized data.
+    Args:
+        src (str): The source directory containing the channel data files.
+        channels (list): The list of channel indices to be concatenated and normalized.
+        randomize (bool, optional): Whether to randomize the order of the files. Defaults to True.
+        timelapse (bool, optional): Whether the channel data is from a timelapse experiment. Defaults to False.
+        batch_size (int, optional): The number of files to be processed in each batch. Defaults to 100.
+        backgrounds (list, optional): Background values for each channel. Defaults to [100, 100, 100].
+        remove_backgrounds (list, optional): Whether to remove background values for each channel. Defaults to [False, False, False].
+        lower_percentile (int, optional): Lower percentile value for normalization. Defaults to 2.
+        save_dtype (numpy.dtype, optional): Data type for saving the normalized stack. Defaults to np.float32.
+        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5, 5, 5].
+        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000, 1000, 1000].
+    Returns:
+        str: The directory path where the concatenated and normalized channel data is saved.
+    """
+    channels = [item for item in channels if item is not None]
+    paths = []
+    output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
+    os.makedirs(output_fldr, exist_ok=True)
+    if timelapse:
+        try:
+            time_stack_path_lists = _generate_time_lists(os.listdir(src))
+            for i, time_stack_list in enumerate(time_stack_path_lists):
+                stack_region = []
+                filenames_region = []
+                for idx, file in enumerate(time_stack_list):
+                    path = os.path.join(src, file)
+                    if idx == 0:
+                        parts = file.split('_')
+                        name = parts[0] + '_' + parts[1] + '_' + parts[2]
+                    array = np.load(path)
+                    array = np.take(array, channels, axis=2)
+                    stack_region.append(array)
+                    filenames_region.append(os.path.basename(path))
+                print(f'Region {i + 1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
+                stack = np.stack(stack_region)
+                normalized_stack = _normalize_stack(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                save_loc = os.path.join(output_fldr, f'{name}_norm_timelapse.npz')
+                np.savez(save_loc, data=normalized_stack, filenames=filenames_region)
+                print(save_loc)
+                del stack, normalized_stack
+        except Exception as e:
+            print(f"Error processing files, make sure filenames metadata is structured plate_well_field_time.npy")
+            print(f"Error: {e}")
+    else:
+        for file in os.listdir(src):
+            if file.endswith('.npy'):
+                path = os.path.join(src, file)
+                paths.append(path)
+        if randomize:
+            random.shuffle(paths)
+        nr_files = len(paths)
+        batch_index = 0
+        stack_ls = []
+        filenames_batch = []
+        for i, path in enumerate(paths):
+            array = np.load(path)
+            array = np.take(array, channels, axis=2)
+            stack_ls.append(array)
+            filenames_batch.append(os.path.basename(path))
+            print(f'Concatenated: {i + 1}/{nr_files} files')
+            if (i + 1) % batch_size == 0 or i + 1 == nr_files:
+                unique_shapes = {arr.shape[:-1] for arr in stack_ls}
+                if len(unique_shapes) > 1:
+                    max_dims = np.max(np.array(list(unique_shapes)), axis=0)
+                    print(f'Warning: arrays with multiple shapes found in batch {i + 1}. Padding arrays to max X,Y dimensions {max_dims}')
+                    padded_stack_ls = []
+                    for arr in stack_ls:
+                        pad_width = [(0, max_dim - dim) for max_dim, dim in zip(max_dims, arr.shape[:-1])]
+                        pad_width.append((0, 0))
+                        padded_arr = np.pad(arr, pad_width)
+                        padded_stack_ls.append(padded_arr)
+                    stack = np.stack(padded_stack_ls)
+                else:
+                    stack = np.stack(stack_ls)
+                normalized_stack = _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                save_loc = os.path.join(output_fldr, f'stack_{batch_index}_norm.npz')
+                np.savez(save_loc, data=normalized_stack, filenames=filenames_batch)
+                batch_index += 1
+                del stack, normalized_stack
+                stack_ls = []
+                filenames_batch = []
+                padded_stack_ls = []
+    print(f'All files concatenated and normalized. Saved to: {output_fldr}')
+    return output_fldr
+def _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        backgrounds (list): Background values for each channel.
+        remove_backgrounds (list): Whether to remove background values for each channel.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
+        signal_thresholds (list): Signal thresholds for each channel.
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    for chan_index, channel in enumerate(range(stack.shape[-1])):
+        single_channel = stack[:, :, :, channel]
+        background = backgrounds[chan_index]
+        signal_threshold = signal_thresholds[chan_index]
+        remove_background = remove_backgrounds[chan_index]
+        print(f'Processing channel {chan_index}: background={background}, signal_threshold={signal_threshold}, remove_background={remove_background}')
+        # Step 3: Remove background if required
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        # Step 4: Calculate global lower percentile for the channel
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+        # Step 5: Calculate global upper percentile for the channel
+        global_upper = None
+        for upper_p in np.linspace(98, 99.5, num=16):
+            upper_value = np.percentile(non_zero_single_channel, upper_p)
+            if upper_value >= signal_threshold:
+                global_upper = upper_value
+                break
+        if global_upper is None:
+            global_upper = np.percentile(non_zero_single_channel, 99.5)  # Fallback in case no upper percentile met the threshold
+        print(f'Channel {chan_index}: global_lower={global_lower}, global_upper={global_upper}, Signal-to-noise={global_upper / global_lower}')
+        # Step 6: Normalize each array from global_lower to global_upper between 0 and 1
+        for array_index in range(single_channel.shape[0]):
+            arr_2d = single_channel[array_index, :, :]
+            arr_2d_normalized = exposure.rescale_intensity(arr_2d, in_range=(global_lower, global_upper), out_range=(0, 1))
+            normalized_stack[array_index, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
+def _normalize_img_batch_v1(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        backgrounds (list): Background values for each channel.
+        remove_backgrounds (list): Whether to remove background values for each channel.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
+        signal_thresholds (list): Signal thresholds for each channel.
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    time_ls = []
+    for chan_index, channel in enumerate(range(stack.shape[-1])):
+        single_channel = stack[:, :, :, channel]
+        background = backgrounds[chan_index]
+        signal_threshold = signal_thresholds[chan_index]
+        remove_background = remove_backgrounds[chan_index]
+        signal_2_noise = signal_to_noise[chan_index]
+        print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+        for upper_p in np.linspace(98, 99.5, num=20).tolist():
+            global_upper = np.percentile(non_zero_single_channel, upper_p)
+            if global_upper >= signal_threshold:
+                break
+        arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
+        signal_to_noise_ratio_ls = []
+        for array_index in range(single_channel.shape[0]):
+            start = time.time()
+            arr_2d = single_channel[array_index, :, :]
+            non_zero_arr_2d = arr_2d[arr_2d != 0]
+            if non_zero_arr_2d.size > 0:
+                lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
+                signal_to_noise_ratio = upper / lower
+            else:
+                signal_to_noise_ratio = 0
+            signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
+            average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
+            if signal_to_noise_ratio > signal_2_noise:
+                arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
+                arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
+            else:
+                arr_2d_normalized[array_index, :, :] = arr_2d
+            stop = time.time()
+            duration = (stop - start) * single_channel.shape[0]
+            time_ls.append(duration)
+            average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
+            print(f'Progress: channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
+        normalized_stack[:, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
 def _get_lists_for_normalization(settings):
     """
     Get lists for normalization based on the provided settings.
@@ -1035,7 +1118,8 @@ def _get_lists_for_normalization(settings):
     # Initialize the lists
     backgrounds = []
     signal_to_noise = []
-    signal_thresholds = []
+    signal_thresholds = []
+    remove_background = []
     # Iterate through the channels and append the corresponding values if the channel is not None
     for ch in settings['channels']:
@@ -1043,29 +1127,31 @@ def _get_lists_for_normalization(settings):
             backgrounds.append(settings['nucleus_background'])
             signal_to_noise.append(settings['nucleus_Signal_to_noise'])
             signal_thresholds.append(settings['nucleus_Signal_to_noise']*settings['nucleus_background'])
+            remove_background.append(settings['remove_background_nucleus'])
         elif ch == settings['cell_channel']:
             backgrounds.append(settings['cell_background'])
             signal_to_noise.append(settings['cell_Signal_to_noise'])
             signal_thresholds.append(settings['cell_Signal_to_noise']*settings['cell_background'])
+            remove_background.append(settings['remove_background_cell'])
         elif ch == settings['pathogen_channel']:
             backgrounds.append(settings['pathogen_background'])
             signal_to_noise.append(settings['pathogen_Signal_to_noise'])
             signal_thresholds.append(settings['pathogen_Signal_to_noise']*settings['pathogen_background'])
-    return backgrounds, signal_to_noise, signal_thresholds
+            remove_background.append(settings['remove_background_pathogen'])
+    return backgrounds, signal_to_noise, signal_thresholds, remove_background
-def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lower_quantile=0.01, save_dtype=np.float32, signal_to_noise=[5,5,5], signal_thresholds=[1000,1000,1000], correct_illumination=False):
+def _normalize_stack(src, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
     """
     Normalize the stack of images.
     Args:
         src (str): The source directory containing the stack of images.
-        backgrounds (list, optional): Background values for each channel. Defaults to [100,100,100].
-        remove_background (bool, optional): Whether to remove background values. Defaults to False.
-        lower_quantile (float, optional): Lower quantile value for normalization. Defaults to 0.01.
+        backgrounds (list, optional): Background values for each channel. Defaults to [100, 100, 100].
+        remove_background (list, optional): Whether to remove background values for each channel. Defaults to [False, False, False].
+        lower_percentile (int, optional): Lower percentile value for normalization. Defaults to 2.
         save_dtype (numpy.dtype, optional): Data type for saving the normalized stack. Defaults to np.float32.
-        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5,5,5].
-        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000,1000,1000].
-        correct_illumination (bool, optional): Whether to correct illumination. Defaults to False.
+        signal_to_noise (list, optional): Signal-to-noise ratio thresholds for each channel. Defaults to [5, 5, 5].
+        signal_thresholds (list, optional): Signal thresholds for each channel. Defaults to [1000, 1000, 1000].
     Returns:
         None
@@ -1074,11 +1160,13 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
     output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
     os.makedirs(output_fldr, exist_ok=True)
     time_ls = []
     for file_index, path in enumerate(paths):
         with np.load(path) as data:
             stack = data['data']
             filenames = data['filenames']
-        normalized_stack = np.zeros_like(stack, dtype=stack.dtype)
+        normalized_stack = np.zeros_like(stack, dtype=np.float32)
         file = os.path.basename(path)
         name, _ = os.path.splitext(file)
@@ -1086,24 +1174,22 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
             single_channel = stack[:, :, :, channel]
             background = backgrounds[chan_index]
             signal_threshold = signal_thresholds[chan_index]
-            #print(f'signal_threshold:{signal_threshold} in {signal_thresholds} for {chan_index}')
+            remove_background = remove_backgrounds[chan_index]
             signal_2_noise = signal_to_noise[chan_index]
+            print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
             if remove_background:
                 single_channel[single_channel < background] = 0
-            if correct_illumination:
-                bg = filters.gaussian(single_channel, sigma=50)
-                single_channel = single_channel - bg
-            #Calculate the global lower and upper quantiles for non-zero pixels
+            # Calculate the global lower and upper percentiles for non-zero pixels
             non_zero_single_channel = single_channel[single_channel != 0]
-            global_lower = np.quantile(non_zero_single_channel, lower_quantile)
-            for upper_p in np.linspace(0.98, 1.0, num=100).tolist():
-                global_upper = np.quantile(non_zero_single_channel, upper_p)
+            global_lower = np.percentile(non_zero_single_channel, lower_percentile)
+            for upper_p in np.linspace(98, 100, num=100).tolist():
+                global_upper = np.percentile(non_zero_single_channel, upper_p)
                 if global_upper >= signal_threshold:
                     break
-            #Normalize the pixels in each image to the global quantiles and then dtype.
+            # Normalize the pixels in each image to the global percentiles and then dtype.
             arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
             signal_to_noise_ratio_ls = []
             for array_index in range(single_channel.shape[0]):
@@ -1111,41 +1197,40 @@ def _normalize_stack(src, backgrounds=[100,100,100], remove_background=False, lo
                 arr_2d = single_channel[array_index, :, :]
                 non_zero_arr_2d = arr_2d[arr_2d != 0]
                 if non_zero_arr_2d.size > 0:
-                    lower, upper = np.quantile(non_zero_arr_2d, (lower_quantile, upper_p))
-                    signal_to_noise_ratio = upper/lower
+                    lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
+                    signal_to_noise_ratio = upper / lower
                 else:
                     signal_to_noise_ratio = 0
                 signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
                 average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
                 if signal_to_noise_ratio > signal_2_noise:
-                    arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(global_lower, global_upper))
+                    arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
                     arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
                 else:
                     arr_2d_normalized[array_index, :, :] = arr_2d
                 stop = time.time()
-                duration = (stop - start)*single_channel.shape[0]
+                duration = (stop - start) * single_channel.shape[0]
                 time_ls.append(duration)
                 average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
-                #clear_output(wait=True)
-                print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
-                #print(f'Progress: files {file_index+1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1]-1}, arrays:{array_index+1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec', end='\r', flush=True)
-            normalized_single_channel = exposure.rescale_intensity(arr_2d_normalized, out_range='dtype')
-            normalized_stack[:, :, :, channel] = normalized_single_channel
-        save_loc = output_fldr+'/'+name+'_norm_stack.npz'
-        normalized_stack = normalized_stack.astype(save_dtype)
-        np.savez(save_loc, data=normalized_stack, filenames=filenames)
-        del normalized_stack, single_channel, normalized_single_channel, stack, filenames
+                print(f'Progress: files {file_index + 1}/{len(paths)}, channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
+            normalized_stack[:, :, :, channel] = arr_2d_normalized
+        save_loc = os.path.join(output_fldr, f'{name}_norm_stack.npz')
+        np.savez(save_loc, data=normalized_stack.astype(save_dtype), filenames=filenames)
+        del normalized_stack, single_channel, arr_2d_normalized, stack, filenames
         gc.collect()
-    return print(f'Saved stacks:{output_fldr}')
+    return print(f'Saved stacks: {output_fldr}')
-def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
+def _normalize_timelapse(src, lower_percentile=2, save_dtype=np.float32):
     """
     Normalize the timelapse data by rescaling the intensity values based on percentiles.
     Args:
         src (str): The source directory containing the timelapse data files.
-        lower_quantile (float, optional): The lower quantile used to calculate the intensity range. Defaults to 0.01.
+        lower_percentile (int, optional): The lower percentile used to calculate the intensity range. Defaults to 1.
         save_dtype (numpy.dtype, optional): The data type to save the normalized stack. Defaults to np.float32.
     """
     paths = [os.path.join(src, file) for file in os.listdir(src) if file.endswith('.npz')]
@@ -1167,7 +1252,7 @@ def _normalize_timelapse(src, lower_quantile=0.01, save_dtype=np.float32):
             for array_index in range(single_channel.shape[0]):
                 arr_2d = single_channel[array_index]
                 # Calculate the 1% and 98% percentiles for this specific image
-                q_low = np.percentile(arr_2d[arr_2d != 0], 2)
+                q_low = np.percentile(arr_2d[arr_2d != 0], lower_percentile)
                 q_high = np.percentile(arr_2d[arr_2d != 0], 98)
                 # Rescale intensity based on the calculated percentiles to fill the dtype range
@@ -1261,7 +1346,7 @@ def delete_empty_subdirectories(folder_path):
 def preprocess_img_data(settings):
     from .plot import plot_arrays, _plot_4D_arrays
-    from .utils import _run_test_mode
+    from .utils import _run_test_mode, _get_regex, set_default_settings_preprocess_img_data
     """
     Preprocesses image data by converting z-stack images to maximum intensity projection (MIP) images.
@@ -1280,9 +1365,8 @@ def preprocess_img_data(settings):
         timelapse (bool, optional): Whether the images are from a timelapse experiment. Defaults to False.
         remove_background (bool, optional): Whether to remove the background from the images. Defaults to False.
         backgrounds (int, optional): The number of background images to use for background removal. Defaults to 100.
-        lower_quantile (float, optional): The lower quantile used for background removal. Defaults to 0.01.
+        lower_percentile (float, optional): The lower percentile used for background removal. Defaults to 1.
         save_dtype (type, optional): The data type used for saving the preprocessed images. Defaults to np.float32.
-        correct_illumination (bool, optional): Whether to correct the illumination of the images. Defaults to False.
         randomize (bool, optional): Whether to randomize the order of the images. Defaults to True.
         all_to_mip (bool, optional): Whether to convert all images to MIP. Defaults to False.
         pick_slice (bool, optional): Whether to pick a specific slice based on the provided skip mode. Defaults to False.
@@ -1301,7 +1385,6 @@ def preprocess_img_data(settings):
     most_common_extension = extension_counts.most_common(1)[0][0]
     img_format = None
     delete_empty_subdirectories(src)
     # Check if the most common extension is one of the specified image formats
@@ -1318,47 +1401,15 @@ def preprocess_img_data(settings):
             print('Found existing norm_channel_stack folder. Skipping preprocessing')
             return settings, src
-    cmap = 'inferno'
-    figuresize = 20
-    normalize = True
-    save_dtype = 'uint16'
-    correct_illumination = False
-    #mask_channels = [settings['nucleus_channel'], settings['pathogen_channel'], settings['cell_channel']]
-    #backgrounds = [settings['nucleus_background'], settings['pathogen_background'], settings['cell_background']]
     mask_channels = [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]
     backgrounds = [settings['nucleus_background'], settings['cell_background'], settings['pathogen_background']]
-    metadata_type = settings['metadata_type']
-    custom_regex = settings['custom_regex']
-    nr = settings['examples_to_plot']
-    plot = settings['plot']
-    batch_size = settings['batch_size']
-    timelapse = settings['timelapse']
-    remove_background = settings['remove_background']
-    lower_quantile = settings['lower_quantile']
-    randomize = settings['randomize']
-    all_to_mip = settings['all_to_mip']
-    pick_slice = settings['pick_slice']
-    skip_mode = settings['skip_mode']
-    if not img_format == None:
-        if metadata_type == 'cellvoyager':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'cq1':
-            regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'nikon':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'zeis':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'leica':
-            regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
-        elif metadata_type == 'custom':
-            regex = f'({custom_regex}){img_format}'
-        print(f'regex mode:{metadata_type} regex:{regex}')
-    if settings.get('test_mode', False):
+    settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test = set_default_settings_preprocess_img_data(settings)
+    regex = _get_regex(metadata_type, img_format, custom_regex)
+    if test_mode:
         print(f'Running spacr in test mode')
         settings['plot'] = True
         try:
@@ -1367,7 +1418,7 @@ def preprocess_img_data(settings):
         except OSError as e:
             pass
-        src = _run_test_mode(settings['src'], regex, timelapse=timelapse)
+        src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
         settings['src'] = src
     if img_format == None:
@@ -1412,31 +1463,20 @@ def preprocess_img_data(settings):
         except Exception as e:
             print(f"Error: {e}")
-    print('concatinating cahnnels')
-    _concatenate_channel(src+'/stack',
-                        channels=mask_channels,
-                        randomize=randomize,
-                        timelapse=timelapse,
-                        batch_size=batch_size)
-    if plot:
-        print(f'plotting {nr} images from {src}/channel_stack')
-        _plot_4D_arrays(src+'/channel_stack', figuresize, cmap, nr_npz=1, nr=nr)
-    backgrounds, signal_to_noise, signal_thresholds = _get_lists_for_normalization(settings=settings)
-    if not timelapse:
-        _normalize_stack(src+'/channel_stack',
-                    backgrounds=backgrounds,
-                    lower_quantile=lower_quantile,
-                    save_dtype=save_dtype,
-                    signal_thresholds=signal_thresholds,
-                    correct_illumination=correct_illumination,
-                    signal_to_noise=signal_to_noise,
-                    remove_background=remove_background)
-    else:
-        _normalize_timelapse(src+'/channel_stack', lower_quantile=lower_quantile, save_dtype=np.float32)
+    backgrounds, signal_to_noise, signal_thresholds, remove_backgrounds = _get_lists_for_normalization(settings=settings)
+    concatenate_and_normalize(src+'/stack',
+                              mask_channels,
+                              randomize,
+                              timelapse,
+                              batch_size,
+                              backgrounds,
+                              remove_backgrounds,
+                              lower_percentile,
+                              np.float32,
+                              signal_to_noise,
+                              signal_thresholds)
     if plot:
         _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
@@ -1490,27 +1530,6 @@ def _get_avg_object_size(masks):
         return sum(object_areas) / len(object_areas)
     else:
         return 0  # Return 0 if no objects are found
-def _save_figure_v1(fig, src, text, dpi=300, ):
-    """
-    Save a figure to a specified location.
-    Parameters:
-    fig (matplotlib.figure.Figure): The figure to be saved.
-    src (str): The source file path.
-    text (str): The text to be included in the figure name.
-    dpi (int, optional): The resolution of the saved figure. Defaults to 300.
-    """
-    save_folder = os.path.dirname(src)
-    obj_type = os.path.basename(src)
-    name = os.path.basename(save_folder)
-    save_folder = os.path.join(save_folder, 'figure')
-    os.makedirs(save_folder, exist_ok=True)
-    fig_name = f'{obj_type}_{name}_{text}.pdf'
-    save_location = os.path.join(save_folder, fig_name)
-    fig.savefig(save_location, bbox_inches='tight', dpi=dpi)
-    print(f'Saved single cell figure: {save_location}')
-    plt.close()
 def _save_figure(fig, src, text, dpi=300, i=1, all_folders=1):
     """
@@ -1616,56 +1635,6 @@ def _save_settings_to_db(settings):
     settings_df.to_sql('settings', conn, if_exists='replace', index=False)  # Replace the table if it already exists
     conn.close()
-def _save_mask_timelapse_as_gif_v1(masks, path, cmap, norm, filenames):
-    """
-    Save a timelapse of masks as a GIF.
-    Parameters:
-    masks (list): List of mask frames.
-    path (str): Path to save the GIF.
-    cmap: Colormap for displaying the masks.
-    norm: Normalization for the masks.
-    filenames (list): List of filenames corresponding to each mask frame.
-    Returns:
-    None
-    """
-    def _update(frame):
-        """
-        Update the plot with the given frame.
-        Parameters:
-        frame (int): The frame number to update the plot with.
-        Returns:
-        None
-        """
-        nonlocal filename_text_obj
-        if filename_text_obj is not None:
-            filename_text_obj.remove()
-        ax.clear()
-        ax.axis('off')
-        current_mask = masks[frame]
-        ax.imshow(current_mask, cmap=cmap, norm=norm)
-        ax.set_title(f'Frame: {frame}', fontsize=24, color='white')
-        filename_text = filenames[frame]
-        filename_text_obj = fig.text(0.5, 0.01, filename_text, ha='center', va='center', fontsize=20, color='white')
-        for label_value in np.unique(current_mask):
-            if label_value == 0: continue  # Skip background
-            y, x = np.mean(np.where(current_mask == label_value), axis=1)
-            ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
-    fig, ax = plt.subplots(figsize=(50, 50), facecolor='black')
-    ax.set_facecolor('black')
-    ax.axis('off')
-    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
-    filename_text_obj = None
-    anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
-    anim.save(path, writer='pillow', fps=2, dpi=80)  # Adjust DPI for size/quality
-    plt.close(fig)
-    print(f'Saved timelapse to {path}')
 def _save_mask_timelapse_as_gif(masks, tracks_df, path, cmap, norm, filenames):
     """
     Save a timelapse animation of masks as a GIF.
@@ -2409,10 +2378,9 @@ def convert_numpy_to_tiff(folder_path, limit=None):
     return
 def generate_cellpose_train_test(src, test_split=0.1):
     mask_src = os.path.join(src, 'masks')
     img_paths = glob.glob(os.path.join(src, '*.tif'))
-    img_filenames = [os.path.basename(file) for file in img_paths + img_paths]
+    img_filenames = [os.path.basename(file) for file in img_paths]
     img_filenames = [file for file in img_filenames if os.path.exists(os.path.join(mask_src, file))]
     print(f'Found {len(img_filenames)} images with masks')
@@ -2424,19 +2392,21 @@ def generate_cellpose_train_test(src, test_split=0.1):
     print(f'Split dataset into Train {len(train_files)} and Test {len(test_files)} files')
     train_dir = os.path.join(os.path.dirname(src), 'train')
-    train_dir_masks = os.path.join(train_dir, 'mask')
+    train_dir_masks = os.path.join(train_dir, 'masks')
     test_dir = os.path.join(os.path.dirname(src), 'test')
-    test_dir_masks = os.path.join(test_dir, 'mask')
+    test_dir_masks = os.path.join(test_dir, 'masks')
+    os.makedirs(train_dir, exist_ok=True)
     os.makedirs(train_dir_masks, exist_ok=True)
+    os.makedirs(test_dir, exist_ok=True)
     os.makedirs(test_dir_masks, exist_ok=True)
     for i, ls in enumerate(list_of_lists):
         if i == 0:
             dst = test_dir
             dst_mask = test_dir_masks
             _type = 'Test'
-        if i == 1:
+        else:
             dst = train_dir
             dst_mask = train_dir_masks
             _type = 'Train'
@@ -2449,130 +2419,4 @@ def generate_cellpose_train_test(src, test_split=0.1):
             shutil.copy(img_path, new_img_path)
             shutil.copy(mask_path, new_mask_path)
             print(f'Copied {idx+1}/{len(ls)} images to {_type} set', end='\r', flush=True)

spacr 0.0.36__py3-none-any.whl → 0.0.62__py3-none-any.whl

spacr 0.0.36py3-none-any.whl → 0.0.62py3-none-any.whl