PyPI - spacr - Versions diffs - 0.0.81__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

spacr 0.0.81py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

spacr/__init__.py +4 -0
spacr/annotate_app.py +4 -0
spacr/annotate_app_v2.py +511 -0
spacr/core.py +258 -177
spacr/deep_spacr.py +137 -50
spacr/graph_learning.py +28 -8
spacr/io.py +332 -142
spacr/measure.py +2 -1
spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model +0 -0
spacr/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv +23 -0
spacr/plot.py +102 -6
spacr/sequencing.py +849 -129
spacr/settings.py +477 -0
spacr/timelapse.py +0 -3
spacr/utils.py +312 -275
{spacr-0.0.81.dist-info → spacr-0.1.0.dist-info}/METADATA +1 -1
spacr-0.1.0.dist-info/RECORD +40 -0
spacr-0.0.81.dist-info/RECORD +0 -36
{spacr-0.0.81.dist-info → spacr-0.1.0.dist-info}/LICENSE +0 -0
{spacr-0.0.81.dist-info → spacr-0.1.0.dist-info}/WHEEL +0 -0
{spacr-0.0.81.dist-info → spacr-0.1.0.dist-info}/entry_points.txt +0 -0
{spacr-0.0.81.dist-info → spacr-0.1.0.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -21,6 +21,7 @@ from multiprocessing import Pool, cpu_count
 from torch.utils.data import Dataset
 import matplotlib.pyplot as plt
 from torchvision.transforms import ToTensor
+import seaborn as sns
 from .logger import log_function_call
@@ -87,7 +88,7 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
         print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
     return images, labels, image_names, label_names
-def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,  circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
+def _load_normalized_images_and_labels_v1(image_files, label_files, channels=None, percentiles=None,  circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10):
     from .plot import normalize_and_visualize
     from .utils import invert_image, apply_mask
@@ -182,6 +183,115 @@ def _load_normalized_images_and_labels(image_files, label_files, channels=None,
     return normalized_images, labels, image_names, label_names
+def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,  circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10, target_height=None, target_width=None):
+    from .plot import normalize_and_visualize, plot_resize
+    from .utils import invert_image, apply_mask
+    from skimage.transform import resize as resizescikit
+    signal_thresholds = background * Signal_to_noise
+    lower_percentile = 2
+    images = []
+    labels = []
+    orig_dims = []
+    num_channels = 4
+    percentiles_1 = [[] for _ in range(num_channels)]
+    percentiles_99 = [[] for _ in range(num_channels)]
+    image_names = [os.path.basename(f) for f in image_files]
+    image_dir = os.path.dirname(image_files[0])
+    if label_files is not None:
+        label_names = [os.path.basename(f) for f in label_files]
+        label_dir = os.path.dirname(label_files[0])
+    # Load, normalize, and resize images
+    for i, img_file in enumerate(image_files):
+        image = cellpose.io.imread(img_file)
+        orig_dims.append((image.shape[0], image.shape[1]))
+        if invert:
+            image = invert_image(image)
+        if circular:
+            image = apply_mask(image, output_value=0)
+        # If specific channels are specified, select them
+        if channels is not None and image.ndim == 3:
+            image = image[..., channels]
+        if remove_background:
+            image[image < background] = 0
+        if image.ndim < 3:
+            image = np.expand_dims(image, axis=-1)
+        if percentiles is None:
+            for c in range(image.shape[-1]):
+                p1 = np.percentile(image[..., c], lower_percentile)
+                percentiles_1[c].append(p1)
+                for percentile in [98, 99, 99.9, 99.99, 99.999]:
+                    p = np.percentile(image[..., c], percentile)
+                    if p > signal_thresholds:
+                        percentiles_99[c].append(p)
+                        break
+        # Resize image
+        if target_height is not None and target_width is not None:
+            if image.ndim == 2:
+                image_shape = (target_height, target_width)
+            elif image.ndim == 3:
+                image_shape = (target_height, target_width, image.shape[-1])
+            image = resizescikit(image, image_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
+        images.append(image)
+    if percentiles is None:
+        # Calculate average percentiles for normalization
+        avg_p1 = [np.mean(p) for p in percentiles_1]
+        avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
+        print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
+        normalized_images = []
+        for image in images:
+            normalized_image = np.zeros_like(image, dtype=np.float32)
+            for c in range(image.shape[-1]):
+                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
+            normalized_images.append(normalized_image)
+            if visualize:
+                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
+    else:
+        normalized_images = []
+        for image in images:
+            normalized_image = np.zeros_like(image, dtype=np.float32)
+            for c in range(image.shape[-1]):
+                low_p = np.percentile(image[..., c], percentiles[0])
+                high_p = np.percentile(image[..., c], percentiles[1])
+                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
+            normalized_images.append(normalized_image)
+            if visualize:
+                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
+    if label_files is not None:
+        for lbl_file in label_files:
+            label = cellpose.io.imread(lbl_file)
+            # Resize label
+            if target_height is not None and target_width is not None:
+                label = resizescikit(label, (target_height, target_width), order=0, preserve_range=True, anti_aliasing=False).astype(label.dtype)
+            labels.append(label)
+    else:
+        label_names = []
+        label_dir = None
+    print(f'Loaded and normalized {len(normalized_images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
+    if visualize and images and labels:
+        plot_resize(images, normalized_images, labels, labels)
+    return normalized_images, labels, image_names, label_names, orig_dims
 class CombineLoaders:
     """
@@ -203,14 +313,14 @@ class CombineLoaders:
     """
-    def _init__(self, train_loaders):
+    def __init__(self, train_loaders):
         self.train_loaders = train_loaders
         self.loader_iters = [iter(loader) for loader in train_loaders]
-    def _iter__(self):
+    def __iter__(self):
         return self
-    def _next__(self):
+    def __next__(self):
         while self.loader_iters:
             random.shuffle(self.loader_iters)  # Shuffle the loader_iters list
             for i, loader_iter in enumerate(self.loader_iters):
@@ -233,7 +343,7 @@ class CombinedDataset(Dataset):
         shuffle (bool, optional): Whether to shuffle the combined dataset. Defaults to True.
     """
-    def _init__(self, datasets, shuffle=True):
+    def __init__(self, datasets, shuffle=True):
         self.datasets = datasets
         self.lengths = [len(dataset) for dataset in datasets]
         self.total_length = sum(self.lengths)
@@ -243,14 +353,14 @@ class CombinedDataset(Dataset):
             random.shuffle(self.indices)
         else:
             self.indices = None
-    def _getitem__(self, index):
+    def __getitem__(self, index):
         if self.shuffle:
             index = self.indices[index]
         for dataset, length in zip(self.datasets, self.lengths):
             if index < length:
                 return dataset[index]
             index -= length
-    def _len__(self):
+    def __len__(self):
         return self.total_length
 class NoClassDataset(Dataset):
@@ -434,7 +544,7 @@ class NoClassDataset(Dataset):
 class TarImageDataset(Dataset):
-    def _init__(self, tar_path, transform=None):
+    def __init__(self, tar_path, transform=None):
         self.tar_path = tar_path
         self.transform = transform
@@ -442,10 +552,10 @@ class TarImageDataset(Dataset):
         with tarfile.open(self.tar_path, 'r') as f:
             self.members = [m for m in f.getmembers() if m.isfile()]
-    def _len__(self):
+    def __len__(self):
         return len(self.members)
-    def _getitem__(self, idx):
+    def __getitem__(self, idx):
         with tarfile.open(self.tar_path, 'r') as f:
             m = self.members[idx]
             img_file = f.extractfile(m)
@@ -890,7 +1000,75 @@ def _concatenate_channel(src, channels, randomize=True, timelapse=False, batch_s
     print(f'All files concatenated and saved to:{channel_stack_loc}')
     return channel_stack_loc
-def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, batch_size=100, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
+def _normalize_img_batch(stack, channels, save_dtype, settings):
+    """
+    Normalize the stack of images.
+    Args:
+        stack (numpy.ndarray): The stack of images to normalize.
+        lower_percentile (int): Lower percentile value for normalization.
+        save_dtype (numpy.dtype): Data type for saving the normalized stack.
+        settings (dict): keword arguments
+    Returns:
+        numpy.ndarray: The normalized stack.
+    """
+    normalized_stack = np.zeros_like(stack, dtype=np.float32)
+    #for channel in range(stack.shape[-1]):
+    for channel in channels:
+        if channel == settings['nucleus_channel']:
+            background = settings['nucleus_background']
+            signal_threshold = settings['nucleus_Signal_to_noise']*settings['nucleus_background']
+            remove_background = settings['remove_background_nucleus']
+        if channel == settings['cell_channel']:
+            background = settings['cell_background']
+            signal_threshold = settings['cell_Signal_to_noise']*settings['cell_background']
+            remove_background = settings['remove_background_cell']
+        if channel == settings['pathogen_channel']:
+            background = settings['pathogen_background']
+            signal_threshold = settings['pathogen_Signal_to_noise']*settings['pathogen_background']
+            remove_background = settings['remove_background_pathogen']
+        single_channel = stack[:, :, :, channel]
+        print(f'Processing channel {channel}: background={background}, signal_threshold={signal_threshold}, remove_background={remove_background}')
+        # Step 3: Remove background if required
+        if remove_background:
+            single_channel[single_channel < background] = 0
+        # Step 4: Calculate global lower percentile for the channel
+        non_zero_single_channel = single_channel[single_channel != 0]
+        global_lower = np.percentile(non_zero_single_channel, settings['lower_percentile'])
+        # Step 5: Calculate global upper percentile for the channel
+        global_upper = None
+        for upper_p in np.linspace(98, 99.5, num=16):
+            upper_value = np.percentile(non_zero_single_channel, upper_p)
+            if upper_value >= signal_threshold:
+                global_upper = upper_value
+                break
+        if global_upper is None:
+            global_upper = np.percentile(non_zero_single_channel, 99.5)  # Fallback in case no upper percentile met the threshold
+        print(f'Channel {channel}: global_lower={global_lower}, global_upper={global_upper}, Signal-to-noise={global_upper / global_lower}')
+        # Step 6: Normalize each array from global_lower to global_upper between 0 and 1
+        for array_index in range(single_channel.shape[0]):
+            arr_2d = single_channel[array_index, :, :]
+            arr_2d_normalized = exposure.rescale_intensity(arr_2d, in_range=(global_lower, global_upper), out_range=(0, 1))
+            normalized_stack[array_index, :, :, channel] = arr_2d_normalized
+    return normalized_stack.astype(save_dtype)
+def concatenate_and_normalize(src, channels, save_dtype=np.float32, settings={}):
     """
     Concatenates and normalizes channel data from multiple files and saves the normalized data.
@@ -910,12 +1088,14 @@ def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, ba
     Returns:
         str: The directory path where the concatenated and normalized channel data is saved.
     """
+    # n c p
     channels = [item for item in channels if item is not None]
     paths = []
     output_fldr = os.path.join(os.path.dirname(src), 'norm_channel_stack')
     os.makedirs(output_fldr, exist_ok=True)
-    if timelapse:
+    if settings['timelapse']:
         try:
             time_stack_path_lists = _generate_time_lists(os.listdir(src))
             for i, time_stack_list in enumerate(time_stack_path_lists):
@@ -927,12 +1107,19 @@ def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, ba
                         parts = file.split('_')
                         name = parts[0] + '_' + parts[1] + '_' + parts[2]
                     array = np.load(path)
-                    array = np.take(array, channels, axis=2)
+                    #array = np.take(array, channels, axis=2)
                     stack_region.append(array)
                     filenames_region.append(os.path.basename(path))
                 print(f'Region {i + 1}/ {len(time_stack_path_lists)}', end='\r', flush=True)
                 stack = np.stack(stack_region)
-                normalized_stack = _normalize_stack(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                normalized_stack = _normalize_img_batch(stack=stack,
+                                                        channels=channels,
+                                                        save_dtype=save_dtype,
+                                                        settings=settings)
+                normalized_stack = normalized_stack[..., channels]
                 save_loc = os.path.join(output_fldr, f'{name}_norm_timelapse.npz')
                 np.savez(save_loc, data=normalized_stack, filenames=filenames_region)
                 print(save_loc)
@@ -945,7 +1132,7 @@ def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, ba
             if file.endswith('.npy'):
                 path = os.path.join(src, file)
                 paths.append(path)
-        if randomize:
+        if settings['randomize']:
             random.shuffle(paths)
         nr_files = len(paths)
         batch_index = 0
@@ -954,12 +1141,12 @@ def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, ba
         for i, path in enumerate(paths):
             array = np.load(path)
-            array = np.take(array, channels, axis=2)
+            #array = np.take(array, channels, axis=2)
             stack_ls.append(array)
             filenames_batch.append(os.path.basename(path))
             print(f'Concatenated: {i + 1}/{nr_files} files')
-            if (i + 1) % batch_size == 0 or i + 1 == nr_files:
+            if (i + 1) % settings['batch_size'] == 0 or i + 1 == nr_files:
                 unique_shapes = {arr.shape[:-1] for arr in stack_ls}
                 if len(unique_shapes) > 1:
                     max_dims = np.max(np.array(list(unique_shapes)), axis=0)
@@ -973,8 +1160,13 @@ def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, ba
                     stack = np.stack(padded_stack_ls)
                 else:
                     stack = np.stack(stack_ls)
-                normalized_stack = _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds)
+                normalized_stack = _normalize_img_batch(stack=stack,
+                                                        channels=channels,
+                                                        save_dtype=save_dtype,
+                                                        settings=settings)
+                normalized_stack = normalized_stack[..., channels]
                 save_loc = os.path.join(output_fldr, f'stack_{batch_index}_norm.npz')
                 np.savez(save_loc, data=normalized_stack, filenames=filenames_batch)
@@ -983,64 +1175,10 @@ def concatenate_and_normalize(src, channels, randomize=True, timelapse=False, ba
                 stack_ls = []
                 filenames_batch = []
                 padded_stack_ls = []
     print(f'All files concatenated and normalized. Saved to: {output_fldr}')
     return output_fldr
-def _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
-    """
-    Normalize the stack of images.
-    Args:
-        stack (numpy.ndarray): The stack of images to normalize.
-        backgrounds (list): Background values for each channel.
-        remove_backgrounds (list): Whether to remove background values for each channel.
-        lower_percentile (int): Lower percentile value for normalization.
-        save_dtype (numpy.dtype): Data type for saving the normalized stack.
-        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
-        signal_thresholds (list): Signal thresholds for each channel.
-    Returns:
-        numpy.ndarray: The normalized stack.
-    """
-    normalized_stack = np.zeros_like(stack, dtype=np.float32)
-    for chan_index, channel in enumerate(range(stack.shape[-1])):
-        single_channel = stack[:, :, :, channel]
-        background = backgrounds[chan_index]
-        signal_threshold = signal_thresholds[chan_index]
-        remove_background = remove_backgrounds[chan_index]
-        print(f'Processing channel {chan_index}: background={background}, signal_threshold={signal_threshold}, remove_background={remove_background}')
-        # Step 3: Remove background if required
-        if remove_background:
-            single_channel[single_channel < background] = 0
-        # Step 4: Calculate global lower percentile for the channel
-        non_zero_single_channel = single_channel[single_channel != 0]
-        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
-        # Step 5: Calculate global upper percentile for the channel
-        global_upper = None
-        for upper_p in np.linspace(98, 99.5, num=16):
-            upper_value = np.percentile(non_zero_single_channel, upper_p)
-            if upper_value >= signal_threshold:
-                global_upper = upper_value
-                break
-        if global_upper is None:
-            global_upper = np.percentile(non_zero_single_channel, 99.5)  # Fallback in case no upper percentile met the threshold
-        print(f'Channel {chan_index}: global_lower={global_lower}, global_upper={global_upper}, Signal-to-noise={global_upper / global_lower}')
-        # Step 6: Normalize each array from global_lower to global_upper between 0 and 1
-        for array_index in range(single_channel.shape[0]):
-            arr_2d = single_channel[array_index, :, :]
-            arr_2d_normalized = exposure.rescale_intensity(arr_2d, in_range=(global_lower, global_upper), out_range=(0, 1))
-            normalized_stack[array_index, :, :, channel] = arr_2d_normalized
-    return normalized_stack.astype(save_dtype)
 def _get_lists_for_normalization(settings):
     """
     Get lists for normalization based on the provided settings.
@@ -1059,22 +1197,25 @@ def _get_lists_for_normalization(settings):
     remove_background = []
     # Iterate through the channels and append the corresponding values if the channel is not None
-    for ch in settings['channels']:
-        if ch == settings['nucleus_channel']:
-            backgrounds.append(settings['nucleus_background'])
-            signal_to_noise.append(settings['nucleus_Signal_to_noise'])
-            signal_thresholds.append(settings['nucleus_Signal_to_noise']*settings['nucleus_background'])
-            remove_background.append(settings['remove_background_nucleus'])
-        elif ch == settings['cell_channel']:
-            backgrounds.append(settings['cell_background'])
-            signal_to_noise.append(settings['cell_Signal_to_noise'])
-            signal_thresholds.append(settings['cell_Signal_to_noise']*settings['cell_background'])
-            remove_background.append(settings['remove_background_cell'])
-        elif ch == settings['pathogen_channel']:
-            backgrounds.append(settings['pathogen_background'])
-            signal_to_noise.append(settings['pathogen_Signal_to_noise'])
-            signal_thresholds.append(settings['pathogen_Signal_to_noise']*settings['pathogen_background'])
-            remove_background.append(settings['remove_background_pathogen'])
+    # for ch in settings['channels']:
+    for ch in [settings['nucleus_channel'], settings['cell_channel'], settings['pathogen_channel']]:
+        if not ch is None:
+            if ch == settings['nucleus_channel']:
+                backgrounds.append(settings['nucleus_background'])
+                signal_to_noise.append(settings['nucleus_Signal_to_noise'])
+                signal_thresholds.append(settings['nucleus_Signal_to_noise']*settings['nucleus_background'])
+                remove_background.append(settings['remove_background_nucleus'])
+            elif ch == settings['cell_channel']:
+                backgrounds.append(settings['cell_background'])
+                signal_to_noise.append(settings['cell_Signal_to_noise'])
+                signal_thresholds.append(settings['cell_Signal_to_noise']*settings['cell_background'])
+                remove_background.append(settings['remove_background_cell'])
+            elif ch == settings['pathogen_channel']:
+                backgrounds.append(settings['pathogen_background'])
+                signal_to_noise.append(settings['pathogen_Signal_to_noise'])
+                signal_thresholds.append(settings['pathogen_Signal_to_noise']*settings['pathogen_background'])
+                remove_background.append(settings['remove_background_pathogen'])
     return backgrounds, signal_to_noise, signal_thresholds, remove_background
 def _normalize_stack(src, backgrounds=[100, 100, 100], remove_backgrounds=[False, False, False], lower_percentile=2, save_dtype=np.float32, signal_to_noise=[5, 5, 5], signal_thresholds=[1000, 1000, 1000]):
@@ -1283,7 +1424,8 @@ def delete_empty_subdirectories(folder_path):
 def preprocess_img_data(settings):
     from .plot import plot_arrays, _plot_4D_arrays
-    from .utils import _run_test_mode, _get_regex, set_default_settings_preprocess_img_data
+    from .utils import _run_test_mode, _get_regex
+    from .settings import set_default_settings_preprocess_img_data
     """
     Preprocesses image data by converting z-stack images to maximum intensity projection (MIP) images.
@@ -1400,19 +1542,10 @@ def preprocess_img_data(settings):
         except Exception as e:
             print(f"Error: {e}")
-    backgrounds, signal_to_noise, signal_thresholds, remove_backgrounds = _get_lists_for_normalization(settings=settings)
-    concatenate_and_normalize(src+'/stack',
-                              mask_channels,
-                              randomize,
-                              timelapse,
-                              batch_size,
-                              backgrounds,
-                              remove_backgrounds,
-                              lower_percentile,
-                              np.float32,
-                              signal_to_noise,
-                              signal_thresholds)
+    concatenate_and_normalize(src=src+'/stack',
+                              channels=mask_channels,
+                              save_dtype=np.float32,
+                              settings=settings)
     if plot:
         _plot_4D_arrays(src+'/norm_channel_stack', nr_npz=1, nr=nr)
@@ -1494,13 +1627,13 @@ def _save_figure(fig, src, text, dpi=300, i=1, all_folders=1):
     del fig
     gc.collect()
-def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus', 'pathogen', 'parasite', 'png_list']):
+def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus', 'pathogen', 'png_list']):
     """
     Reads and joins tables from a SQLite database.
     Args:
         db_path (str): The path to the SQLite database file.
-        table_names (list, optional): The names of the tables to read and join. Defaults to ['cell', 'cytoplasm', 'nucleus', 'pathogen', 'parasite', 'png_list'].
+        table_names (list, optional): The names of the tables to read and join. Defaults to ['cell', 'cytoplasm', 'nucleus', 'pathogen', 'png_list'].
     Returns:
         pandas.DataFrame: The joined DataFrame containing the data from the specified tables, or None if an error occurs.
@@ -1522,9 +1655,9 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
             join_cols = ['object_label', 'plate', 'row', 'col']
             dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
         else:
-            print("Cell table not found. Cannot join with png_list.")
-            return None
-    for entity in ['nucleus', 'pathogen', 'parasite']:
+            print("Cell table not found in database tables.")
+            return png_list_df
+    for entity in ['nucleus', 'pathogen']:
         if entity in dataframes:
             numeric_cols = dataframes[entity].select_dtypes(include=[np.number]).columns.tolist()
             non_numeric_cols = dataframes[entity].select_dtypes(exclude=[np.number]).columns.tolist()
@@ -1537,14 +1670,11 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
     joined_df = None
     if 'cell' in dataframes:
         joined_df = dataframes['cell']
-        if 'cytoplasm' in dataframes:
-            joined_df = pd.merge(joined_df, dataframes['cytoplasm'], on=['object_label', 'prcf'], how='left', suffixes=('', '_cytoplasm'))
-        for entity in ['nucleus', 'pathogen']:
-            if entity in dataframes:
-                joined_df = pd.merge(joined_df, dataframes[entity], left_on=['object_label', 'prcf'], right_index=True, how='left', suffixes=('', f'_{entity}'))
-    else:
-        print("Cell table not found. Cannot proceed with joining.")
-        return None
+    if 'cytoplasm' in dataframes:
+        joined_df = pd.merge(joined_df, dataframes['cytoplasm'], on=['object_label', 'prcf'], how='left', suffixes=('', '_cytoplasm'))
+    for entity in ['nucleus', 'pathogen']:
+        if entity in dataframes:
+            joined_df = pd.merge(joined_df, dataframes[entity], left_on=['object_label', 'prcf'], right_index=True, how='left', suffixes=('', f'_{entity}'))
     return joined_df
 def _save_settings_to_db(settings):
@@ -1993,8 +2123,75 @@ def _results_to_csv(src, df, df_well):
 ###################################################
 #  Classify
 ###################################################
+def read_plot_model_stats(file_path ,save=False):
-def _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94]):
+    def _plot_and_save(train_df, val_df, column='accuracy', save=False, path=None, dpi=600):
+        pdf_path = os.path.join(path, f'{column}.pdf')
+        # Create subplots
+        fig, axes = plt.subplots(1, 2, figsize=(20, 10), sharey=True)
+        # Plotting
+        sns.lineplot(ax=axes[0], x='epoch', y=column, data=train_df, marker='o', color='red')
+        sns.lineplot(ax=axes[1], x='epoch', y=column, data=val_df, marker='o', color='blue')
+        # Set titles and labels
+        axes[0].set_title(f'Train {column} vs. Epoch', fontsize=20)
+        axes[0].set_xlabel('Epoch', fontsize=16)
+        axes[0].set_ylabel(column, fontsize=16)
+        axes[0].tick_params(axis='both', which='major', labelsize=12)
+        axes[1].set_title(f'Validation {column} vs. Epoch', fontsize=20)
+        axes[1].set_xlabel('Epoch', fontsize=16)
+        axes[1].tick_params(axis='both', which='major', labelsize=12)
+        plt.tight_layout()
+        if save:
+            plt.savefig(pdf_path, format='pdf', dpi=dpi)
+        else:
+            plt.show()
+    # Read the CSV into a dataframe
+    df = pd.read_csv(file_path, index_col=0)
+    # Split the dataframe into train and validation based on the index
+    train_df = df.filter(like='_train', axis=0).copy()
+    val_df = df.filter(like='_val', axis=0).copy()
+    fldr_1 = os.path.dirname(file_path)
+    train_csv_path = os.path.join(fldr_1, 'train.csv')
+    val_csv_path = os.path.join(fldr_1, 'validation.csv')
+    fldr_2 = os.path.dirname(fldr_1)
+    fldr_3 = os.path.dirname(fldr_2)
+    bn_1 = os.path.basename(fldr_1)
+    bn_2 = os.path.basename(fldr_2)
+    bn_3 = os.path.basename(fldr_3)
+    model_name = str(f'{bn_1}_{bn_2}_{bn_3}')
+    # Extract epochs from index
+    train_df['epoch'] = [int(idx.split('_')[0]) for idx in train_df.index]
+    val_df['epoch'] = [int(idx.split('_')[0]) for idx in val_df.index]
+    # Save dataframes to a CSV file
+    train_df.to_csv(train_csv_path)
+    val_df.to_csv(val_csv_path)
+    if save:
+        # Setting the style
+        sns.set(style="whitegrid")
+    _plot_and_save(train_df, val_df, column='accuracy', save=save, path=fldr_1)
+    _plot_and_save(train_df, val_df, column='neg_accuracy', save=save, path=fldr_1)
+    _plot_and_save(train_df, val_df, column='pos_accuracy', save=save, path=fldr_1)
+    _plot_and_save(train_df, val_df, column='loss', save=save, path=fldr_1)
+    _plot_and_save(train_df, val_df, column='prauc', save=save, path=fldr_1)
+    _plot_and_save(train_df, val_df, column='optimal_threshold', save=save, path=fldr_1)
+def _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94], channels=['r','g','b']):
     """
     Save the model based on certain conditions during training.
@@ -2007,35 +2204,25 @@ def _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_
         epochs (int): The total number of epochs.
         intermedeate_save (list, optional): List of accuracy thresholds to trigger intermediate model saves.
                                             Defaults to [0.99, 0.98, 0.95, 0.94].
+        channels (list, optional): List of channels used. Defaults to ['r', 'g', 'b'].
     """
-    if epoch % 100 == 0:
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}.pth')
-    if epoch == epochs:
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}.pth')
-    if results_df['neg_accuracy'].dropna().mean() >= intermedeate_save[0] and results_df['pos_accuracy'].dropna().mean() >= intermedeate_save[0]:
-        percentile = str(intermedeate_save[0]*100)
-        print(f'\rfound: {percentile}% accurate model', end='\r', flush=True)
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}_acc_{str(percentile)}.pth')
-    elif results_df['neg_accuracy'].dropna().mean() >= intermedeate_save[1] and results_df['pos_accuracy'].dropna().mean() >= intermedeate_save[1]:
-        percentile = str(intermedeate_save[1]*100)
-        print(f'\rfound: {percentile}% accurate model', end='\r', flush=True)
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}_acc_{str(percentile)}.pth')
+    channels_str = ''.join(channels)
-    elif results_df['neg_accuracy'].dropna().mean() >= intermedeate_save[2] and results_df['pos_accuracy'].dropna().mean() >= intermedeate_save[2]:
-        percentile = str(intermedeate_save[2]*100)
+    def save_model_at_threshold(threshold, epoch, suffix=""):
+        percentile = str(threshold * 100)
         print(f'\rfound: {percentile}% accurate model', end='\r', flush=True)
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}_acc_{str(percentile)}.pth')
-    elif results_df['neg_accuracy'].dropna().mean() >= intermedeate_save[3] and results_df['pos_accuracy'].dropna().mean() >= intermedeate_save[3]:
-        percentile = str(intermedeate_save[3]*100)
-        print(f'\rfound: {percentile}% accurate model', end='\r', flush=True)
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}_acc_{str(percentile)}.pth')
+        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}{suffix}_acc_{percentile}_channels_{channels_str}.pth')
+    if epoch % 100 == 0 or epoch == epochs:
+        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}_channels_{channels_str}.pth')
-def _save_progress(dst, results_df, train_metrics_df):
+    for threshold in intermedeate_save:
+        if results_df['neg_accuracy'].dropna().mean() >= threshold and results_df['pos_accuracy'].dropna().mean() >= threshold:
+            save_model_at_threshold(threshold, epoch)
+            break  # Ensure we only save for the highest matching threshold
+def _save_progress(dst, results_df, train_metrics_df, epoch, epochs):
     """
     Save the progress of the classification model.
@@ -2054,11 +2241,14 @@ def _save_progress(dst, results_df, train_metrics_df):
         results_df.to_csv(results_path, index=True, header=True, mode='w')
     else:
         results_df.to_csv(results_path, index=True, header=False, mode='a')
     training_metrics_path = os.path.join(dst, 'training_metrics.csv')
     if not os.path.exists(training_metrics_path):
         train_metrics_df.to_csv(training_metrics_path, index=True, header=True, mode='w')
     else:
         train_metrics_df.to_csv(training_metrics_path, index=True, header=False, mode='a')
+    if epoch == epochs:
+        read_plot_model_stats(results_path, save=True)
     return
 def _save_settings(settings, src):

spacr 0.0.81__py3-none-any.whl → 0.1.0__py3-none-any.whl

spacr 0.0.81py3-none-any.whl → 0.1.0py3-none-any.whl