PyPI - spacr - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

spacr 0.0.1py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

spacr/__init__.py +6 -2
spacr/__main__.py +0 -2
spacr/alpha.py +807 -0
spacr/annotate_app.py +118 -120
spacr/chris.py +50 -0
spacr/cli.py +25 -187
spacr/core.py +1611 -389
spacr/deep_spacr.py +696 -0
spacr/foldseek.py +779 -0
spacr/get_alfafold_structures.py +72 -0
spacr/graph_learning.py +320 -0
spacr/graph_learning_lap.py +84 -0
spacr/gui.py +145 -0
spacr/gui_2.py +90 -0
spacr/gui_classify_app.py +187 -0
spacr/gui_mask_app.py +149 -174
spacr/gui_measure_app.py +116 -109
spacr/gui_sim_app.py +0 -0
spacr/gui_utils.py +679 -139
spacr/io.py +620 -469
spacr/mask_app.py +116 -9
spacr/measure.py +178 -84
spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/old_code.py +255 -1
spacr/plot.py +263 -100
spacr/sequencing.py +1130 -0
spacr/sim.py +634 -122
spacr/timelapse.py +343 -53
spacr/train.py +195 -22
spacr/umap.py +0 -689
spacr/utils.py +1530 -188
spacr-0.0.6.dist-info/METADATA +118 -0
spacr-0.0.6.dist-info/RECORD +39 -0
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/WHEEL +1 -1
spacr-0.0.6.dist-info/entry_points.txt +9 -0
spacr-0.0.1.dist-info/METADATA +0 -64
spacr-0.0.1.dist-info/RECORD +0 -26
spacr-0.0.1.dist-info/entry_points.txt +0 -5
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/LICENSE +0 -0
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/top_level.txt +0 -0

spacr/utils.py CHANGED Viewed

@@ -1,10 +1,18 @@
-import os, re, sqlite3, gc, torch, torchvision, time, random, string, shutil, cv2, tarfile, glob
+import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob
 import numpy as np
+from cellpose import models as cp_models
+from cellpose import denoise
 from skimage import morphology
 from skimage.measure import label, regionprops_table, regionprops
 import skimage.measure as measure
-from collections import defaultdict
+from skimage.transform import resize as resizescikit
+from skimage.morphology import dilation, square
+from skimage.measure import find_contours
+from skimage.segmentation import clear_border
+from collections import defaultdict, OrderedDict
 from PIL import Image
 import pandas as pd
 from statsmodels.stats.outliers_influence import variance_inflation_factor
@@ -13,37 +21,257 @@ import statsmodels.formula.api as smf
 import statsmodels.api as sm
 from statsmodels.stats.multitest import multipletests
 from itertools import combinations
-from collections import OrderedDict
 from functools import reduce
-from IPython.display import display, clear_output
+from IPython.display import display
 from multiprocessing import Pool, cpu_count
-from skimage.transform import resize as resizescikit
+from concurrent.futures import ThreadPoolExecutor
 import torch.nn as nn
 import torch.nn.functional as F
-#from torchsummary import summary
 from torch.utils.checkpoint import checkpoint
 from torch.utils.data import Subset
 from torch.autograd import grad
-from torchvision import models
-from skimage.segmentation import clear_border
 import seaborn as sns
 import matplotlib.pyplot as plt
+from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 import scipy.ndimage as ndi
+from scipy.spatial import distance
 from scipy.stats import fisher_exact
-from scipy.ndimage import binary_erosion, binary_dilation
+from scipy.ndimage.filters import gaussian_filter
+from scipy.spatial import ConvexHull
+from scipy.interpolate import splprep, splev
+from sklearn.preprocessing import StandardScaler
 from skimage.exposure import rescale_intensity
 from sklearn.metrics import auc, precision_recall_curve
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import Lasso, Ridge
 from sklearn.preprocessing import OneHotEncoder
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import StandardScaler
+from sklearn.cluster import DBSCAN
+from sklearn.cluster import KMeans
+from sklearn.manifold import TSNE
+import umap.umap_ as umap
+from torchvision import models
 from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
+import torchvision.transforms as transforms
 from .logger import log_function_call
-#from .io import _read_and_join_tables, _save_figure
-#from .timelapse import _btrack_track_cells, _trackpy_track_cells
-#from .plot import _plot_images_on_grid, plot_masks, _plot_histograms_and_stats, plot_resize, _plot_plates, _reg_v_plot, plot_masks
-#from .core import identify_masks
+def check_mask_folder(src,mask_fldr):
+    mask_folder = os.path.join(src,'norm_channel_stack',mask_fldr)
+    stack_folder = os.path.join(src,'stack')
+    if not os.path.exists(mask_folder):
+        return True
+    mask_count = sum(1 for file in os.listdir(mask_folder) if file.endswith('.npy'))
+    stack_count = sum(1 for file in os.listdir(stack_folder) if file.endswith('.npy'))
+    if mask_count == stack_count:
+        print(f'All masks have been generated for {mask_fldr}')
+        return False
+    else:
+        return True
+def set_default_plot_merge_settings():
+    settings = {}
+    settings.setdefault('include_noninfected', True)
+    settings.setdefault('include_multiinfected', True)
+    settings.setdefault('include_multinucleated', True)
+    settings.setdefault('remove_background', False)
+    settings.setdefault('filter_min_max', None)
+    settings.setdefault('channel_dims', [0,1,2,3])
+    settings.setdefault('backgrounds', [100,100,100,100])
+    settings.setdefault('cell_mask_dim', 4)
+    settings.setdefault('nucleus_mask_dim', 5)
+    settings.setdefault('pathogen_mask_dim', 6)
+    settings.setdefault('outline_thickness', 3)
+    settings.setdefault('outline_color', 'gbr')
+    settings.setdefault('overlay_chans', [1,2,3])
+    settings.setdefault('overlay', True)
+    settings.setdefault('normalization_percentiles', [2,98])
+    settings.setdefault('normalize', True)
+    settings.setdefault('print_object_number', True)
+    settings.setdefault('nr', 1)
+    settings.setdefault('figuresize', 50)
+    settings.setdefault('cmap', 'inferno')
+    settings.setdefault('verbose', True)
+    return settings
+def set_default_settings_preprocess_generate_masks(src, settings={}):
+    # Main settings
+    settings['src'] = src
+    settings.setdefault('preprocess', True)
+    settings.setdefault('masks', True)
+    settings.setdefault('save', True)
+    settings.setdefault('batch_size', 50)
+    settings.setdefault('test_mode', False)
+    settings.setdefault('test_images', 10)
+    settings.setdefault('magnification', 20)
+    settings.setdefault('custom_regex', None)
+    settings.setdefault('metadata_type', 'cellvoyager')
+    settings.setdefault('workers', os.cpu_count()-4)
+    settings.setdefault('randomize', True)
+    settings.setdefault('verbose', True)
+    settings.setdefault('remove_background_cell', False)
+    settings.setdefault('remove_background_nucleus', False)
+    settings.setdefault('remove_background_pathogen', False)
+    # Channel settings
+    settings.setdefault('cell_channel', None)
+    settings.setdefault('nucleus_channel', None)
+    settings.setdefault('pathogen_channel', None)
+    settings.setdefault('channels', [0,1,2,3])
+    settings.setdefault('pathogen_background', 100)
+    settings.setdefault('pathogen_Signal_to_noise', 10)
+    settings.setdefault('pathogen_CP_prob', 0)
+    settings.setdefault('cell_background', 100)
+    settings.setdefault('cell_Signal_to_noise', 10)
+    settings.setdefault('cell_CP_prob', 0)
+    settings.setdefault('nucleus_background', 100)
+    settings.setdefault('nucleus_Signal_to_noise', 10)
+    settings.setdefault('nucleus_CP_prob', 0)
+    settings.setdefault('nucleus_FT', 100)
+    settings.setdefault('cell_FT', 100)
+    settings.setdefault('pathogen_FT', 100)
+    # Plot settings
+    settings.setdefault('plot', False)
+    settings.setdefault('figuresize', 50)
+    settings.setdefault('cmap', 'inferno')
+    settings.setdefault('normalize', True)
+    settings.setdefault('normalize_plots', True)
+    settings.setdefault('examples_to_plot', 1)
+    # Analasys settings
+    settings.setdefault('pathogen_model', None)
+    settings.setdefault('merge_pathogens', False)
+    settings.setdefault('filter', False)
+    settings.setdefault('lower_percentile', 2)
+    # Timelapse settings
+    settings.setdefault('timelapse', False)
+    settings.setdefault('fps', 2)
+    settings.setdefault('timelapse_displacement', None)
+    settings.setdefault('timelapse_memory', 3)
+    settings.setdefault('timelapse_frame_limits', None)
+    settings.setdefault('timelapse_remove_transient', False)
+    settings.setdefault('timelapse_mode', 'trackpy')
+    settings.setdefault('timelapse_objects', 'cells')
+    # Misc settings
+    settings.setdefault('all_to_mip', False)
+    settings.setdefault('pick_slice', False)
+    settings.setdefault('skip_mode', '01')
+    settings.setdefault('upscale', False)
+    settings.setdefault('upscale_factor', 2.0)
+    settings.setdefault('adjust_cells', False)
+    return settings
+def set_default_settings_preprocess_img_data(settings):
+    metadata_type = settings.setdefault('metadata_type', 'cellvoyager')
+    custom_regex = settings.setdefault('custom_regex', None)
+    nr = settings.setdefault('nr', 1)
+    plot = settings.setdefault('plot', True)
+    batch_size = settings.setdefault('batch_size', 50)
+    timelapse = settings.setdefault('timelapse', False)
+    lower_percentile = settings.setdefault('lower_percentile', 2)
+    randomize = settings.setdefault('randomize', True)
+    all_to_mip = settings.setdefault('all_to_mip', False)
+    pick_slice = settings.setdefault('pick_slice', False)
+    skip_mode = settings.setdefault('skip_mode', False)
+    cmap = settings.setdefault('cmap', 'inferno')
+    figuresize = settings.setdefault('figuresize', 50)
+    normalize = settings.setdefault('normalize', True)
+    save_dtype = settings.setdefault('save_dtype', 'uint16')
+    test_mode = settings.setdefault('test_mode', False)
+    test_images = settings.setdefault('test_images', 10)
+    random_test = settings.setdefault('random_test', True)
+    return settings, metadata_type, custom_regex, nr, plot, batch_size, timelapse, lower_percentile, randomize, all_to_mip, pick_slice, skip_mode, cmap, figuresize, normalize, save_dtype, test_mode, test_images, random_test
+def smooth_hull_lines(cluster_data):
+    hull = ConvexHull(cluster_data)
+    # Extract vertices of the hull
+    vertices = hull.points[hull.vertices]
+    # Close the loop
+    vertices = np.vstack([vertices, vertices[0, :]])
+    # Parameterize the vertices
+    tck, u = splprep(vertices.T, u=None, s=0.0)
+    # Evaluate spline at new parameter values
+    new_points = splev(np.linspace(0, 1, 100), tck)
+    return new_points[0], new_points[1]
+def _gen_rgb_image(image, channels):
+    """
+    Generate an RGB image from the specified channels of the input image.
+    Args:
+        image (ndarray): The input image.
+        channels (list): List of channel indices to use for RGB.
+    Returns:
+        rgb_image (ndarray): The generated RGB image.
+    """
+    rgb_image = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.float32)
+    for i, chan in enumerate(channels):
+        if chan < image.shape[2]:
+            rgb_image[:, :, i] = image[:, :, chan]
+    return rgb_image
+def _outline_and_overlay(image, rgb_image, mask_dims, outline_colors, outline_thickness):
+    outlines = []
+    overlayed_image = rgb_image.copy()
+    def process_dim(mask_dim):
+        mask = np.take(image, mask_dim, axis=-1)
+        outline = np.zeros_like(mask, dtype=np.uint8)  # Use uint8 for contour detection efficiency
+        # Find and draw contours
+        for j in np.unique(mask):
+            if j == 0:
+                continue  # Skip background
+            contours = find_contours(mask == j, 0.5)
+            # Convert contours for OpenCV format and draw directly to optimize
+            cv_contours = [np.flip(contour.astype(int), axis=1) for contour in contours]
+            cv2.drawContours(outline, cv_contours, -1, color=255, thickness=outline_thickness)
+        return dilation(outline, square(outline_thickness))
+    # Parallel processing
+    with ThreadPoolExecutor() as executor:
+        outlines = list(executor.map(process_dim, mask_dims))
+    # Overlay outlines onto the RGB image
+    for i, outline in enumerate(outlines):
+        color = np.array(outline_colors[i % len(outline_colors)])
+        for j in np.unique(outline):
+            if j == 0:
+                continue  # Skip background
+            mask = outline == j
+            overlayed_image[mask] = color  # Direct assignment with broadcasting
+    return overlayed_image, outlines, image
 def _convert_cq1_well_id(well_id):
     """
@@ -114,8 +342,8 @@ def _extract_filename_metadata(filenames, src, images_by_key, regular_expression
                 if metadata_type =='cq1':
                     orig_wellID = wellID
                     wellID = _convert_cq1_well_id(wellID)
-                    clear_output(wait=True)
-                    print(f'\033[KConverted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
+                    #clear_output(wait=True)
+                    print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
                 if pick_slice:
                     try:
@@ -302,43 +530,82 @@ def _annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pa
     df['condition'] = df.apply(lambda row: '_'.join(filter(None, [row.get('pathogen'), row.get('treatment')])), axis=1)
     df['condition'] = df['condition'].apply(lambda x: x if x else 'none')
     return df
-def normalize_to_dtype(array, q1=2,q2=98, percentiles=None):
+def normalize_to_dtype(array, p1=2, p2=98):
     """
-    Normalize the input array to a specified data type.
+    Normalize each image in the stack to its own percentiles.
     Parameters:
     - array: numpy array
-        The input array to be normalized.
-    - q1: int, optional
+        The input stack to be normalized.
+    - p1: int, optional
         The lower percentile value for normalization. Default is 2.
-    - q2: int, optional
+    - p2: int, optional
         The upper percentile value for normalization. Default is 98.
-    - percentiles: list of tuples, optional
-        A list of tuples containing the percentile values for each image in the array.
-        If provided, the percentiles for each image will be used instead of q1 and q2.
     Returns:
     - new_stack: numpy array
-        The normalized array with the same shape as the input array.
+        The normalized stack with the same shape as the input stack.
     """
     nimg = array.shape[2]
     new_stack = np.empty_like(array)
-    for i,v in enumerate(range(nimg)):
-        img = np.squeeze(array[:, :, v])
+    for i in range(nimg):
+        img = array[:, :, i]
         non_zero_img = img[img > 0]
-        if non_zero_img.size > 0: # check if there are non-zero values
-            img_min = np.percentile(non_zero_img, q1)  # change percentile from 0.02 to 2
-            img_max = np.percentile(non_zero_img, q2)  # change percentile from 0.98 to 98
-            img = rescale_intensity(img, in_range=(img_min, img_max), out_range='dtype')
-        else:  # if there are no non-zero values, just use the image as it is
-            if percentiles==None:
-                img_min, img_max = img.min(), img.max()
-            else:
-                img_min, img_max = percentiles[i]
-            img = rescale_intensity(img, in_range=(img_min, img_max), out_range='dtype')
-        img = np.expand_dims(img, axis=2)
-        new_stack[:, :, v] = img[:, :, 0]
+        if non_zero_img.size > 0:
+            img_min = np.percentile(non_zero_img, p1)
+            img_max = np.percentile(non_zero_img, p2)
+        else:
+            img_min = img.min()
+            img_max = img.max()
+        # Determine output range based on dtype
+        if np.issubdtype(array.dtype, np.integer):
+            out_range = (0, np.iinfo(array.dtype).max)
+        else:
+            out_range = (0.0, 1.0)
+        img = rescale_intensity(img, in_range=(img_min, img_max), out_range=out_range).astype(array.dtype)
+        new_stack[:, :, i] = img
+    return new_stack
+def normalize_to_dtype(array, p1=2, p2=98):
+    """
+    Normalize each image in the stack to its own percentiles.
+    Parameters:
+    - array: numpy array
+        The input stack to be normalized.
+    - p1: int, optional
+        The lower percentile value for normalization. Default is 2.
+    - p2: int, optional
+        The upper percentile value for normalization. Default is 98.
+    Returns:
+    - new_stack: numpy array
+        The normalized stack with the same shape as the input stack.
+    """
+    nimg = array.shape[2]
+    new_stack = np.empty_like(array, dtype=np.float32)
+    for i in range(nimg):
+        img = array[:, :, i]
+        non_zero_img = img[img > 0]
+        if non_zero_img.size > 0:
+            img_min = np.percentile(non_zero_img, p1)
+            img_max = np.percentile(non_zero_img, p2)
+        else:
+            img_min = img.min()
+            img_max = img.max()
+        # Normalize to the range (0, 1) for visualization
+        img = rescale_intensity(img, in_range=(img_min, img_max), out_range=(0.0, 1.0))
+        new_stack[:, :, i] = img
     return new_stack
 def _list_endpoint_subdirectories(base_dir):
@@ -673,9 +940,6 @@ def _crop_center(img, cell_mask, new_width, new_height, normalize=(2,98)):
     img = img[start_y:end_y, start_x:end_x, :]
     return img
 def _masks_to_masks_stack(masks):
     """
     Convert a list of masks into a stack of masks.
@@ -692,53 +956,50 @@ def _masks_to_masks_stack(masks):
     return mask_stack
 def _get_diam(mag, obj):
-    if obj == 'cell':
-        if mag == 20:
-            scale = 6
-        if mag == 40:
-            scale = 4.5
-        if mag == 60:
-            scale = 3
-    elif obj == 'nucleus':
-        if mag == 20:
-            scale = 3
-        if mag == 40:
-            scale = 2
-        if mag == 60:
-            scale = 1.5
-    elif obj == 'pathogen':
-        if mag == 20:
-            scale = 1.5
-        if mag == 40:
-            scale = 1
-        if mag == 60:
-            scale = 1.25
-    elif obj == 'pathogen_nucleus':
-        if mag == 20:
-            scale = 0.25
-        if mag == 40:
-            scale = 0.2
-        if mag == 60:
-            scale = 0.2
+    if mag == 20:
+        if obj == 'cell':
+            diamiter = 120
+        elif obj == 'nucleus':
+            diamiter = 60
+        elif obj == 'pathogen':
+            diamiter = 20
+        else:
+            raise ValueError("Invalid magnification: Use 20, 40 or 60")
+    elif mag == 40:
+        if obj == 'cell':
+            diamiter = 160
+        elif obj == 'nucleus':
+            diamiter = 80
+        elif obj == 'pathogen':
+            diamiter = 40
+        else:
+            raise ValueError("Invalid magnification: Use 20, 40 or 60")
+    elif mag == 60:
+        if obj == 'cell':
+            diamiter = 200
+        if obj == 'nucleus':
+            diamiter = 90
+        if obj == 'pathogen':
+            diamiter = 60
+        else:
+            raise ValueError("Invalid magnification: Use 20, 40 or 60")
     else:
-        raise ValueError("Invalid object type")
-    diamiter = mag*scale
+        raise ValueError("Invalid magnification: Use 20, 40 or 60")
     return diamiter
 def _get_object_settings(object_type, settings):
     object_settings = {}
-    object_settings['refine_masks'] = False
-    object_settings['filter_size'] = False
-    object_settings['filter_dimm'] = False
-    print(object_type)
     object_settings['diameter'] = _get_diam(settings['magnification'], obj=object_type)
-    object_settings['remove_border_objects'] = False
-    object_settings['minimum_size'] = (object_settings['diameter']**2)/10
-    object_settings['maximum_size'] = object_settings['minimum_size']*50
+    object_settings['minimum_size'] = (object_settings['diameter']**2)/4
+    object_settings['maximum_size'] = (object_settings['diameter']**2)*10
     object_settings['merge'] = False
-    object_settings['net_avg'] = True
     object_settings['resample'] = True
+    object_settings['remove_border_objects'] = False
     object_settings['model_name'] = 'cyto'
     if object_type == 'cell':
@@ -746,20 +1007,29 @@ def _get_object_settings(object_type, settings):
             object_settings['model_name'] = 'cyto'
         else:
             object_settings['model_name'] = 'cyto2'
+        object_settings['filter_size'] = False
+        object_settings['filter_intensity'] = False
+        object_settings['restore_type'] = settings.get('cell_restore_type', None)
     elif object_type == 'nucleus':
         object_settings['model_name'] = 'nuclei'
+        object_settings['filter_size'] = False
+        object_settings['filter_intensity'] = False
+        object_settings['restore_type'] = settings.get('nucleus_restore_type', None)
     elif object_type == 'pathogen':
-        object_settings['model_name'] = 'cyto3'
-    elif object_type == 'pathogen_nucleus':
-        object_settings['filter_size'] = True
         object_settings['model_name'] = 'cyto'
+        object_settings['filter_size'] = False
+        object_settings['filter_intensity'] = False
+        object_settings['resample'] = False
+        object_settings['restore_type'] = settings.get('pathogen_restore_type', None)
+        object_settings['merge'] = settings['merge_pathogens']
     else:
         print(f'Object type: {object_type} not supported. Supported object types are : cell, nucleus and pathogen')
-        print(f'using settings: {object_settings}')
+    if settings['verbose']:
+        print(object_settings)
     return object_settings
@@ -786,6 +1056,7 @@ def _pivot_counts_table(db_path):
         return df
     def _pivot_dataframe(df):
         """
         Pivot the DataFrame.
@@ -812,61 +1083,32 @@ def _pivot_counts_table(db_path):
     pivoted_df.to_sql('pivoted_counts', conn, if_exists='replace', index=False)
     conn.close()
-def _get_cellpose_channels_v1(mask_channels, nucleus_chann_dim, pathogen_chann_dim, cell_chann_dim):
-    cellpose_channels = {}
-    if nucleus_chann_dim in mask_channels:
-        cellpose_channels['nucleus'] = [0, mask_channels.index(nucleus_chann_dim)]
-    if pathogen_chann_dim in mask_channels:
-        cellpose_channels['pathogen'] = [0, mask_channels.index(pathogen_chann_dim)]
-    if cell_chann_dim in mask_channels:
-        cellpose_channels['cell'] = [0, mask_channels.index(cell_chann_dim)]
-    return cellpose_channels
+def _get_cellpose_channels(src, nucleus_channel, pathogen_channel, cell_channel):
-def _get_cellpose_channels_v1(cell_channel, nucleus_channel, pathogen_channel):
-    # Initialize a dictionary to hold the new indices for the specified channels
-    cellpose_channels = {}
+    cell_mask_path = os.path.join(src, 'norm_channel_stack', 'cell_mask_stack')
+    nucleus_mask_path = os.path.join(src, 'norm_channel_stack', 'nucleus_mask_stack')
+    pathogen_mask_path = os.path.join(src, 'norm_channel_stack', 'pathogen_mask_stack')
-    # Initialize a list to keep track of the channels in their new order
-    new_channel_order = []
-    # Add each channel to the new order list if it is not None
-    if cell_channel is not None:
-        new_channel_order.append(('cell', cell_channel))
-    if nucleus_channel is not None:
-        new_channel_order.append(('nucleus', nucleus_channel))
-    if pathogen_channel is not None:
-        new_channel_order.append(('pathogen', pathogen_channel))
-    # Sort the list based on the original channel indices to maintain the original order
-    new_channel_order.sort(key=lambda x: x[1])
-    print(new_channel_order)
-    # Assign new indices based on the sorted order
-    for new_index, (channel_name, _) in enumerate(new_channel_order):
-        cellpose_channels[channel_name] = [new_index, 0]
-    if cell_channel is not None and nucleus_channel is not None:
-        cellpose_channels['cell'][1] = cellpose_channels['nucleus'][0]
-    return cellpose_channels
-def _get_cellpose_channels(nucleus_channel, pathogen_channel, cell_channel):
+    if os.path.exists(cell_mask_path) or os.path.exists(nucleus_mask_path) or os.path.exists(pathogen_mask_path):
+        if nucleus_channel is None or nucleus_channel is None or nucleus_channel is None:
+            print('Warning: Cellpose masks already exist. Unexpected behaviour when setting any object dimention to None when the object masks have been created.')
     cellpose_channels = {}
     if not nucleus_channel is None:
         cellpose_channels['nucleus'] = [0,0]
     if not pathogen_channel is None:
         if not nucleus_channel is None:
-            cellpose_channels['pathogen'] = [0,1]
+            if not pathogen_channel is None:
+                cellpose_channels['pathogen'] = [0,2]
+            else:
+                cellpose_channels['pathogen'] = [0,1]
         else:
             cellpose_channels['pathogen'] = [0,0]
     if not cell_channel is None:
         if not nucleus_channel is None:
-            if not pathogen_channel is None:
-                cellpose_channels['cell'] = [0,2]
-            else:
-                cellpose_channels['cell'] = [0,1]
-        elif not pathogen_channel is None:
             cellpose_channels['cell'] = [0,1]
         else:
             cellpose_channels['cell'] = [0,0]
@@ -1027,9 +1269,6 @@ def _group_by_well(df):
     # Apply mean function to numeric columns and first to non-numeric
     df_grouped = df.groupby(['plate', 'row', 'col']).agg({**{col: np.mean for col in numeric_cols}, **{col: 'first' for col in non_numeric_cols}})
     return df_grouped
 ###################################################
 #  Classify
@@ -1044,7 +1283,7 @@ class Cache:
         cache (OrderedDict): The cache data structure.
     """
-    def _init__(self, max_size):
+    def __init__(self, max_size):
         self.cache = OrderedDict()
         self.max_size = max_size
@@ -1075,7 +1314,7 @@ class ScaledDotProductAttention(nn.Module):
     """
-    def _init__(self, d_k):
+    def __init__(self, d_k):
         super(ScaledDotProductAttention, self).__init__()
         self.d_k = d_k
@@ -1106,7 +1345,7 @@ class SelfAttention(nn.Module):
         d_k (int): Dimensionality of the key and query vectors.
     """
-    def _init__(self, in_channels, d_k):
+    def __init__(self, in_channels, d_k):
         super(SelfAttention, self).__init__()
         self.W_q = nn.Linear(in_channels, d_k)
         self.W_k = nn.Linear(in_channels, d_k)
@@ -1130,7 +1369,7 @@ class SelfAttention(nn.Module):
         return output
 class ScaledDotProductAttention(nn.Module):
-    def _init__(self, d_k):
+    def __init__(self, d_k):
         """
         Initializes the ScaledDotProductAttention module.
@@ -1167,7 +1406,7 @@ class SelfAttention(nn.Module):
         in_channels (int): Number of input channels.
         d_k (int): Dimensionality of the key and query vectors.
     """
-    def _init__(self, in_channels, d_k):
+    def __init__(self, in_channels, d_k):
         super(SelfAttention, self).__init__()
         self.W_q = nn.Linear(in_channels, d_k)
         self.W_k = nn.Linear(in_channels, d_k)
@@ -1198,7 +1437,7 @@ class EarlyFusion(nn.Module):
     Args:
         in_channels (int): Number of input channels.
     """
-    def _init__(self, in_channels):
+    def __init__(self, in_channels):
         super(EarlyFusion, self).__init__()
         self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=1, stride=1)
@@ -1217,7 +1456,7 @@ class EarlyFusion(nn.Module):
 # Spatial Attention Mechanism
 class SpatialAttention(nn.Module):
-    def _init__(self, kernel_size=7):
+    def __init__(self, kernel_size=7):
         """
         Initializes the SpatialAttention module.
@@ -1262,7 +1501,7 @@ class MultiScaleBlockWithAttention(nn.Module):
         forward: Forward method for the module.
     """
-    def _init__(self, in_channels, out_channels):
+    def __init__(self, in_channels, out_channels):
         super(MultiScaleBlockWithAttention, self).__init__()
         self.dilated_conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, dilation=1, padding=1)
         self.spatial_attention = nn.Conv2d(out_channels, out_channels, kernel_size=1)
@@ -1295,7 +1534,7 @@ class MultiScaleBlockWithAttention(nn.Module):
 # Final Classifier
 class CustomCellClassifier(nn.Module):
-    def _init__(self, num_classes, pathogen_channel, use_attention, use_checkpoint, dropout_rate):
+    def __init__(self, num_classes, pathogen_channel, use_attention, use_checkpoint, dropout_rate):
         super(CustomCellClassifier, self).__init__()
         self.early_fusion = EarlyFusion(in_channels=3)
@@ -1324,7 +1563,7 @@ class CustomCellClassifier(nn.Module):
 #CNN and Transformer class, pick any Torch model.
 class TorchModel(nn.Module):
-    def _init__(self, model_name='resnet50', pretrained=True, dropout_rate=None, use_checkpoint=False):
+    def __init__(self, model_name='resnet50', pretrained=True, dropout_rate=None, use_checkpoint=False):
         super(TorchModel, self).__init__()
         self.model_name = model_name
         self.use_checkpoint = use_checkpoint
@@ -1398,7 +1637,7 @@ class TorchModel(nn.Module):
         return logits
 class FocalLossWithLogits(nn.Module):
-    def _init__(self, alpha=1, gamma=2):
+    def __init__(self, alpha=1, gamma=2):
         super(FocalLossWithLogits, self).__init__()
         self.alpha = alpha
         self.gamma = gamma
@@ -1410,7 +1649,7 @@ class FocalLossWithLogits(nn.Module):
         return focal_loss.mean()
 class ResNet(nn.Module):
-    def _init__(self, resnet_type='resnet50', dropout_rate=None, use_checkpoint=False, init_weights='imagenet'):
+    def __init__(self, resnet_type='resnet50', dropout_rate=None, use_checkpoint=False, init_weights='imagenet'):
         super(ResNet, self).__init__()
         resnet_map = {
@@ -1763,25 +2002,24 @@ def annotate_predictions(csv_loc):
     df['cond'] = df.apply(assign_condition, axis=1)
     return df
-def init_globals(counter_, lock_):
+def initiate_counter(counter_, lock_):
     global counter, lock
     counter = counter_
     lock = lock_
-def add_images_to_tar(args):
-    global counter, lock, total_images
-    paths_chunk, tar_path = args
+def add_images_to_tar(paths_chunk, tar_path, total_images):
     with tarfile.open(tar_path, 'w') as tar:
-        for img_path in paths_chunk:
+        for i, img_path in enumerate(paths_chunk):
             arcname = os.path.basename(img_path)
             try:
                 tar.add(img_path, arcname=arcname)
                 with lock:
                     counter.value += 1
-                    print(f"\rProcessed: {counter.value}/{total_images}", end='', flush=True)
+                    if counter.value % 100 == 0:  # Print every 100 updates
+                        progress = (counter.value / total_images) * 100
+                        print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
             except FileNotFoundError:
                 print(f"File not found: {img_path}")
-    return tar_path
 def generate_fraction_map(df, gene_column, min_frequency=0.0):
     df['fraction'] = df['count']/df['well_read_sum']
@@ -2230,8 +2468,8 @@ def dice_coefficient(mask1, mask2):
 def extract_boundaries(mask, dilation_radius=1):
     binary_mask = (mask > 0).astype(np.uint8)
     struct_elem = np.ones((dilation_radius*2+1, dilation_radius*2+1))
-    dilated = binary_dilation(binary_mask, footprint=struct_elem)
-    eroded = binary_erosion(binary_mask, footprint=struct_elem)
+    dilated = morphology.binary_dilation(binary_mask, footprint=struct_elem)
+    eroded = morphology.binary_erosion(binary_mask, footprint=struct_elem)
     boundary = dilated ^ eroded
     return boundary
@@ -2612,24 +2850,21 @@ def _filter_object(mask, min_value):
     mask[np.isin(mask, to_remove)] = 0
     return mask
-def _filter_cp_masks(masks, flows, filter_size, minimum_size, maximum_size, remove_border_objects, merge, filter_dimm, batch, moving_avg_q1, moving_avg_q3, moving_count, plot, figuresize):
+def _filter_cp_masks(masks, flows, filter_size, filter_intensity, minimum_size, maximum_size, remove_border_objects, merge, batch, plot, figuresize):
     """
     Filter the masks based on various criteria such as size, border objects, merging, and intensity.
     Args:
         masks (list): List of masks.
         flows (list): List of flows.
-        refine_masks (bool): Flag indicating whether to refine masks.
         filter_size (bool): Flag indicating whether to filter based on size.
+        filter_intensity (bool): Flag indicating whether to filter based on intensity.
         minimum_size (int): Minimum size of objects to keep.
         maximum_size (int): Maximum size of objects to keep.
         remove_border_objects (bool): Flag indicating whether to remove border objects.
         merge (bool): Flag indicating whether to merge adjacent objects.
-        filter_dimm (bool): Flag indicating whether to filter based on intensity.
         batch (ndarray): Batch of images.
-        moving_avg_q1 (float): Moving average of the first quartile of object intensities.
-        moving_avg_q3 (float): Moving average of the third quartile of object intensities.
-        moving_count (int): Count of moving averages.
         plot (bool): Flag indicating whether to plot the masks.
         figuresize (tuple): Size of the figure.
@@ -2641,51 +2876,66 @@ def _filter_cp_masks(masks, flows, filter_size, minimum_size, maximum_size, remo
     mask_stack = []
     for idx, (mask, flow, image) in enumerate(zip(masks, flows[0], batch)):
         if plot and idx == 0:
             num_objects = mask_object_count(mask)
             print(f'Number of objects before filtration: {num_objects}')
             plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
-        if filter_size:
-            props = measure.regionprops_table(mask, properties=['label', 'area'])  # Measure properties of labeled image regions.
-            valid_labels = props['label'][np.logical_and(props['area'] > minimum_size, props['area'] < maximum_size)]  # Select labels of valid size.
-            masks[idx] = np.isin(mask, valid_labels) * mask  # Keep only valid objects.
+        if merge:
+            mask = merge_touching_objects(mask, threshold=0.66)
             if plot and idx == 0:
                 num_objects = mask_object_count(mask)
-                print(f'Number of objects after size filtration >{minimum_size} and <{maximum_size} : {num_objects}')
+                print(f'Number of objects after merging adjacent objects, : {num_objects}')
                 plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
-        if remove_border_objects:
-            mask = clear_border(mask)
+        if filter_size:
+            props = measure.regionprops_table(mask, properties=['label', 'area'])
+            valid_labels = props['label'][np.logical_and(props['area'] > minimum_size, props['area'] < maximum_size)]
+            mask = np.isin(mask, valid_labels) * mask
             if plot and idx == 0:
                 num_objects = mask_object_count(mask)
-                print(f'Number of objects after removing border objects, : {num_objects}')
+                print(f'Number of objects after size filtration >{minimum_size} and <{maximum_size} : {num_objects}')
                 plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
-        if merge:
-            mask = merge_touching_objects(mask, threshold=0.25)
+        if filter_intensity:
+            intensity_image = image[:, :, 1]
+            props = measure.regionprops_table(mask, intensity_image=intensity_image, properties=['label', 'mean_intensity'])
+            mean_intensities = np.array(props['mean_intensity']).reshape(-1, 1)
+            if mean_intensities.shape[0] >= 2:
+                kmeans = KMeans(n_clusters=2, random_state=0).fit(mean_intensities)
+                centroids = kmeans.cluster_centers_
+                # Calculate the Euclidean distance between the two centroids
+                dist_between_centroids = distance.euclidean(centroids[0], centroids[1])
+                # Set a threshold for the minimum distance to consider clusters distinct
+                distance_threshold = 0.25
+                if dist_between_centroids > distance_threshold:
+                    high_intensity_cluster = np.argmax(centroids)
+                    valid_labels = np.array(props['label'])[kmeans.labels_ == high_intensity_cluster]
+                    mask = np.isin(mask, valid_labels) * mask
             if plot and idx == 0:
                 num_objects = mask_object_count(mask)
-                print(f'Number of objects after merging adjacent objects, : {num_objects}')
+                props_after = measure.regionprops_table(mask, intensity_image=intensity_image, properties=['label', 'mean_intensity'])
+                mean_intensities_after = np.mean(np.array(props_after['mean_intensity']))
+                average_intensity_before = np.mean(mean_intensities)
+                print(f'Number of objects after potential intensity clustering: {num_objects}. Mean intensity before:{average_intensity_before:.4f}. After:{mean_intensities_after:.4f}.')
                 plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
-        if filter_dimm:
-            unique_labels = np.unique(mask)
-            if len(unique_labels) == 1 and unique_labels[0] == 0:
-                continue
-            object_intensities = [np.mean(batch[idx, :, :, 1][mask == label]) for label in unique_labels if label != 0]
-            object_q1s = [np.percentile(intensities, 25) for intensities in object_intensities if intensities.size > 0]
-            object_q3s = [np.percentile(intensities, 75) for intensities in object_intensities if intensities.size > 0]
-            if object_q1s:
-                object_q1_mean = np.mean(object_q1s)
-                object_q3_mean = np.mean(object_q3s)
-                moving_avg_q1 = (moving_avg_q1 * moving_count + object_q1_mean) / (moving_count + 1)
-                moving_avg_q3 = (moving_avg_q3 * moving_count + object_q3_mean) / (moving_count + 1)
-                moving_count += 1
-            mask = remove_intensity_objects(batch[idx, :, :, 1], mask, intensity_threshold=moving_avg_q1, mode='low')
-            mask = remove_intensity_objects(batch[idx, :, :, 1], mask, intensity_threshold=moving_avg_q3, mode='high')
+        if remove_border_objects:
+            mask = clear_border(mask)
             if plot and idx == 0:
                 num_objects = mask_object_count(mask)
-                print(f'Objects after intensity filtration > {moving_avg_q1} and <{moving_avg_q3}: {num_objects}')
+                print(f'Number of objects after removing border objects, : {num_objects}')
                 plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
         mask_stack.append(mask)
     return mask_stack
 def _object_filter(df, object_type, size_range, intensity_range, mask_chans, mask_chan):
@@ -2721,6 +2971,1098 @@ def _object_filter(df, object_type, size_range, intensity_range, mask_chans, mas
                 print(f'After {object_type} maximum mean intensity filter: {len(df)}')
     return df
-###################################################
-#  Classify
-###################################################
+def _get_regex(metadata_type, img_format, custom_regex=None):
+    if img_format == None:
+        img_format == '.tif'
+    if metadata_type == 'cellvoyager':
+        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+    elif metadata_type == 'cq1':
+        regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+    elif metadata_type == 'nikon':
+        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+    elif metadata_type == 'zeis':
+        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+    elif metadata_type == 'leica':
+        regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
+    elif metadata_type == 'custom':
+        regex = f'({custom_regex}){img_format}'
+    print(f'regex mode:{metadata_type} regex:{regex}')
+    return regex
+def _run_test_mode(src, regex, timelapse=False, test_images=10, random_test=True):
+    if timelapse:
+        test_images = 1  # Use only 1 set for timelapse to ensure full sequence inclusion
+    test_folder_path = os.path.join(src, 'test')
+    os.makedirs(test_folder_path, exist_ok=True)
+    regular_expression = re.compile(regex)
+    if os.path.exists(os.path.join(src, 'orig')):
+        src = os.path.join(src, 'orig')
+    all_filenames = [filename for filename in os.listdir(src) if regular_expression.match(filename)]
+    print(f'Found {len(all_filenames)} files')
+    images_by_set = defaultdict(list)
+    for filename in all_filenames:
+        match = regular_expression.match(filename)
+        if match:
+            plate = match.group('plateID') if 'plateID' in match.groupdict() else os.path.basename(src)
+            well = match.group('wellID')
+            field = match.group('fieldID')
+            set_identifier = (plate, well, field)
+            images_by_set[set_identifier].append(filename)
+    # Prepare for random selection
+    set_identifiers = list(images_by_set.keys())
+    if random_test:
+        random.seed(42)
+    random.shuffle(set_identifiers)  # Randomize the order
+    # Select a subset based on the test_images count
+    selected_sets = set_identifiers[:test_images]
+    # Print information about the number of sets used
+    print(f'Using {len(selected_sets)} random image set(s) for test model')
+    # Copy files for selected sets to the test folder
+    for set_identifier in selected_sets:
+        for filename in images_by_set[set_identifier]:
+            shutil.copy(os.path.join(src, filename), test_folder_path)
+    return test_folder_path
+def _choose_model(model_name, device, object_type='cell', restore_type=None, object_settings={}):
+    if object_type == 'pathogen':
+        if model_name == 'toxo_pv_lumen':
+            diameter = object_settings['diameter']
+            current_dir = os.path.dirname(__file__)
+            model_path = os.path.join(current_dir, 'models', 'cp', 'toxo_pv_lumen.CP_model')
+            print(model_path)
+            model = cp_models.CellposeModel(gpu=torch.cuda.is_available(), model_type=None, pretrained_model=model_path, diam_mean=diameter, device=device)
+            #model = cp_models.Cellpose(gpu=torch.cuda.is_available(), model_type='cyto', device=device)
+            print(f'Using Toxoplasma PV lumen model to generate pathogen masks')
+            return model
+    restore_list = ['denoise', 'deblur', 'upsample', None]
+    if restore_type not in restore_list:
+        print(f"Invalid restore type. Choose from {restore_list} defaulting to None")
+        restore_type = None
+    if restore_type == None:
+        if model_name in ['cyto', 'cyto2', 'cyto3', 'nuclei']:
+            model = cp_models.Cellpose(gpu=torch.cuda.is_available(), model_type=model_name, device=device)
+    else:
+        if object_type == 'nucleus':
+            restore = f'{type}_nuclei'
+            model = denoise.CellposeDenoiseModel(gpu=torch.cuda.is_available(), model_type="nuclei",restore_type=restore, chan2_restore=False, device=device)
+        else:
+            restore = f'{type}_cyto3'
+            if model_name =='cyto2':
+                chan2_restore = True
+            if model_name =='cyto':
+                chan2_restore = False
+            model = denoise.CellposeDenoiseModel(gpu=torch.cuda.is_available(), model_type="cyto3",restore_type=restore, chan2_restore=chan2_restore, device=device)
+    return model
+class SelectChannels:
+    def __init__(self, channels):
+        self.channels = channels
+    def __call__(self, img):
+        img = img.clone()
+        if 1 not in self.channels:
+            img[0, :, :] = 0  # Zero out the red channel
+        if 2 not in self.channels:
+            img[1, :, :] = 0  # Zero out the green channel
+        if 3 not in self.channels:
+            img[2, :, :] = 0  # Zero out the blue channel
+        return img
+def preprocess_image(image_path, image_size=224, channels=[1,2,3], normalize=True):
+    if normalize:
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.CenterCrop(size=(image_size, image_size)),
+            SelectChannels(channels),
+            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
+    else:
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.CenterCrop(size=(image_size, image_size)),
+            SelectChannels(channels)])
+    image = Image.open(image_path).convert('RGB')
+    input_tensor = transform(image).unsqueeze(0)
+    return image, input_tensor
+class SaliencyMapGenerator:
+    def __init__(self, model):
+        self.model = model
+    def compute_saliency_maps(self, X, y):
+        self.model.eval()
+        X.requires_grad_()
+        # Forward pass
+        scores = self.model(X).squeeze()
+        # For binary classification, target scores can be the single output
+        target_scores = scores * (2 * y - 1)
+        self.model.zero_grad()
+        target_scores.backward(torch.ones_like(target_scores))
+        saliency = X.grad.abs()
+        return saliency
+    def plot_saliency_maps(self, X, y, saliency, class_names):
+        N = X.shape[0]
+        for i in range(N):
+            plt.subplot(2, N, i + 1)
+            plt.imshow(X[i].permute(1, 2, 0).cpu().numpy())
+            plt.axis('off')
+            plt.title(class_names[y[i]])
+            plt.subplot(2, N, N + i + 1)
+            plt.imshow(saliency[i].cpu().numpy(), cmap=plt.cm.hot)
+            plt.axis('off')
+        plt.gcf().set_size_inches(12, 5)
+        plt.show()
+def preprocess_image(image_path, normalize=True, image_size=224, channels=[1,2,3]):
+    preprocess = transforms.Compose([
+        transforms.Resize((image_size, image_size)),
+        transforms.ToTensor(),
+    ])
+    image = Image.open(image_path).convert('RGB')
+    input_tensor = preprocess(image)
+    if normalize:
+        input_tensor = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(input_tensor)
+    input_tensor = input_tensor.unsqueeze(0)
+    return image, input_tensor
+def class_visualization(target_y, model_path, dtype, img_size=224, channels=[0,1,2], l2_reg=1e-3, learning_rate=25, num_iterations=100, blur_every=10, max_jitter=16, show_every=25, class_names = ['nc', 'pc']):
+    def jitter(img, ox, oy):
+        # Randomly jitter the image
+        return torch.roll(torch.roll(img, ox, dims=2), oy, dims=3)
+    def blur_image(img, sigma=1):
+        # Apply Gaussian blur to the image
+        img_np = img.cpu().numpy()
+        for i in range(img_np.shape[1]):
+            img_np[:, i] = gaussian_filter(img_np[:, i], sigma=sigma)
+        img.copy_(torch.tensor(img_np).to(img.device))
+    def deprocess(img_tensor):
+        # Convert the tensor image to a numpy array for visualization
+        img_tensor = img_tensor.clone()
+        for c in range(3):
+            img_tensor[:, c] = img_tensor[:, c] * SQUEEZENET_STD[c] + SQUEEZENET_MEAN[c]
+        img_tensor = img_tensor.clamp(0, 1)
+        return img_tensor.squeeze().permute(1, 2, 0).cpu().numpy()
+    # Assuming these are defined somewhere in your codebase
+    SQUEEZENET_MEAN = [0.485, 0.456, 0.406]
+    SQUEEZENET_STD = [0.229, 0.224, 0.225]
+    model = torch.load(model_path)
+    dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
+    len_chans = len(channels)
+    model.type(dtype)
+    # Randomly initialize the image as a PyTorch Tensor, and make it requires gradient.
+    img = torch.randn(1, len_chans, img_size, img_size).mul_(1.0).type(dtype).requires_grad_()
+    for t in range(num_iterations):
+        # Randomly jitter the image a bit; this gives slightly nicer results
+        ox, oy = random.randint(0, max_jitter), random.randint(0, max_jitter)
+        img.data.copy_(jitter(img.data, ox, oy))
+        # Forward pass
+        score = model(img)
+        if target_y == 0:
+            target_score = -score
+        else:
+            target_score = score
+        # Add regularization
+        target_score = target_score - l2_reg * torch.norm(img)
+        # Backward pass
+        target_score.backward()
+        # Gradient ascent step
+        with torch.no_grad():
+            img += learning_rate * img.grad / torch.norm(img.grad)
+            img.grad.zero_()
+        # Undo the random jitter
+        img.data.copy_(jitter(img.data, -ox, -oy))
+        # As regularizer, clamp and periodically blur the image
+        for c in range(3):
+            lo = float(-SQUEEZENET_MEAN[c] / SQUEEZENET_STD[c])
+            hi = float((1.0 - SQUEEZENET_MEAN[c]) / SQUEEZENET_STD[c])
+            img.data[:, c].clamp_(min=lo, max=hi)
+        if t % blur_every == 0:
+            blur_image(img.data, sigma=0.5)
+        # Periodically show the image
+        if t == 0 or (t + 1) % show_every == 0 or t == num_iterations - 1:
+            plt.imshow(deprocess(img.data.clone().cpu()))
+            class_name = class_names[target_y]
+            plt.title('%s\nIteration %d / %d' % (class_name, t + 1, num_iterations))
+            plt.gcf().set_size_inches(4, 4)
+            plt.axis('off')
+            plt.show()
+    return deprocess(img.data.cpu())
+def get_submodules(model, prefix=''):
+    submodules = []
+    for name, module in model.named_children():
+        full_name = prefix + ('.' if prefix else '') + name
+        submodules.append(full_name)
+        submodules.extend(get_submodules(module, full_name))
+    return submodules
+class GradCAM:
+    def __init__(self, model, target_layers=None, use_cuda=True):
+        self.model = model
+        self.model.eval()
+        self.target_layers = target_layers
+        self.cuda = use_cuda
+        if self.cuda:
+            self.model = model.cuda()
+    def forward(self, input):
+        return self.model(input)
+    def __call__(self, x, index=None):
+        if self.cuda:
+            x = x.cuda()
+        features = []
+        def hook(module, input, output):
+            features.append(output)
+        handles = []
+        for name, module in self.model.named_modules():
+            if name in self.target_layers:
+                handles.append(module.register_forward_hook(hook))
+        output = self.forward(x)
+        if index is None:
+            index = np.argmax(output.data.cpu().numpy())
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0][index] = 1
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        if self.cuda:
+            one_hot = one_hot.cuda()
+        one_hot = torch.sum(one_hot * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        grads_val = features[0].grad.cpu().data.numpy()
+        target = features[0].cpu().data.numpy()[0, :]
+        weights = np.mean(grads_val, axis=(2, 3))[0, :]
+        cam = np.zeros(target.shape[1:], dtype=np.float32)
+        for i, w in enumerate(weights):
+            cam += w * target[i, :, :]
+        cam = np.maximum(cam, 0)
+        cam = cv2.resize(cam, (x.size(2), x.size(3)))
+        cam = cam - np.min(cam)
+        cam = cam / np.max(cam)
+        for handle in handles:
+            handle.remove()
+        return cam
+def show_cam_on_image(img, mask):
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
+    heatmap = np.float32(heatmap) / 255
+    cam = heatmap + np.float32(img)
+    cam = cam / np.max(cam)
+    return np.uint8(255 * cam)
+def recommend_target_layers(model):
+    target_layers = []
+    for name, module in model.named_modules():
+        if isinstance(module, torch.nn.Conv2d):
+            target_layers.append(name)
+    # Choose the last conv layer as the recommended target layer
+    if target_layers:
+        return [target_layers[-1]], target_layers
+    else:
+        raise ValueError("No convolutional layers found in the model.")
+class IntegratedGradients:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_integrated_gradients(self, input_tensor, target_label_idx, baseline=None, num_steps=50):
+        if baseline is None:
+            baseline = torch.zeros_like(input_tensor)
+        assert baseline.shape == input_tensor.shape
+        # Scale input and compute gradients
+        scaled_inputs = [(baseline + (float(i) / num_steps) * (input_tensor - baseline)).requires_grad_(True) for i in range(0, num_steps + 1)]
+        grads = []
+        for scaled_input in scaled_inputs:
+            out = self.model(scaled_input)
+            self.model.zero_grad()
+            out[0, target_label_idx].backward(retain_graph=True)
+            grads.append(scaled_input.grad.data.cpu().numpy())
+        avg_grads = np.mean(grads[:-1], axis=0)
+        integrated_grads = (input_tensor.cpu().data.numpy() - baseline.cpu().data.numpy()) * avg_grads
+        return integrated_grads
+def get_db_paths(src):
+    if isinstance(src, str):
+        src = [src]
+    db_paths = [os.path.join(source, 'measurements/measurements.db') for source in src]
+    return db_paths
+def get_sequencing_paths(src):
+    if isinstance(src, str):
+        src = [src]
+    seq_paths = [os.path.join(source, 'sequencing/sequencing_data.csv') for source in src]
+    return seq_paths
+def load_image_paths(c, visualize):
+    c.execute(f'SELECT * FROM png_list')
+    data = c.fetchall()
+    columns_info = c.execute(f'PRAGMA table_info(png_list)').fetchall()
+    column_names = [col_info[1] for col_info in columns_info]
+    image_paths_df = pd.DataFrame(data, columns=column_names)
+    if visualize:
+        object_visualize = visualize + '_png'
+        image_paths_df = image_paths_df[image_paths_df['png_path'].str.contains(object_visualize)]
+    image_paths_df = image_paths_df.set_index('prcfo')
+    return image_paths_df
+def merge_dataframes(df, image_paths_df, verbose):
+    df.set_index('prcfo', inplace=True)
+    df = image_paths_df.merge(df, left_index=True, right_index=True)
+    if verbose:
+        display(df)
+    return df
+def remove_highly_correlated_columns(df, threshold):
+    corr_matrix = df.corr().abs()
+    upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
+    to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > threshold)]
+    return df.drop(to_drop, axis=1)
+def filter_columns(df, filter_by):
+    if filter_by != 'morphology':
+        cols_to_include = [col for col in df.columns if filter_by in str(col)]
+    else:
+        cols_to_include = [col for col in df.columns if 'channel' not in str(col)]
+    df = df[cols_to_include]
+    return df
+def reduction_and_clustering(numeric_data, n_neighbors, min_dist, metric, eps, min_samples, clustering, reduction_method='umap', verbose=False, embedding=None, n_jobs=-1, mode='fit', model=False):
+    """
+    Perform dimensionality reduction and clustering on the given data.
+    Parameters:
+    numeric_data (np.ndarray): Numeric data for embedding and clustering.
+    n_neighbors (int or float): Number of neighbors for UMAP or perplexity for t-SNE.
+    min_dist (float): Minimum distance for UMAP.
+    metric (str): Metric for UMAP and DBSCAN.
+    eps (float): Epsilon for DBSCAN.
+    min_samples (int): Minimum samples for DBSCAN or number of clusters for KMeans.
+    clustering (str): Clustering method ('DBSCAN' or 'KMeans').
+    reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
+    verbose (bool): Whether to print verbose output.
+    embedding (np.ndarray, optional): Precomputed embedding. Default is None.
+    return_model (bool): Whether to return the reducer model. Default is False.
+    Returns:
+    tuple: embedding, labels (and optionally the reducer model)
+    """
+    if verbose:
+        v = 1
+    else:
+        v = 0
+    if isinstance(n_neighbors, float):
+        n_neighbors = int(n_neighbors * len(numeric_data))
+    if n_neighbors <= 2:
+        n_neighbors = 2
+    if mode == 'fit':
+        if reduction_method == 'umap':
+            reducer = umap.UMAP(n_neighbors=n_neighbors,
+                                n_components=2,
+                                metric=metric,
+                                n_epochs=None,
+                                learning_rate=1.0,
+                                init='spectral',
+                                min_dist=min_dist,
+                                spread=1.0,
+                                set_op_mix_ratio=1.0,
+                                local_connectivity=1,
+                                repulsion_strength=1.0,
+                                negative_sample_rate=5,
+                                transform_queue_size=4.0,
+                                a=None,
+                                b=None,
+                                random_state=42,
+                                metric_kwds=None,
+                                angular_rp_forest=False,
+                                target_n_neighbors=-1,
+                                target_metric='categorical',
+                                target_metric_kwds=None,
+                                target_weight=0.5,
+                                transform_seed=42,
+                                n_jobs=n_jobs,
+                                verbose=verbose)
+        elif reduction_method == 'tsne':
+            reducer = TSNE(n_components=2,
+                        perplexity=n_neighbors,
+                        early_exaggeration=12.0,
+                        learning_rate=200.0,
+                        n_iter=1000,
+                        n_iter_without_progress=300,
+                        min_grad_norm=1e-7,
+                        metric=metric,
+                        init='random',
+                        verbose=v,
+                        random_state=42,
+                        method='barnes_hut',
+                        angle=0.5,
+                        n_jobs=n_jobs)
+        else:
+            raise ValueError(f"Unsupported reduction method: {reduction_method}. Supported methods are 'umap' and 'tsne'")
+        embedding = reducer.fit_transform(numeric_data)
+        if verbose:
+            print(f'Trained and fit reducer')
+    else:
+        if not model is None:
+            embedding = model.transform(numeric_data)
+            reducer = model
+            if verbose:
+                print(f'Fit data to reducer')
+        else:
+            raise ValueError(f"Model is None. Please provide a model for transform.")
+    if clustering == 'dbscan':
+        clustering_model = DBSCAN(eps=eps, min_samples=min_samples, metric=metric, n_jobs=n_jobs)
+    elif clustering == 'kmeans':
+        clustering_model = KMeans(n_clusters=min_samples, random_state=42)
+    clustering_model.fit(embedding)
+    labels = clustering_model.labels_ if clustering == 'dbscan' else clustering_model.predict(embedding)
+    if verbose:
+        print(f'Embedding shape: {embedding.shape}')
+    return embedding, labels, reducer
+def reduction_and_clustering_v1(numeric_data, n_neighbors, min_dist, metric, eps, min_samples, clustering, reduction_method='umap', verbose=False, embedding=None, n_jobs=-1):
+    """
+    Perform dimensionality reduction and clustering on the given data.
+    Parameters:
+    numeric_data (np.ndarray): Numeric data for embedding and clustering.
+    n_neighbors (int or float): Number of neighbors for UMAP or perplexity for t-SNE.
+    min_dist (float): Minimum distance for UMAP.
+    metric (str): Metric for UMAP and DBSCAN.
+    eps (float): Epsilon for DBSCAN.
+    min_samples (int): Minimum samples for DBSCAN or number of clusters for KMeans.
+    clustering (str): Clustering method ('DBSCAN' or 'KMeans').
+    reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
+    verbose (bool): Whether to print verbose output.
+    embedding (np.ndarray, optional): Precomputed embedding. Default is None.
+    Returns:
+    tuple: embedding, labels
+    """
+    if verbose:
+        v=1
+    else:
+        v=0
+    if isinstance(n_neighbors, float):
+        n_neighbors = int(n_neighbors * len(numeric_data))
+    if n_neighbors <= 2:
+        n_neighbors = 2
+    if reduction_method == 'umap':
+        reducer = umap.UMAP(n_neighbors=n_neighbors,
+                            n_components=2,
+                            metric=metric,
+                            n_epochs=None,
+                            learning_rate=1.0,
+                            init='spectral',
+                            min_dist=min_dist,
+                            spread=1.0,
+                            set_op_mix_ratio=1.0,
+                            local_connectivity=1,
+                            repulsion_strength=1.0,
+                            negative_sample_rate=5,
+                            transform_queue_size=4.0,
+                            a=None,
+                            b=None,
+                            random_state=42,
+                            metric_kwds=None,
+                            angular_rp_forest=False,
+                            target_n_neighbors=-1,
+                            target_metric='categorical',
+                            target_metric_kwds=None,
+                            target_weight=0.5,
+                            transform_seed=42,
+                            n_jobs=n_jobs,
+                            verbose=verbose)
+    elif reduction_method == 'tsne':
+        #tsne_params.setdefault('n_components', 2)
+        #reducer = TSNE(**tsne_params)
+        reducer = TSNE(n_components=2,
+                       perplexity=n_neighbors,
+                       early_exaggeration=12.0,
+                       learning_rate=200.0,
+                       n_iter=1000,
+                       n_iter_without_progress=300,
+                       min_grad_norm=1e-7,
+                       metric=metric,
+                       init='random',
+                       verbose=v,
+                       random_state=42,
+                       method='barnes_hut',
+                       angle=0.5,
+                       n_jobs=n_jobs)
+    else:
+        raise ValueError(f"Unsupported reduction method: {reduction_method}. Supported methods are 'umap' and 'tsne'")
+    if embedding is None:
+        embedding = reducer.fit_transform(numeric_data)
+    if clustering == 'dbscan':
+        clustering_model = DBSCAN(eps=eps, min_samples=min_samples, metric=metric, n_jobs=n_jobs)
+    elif clustering == 'kmeans':
+        clustering_model = KMeans(n_clusters=min_samples, random_state=42)
+    else:
+        raise ValueError(f"Unsupported clustering method: {clustering}. Supported methods are 'dbscan' and 'kmeans'")
+    clustering_model.fit(embedding)
+    labels = clustering_model.labels_ if clustering == 'dbscan' else clustering_model.predict(embedding)
+    if verbose:
+        print(f'Embedding shape: {embedding.shape}')
+    return embedding, labels
+def remove_noise(embedding, labels):
+    non_noise_indices = labels != -1
+    embedding = embedding[non_noise_indices]
+    labels = labels[non_noise_indices]
+    return embedding, labels
+def plot_embedding(embedding, image_paths, labels, image_nr, img_zoom, colors, plot_by_cluster, plot_outlines, plot_points, plot_images, smooth_lines, black_background, figuresize, dot_size, remove_image_canvas, verbose):
+    unique_labels = np.unique(labels)
+    #num_clusters = len(unique_labels[unique_labels != 0])
+    colors, label_to_color_index = assign_colors(unique_labels, colors)
+    cluster_centers = [np.mean(embedding[labels == cluster_label], axis=0) for cluster_label in unique_labels]
+    fig, ax = setup_plot(figuresize, black_background)
+    plot_clusters(ax, embedding, labels, colors, cluster_centers, plot_outlines, plot_points, smooth_lines, figuresize, dot_size, verbose)
+    if not image_paths is None and plot_images:
+        plot_umap_images(ax, image_paths, embedding, labels, image_nr, img_zoom, colors, plot_by_cluster, remove_image_canvas, verbose)
+    plt.show()
+    return fig
+def generate_colors(num_clusters, black_background):
+    random_colors = np.random.rand(num_clusters + 1, 4)
+    random_colors[:, 3] = 1
+    specific_colors = [
+        [155 / 255, 55 / 255, 155 / 255, 1],
+        [55 / 255, 155 / 255, 155 / 255, 1],
+        [55 / 255, 155 / 255, 255 / 255, 1],
+        [255 / 255, 55 / 255, 155 / 255, 1]
+    ]
+    random_colors = np.vstack((specific_colors, random_colors[len(specific_colors):]))
+    if not black_background:
+        random_colors = np.vstack(([0, 0, 0, 1], random_colors))
+    return random_colors
+def assign_colors(unique_labels, random_colors):
+    normalized_colors = random_colors / 255
+    colors_img = [tuple(color) for color in normalized_colors]
+    colors = [tuple(color) for color in random_colors]
+    label_to_color_index = {label: index for index, label in enumerate(unique_labels)}
+    return colors, label_to_color_index
+def setup_plot(figuresize, black_background):
+    if black_background:
+        plt.rcParams.update({'figure.facecolor': 'black', 'axes.facecolor': 'black', 'text.color': 'white', 'xtick.color': 'white', 'ytick.color': 'white', 'axes.labelcolor': 'white'})
+    else:
+        plt.rcParams.update({'figure.facecolor': 'white', 'axes.facecolor': 'white', 'text.color': 'black', 'xtick.color': 'black', 'ytick.color': 'black', 'axes.labelcolor': 'black'})
+    fig, ax = plt.subplots(1, 1, figsize=(figuresize, figuresize))
+    return fig, ax
+def plot_clusters(ax, embedding, labels, colors, cluster_centers, plot_outlines, plot_points, smooth_lines, figuresize=50, dot_size=50, verbose=False):
+    unique_labels = np.unique(labels)
+    for cluster_label, color, center in zip(unique_labels, colors, cluster_centers):
+        cluster_data = embedding[labels == cluster_label]
+        if smooth_lines:
+            if cluster_data.shape[0] > 2:
+                x_smooth, y_smooth = smooth_hull_lines(cluster_data)
+                if plot_outlines:
+                    plt.plot(x_smooth, y_smooth, color=color, linewidth=2)
+        else:
+            if cluster_data.shape[0] > 2:
+                hull = ConvexHull(cluster_data)
+                for simplex in hull.simplices:
+                    if plot_outlines:
+                        plt.plot(hull.points[simplex, 0], hull.points[simplex, 1], color=color, linewidth=4)
+        if plot_points:
+            scatter = ax.scatter(cluster_data[:, 0], cluster_data[:, 1], s=dot_size, c=[color], alpha=0.5, label=f'Cluster {cluster_label if cluster_label != -1 else "Noise"}')
+        else:
+            scatter = ax.scatter(cluster_data[:, 0], cluster_data[:, 1], s=dot_size, c=[color], alpha=0, label=f'Cluster {cluster_label if cluster_label != -1 else "Noise"}')
+        ax.text(center[0], center[1], str(cluster_label), fontsize=12, ha='center', va='center')
+    plt.legend(loc='best', fontsize=int(figuresize * 0.75))
+    plt.xlabel('UMAP Dimension 1', fontsize=int(figuresize * 0.75))
+    plt.ylabel('UMAP Dimension 2', fontsize=int(figuresize * 0.75))
+    plt.tick_params(axis='both', which='major', labelsize=int(figuresize * 0.75))
+def plot_umap_images(ax, image_paths, embedding, labels, image_nr, img_zoom, colors, plot_by_cluster, remove_image_canvas, verbose):
+    if plot_by_cluster:
+        cluster_indices = {label: np.where(labels == label)[0] for label in np.unique(labels) if label != -1}
+        plot_images_by_cluster(ax, image_paths, embedding, labels, image_nr, img_zoom, colors, cluster_indices, remove_image_canvas, verbose)
+    else:
+        indices = random.sample(range(len(embedding)), image_nr)
+        for i, index in enumerate(indices):
+            x, y = embedding[index]
+            img = Image.open(image_paths[index])
+            plot_image(ax, x, y, img, img_zoom, remove_image_canvas)
+def plot_images_by_cluster(ax, image_paths, embedding, labels, image_nr, img_zoom, colors, cluster_indices, remove_image_canvas, verbose):
+    for cluster_label, color in zip(np.unique(labels), colors):
+        if cluster_label == -1:
+            continue
+        indices = cluster_indices.get(cluster_label, [])
+        if len(indices) > image_nr:
+            indices = random.sample(list(indices), image_nr)
+        for index in indices:
+            x, y = embedding[index]
+            img = Image.open(image_paths[index])
+            plot_image(ax, x, y, img, img_zoom, remove_image_canvas)
+def plot_image(ax, x, y, img, img_zoom, remove_image_canvas=True):
+    img = np.array(img)
+    if remove_image_canvas:
+        img = remove_canvas(img)
+    imagebox = OffsetImage(img, zoom=img_zoom)
+    ab = AnnotationBbox(imagebox, (x, y), frameon=False)
+    ax.add_artist(ab)
+def remove_canvas(img):
+    if img.mode in ['L', 'I']:
+        img_data = np.array(img)
+        img_data = img_data / np.max(img_data)
+        alpha_channel = (img_data > 0).astype(float)
+        img_data_rgb = np.stack([img_data] * 3, axis=-1)
+        img_data_with_alpha = np.dstack([img_data_rgb, alpha_channel])
+    elif img.mode == 'RGB':
+        img_data = np.array(img)
+        img_data = img_data / 255.0
+        alpha_channel = (np.sum(img_data, axis=-1) > 0).astype(float)
+        img_data_with_alpha = np.dstack([img_data, alpha_channel])
+    else:
+        raise ValueError(f"Unsupported image mode: {img.mode}")
+    return img_data_with_alpha
+def plot_clusters_grid(embedding, labels, image_nr, image_paths, colors, figuresize, black_background, verbose):
+    unique_labels = np.unique(labels)
+    num_clusters = len(unique_labels[unique_labels != -1])
+    if num_clusters == 0:
+        print("No clusters found.")
+        return
+    cluster_images = {label: [] for label in unique_labels if label != -1}
+    cluster_indices = {label: np.where(labels == label)[0] for label in unique_labels if label != -1}
+    for cluster_label, indices in cluster_indices.items():
+        if cluster_label == -1:
+            continue
+        if len(indices) > image_nr:
+            indices = random.sample(list(indices), image_nr)
+        for index in indices:
+            img_path = image_paths[index]
+            img_array = Image.open(img_path)
+            img = np.array(img_array)
+            cluster_images[cluster_label].append(img)
+    fig = plot_grid(cluster_images, colors, figuresize, black_background, verbose)
+    return fig
+def plot_grid(cluster_images, colors, figuresize, black_background, verbose):
+    num_clusters = len(cluster_images)
+    max_figsize = 200  # Set a maximum figure size
+    if figuresize * num_clusters > max_figsize:
+        figuresize = max_figsize / num_clusters
+    grid_fig, grid_axes = plt.subplots(1, num_clusters, figsize=(figuresize * num_clusters, figuresize), gridspec_kw={'wspace': 0.2, 'hspace': 0})
+    if num_clusters == 1:
+        grid_axes = [grid_axes]  # Ensure grid_axes is always iterable
+    for cluster_label, axes in zip(cluster_images.keys(), grid_axes):
+        images = cluster_images[cluster_label]
+        num_images = len(images)
+        grid_size = int(np.ceil(np.sqrt(num_images)))
+        image_size = 0.9 / grid_size
+        whitespace = (1 - grid_size * image_size) / (grid_size + 1)
+        if isinstance(cluster_label, str):
+            idx = list(cluster_images.keys()).index(cluster_label)
+            color = colors[idx]
+            if verbose:
+                print(f'Lable: {cluster_label} index: {idx}')
+        else:
+            color = colors[cluster_label]
+        axes.add_patch(plt.Rectangle((0, 0), 1, 1, transform=axes.transAxes, color=color[:3]))
+        axes.axis('off')
+        for i, img in enumerate(images):
+            row = i // grid_size
+            col = i % grid_size
+            x_pos = (col + 1) * whitespace + col * image_size
+            y_pos = 1 - ((row + 1) * whitespace + (row + 1) * image_size)
+            ax_img = axes.inset_axes([x_pos, y_pos, image_size, image_size], transform=axes.transAxes)
+            ax_img.imshow(img, cmap='gray', aspect='auto')
+            ax_img.axis('off')
+            ax_img.set_aspect('equal')
+            ax_img.set_facecolor(color[:3])
+    # Add cluster labels beside the UMAP plot
+    spacing_factor = 0.5  # Adjust this value to control the spacing between labels
+    for i, (cluster_label, color) in enumerate(zip(cluster_images.keys(), colors)):
+        label_y = 1 - (i + 1) * (spacing_factor / num_clusters)  # Adjust y position for each label
+        grid_fig.text(1.05, label_y, f'Cluster {cluster_label}', verticalalignment='center', fontsize=figuresize, color='black' if not black_background else 'white')
+        grid_fig.patches.append(plt.Rectangle((1, label_y - 0.02), 0.03, 0.03, transform=grid_fig.transFigure, color=color[:3], clip_on=False))
+    plt.show()
+    return grid_fig
+def correct_paths(df, base_path):
+    if 'png_path' not in df.columns:
+        print("No 'png_path' column found in the dataframe.")
+        return df, None
+    image_paths = df['png_path'].to_list()
+    adjusted_image_paths = []
+    for path in image_paths:
+        if base_path not in path:
+            parts = path.split('/data/')
+            if len(parts) > 1:
+                new_path = os.path.join(base_path, 'data', parts[1])
+                adjusted_image_paths.append(new_path)
+            else:
+                adjusted_image_paths.append(path)
+        else:
+            adjusted_image_paths.append(path)
+    df['png_path'] = adjusted_image_paths
+    image_paths = df['png_path'].to_list()
+    return df, image_paths
+def correct_paths_v1(df, base_path):
+    if 'png_path' not in df.columns:
+        print("No 'png_path' column found in the dataframe.")
+        return df, None
+    image_paths = df['png_path'].to_list()
+    adjusted_image_paths = []
+    for path in image_paths:
+        if base_path not in path:
+            print(f"Adjusting path: {path}")
+            parts = path.split('data/')
+            if len(parts) > 1:
+                new_path = os.path.join(base_path, 'data', parts[1])
+                adjusted_image_paths.append(new_path)
+            else:
+                adjusted_image_paths.append(path)
+        else:
+            adjusted_image_paths.append(path)
+    df['png_path'] = adjusted_image_paths
+    image_paths = df['png_path'].to_list()
+    return df, image_paths
+def get_umap_image_settings(settings={}):
+    settings.setdefault('src', 'path')
+    settings.setdefault('row_limit', 1000)
+    settings.setdefault('tables', ['cell', 'cytoplasm', 'nucleus', 'pathogen'])
+    settings.setdefault('visualize', 'cell')
+    settings.setdefault('image_nr', 16)
+    settings.setdefault('dot_size', 50)
+    settings.setdefault('n_neighbors', 1000)
+    settings.setdefault('min_dist', 0.1)
+    settings.setdefault('metric', 'euclidean')
+    settings.setdefault('eps', 0.5)
+    settings.setdefault('min_samples', 1000)
+    settings.setdefault('filter_by', 'channel_0')
+    settings.setdefault('img_zoom', 0.5)
+    settings.setdefault('plot_by_cluster', True)
+    settings.setdefault('plot_cluster_grids', True)
+    settings.setdefault('remove_cluster_noise', True)
+    settings.setdefault('remove_highly_correlated', True)
+    settings.setdefault('log_data', False)
+    settings.setdefault('figuresize', 60)
+    settings.setdefault('black_background', True)
+    settings.setdefault('remove_image_canvas', False)
+    settings.setdefault('plot_outlines', True)
+    settings.setdefault('plot_points', True)
+    settings.setdefault('smooth_lines', True)
+    settings.setdefault('clustering', 'dbscan')
+    settings.setdefault('exclude', None)
+    settings.setdefault('col_to_compare', 'col')
+    settings.setdefault('pos', 'c1')
+    settings.setdefault('neg', 'c2')
+    settings.setdefault('embedding_by_controls', False)
+    settings.setdefault('plot_images', True)
+    settings.setdefault('reduction_method','umap')
+    settings.setdefault('save_figure', False)
+    settings.setdefault('n_jobs', -1)
+    settings.setdefault('color_by', None)
+    settings.setdefault('neg', 'c1')
+    settings.setdefault('pos', 'c2')
+    settings.setdefault('mix', 'c3')
+    settings.setdefault('mix', 'c3')
+    settings.setdefault('exclude_conditions', None)
+    settings.setdefault('analyze_clusters', False)
+    settings.setdefault('resnet_features', False)
+    settings.setdefault('verbose',True)
+    return settings
+def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude):
+    """
+    Preprocesses the given dataframe by applying filtering, removing highly correlated columns,
+    applying log transformation, filling NaN values, and scaling the numeric data.
+    Args:
+        df (pandas.DataFrame): The input dataframe.
+        filter_by (str or None): The channel of interest to filter the dataframe by.
+        remove_highly_correlated (bool or float): Whether to remove highly correlated columns.
+            If a float is provided, it represents the correlation threshold.
+        log_data (bool): Whether to apply log transformation to the numeric data.
+        exclude (list or None): List of features to exclude from the filtering process.
+        verbose (bool): Whether to print verbose output during preprocessing.
+    Returns:
+        numpy.ndarray: The preprocessed numeric data.
+    Raises:
+        ValueError: If no numeric columns are available after filtering.
+    """
+    # Apply filtering based on the `filter_by` parameter
+    if filter_by is not None:
+        df, _ = filter_dataframe_features(df, channel_of_interest=filter_by, exclude=exclude)
+    # Select numerical features
+    numeric_data = df.select_dtypes(include=['number'])
+    # Check if numeric_data is empty
+    if numeric_data.empty:
+        raise ValueError("No numeric columns available after filtering. Please check the filter_by and exclude parameters.")
+    # Remove highly correlated columns
+    if not remove_highly_correlated is False:
+        if isinstance(remove_highly_correlated, float):
+            numeric_data = remove_highly_correlated_columns(numeric_data, remove_highly_correlated)
+        else:
+            numeric_data = remove_highly_correlated_columns(numeric_data, 0.95)
+    # Apply log transformation
+    if log_data:
+        numeric_data = np.log(numeric_data + 1e-6)
+    # Fill NaN values with the column mean
+    numeric_data = numeric_data.fillna(numeric_data.mean())
+    # Scale the numeric data
+    scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
+    numeric_data = scaler.fit_transform(numeric_data)
+    return numeric_data
+def filter_dataframe_features(df, channel_of_interest, exclude=None):
+    """
+    Filter the dataframe `df` based on the specified `channel_of_interest` and `exclude` parameters.
+    Parameters:
+    - df (pandas.DataFrame): The input dataframe to be filtered.
+    - channel_of_interest (str, int, list, None): The channel(s) of interest to filter the dataframe.
+      If None, no filtering is applied. If 'morphology', only morphology features are included.
+      If an integer, only the specified channel is included. If a list, only the specified channels are included.
+      If a string, only the specified channel is included.
+    - exclude (str, list, None): The feature(s) to exclude from the filtered dataframe.
+      If None, no features are excluded. If a string, the specified feature is excluded.
+      If a list, the specified features are excluded.
+    Returns:
+    - filtered_df (pandas.DataFrame): The filtered dataframe based on the specified parameters.
+    - features (list): The list of selected features after filtering.
+    """
+    if channel_of_interest is None:
+        feature_string = None
+    elif channel_of_interest == 'morphology':
+        feature_string = 'morphology'
+    elif isinstance(channel_of_interest, list):
+        feature_string = []
+        for i in channel_of_interest:
+            feature_string_tmp = f'channel_{i}'
+            feature_string.append(feature_string_tmp)
+    elif isinstance(channel_of_interest, int):
+        feature_string = f'channel_{channel_of_interest}'
+    elif isinstance(channel_of_interest, str):
+        feature_string = channel_of_interest
+    # Remove columns with a single value
+    df = df.loc[:, df.nunique() > 1]
+    # Select numerical features
+    features = df.select_dtypes(include=[np.number]).columns.tolist()
+    if feature_string is not None:
+        feature_list = ['channel_0', 'channel_1', 'channel_2', 'channel_3']
+        # Remove feature_string from the list if it exists
+        if isinstance(feature_string, str):
+            if feature_string in feature_list:
+                feature_list.remove(feature_string)
+        elif isinstance(feature_string, list):
+            feature_list = [feature for feature in feature_list if feature not in feature_string]
+        if feature_string != 'morphology':
+            features = [feature for feature in features if feature_string in feature]
+        # Iterate through the list and remove columns from df
+        for feature_ in feature_list:
+            features = [feature for feature in features if feature_ not in feature]
+            print(f'After removing {feature_} features: {len(features)}')
+    if isinstance(exclude, list):
+        features = [feature for feature in features if feature not in exclude]
+    elif isinstance(exclude, str):
+        features.remove(exclude)
+    filtered_df = df[features]
+    return filtered_df, features
+# Create a function to check if images overlap
+def check_overlap(current_position, other_positions, threshold):
+    for other_position in other_positions:
+        distance = np.linalg.norm(np.array(current_position) - np.array(other_position))
+        if distance < threshold:
+            return True
+    return False
+# Define a function to try random positions around a given point
+def find_non_overlapping_position(x, y, image_positions, threshold, max_attempts=100):
+    offset_range = 10  # Adjust the range for random offsets
+    attempts = 0
+    while attempts < max_attempts:
+        random_offset_x = random.uniform(-offset_range, offset_range)
+        random_offset_y = random.uniform(-offset_range, offset_range)
+        new_x = x + random_offset_x
+        new_y = y + random_offset_y
+        if not check_overlap((new_x, new_y), image_positions, threshold):
+            return new_x, new_y
+        attempts += 1
+    return x, y  # Return the original position if no suitable position found
+def search_reduction_and_clustering(numeric_data, n_neighbors, min_dist, metric, eps, min_samples, clustering, reduction_method, verbose, reduction_param=None, embedding=None, n_jobs=-1):
+    """
+    Perform dimensionality reduction and clustering on the given data.
+    Parameters:
+    numeric_data (np.array): Numeric data to process.
+    n_neighbors (int): Number of neighbors for UMAP or perplexity for tSNE.
+    min_dist (float): Minimum distance for UMAP.
+    metric (str): Metric for UMAP, tSNE, and DBSCAN.
+    eps (float): Epsilon for DBSCAN clustering.
+    min_samples (int): Minimum samples for DBSCAN or number of clusters for KMeans.
+    clustering (str): Clustering method ('DBSCAN' or 'KMeans').
+    reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
+    verbose (bool): Whether to print verbose output.
+    reduction_param (dict): Additional parameters for the reduction method.
+    embedding (np.array): Precomputed embedding (optional).
+    n_jobs (int): Number of parallel jobs to run.
+    Returns:
+    embedding (np.array): Embedding of the data.
+    labels (np.array): Cluster labels.
+    """
+    if isinstance(n_neighbors, float):
+        n_neighbors = int(n_neighbors * len(numeric_data))
+    if n_neighbors <= 1:
+        n_neighbors = 2
+        print(f'n_neighbors cannota be less than 2. Setting n_neighbors to {n_neighbors}')
+    reduction_param = reduction_param or {}
+    reduction_param = {k: v for k, v in reduction_param.items() if k not in ['perplexity', 'n_neighbors', 'min_dist', 'metric', 'method']}
+    if reduction_method == 'umap':
+        reducer = umap.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, n_jobs=n_jobs, **reduction_param)
+    elif reduction_method == 'tsne':
+        reducer = TSNE(n_components=2, perplexity=n_neighbors, metric=metric, n_jobs=n_jobs, **reduction_param)
+    else:
+        raise ValueError(f"Unsupported reduction method: {reduction_method}. Supported methods are 'umap' and 'tsne'")
+    if embedding is None:
+        embedding = reducer.fit_transform(numeric_data)
+    if clustering == 'dbscan':
+        clustering_model = DBSCAN(eps=eps, min_samples=min_samples, metric=metric)
+    elif clustering == 'kmeans':
+        from sklearn.cluster import KMeans
+        clustering_model = KMeans(n_clusters=min_samples, random_state=42)
+    else:
+        raise ValueError(f"Unsupported clustering method: {clustering}. Supported methods are 'dbscan' and 'kmeans'")
+    clustering_model.fit(embedding)
+    labels = clustering_model.labels_ if clustering == 'dbscan' else clustering_model.predict(embedding)
+    if verbose:
+        print(f'Embedding shape: {embedding.shape}')
+    return embedding, labels

spacr 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl

spacr 0.0.1py3-none-any.whl → 0.0.6py3-none-any.whl