PyPI - spacr - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

spacr 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

spacr/core.py +105 -1
spacr/deep_spacr.py +191 -141
spacr/gui.py +1 -0
spacr/gui_core.py +13 -4
spacr/gui_utils.py +29 -1
spacr/io.py +84 -125
spacr/measure.py +1 -38
spacr/ml.py +153 -66
spacr/plot.py +429 -7
spacr/settings.py +55 -10
spacr/submodules.py +7 -6
spacr/toxo.py +9 -4
spacr/utils.py +510 -16
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/METADATA +28 -25
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/RECORD +19 -19
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/LICENSE +0 -0
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/WHEEL +0 -0
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/entry_points.txt +0 -0
{spacr-0.3.2.dist-info → spacr-0.3.3.dist-info}/top_level.txt +0 -0

spacr/submodules.py CHANGED Viewed

@@ -36,7 +36,7 @@ def analyze_recruitment(settings={}):
     sns.color_palette("mako", as_cmap=True)
     print(f"channel:{settings['channel_of_interest']} = {settings['target']}")
-    df, _ = _read_and_merge_data(db_loc=[settings['src']+'/measurements/measurements.db'],
+    df, _ = _read_and_merge_data(locs=[settings['src']+'/measurements/measurements.db'],
                                  tables=['cell', 'nucleus', 'pathogen','cytoplasm'],
                                  verbose=True,
                                  nuclei_limit=settings['nuclei_limit'],
@@ -89,15 +89,16 @@ def analyze_recruitment(settings={}):
     if not settings['cell_chann_dim'] is None:
         df = _object_filter(df, 'cell', settings['cell_size_range'], settings['cell_intensity_range'], mask_chans, 0)
-        if not settings['target_intensity_min'] is None:
-            df = df[df[f"cell_channel_{settings['channel_of_interest']}_percentile_95'] > settings['target_intensity_min"]]
+        if not settings['target_intensity_min'] is None or not settings['target_intensity_min'] is 0:
+            df = df[df[f"cell_channel_{settings['channel_of_interest']}_percentile_95"] > settings['target_intensity_min']]
             print(f"After channel {settings['channel_of_interest']} filtration", len(df))
     if not settings['nucleus_chann_dim'] is None:
         df = _object_filter(df, 'nucleus', settings['nucleus_size_range'], settings['nucleus_intensity_range'], mask_chans, 1)
     if not settings['pathogen_chann_dim'] is None:
         df = _object_filter(df, 'pathogen', settings['pathogen_size_range'], settings['pathogen_intensity_range'], mask_chans, 2)
-    df['recruitment'] = df[f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity']/df[f'cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity"]
+    df['recruitment'] = df[f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity"]/df[f"cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity"]
     for chan in settings['channel_dims']:
         df = _calculate_recruitment(df, channel=chan)
     print(f'calculated recruitment for: {len(df)} rows')
@@ -114,9 +115,9 @@ def analyze_recruitment(settings={}):
         _plot_controls(df, mask_chans, settings['channel_of_interest'], figuresize=5)
     print(f'PV level: {len(df)} rows')
-    _plot_recruitment(df, 'by PV', settings['channel_of_interest'], settings['target'], settings['figuresize'])
+    _plot_recruitment(df, 'by PV', settings['channel_of_interest'], columns=[], figuresize=settings['figuresize'])
     print(f'well level: {len(df_well)} rows')
-    _plot_recruitment(df_well, 'by well', settings['channel_of_interest'], settings['target'], settings['figuresize'])
+    _plot_recruitment(df_well, 'by well', settings['channel_of_interest'], columns=[], figuresize=settings['figuresize'])
     cells,wells = _results_to_csv(settings['src'], df, df_well)
     return [cells,wells]

spacr/toxo.py CHANGED Viewed

@@ -112,10 +112,15 @@ def go_term_enrichment_by_column(significant_df, metadata_path, go_term_columns=
     - Plot the enrichment score vs -log10(p-value).
     """
-    significant_df['variable'].fillna(significant_df['feature'], inplace=True)
-    split_columns = significant_df['variable'].str.split('_', expand=True)
-    significant_df['gene_nr'] = split_columns[0]
-    gene_list = significant_df['gene_nr'].to_list()
+    #significant_df['variable'].fillna(significant_df['feature'], inplace=True)
+    #split_columns = significant_df['variable'].str.split('_', expand=True)
+    #significant_df['gene_nr'] = split_columns[0]
+    #gene_list = significant_df['gene_nr'].to_list()
+    significant_df = significant_df.dropna(subset=['n_gene'])
+    significant_df = significant_df[significant_df['n_gene'] != None]
+    gene_list = significant_df['n_gene'].to_list()
     # Load metadata
     metadata = pd.read_csv(metadata_path)

spacr/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests
+import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests, ast, traceback
 import numpy as np
 import pandas as pd
@@ -12,6 +12,7 @@ from skimage.transform import resize as resizescikit
 from skimage.morphology import dilation, square
 from skimage.measure import find_contours
 from skimage.segmentation import clear_border
+from scipy.stats import pearsonr
 from collections import defaultdict, OrderedDict
 from PIL import Image
@@ -37,6 +38,7 @@ from torchvision import models
 from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
 import torchvision.transforms as transforms
 from torchvision.models import resnet50
+from torchvision.utils import make_grid
 import seaborn as sns
 import matplotlib.pyplot as plt
@@ -66,13 +68,270 @@ from huggingface_hub import list_repo_files
 import umap.umap_ as umap
 #import umap
+def filepaths_to_database(img_paths, settings, source_folder, crop_mode):
+    png_df = pd.DataFrame(img_paths, columns=['png_path'])
+    png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
+    parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=settings['timelapse'])))
+    columns = ['plate', 'row', 'col', 'field']
+    if settings['timelapse']:
+        columns = columns + ['time_id']
+    columns = columns + ['prcfo']
+    if crop_mode == 'cell':
+        columns = columns + ['cell_id']
+    if crop_mode == 'nucleus':
+        columns = columns + ['nucleus_id']
+    if crop_mode == 'pathogen':
+        columns = columns + ['pathogen_id']
+    if crop_mode == 'cytoplasm':
+        columns = columns + ['cytoplasm_id']
+    png_df[columns] = parts
+    try:
+        conn = sqlite3.connect(f'{source_folder}/measurements/measurements.db', timeout=5)
+        png_df.to_sql('png_list', conn, if_exists='append', index=False)
+        conn.commit()
+    except sqlite3.OperationalError as e:
+        print(f"SQLite error: {e}", flush=True)
+        traceback.print_exc()
+def activation_maps_to_database(img_paths, source_folder, settings):
+    from .io import _create_database
+    png_df = pd.DataFrame(img_paths, columns=['png_path'])
+    png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
+    parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=False)))
+    columns = ['plate', 'row', 'col', 'field', 'prcfo', 'object']
+    png_df[columns] = parts
+    dataset_name = os.path.splitext(os.path.basename(settings['dataset']))[0]
+    database_name = f"{source_folder}/measurements/{dataset_name}.db"
+    if not os.path.exists(database_name):
+        _create_database(database_name)
+    try:
+        conn = sqlite3.connect(database_name, timeout=5)
+        png_df.to_sql(f"{settings['cam_type']}_list", conn, if_exists='append', index=False)
+        conn.commit()
+    except sqlite3.OperationalError as e:
+        print(f"SQLite error: {e}", flush=True)
+        traceback.print_exc()
+def activation_correlations_to_database(df, img_paths, source_folder, settings):
+    from .io import _create_database
+    png_df = pd.DataFrame(img_paths, columns=['png_path'])
+    png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
+    parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=False)))
+    columns = ['plate', 'row', 'col', 'field', 'prcfo', 'object']
+    png_df[columns] = parts
+    # Align both DataFrames by file_name
+    png_df.set_index('file_name', inplace=True)
+    df.set_index('file_name', inplace=True)
+    merged_df = pd.concat([png_df, df], axis=1)
+    merged_df.reset_index(inplace=True)
+    dataset_name = os.path.splitext(os.path.basename(settings['dataset']))[0]
+    database_name = f"{source_folder}/measurements/{dataset_name}.db"
+    if not os.path.exists(database_name):
+        _create_database(database_name)
+    try:
+        conn = sqlite3.connect(database_name, timeout=5)
+        merged_df.to_sql(f"{settings['cam_type']}_correlations", conn, if_exists='append', index=False)
+        conn.commit()
+    except sqlite3.OperationalError as e:
+        print(f"SQLite error: {e}", flush=True)
+        traceback.print_exc()
+def calculate_activation_correlations(inputs, activation_maps, file_names, manders_thresholds=[15, 50, 75]):
+    """
+    Calculates Pearson and Manders correlations between input image channels and activation map channels.
+    Args:
+        inputs: A batch of input images, Tensor of shape (batch_size, channels, height, width)
+        activation_maps: A batch of activation maps, Tensor of shape (batch_size, channels, height, width)
+        file_names: List of file names corresponding to each image in the batch.
+        manders_thresholds: List of intensity percentiles to calculate Manders correlation.
+    Returns:
+        df_correlations: A DataFrame with columns for pairwise correlations (Pearson and Manders)
+                         between input channels and activation map channels.
+    """
+    # Ensure tensors are detached and moved to CPU before converting to numpy
+    inputs = inputs.detach().cpu()
+    activation_maps = activation_maps.detach().cpu()
+    batch_size, in_channels, height, width = inputs.shape
+    if activation_maps.dim() == 3:
+        # If activation maps have no channels, add a dummy channel dimension
+        activation_maps = activation_maps.unsqueeze(1)  # Now shape is (batch_size, 1, height, width)
+    _, act_channels, act_height, act_width = activation_maps.shape
+    # Ensure that the inputs and activation maps are the same size
+    if (height != act_height) or (width != act_width):
+        activation_maps = torch.nn.functional.interpolate(activation_maps, size=(height, width), mode='bilinear')
+    # Dictionary to collect correlation results
+    correlations_dict = {'file_name': []}
+    # Initialize correlation columns based on input channels and activation map channels
+    for in_c in range(in_channels):
+        for act_c in range(act_channels):
+            correlations_dict[f'channel_{in_c}_activation_{act_c}_pearsons'] = []
+            for threshold in manders_thresholds:
+                correlations_dict[f'channel_{in_c}_activation_{act_c}_{threshold}_M1'] = []
+                correlations_dict[f'channel_{in_c}_activation_{act_c}_{threshold}_M2'] = []
+    # Loop over the batch
+    for b in range(batch_size):
+        input_img = inputs[b]  # Input image channels (C, H, W)
+        activation_map = activation_maps[b]  # Activation map channels (C, H, W)
+        # Add the file name to the current row
+        correlations_dict['file_name'].append(file_names[b])
+        # Calculate correlations for each channel pair
+        for in_c in range(in_channels):
+            input_channel = input_img[in_c].flatten().numpy()  # Flatten the input image channel
+            input_channel = input_channel[np.isfinite(input_channel)]  # Remove NaN or inf values
+            for act_c in range(act_channels):
+                activation_channel = activation_map[act_c].flatten().numpy()  # Flatten the activation map channel
+                activation_channel = activation_channel[np.isfinite(activation_channel)]  # Remove NaN or inf values
+                # Check if there are valid (non-empty) arrays left to calculate the Pearson correlation
+                if input_channel.size > 0 and activation_channel.size > 0:
+                    pearson_corr, _ = pearsonr(input_channel, activation_channel)
+                else:
+                    pearson_corr = np.nan  # Assign NaN if there are no valid data points
+                correlations_dict[f'channel_{in_c}_activation_{act_c}_pearsons'].append(pearson_corr)
+                # Compute Manders correlations for each threshold
+                for threshold in manders_thresholds:
+                    # Get the top percentile pixels based on intensity in both channels
+                    if input_channel.size > 0 and activation_channel.size > 0:
+                        input_threshold = np.percentile(input_channel, threshold)
+                        activation_threshold = np.percentile(activation_channel, threshold)
+                        # Mask the pixels above the threshold
+                        mask = (input_channel >= input_threshold) & (activation_channel >= activation_threshold)
+                        # If we have enough pixels, calculate Manders correlation
+                        if np.sum(mask) > 0:
+                            manders_corr_M1 = np.sum(input_channel[mask] * activation_channel[mask]) / np.sum(input_channel[mask] ** 2)
+                            manders_corr_M2 = np.sum(activation_channel[mask] * input_channel[mask]) / np.sum(activation_channel[mask] ** 2)
+                        else:
+                            manders_corr_M1 = np.nan
+                            manders_corr_M2 = np.nan
+                    else:
+                        manders_corr_M1 = np.nan
+                        manders_corr_M2 = np.nan
+                    # Store the Manders correlation for this threshold
+                    correlations_dict[f'channel_{in_c}_activation_{act_c}_{threshold}_M1'].append(manders_corr_M1)
+                    correlations_dict[f'channel_{in_c}_activation_{act_c}_{threshold}_M2'].append(manders_corr_M2)
+    # Convert the dictionary to a DataFrame
+    df_correlations = pd.DataFrame(correlations_dict)
+    return df_correlations
+def load_settings(csv_file_path, show=False, setting_key='setting_key', setting_value='setting_value'):
+    """
+    Convert a CSV file with 'settings_key' and 'settings_value' columns into a dictionary.
+    Handles special cases where values are lists, tuples, booleans, None, integers, floats, and nested dictionaries.
+    Args:
+        csv_file_path (str): The path to the CSV file.
+        show (bool): Whether to display the dataframe (for debugging).
+        setting_key (str): The name of the column that contains the setting keys.
+        setting_value (str): The name of the column that contains the setting values.
+    Returns:
+        dict: A dictionary where 'settings_key' are the keys and 'settings_value' are the values.
+    """
+    # Read the CSV file into a DataFrame
+    df = pd.read_csv(csv_file_path)
+    if show:
+        display(df)
+    # Ensure the columns 'setting_key' and 'setting_value' exist
+    if setting_key not in df.columns or setting_value not in df.columns:
+        raise ValueError(f"CSV file must contain {setting_key} and {setting_value} columns.")
+    def parse_value(value):
+        """Parse the string value into the appropriate Python data type."""
+        # Handle empty values
+        if pd.isna(value) or value == '':
+            return None
+        # Handle boolean values
+        if value == 'True':
+            return True
+        if value == 'False':
+            return False
+        # Handle lists, tuples, dictionaries, and other literals
+        if value.startswith(('(', '[', '{')):  # If it starts with (, [ or {, use ast.literal_eval
+            try:
+                parsed_value = ast.literal_eval(value)
+                # If parsed_value is a dict, recursively parse its values
+                if isinstance(parsed_value, dict):
+                    parsed_value = {k: parse_value(v) for k, v in parsed_value.items()}
+                return parsed_value
+            except (ValueError, SyntaxError):
+                pass  # If there's an error, return the value as-is
+        # Handle numeric values (integers and floats)
+        try:
+            if '.' in value:
+                return float(value)  # If it contains a dot, convert to float
+            return int(value)  # Otherwise, convert to integer
+        except ValueError:
+            pass  # If it's not a valid number, return the value as-is
+        # Return the original value if no other type matched
+        return value
+    # Convert the DataFrame to a dictionary, with parsing of each value
+    result_dict = {key: parse_value(value) for key, value in zip(df[setting_key], df[setting_value])}
+    return result_dict
 def save_settings(settings, name='settings', show=False):
     settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
     if show:
         display(settings_df)
-    settings_csv = os.path.join(settings['src'],'settings',f'{name}.csv')
-    os.makedirs(os.path.join(settings['src'],'settings'), exist_ok=True)
+    if isinstance(settings['src'], list):
+        src = settings['src'][0]
+        name = f"{name}_list"
+    else:
+        src = settings['src']
+    settings_csv = os.path.join(src,'settings',f'{name}.csv')
+    os.makedirs(os.path.join(src,'settings'), exist_ok=True)
     settings_df.to_csv(settings_csv, index=False)
 def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
@@ -820,7 +1079,7 @@ def _map_wells_png(file_name, timelapse=False):
         print(f"Error: {e}")
         plate, row, column, field, object_id, prcfo = 'error', 'error', 'error', 'error', 'error', 'error'
     if timelapse:
-        return plate, row, column, field, timeid, prcfo, object_id,
+        return plate, row, column, field, timeid, prcfo, object_id
     else:
         return plate, row, column, field, prcfo, object_id
@@ -2987,7 +3246,6 @@ def preprocess_image(image_path, image_size=224, channels=[1,2,3], normalize=Tru
     input_tensor = transform(image).unsqueeze(0)
     return image, input_tensor
 class SaliencyMapGenerator:
     def __init__(self, model):
         self.model = model
@@ -3008,18 +3266,194 @@ class SaliencyMapGenerator:
         saliency = X.grad.abs()
         return saliency
-    def plot_saliency_maps(self, X, y, saliency, class_names):
+    def compute_saliency_and_predictions(self, X):
+        self.model.eval()
+        X.requires_grad_()
+        # Forward pass to get predictions (logits)
+        scores = self.model(X).squeeze()
+        # Get predicted class (0 or 1 for binary classification)
+        predictions = (scores > 0).long()
+        # Compute saliency maps
+        self.model.zero_grad()
+        target_scores = scores * (2 * predictions - 1)
+        target_scores.backward(torch.ones_like(target_scores))
+        saliency = X.grad.abs()
+        return saliency, predictions
+    def plot_activation_grid(self, X, saliency, predictions, overlay=True, normalize=False):
         N = X.shape[0]
+        rows = (N + 7) // 8
+        fig, axs = plt.subplots(rows, 8, figsize=(16, rows * 2))
         for i in range(N):
-            plt.subplot(2, N, i + 1)
-            plt.imshow(X[i].permute(1, 2, 0).cpu().numpy())
-            plt.axis('off')
-            plt.title(class_names[y[i]])
-            plt.subplot(2, N, N + i + 1)
-            plt.imshow(saliency[i].cpu().numpy(), cmap=plt.cm.hot)
-            plt.axis('off')
-        plt.gcf().set_size_inches(12, 5)
-        plt.show()
+            ax = axs[i // 8, i % 8]
+            saliency_map = saliency[i].cpu().numpy()  # Move to CPU and convert to numpy
+            if saliency_map.shape[0] == 3:  # Channels first, reshape to (H, W, 3)
+                saliency_map = np.transpose(saliency_map, (1, 2, 0))
+            # Normalize image channels to 2nd and 98th percentiles
+            if overlay:
+                img_np = X[i].permute(1, 2, 0).detach().cpu().numpy()
+                if normalize:
+                    img_np = self.percentile_normalize(img_np)
+                ax.imshow(img_np)
+                ax.imshow(saliency_map, cmap='jet', alpha=0.5)
+            # Add class label in the top-left corner
+            ax.text(5, 25, str(predictions[i].item()), fontsize=12, color='white', weight='bold',
+                    bbox=dict(facecolor='black', alpha=0.7, boxstyle='round,pad=0.2'))
+            ax.axis('off')
+        plt.tight_layout(pad=0)
+        return fig
+    def percentile_normalize(self, img, lower_percentile=2, upper_percentile=98):
+        """
+        Normalize each channel of the image to the given percentiles.
+        Args:
+            img: Input image as numpy array with shape (H, W, C)
+            lower_percentile: Lower percentile for normalization (default 2)
+            upper_percentile: Upper percentile for normalization (default 98)
+        Returns:
+            img: Normalized image
+        """
+        img_normalized = np.zeros_like(img)
+        for c in range(img.shape[2]):  # Iterate over each channel
+            low = np.percentile(img[:, :, c], lower_percentile)
+            high = np.percentile(img[:, :, c], upper_percentile)
+            img_normalized[:, :, c] = np.clip((img[:, :, c] - low) / (high - low), 0, 1)
+        return img_normalized
+class GradCAMGenerator:
+    def __init__(self, model, target_layer, cam_type='gradcam'):
+        self.model = model
+        self.model.eval()
+        self.target_layer = target_layer
+        self.cam_type = cam_type
+        self.gradients = None
+        self.activations = None
+        # Hook the target layer
+        self.target_layer_module = self.get_layer(self.model, self.target_layer)
+        self.hook_layers()
+    def hook_layers(self):
+        # Forward hook to get activations
+        def forward_hook(module, input, output):
+            self.activations = output
+        # Backward hook to get gradients
+        def backward_hook(module, grad_input, grad_output):
+            self.gradients = grad_output[0]
+        self.target_layer_module.register_forward_hook(forward_hook)
+        self.target_layer_module.register_backward_hook(backward_hook)
+    def get_layer(self, model, target_layer):
+        # Recursively find the layer specified in target_layer
+        modules = target_layer.split('.')
+        layer = model
+        for module in modules:
+            layer = getattr(layer, module)
+        return layer
+    def compute_gradcam_maps(self, X, y):
+        X.requires_grad_()
+        # Forward pass
+        scores = self.model(X).squeeze()
+        # Perform backward pass
+        target_scores = scores * (2 * y - 1)
+        self.model.zero_grad()
+        target_scores.backward(torch.ones_like(target_scores))
+        # Compute GradCAM
+        pooled_gradients = torch.mean(self.gradients, dim=[0, 2, 3])
+        for i in range(self.activations.size(1)):
+            self.activations[:, i, :, :] *= pooled_gradients[i]
+        gradcam = torch.mean(self.activations, dim=1).squeeze()
+        gradcam = F.relu(gradcam)
+        gradcam = F.interpolate(gradcam.unsqueeze(0).unsqueeze(0), size=X.shape[2:], mode='bilinear')
+        gradcam = gradcam.squeeze().cpu().detach().numpy()
+        gradcam = (gradcam - gradcam.min()) / (gradcam.max() - gradcam.min())
+        return gradcam
+    def compute_gradcam_and_predictions(self, X):
+        self.model.eval()
+        X.requires_grad_()
+        # Forward pass to get predictions (logits)
+        scores = self.model(X).squeeze()
+        # Get predicted class (0 or 1 for binary classification)
+        predictions = (scores > 0).long()
+        # Compute gradcam maps
+        gradcam_maps = []
+        for i in range(X.size(0)):
+            gradcam_map = self.compute_gradcam_maps(X[i].unsqueeze(0), predictions[i])
+            gradcam_maps.append(gradcam_map)
+        return torch.tensor(gradcam_maps), predictions
+    def plot_activation_grid(self, X, gradcam, predictions, overlay=True, normalize=False):
+        N = X.shape[0]
+        rows = (N + 7) // 8
+        fig, axs = plt.subplots(rows, 8, figsize=(16, rows * 2))
+        for i in range(N):
+            ax = axs[i // 8, i % 8]
+            gradcam_map = gradcam[i].cpu().numpy()
+            # Normalize image channels to 2nd and 98th percentiles
+            if overlay:
+                img_np = X[i].permute(1, 2, 0).detach().cpu().numpy()
+                if normalize:
+                    img_np = self.percentile_normalize(img_np)
+                ax.imshow(img_np)
+                ax.imshow(gradcam_map, cmap='jet', alpha=0.5)
+            #ax.imshow(X[i].permute(1, 2, 0).detach().cpu().numpy())  # Original image
+            #ax.imshow(gradcam_map, cmap='jet', alpha=0.5)  # Overlay the gradcam map
+            # Add class label in the top-left corner
+            ax.text(5, 25, str(predictions[i].item()), fontsize=12, color='white', weight='bold',
+                    bbox=dict(facecolor='black', alpha=0.7, boxstyle='round,pad=0.2'))
+            ax.axis('off')
+        plt.tight_layout(pad=0)
+        return fig
+    def percentile_normalize(self, img, lower_percentile=2, upper_percentile=98):
+        """
+        Normalize each channel of the image to the given percentiles.
+        Args:
+            img: Input image as numpy array with shape (H, W, C)
+            lower_percentile: Lower percentile for normalization (default 2)
+            upper_percentile: Upper percentile for normalization (default 98)
+        Returns:
+            img: Normalized image
+        """
+        img_normalized = np.zeros_like(img)
+        for c in range(img.shape[2]):  # Iterate over each channel
+            low = np.percentile(img[:, :, c], lower_percentile)
+            high = np.percentile(img[:, :, c], upper_percentile)
+            img_normalized[:, :, c] = np.clip((img[:, :, c] - low) / (high - low), 0, 1)
+        return img_normalized
 def preprocess_image(image_path, normalize=True, image_size=224, channels=[1,2,3]):
     preprocess = transforms.Compose([
@@ -3560,7 +3994,7 @@ def plot_grid(cluster_images, colors, figuresize, black_background, verbose):
     plt.show()
     return grid_fig
-def generate_path_list_from_db(db_path, file_metadata):
+def generate_path_list_from_db_v1(db_path, file_metadata):
     all_paths = []
@@ -3590,6 +4024,44 @@ def generate_path_list_from_db(db_path, file_metadata):
     return all_paths
+def generate_path_list_from_db(db_path, file_metadata):
+    all_paths = []
+    # Connect to the database and retrieve the image paths
+    print(f"Reading DataBase: {db_path}")
+    try:
+        with sqlite3.connect(db_path) as conn:
+            cursor = conn.cursor()
+            if file_metadata:
+                if isinstance(file_metadata, str):
+                    # If file_metadata is a single string
+                    cursor.execute("SELECT png_path FROM png_list WHERE png_path LIKE ?", (f"%{file_metadata}%",))
+                elif isinstance(file_metadata, list):
+                    # If file_metadata is a list of strings
+                    query = "SELECT png_path FROM png_list WHERE " + " OR ".join(
+                        ["png_path LIKE ?" for _ in file_metadata])
+                    params = [f"%{meta}%" for meta in file_metadata]
+                    cursor.execute(query, params)
+            else:
+                # If file_metadata is None or empty
+                cursor.execute("SELECT png_path FROM png_list")
+            while True:
+                rows = cursor.fetchmany(1000)
+                if not rows:
+                    break
+                all_paths.extend([row[0] for row in rows])
+    except sqlite3.Error as e:
+        print(f"Database error: {e}")
+        return
+    except Exception as e:
+        print(f"Error: {e}")
+        return
+    return all_paths
 def correct_paths(df, base_path):
     if isinstance(df, pd.DataFrame):
@@ -4548,3 +5020,25 @@ def download_models(repo_id="einarolafsson/models", local_dir=None, retries=5, d
             time.sleep(delay)
     raise Exception("Failed to download model files after multiple attempts.")
+def generate_cytoplasm_mask(nucleus_mask, cell_mask):
+    """
+    Generates a cytoplasm mask from nucleus and cell masks.
+    Parameters:
+    - nucleus_mask (np.array): Binary or segmented mask of the nucleus (non-zero values represent nucleus).
+    - cell_mask (np.array): Binary or segmented mask of the whole cell (non-zero values represent cell).
+    Returns:
+    - cytoplasm_mask (np.array): Mask for the cytoplasm (1 for cytoplasm, 0 for nucleus and pathogens).
+    """
+    # Make sure the nucleus and cell masks are numpy arrays
+    nucleus_mask = np.array(nucleus_mask)
+    cell_mask = np.array(cell_mask)
+    # Generate cytoplasm mask
+    cytoplasm_mask = np.where(np.logical_or(nucleus_mask != 0), 0, cell_mask)
+    return cytoplasm_mask

spacr 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

spacr 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl