PyPI - spacr - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.22__py3-none-any.whl - Mend

spacr 0.3.1py3-none-any.whl → 0.3.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

spacr/__init__.py +19 -3
spacr/cellpose.py +311 -0
spacr/core.py +245 -2494
spacr/deep_spacr.py +316 -48
spacr/gui.py +1 -0
spacr/gui_core.py +74 -63
spacr/gui_elements.py +110 -5
spacr/gui_utils.py +346 -6
spacr/io.py +680 -141
spacr/logger.py +28 -9
spacr/measure.py +107 -95
spacr/mediar.py +0 -3
spacr/ml.py +1051 -0
spacr/openai.py +37 -0
spacr/plot.py +707 -20
spacr/resources/data/lopit.csv +3833 -0
spacr/resources/data/toxoplasma_metadata.csv +8843 -0
spacr/resources/icons/convert.png +0 -0
spacr/resources/{models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model → icons/dna_matrix.mp4} +0 -0
spacr/sequencing.py +241 -1311
spacr/settings.py +134 -47
spacr/sim.py +0 -2
spacr/submodules.py +349 -0
spacr/timelapse.py +0 -2
spacr/toxo.py +238 -0
spacr/utils.py +419 -180
{spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/METADATA +31 -22
{spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/RECORD +32 -33
spacr/chris.py +0 -50
spacr/graph_learning.py +0 -340
spacr/resources/MEDIAR/.git +0 -1
spacr/resources/MEDIAR_weights/.DS_Store +0 -0
spacr/resources/icons/.DS_Store +0 -0
spacr/resources/icons/spacr_logo_rotation.gif +0 -0
spacr/resources/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv +0 -23
spacr/resources/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/sim_app.py +0 -0
{spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/LICENSE +0 -0
{spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/WHEEL +0 -0
{spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/entry_points.txt +0 -0
{spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/top_level.txt +0 -0

spacr/utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
-import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess
+import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests, ast
 import numpy as np
+import pandas as pd
 from cellpose import models as cp_models
 from cellpose import denoise
@@ -14,7 +15,6 @@ from skimage.segmentation import clear_border
 from collections import defaultdict, OrderedDict
 from PIL import Image
-import pandas as pd
 from statsmodels.stats.outliers_influence import variance_inflation_factor
 from statsmodels.stats.stattools import durbin_watson
 import statsmodels.formula.api as smf
@@ -24,7 +24,7 @@ from itertools import combinations
 from functools import reduce
 from IPython.display import display
-from multiprocessing import Pool, cpu_count
+from multiprocessing import Pool, cpu_count, set_start_method, get_start_method
 from concurrent.futures import ThreadPoolExecutor
 import torch.nn as nn
@@ -33,65 +33,118 @@ from torch.utils.checkpoint import checkpoint
 from torch.utils.data import Subset
 from torch.autograd import grad
+from torchvision import models
+from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
+import torchvision.transforms as transforms
+from torchvision.models import resnet50
+from torchvision.utils import make_grid
 import seaborn as sns
 import matplotlib.pyplot as plt
 from matplotlib.offsetbox import OffsetImage, AnnotationBbox
+from scipy import stats
 import scipy.ndimage as ndi
 from scipy.spatial import distance
-from scipy.stats import fisher_exact
+from scipy.stats import fisher_exact, f_oneway, kruskal
 from scipy.ndimage.filters import gaussian_filter
 from scipy.spatial import ConvexHull
 from scipy.interpolate import splprep, splev
 from scipy.ndimage import binary_dilation
-from sklearn.preprocessing import StandardScaler
 from skimage.exposure import rescale_intensity
 from sklearn.metrics import auc, precision_recall_curve
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import Lasso, Ridge
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.cluster import KMeans
-from sklearn.preprocessing import StandardScaler
-from sklearn.cluster import DBSCAN
-from sklearn.cluster import KMeans
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.cluster import KMeans, DBSCAN
 from sklearn.manifold import TSNE
-from sklearn.cluster import KMeans
 from sklearn.decomposition import PCA
+from sklearn.ensemble import RandomForestClassifier
+from huggingface_hub import list_repo_files
 import umap.umap_ as umap
+#import umap
-from torchvision import models
-from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet50_Weights, ResNet101_Weights, ResNet152_Weights
-import torchvision.transforms as transforms
+def load_settings(csv_file_path, show=False, setting_key='setting_key', setting_value='setting_value'):
+    """
+    Convert a CSV file with 'settings_key' and 'settings_value' columns into a dictionary.
+    Handles special cases where values are lists, tuples, booleans, None, integers, floats, and nested dictionaries.
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.preprocessing import StandardScaler
-from scipy.stats import f_oneway, kruskal
-from sklearn.cluster import KMeans
-from scipy import stats
+    Args:
+        csv_file_path (str): The path to the CSV file.
+        show (bool): Whether to display the dataframe (for debugging).
+        setting_key (str): The name of the column that contains the setting keys.
+        setting_value (str): The name of the column that contains the setting values.
-from .logger import log_function_call
-from multiprocessing import set_start_method, get_start_method
+    Returns:
+        dict: A dictionary where 'settings_key' are the keys and 'settings_value' are the values.
+    """
+    # Read the CSV file into a DataFrame
+    df = pd.read_csv(csv_file_path)
-import torch
-import torchvision.transforms as transforms
-from torchvision.models import resnet50
-from PIL import Image
-import numpy as np
-import umap
-import pandas as pd
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.preprocessing import StandardScaler
-from scipy.stats import f_oneway, kruskal
-from sklearn.cluster import KMeans
-from scipy import stats
+    if show:
+        display(df)
+    # Ensure the columns 'setting_key' and 'setting_value' exist
+    if setting_key not in df.columns or setting_value not in df.columns:
+        raise ValueError(f"CSV file must contain {setting_key} and {setting_value} columns.")
+    def parse_value(value):
+        """Parse the string value into the appropriate Python data type."""
+        # Handle empty values
+        if pd.isna(value) or value == '':
+            return None
+        # Handle boolean values
+        if value == 'True':
+            return True
+        if value == 'False':
+            return False
+        # Handle lists, tuples, dictionaries, and other literals
+        if value.startswith(('(', '[', '{')):  # If it starts with (, [ or {, use ast.literal_eval
+            try:
+                parsed_value = ast.literal_eval(value)
+                # If parsed_value is a dict, recursively parse its values
+                if isinstance(parsed_value, dict):
+                    parsed_value = {k: parse_value(v) for k, v in parsed_value.items()}
+                return parsed_value
+            except (ValueError, SyntaxError):
+                pass  # If there's an error, return the value as-is
+        # Handle numeric values (integers and floats)
+        try:
+            if '.' in value:
+                return float(value)  # If it contains a dot, convert to float
+            return int(value)  # Otherwise, convert to integer
+        except ValueError:
+            pass  # If it's not a valid number, return the value as-is
+        # Return the original value if no other type matched
+        return value
+    # Convert the DataFrame to a dictionary, with parsing of each value
+    result_dict = {key: parse_value(value) for key, value in zip(df[setting_key], df[setting_value])}
-def save_settings(settings, name='settings'):
+    return result_dict
+def save_settings(settings, name='settings', show=False):
     settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
-    settings_csv = os.path.join(settings['src'],'settings',f'{name}.csv')
-    os.makedirs(os.path.join(settings['src'],'settings'), exist_ok=True)
+    if show:
+        display(settings_df)
+    if isinstance(settings['src'], list):
+        src = settings['src'][0]
+        name = f"{name}_list"
+    else:
+        src = settings['src']
+    settings_csv = os.path.join(src,'settings',f'{name}.csv')
+    os.makedirs(os.path.join(src,'settings'), exist_ok=True)
     settings_df.to_csv(settings_csv, index=False)
 def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
@@ -303,7 +356,7 @@ def _get_cellpose_batch_size():
     except Exception as e:
         return 8
-def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
+def _extract_filename_metadata_v1(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
     images_by_key = defaultdict(list)
@@ -353,6 +406,57 @@ def _extract_filename_metadata(filenames, src, regular_expression, metadata_type
     return images_by_key
+def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
+    images_by_key = defaultdict(list)
+    for filename in filenames:
+        match = regular_expression.match(filename)
+        if match:
+            try:
+                try:
+                    plate = match.group('plateID')
+                except:
+                    plate = os.path.basename(src)
+                well = match.group('wellID')
+                field = match.group('fieldID')
+                channel = match.group('chanID')
+                mode = None
+                if well[0].isdigit():
+                    well = str(_safe_int_convert(well))
+                if field[0].isdigit():
+                    field = str(_safe_int_convert(field))
+                if channel[0].isdigit():
+                    channel = str(_safe_int_convert(channel))
+                if metadata_type =='cq1':
+                    orig_wellID = wellID
+                    wellID = _convert_cq1_well_id(wellID)
+                    print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
+                if pick_slice:
+                    try:
+                        mode = match.group('AID')
+                    except IndexError:
+                        sliceid = '00'
+                    if mode == skip_mode:
+                        continue
+                key = (plate, well, field, channel, mode)
+                file_path = os.path.join(src, filename)  # Store the full path
+                images_by_key[key].append(file_path)
+            except IndexError:
+                print(f"Could not extract information from filename {filename} using provided regex")
+        else:
+            print(f"Filename {filename} did not match provided regex")
+            continue
+    return images_by_key
 def mask_object_count(mask):
     """
     Counts the number of objects in a given mask.
@@ -443,7 +547,7 @@ def _generate_representative_images(db_path, cells=['HeLa'], cell_loc=None, path
     from .plot import _plot_images_on_grid
     df = _read_and_join_tables(db_path)
-    df = _annotate_conditions(df, cells, cell_loc, pathogens, pathogen_loc, treatments,treatment_loc)
+    df = annotate_conditions(df, cells, cell_loc, pathogens, pathogen_loc, treatments, treatment_loc)
     if update_db:
         _update_database_with_merged_info(db_path, df, table='png_list', columns=['pathogen', 'treatment', 'host_cells', 'condition', 'prcfo'])
@@ -489,34 +593,6 @@ def _map_values(row, values, locs):
         return value_dict.get(row[type_], None)
     return values[0] if values else None
-def _annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pathogen_loc=None, treatments=['cm'], treatment_loc=None):
-    """
-    Annotates conditions in the given DataFrame based on the provided parameters.
-    Args:
-        df (pandas.DataFrame): The DataFrame to annotate.
-        cells (list, optional): The list of host cell types. Defaults to ['HeLa'].
-        cell_loc (list, optional): The list of location identifiers for host cells. Defaults to None.
-        pathogens (list, optional): The list of pathogens. Defaults to ['rh'].
-        pathogen_loc (list, optional): The list of location identifiers for pathogens. Defaults to None.
-        treatments (list, optional): The list of treatments. Defaults to ['cm'].
-        treatment_loc (list, optional): The list of location identifiers for treatments. Defaults to None.
-    Returns:
-        pandas.DataFrame: The annotated DataFrame with the 'host_cells', 'pathogen', 'treatment', and 'condition' columns.
-    """
-    # Apply mappings or defaults
-    df['host_cells'] = [cells[0]] * len(df) if cell_loc is None else df.apply(_map_values, args=(cells, cell_loc), axis=1)
-    df['pathogen'] = [pathogens[0]] * len(df) if pathogen_loc is None else df.apply(_map_values, args=(pathogens, pathogen_loc), axis=1)
-    df['treatment'] = [treatments[0]] * len(df) if treatment_loc is None else df.apply(_map_values, args=(treatments, treatment_loc), axis=1)
-    # Construct condition column
-    df['condition'] = df.apply(lambda row: '_'.join(filter(None, [row.get('pathogen'), row.get('treatment')])), axis=1)
-    df['condition'] = df['condition'].apply(lambda x: x if x else 'none')
-    return df
 def is_list_of_lists(var):
     if isinstance(var, list) and all(isinstance(i, list) for i in var):
         return True
@@ -1085,67 +1161,74 @@ def _get_cellpose_channels(src, nucleus_channel, pathogen_channel, cell_channel)
         else:
             cellpose_channels['cell'] = [0,0]
     return cellpose_channels
-def annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pathogen_loc=None, treatments=['cm'], treatment_loc=None, types = ['col','col','col']):
+def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_loc=None, treatments=None, treatment_loc=None):
     """
-    Annotates conditions in a DataFrame based on specified criteria.
+    Annotates conditions in a DataFrame based on specified criteria and combines them into a 'condition' column.
+    NaN is used for missing values, and they are excluded from the 'condition' column.
     Args:
         df (pandas.DataFrame): The DataFrame to annotate.
-        cells (list, optional): List of host cell types. Defaults to ['HeLa'].
-        cell_loc (list, optional): List of corresponding values for each host cell type. Defaults to None.
-        pathogens (list, optional): List of pathogens. Defaults to ['rh'].
-        pathogen_loc (list, optional): List of corresponding values for each pathogen. Defaults to None.
-        treatments (list, optional): List of treatments. Defaults to ['cm'].
-        treatment_loc (list, optional): List of corresponding values for each treatment. Defaults to None.
-        types (list, optional): List of column types for host cells, pathogens, and treatments. Defaults to ['col','col','col'].
+        cells (list/str, optional): Host cell types. Defaults to None.
+        cell_loc (list of lists, optional): Values for each host cell type. Defaults to None.
+        pathogens (list/str, optional): Pathogens. Defaults to None.
+        pathogen_loc (list of lists, optional): Values for each pathogen. Defaults to None.
+        treatments (list/str, optional): Treatments. Defaults to None.
+        treatment_loc (list of lists, optional): Values for each treatment. Defaults to None.
     Returns:
-        pandas.DataFrame: The annotated DataFrame.
+        pandas.DataFrame: Annotated DataFrame with a combined 'condition' column.
     """
+    def _get_type(val):
+        """Determine if a value maps to 'row' or 'col'."""
+        if isinstance(val, str) and val.startswith('c'):
+            return 'col'
+        elif isinstance(val, str) and val.startswith('r'):
+            return 'row'
+        return None
-    # Function to apply to each row
-    def _map_values(row, dict_, type_='col'):
+    def _map_or_default(column_name, values, loc, df):
         """
-        Maps the values in a row to corresponding keys in a dictionary.
+        Consolidates the logic for mapping values or assigning defaults when loc is None.
         Args:
-            row (dict): The row containing the values to be mapped.
-            dict_ (dict): The dictionary containing the mapping values.
-            type_ (str, optional): The type of mapping to perform. Defaults to 'col'.
-        Returns:
-            str: The mapped value if found, otherwise None.
+            column_name (str): The column in the DataFrame to annotate.
+            values (list/str): The list of values or a single string to annotate.
+            loc (list of lists): Location mapping for the values, or None if not used.
+            df (pandas.DataFrame): The DataFrame to modify.
         """
-        for values, cols in dict_.items():
-            if row[type_] in cols:
-                return values
-        return None
+        if isinstance(values, str) or (isinstance(values, list) and loc is None):
+            # Assign all rows the first value in the list or the single string
+            df[column_name] = values if isinstance(values, str) else values[0]
+        elif values is not None and loc is not None:
+            # Perform the location-based mapping
+            value_dict = {val: key for key, loc_list in zip(values, loc) for val in loc_list}
+            df[column_name] = np.nan
+            for val, key in value_dict.items():
+                loc_type = _get_type(val)
+                if loc_type:
+                    df.loc[df[loc_type] == val, column_name] = key
+    # Handle cells, pathogens, and treatments using the consolidated logic
+    _map_or_default('host_cells', cells, cell_loc, df)
+    _map_or_default('pathogen', pathogens, pathogen_loc, df)
+    _map_or_default('treatment', treatments, treatment_loc, df)
+    # Conditionally fill NaN for pathogen and treatment columns if applicable
+    if pathogens is not None:
+        df['pathogen'].fillna(np.nan, inplace=True)
+    if treatments is not None:
+        df['treatment'].fillna(np.nan, inplace=True)
+    # Create the 'condition' column by excluding any NaN values, safely checking if 'host_cells', 'pathogen', and 'treatment' exist
+    df['condition'] = df.apply(
+        lambda x: '_'.join([str(v) for v in [x.get('host_cells'), x.get('pathogen'), x.get('treatment')] if pd.notna(v)]),
+        axis=1
+    )
-    if cell_loc is None:
-        df['host_cells'] = cells[0]
-    else:
-        cells_dict = dict(zip(cells, cell_loc))
-        df['host_cells'] = df.apply(lambda row: _map_values(row, cells_dict, type_=types[0]), axis=1)
-    if pathogen_loc is None:
-        if pathogens != None:
-            df['pathogen'] = 'none'
-    else:
-        pathogens_dict = dict(zip(pathogens, pathogen_loc))
-        df['pathogen'] = df.apply(lambda row: _map_values(row, pathogens_dict, type_=types[1]), axis=1)
-    if treatment_loc is None:
-        df['treatment'] = 'cm'
-    else:
-        treatments_dict = dict(zip(treatments, treatment_loc))
-        df['treatment'] = df.apply(lambda row: _map_values(row, treatments_dict, type_=types[2]), axis=1)
-    if pathogens != None:
-        df['condition'] = df['pathogen']+'_'+df['treatment']
-    else:
-        df['condition'] = df['treatment']
     return df
 def _split_data(df, group_by, object_type):
     """
     Splits the input dataframe into numeric and non-numeric parts, groups them by the specified column,
@@ -1951,9 +2034,10 @@ def add_images_to_tar(paths_chunk, tar_path, total_images):
                 tar.add(img_path, arcname=arcname)
                 with lock:
                     counter.value += 1
-                    if counter.value % 100 == 0:  # Print every 100 updates
-                        progress = (counter.value / total_images) * 100
-                        print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
+                    if counter.value % 10 == 0:  # Print every 100 updates
+                        #progress = (counter.value / total_images) * 100
+                        #print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
+                        print_progress(counter.value, total_images, n_jobs=1, time_ls=None, batch_size=None, operation_type="generating .tar dataset")
             except FileNotFoundError:
                 print(f"File not found: {img_path}")
@@ -2070,52 +2154,6 @@ def check_multicollinearity(x):
     vif_data["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]
     return vif_data
-def generate_dependent_variable(df, dv_loc, pc_min=0.95, nc_max=0.05, agg_type='mean'):
-    from .plot import _plot_histograms_and_stats, _plot_plates
-    def qstring_to_float(qstr):
-        number = int(qstr[1:])  # Remove the "q" and convert the rest to an integer
-        return number / 100.0
-    print("Unique values in plate:", df['plate'].unique())
-    dv_cell_loc = f'{dv_loc}/dv_cell.csv'
-    dv_well_loc = f'{dv_loc}/dv_well.csv'
-    df['pred'] = 1-df['pred'] #if you swiched pc and nc
-    df = df[(df['pred'] <= nc_max) | (df['pred'] >= pc_min)]
-    if 'prc' not in df.columns:
-        df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['col']
-    if agg_type.startswith('q'):
-        val = qstring_to_float(agg_type)
-        agg_type = lambda x: x.quantile(val)
-    # Aggregating for mean prediction and total count
-    df_grouped = df.groupby('prc').agg(
-        pred=('pred', agg_type),
-        recruitment=('recruitment', agg_type),
-        count_prc=('prc', 'size'),
-        #count_above_95=('pred', lambda x: (x > 0.95).sum()),
-        mean_pathogen_area=('pathogen_area', 'mean')
-    )
-    df_cell = df[['prc', 'pred', 'pathogen_area', 'recruitment']]
-    df_cell.to_csv(dv_cell_loc, index=True, header=True, mode='w')
-    df_grouped.to_csv(dv_well_loc, index=True, header=True, mode='w')  # Changed from loc to dv_loc
-    display(df)
-    _plot_histograms_and_stats(df)
-    df_grouped = df_grouped.sort_values(by='count_prc', ascending=True)
-    display(df_grouped)
-    print('pred')
-    _plot_plates(df=df_cell, variable='pred', grouping='mean', min_max='allq', cmap='viridis')
-    print('recruitment')
-    _plot_plates(df=df_cell, variable='recruitment', grouping='mean', min_max='allq', cmap='viridis')
-    return df_grouped
 def lasso_reg(merged_df, alpha_value=0.01, reg_type='lasso'):
     # Separate predictors and response
     X = merged_df[['gene', 'grna', 'plate', 'row', 'column']]
@@ -3021,7 +3059,6 @@ def preprocess_image(image_path, image_size=224, channels=[1,2,3], normalize=Tru
     input_tensor = transform(image).unsqueeze(0)
     return image, input_tensor
 class SaliencyMapGenerator:
     def __init__(self, model):
         self.model = model
@@ -3042,17 +3079,63 @@ class SaliencyMapGenerator:
         saliency = X.grad.abs()
         return saliency
-    def plot_saliency_maps(self, X, y, saliency, class_names):
+    def compute_saliency_and_predictions(self, X):
+        self.model.eval()
+        X.requires_grad_()
+        # Forward pass to get predictions (logits)
+        scores = self.model(X).squeeze()
+        # Get predicted class (0 or 1 for binary classification)
+        predictions = (scores > 0).long()
+        # Compute saliency maps
+        self.model.zero_grad()
+        target_scores = scores * (2 * predictions - 1)
+        target_scores.backward(torch.ones_like(target_scores))
+        saliency = X.grad.abs()
+        return saliency, predictions
+    def plot_saliency_grid(self, X, saliency, predictions, mode='mean'):
         N = X.shape[0]
+        rows = (N + 7) // 8  # Ensure we can handle batches of different sizes
+        fig, axs = plt.subplots(rows, 8, figsize=(16, rows * 2))
         for i in range(N):
-            plt.subplot(2, N, i + 1)
-            plt.imshow(X[i].permute(1, 2, 0).cpu().numpy())
-            plt.axis('off')
-            plt.title(class_names[y[i]])
-            plt.subplot(2, N, N + i + 1)
-            plt.imshow(saliency[i].cpu().numpy(), cmap=plt.cm.hot)
-            plt.axis('off')
-        plt.gcf().set_size_inches(12, 5)
+            ax = axs[i // 8, i % 8]
+            if mode == 'mean':
+                saliency_map = saliency[i].mean(dim=0).cpu().numpy()  # Mean saliency over channels
+                ax.imshow(X[i].permute(1, 2, 0).detach().cpu().numpy())  # Added .detach() here
+                ax.imshow(saliency_map, cmap='jet', alpha=0.5)
+            elif mode == 'channel':
+                # Plot individual channels in a loop if the image has multiple channels
+                for j in range(X.shape[1]):
+                    saliency_map = saliency[i, j].cpu().numpy()
+                    ax.imshow(saliency_map, cmap='jet')
+                    ax.axis('off')
+            elif mode == '3-channel' and X.shape[1] == 3:
+                saliency_map = saliency[i].cpu().numpy().transpose(1, 2, 0)
+                ax.imshow(saliency_map)
+            elif mode == '2-channel' and X.shape[1] == 2:
+                saliency_map = saliency[i].cpu().numpy().transpose(1, 2, 0)
+                ax.imshow(saliency_map)
+            # Add class label in top-left corner
+            ax.text(5, 25, str(predictions[i].item()), fontsize=12, color='white', weight='bold',
+                    bbox=dict(facecolor='black', alpha=0.7, boxstyle='round,pad=0.2'))
+            ax.axis('off')
+        # Turn off unused axes
+        for j in range(N, rows * 8):
+            fig.delaxes(axs[j // 8, j % 8])
+        plt.tight_layout(pad=0)
         plt.show()
 def preprocess_image(image_path, normalize=True, image_size=224, channels=[1,2,3]):
@@ -3594,13 +3677,48 @@ def plot_grid(cluster_images, colors, figuresize, black_background, verbose):
     plt.show()
     return grid_fig
-def correct_paths(df, base_path):
+def generate_path_list_from_db(db_path, file_metadata):
-    if 'png_path' not in df.columns:
-        print("No 'png_path' column found in the dataframe.")
-        return df, None
+    all_paths = []
+    # Connect to the database and retrieve the image paths
+    print(f"Reading DataBase: {db_path}")
+    try:
+        with sqlite3.connect(db_path) as conn:
+            cursor = conn.cursor()
+            if file_metadata:
+                if isinstance(file_metadata, str):
+                    cursor.execute("SELECT png_path FROM png_list WHERE png_path LIKE ?", (f"%{file_metadata}%",))
+            else:
+                cursor.execute("SELECT png_path FROM png_list")
+            while True:
+                rows = cursor.fetchmany(1000)
+                if not rows:
+                    break
+                all_paths.extend([row[0] for row in rows])
+    except sqlite3.Error as e:
+        print(f"Database error: {e}")
+        return
+    except Exception as e:
+        print(f"Error: {e}")
+        return
-    image_paths = df['png_path'].to_list()
+    return all_paths
+def correct_paths(df, base_path):
+    if isinstance(df, pd.DataFrame):
+        if 'png_path' not in df.columns:
+            print("No 'png_path' column found in the dataframe.")
+            return df, None
+        else:
+            image_paths = df['png_path'].to_list()
+    elif isinstance(df, list):
+        image_paths = df
     adjusted_image_paths = []
     for path in image_paths:
@@ -3614,9 +3732,11 @@ def correct_paths(df, base_path):
         else:
             adjusted_image_paths.append(path)
-    df['png_path'] = adjusted_image_paths
-    image_paths = df['png_path'].to_list()
-    return df, image_paths
+    if isinstance(df, pd.DataFrame):
+        df['png_path'] = adjusted_image_paths
+        return df, adjusted_image_paths
+    else:
+        return adjusted_image_paths
 def delete_folder(folder_path):
     if os.path.exists(folder_path) and os.path.isdir(folder_path):
@@ -4424,7 +4544,7 @@ def convert_and_relabel_masks(folder_path):
 def correct_masks(src):
-    from .utils import _load_and_concatenate_arrays
+    from .io import _load_and_concatenate_arrays
     cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
     convert_and_relabel_masks(cell_path)
@@ -4447,4 +4567,123 @@ def get_cuda_version():
     except (subprocess.CalledProcessError, FileNotFoundError):
         return None
+def all_elements_match(list1, list2):
+    # Check if all elements in list1 are in list2
+    return all(element in list2 for element in list1)
+def prepare_batch_for_segmentation(batch):
+    # Ensure the batch is of dtype float32
+    if batch.dtype != np.float32:
+        batch = batch.astype(np.float32)
+    # Normalize each image in the batch
+    for i in range(batch.shape[0]):
+        if batch[i].max() > 1:
+            batch[i] = batch[i] / batch[i].max()
+    return batch
+def check_index(df, elements=5, split_char='_'):
+    problematic_indices = []
+    for idx in df.index:
+        parts = str(idx).split(split_char)
+        if len(parts) != elements:
+            problematic_indices.append(idx)
+    if problematic_indices:
+        print("Indices that cannot be separated into 5 parts:")
+        for idx in problematic_indices:
+            print(idx)
+        raise ValueError(f"Found {len(problematic_indices)} problematic indices that do not split into {elements} parts.")
+# Define the mapping function
+def map_condition(col_value, neg='c1', pos='c2', mix='c3'):
+    if col_value == neg:
+        return 'neg'
+    elif col_value == pos:
+        return 'pos'
+    elif col_value == mix:
+        return 'mix'
+    else:
+        return 'screen'
+def download_models(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
+    """
+    Downloads all model files from Hugging Face and stores them in the specified local directory.
+    Args:
+        repo_id (str): The repository ID on Hugging Face (default is 'einarolafsson/models').
+        local_dir (str): The local directory where models will be saved. Defaults to '/home/carruthers/Desktop/test'.
+        retries (int): Number of retry attempts in case of failure.
+        delay (int): Delay in seconds between retries.
+    Returns:
+        str: The local path to the downloaded models.
+    """
+    # Create the local directory if it doesn't exist
+    if not os.path.exists(local_dir):
+        os.makedirs(local_dir)
+    elif len(os.listdir(local_dir)) > 0:
+        print(f"Models already downloaded to: {local_dir}")
+        return local_dir
+    attempt = 0
+    while attempt < retries:
+        try:
+            # List all files in the repo
+            files = list_repo_files(repo_id, repo_type="dataset")
+            print(f"Files in repository: {files}")  # Debugging print to check file list
+            # Download each file
+            for file_name in files:
+                for download_attempt in range(retries):
+                    try:
+                        url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}?download=true"
+                        print(f"Downloading file from: {url}")  # Debugging
+                        response = requests.get(url, stream=True)
+                        print(f"HTTP response status: {response.status_code}")  # Debugging
+                        response.raise_for_status()
+                        # Save the file locally
+                        local_file_path = os.path.join(local_dir, os.path.basename(file_name))
+                        with open(local_file_path, 'wb') as file:
+                            for chunk in response.iter_content(chunk_size=8192):
+                                file.write(chunk)
+                        print(f"Downloaded model file: {file_name} to {local_file_path}")
+                        break  # Exit the retry loop if successful
+                    except (requests.HTTPError, requests.Timeout) as e:
+                        print(f"Error downloading {file_name}: {e}. Retrying in {delay} seconds...")
+                        time.sleep(delay)
+                else:
+                    raise Exception(f"Failed to download {file_name} after multiple attempts.")
+            return local_dir  # Return the directory where models are saved
+        except (requests.HTTPError, requests.Timeout) as e:
+            print(f"Error downloading files: {e}. Retrying in {delay} seconds...")
+            attempt += 1
+            time.sleep(delay)
+    raise Exception("Failed to download model files after multiple attempts.")
+def generate_cytoplasm_mask(nucleus_mask, cell_mask):
+    """
+    Generates a cytoplasm mask from nucleus and cell masks.
+    Parameters:
+    - nucleus_mask (np.array): Binary or segmented mask of the nucleus (non-zero values represent nucleus).
+    - cell_mask (np.array): Binary or segmented mask of the whole cell (non-zero values represent cell).
+    Returns:
+    - cytoplasm_mask (np.array): Mask for the cytoplasm (1 for cytoplasm, 0 for nucleus and pathogens).
+    """
+    # Make sure the nucleus and cell masks are numpy arrays
+    nucleus_mask = np.array(nucleus_mask)
+    cell_mask = np.array(cell_mask)
+    # Generate cytoplasm mask
+    cytoplasm_mask = np.where(np.logical_or(nucleus_mask != 0), 0, cell_mask)
+    return cytoplasm_mask

spacr 0.3.1__py3-none-any.whl → 0.3.22__py3-none-any.whl

spacr 0.3.1py3-none-any.whl → 0.3.22py3-none-any.whl