PyPI - spacr - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.35__py3-none-any.whl - Mend

spacr 0.0.20py3-none-any.whl → 0.0.35py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

spacr/alpha.py +291 -14
spacr/annotate_app.py +7 -5
spacr/chris.py +50 -0
spacr/core.py +1301 -426
spacr/foldseek.py +793 -0
spacr/get_alfafold_structures.py +72 -0
spacr/gui.py +144 -0
spacr/gui_classify_app.py +65 -74
spacr/gui_mask_app.py +110 -87
spacr/gui_measure_app.py +104 -81
spacr/gui_utils.py +276 -31
spacr/io.py +261 -102
spacr/mask_app.py +6 -3
spacr/measure.py +150 -64
spacr/plot.py +151 -12
spacr/sim.py +666 -119
spacr/timelapse.py +139 -9
spacr/train.py +18 -10
spacr/utils.py +43 -49
{spacr-0.0.20.dist-info → spacr-0.0.35.dist-info}/METADATA +5 -2
spacr-0.0.35.dist-info/RECORD +35 -0
spacr-0.0.35.dist-info/entry_points.txt +8 -0
spacr-0.0.20.dist-info/RECORD +0 -31
spacr-0.0.20.dist-info/entry_points.txt +0 -7
{spacr-0.0.20.dist-info → spacr-0.0.35.dist-info}/LICENSE +0 -0
{spacr-0.0.20.dist-info → spacr-0.0.35.dist-info}/WHEEL +0 -0
{spacr-0.0.20.dist-info → spacr-0.0.35.dist-info}/top_level.txt +0 -0

spacr/timelapse.py CHANGED Viewed

@@ -11,6 +11,7 @@ from btrack import datasets as btrack_datasets
 from skimage.measure import regionprops
 from scipy.signal import find_peaks
 from scipy.optimize import curve_fit
+from scipy.integrate import trapz
 import matplotlib.pyplot as plt
 from .logger import log_function_call
@@ -590,6 +591,80 @@ def infected_vs_noninfected(result_df, measurement):
     plt.tight_layout()
     plt.show()
+def save_figure(fig, src, figure_number):
+    source = os.path.dirname(src)
+    results_fldr = os.path.join(source,'results')
+    os.makedirs(results_fldr, exist_ok=True)
+    fig_loc = os.path.join(results_fldr, f'figure_{figure_number}.pdf')
+    fig.savefig(fig_loc)
+    print(f'Saved figure:{fig_loc}')
+def save_results_dataframe(df, src, results_name):
+    source = os.path.dirname(src)
+    results_fldr = os.path.join(source,'results')
+    os.makedirs(results_fldr, exist_ok=True)
+    csv_loc = os.path.join(results_fldr, f'{results_name}.csv')
+    df.to_csv(csv_loc, index=True)
+    print(f'Saved results:{csv_loc}')
+def summarize_per_well(peak_details_df):
+    # Step 1: Split the 'ID' column
+    split_columns = peak_details_df['ID'].str.split('_', expand=True)
+    peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
+    # Step 2: Create 'well_ID' by combining 'row' and 'column'
+    peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
+    # Filter entries where 'amplitude' is not null
+    filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
+    # Preparation for Step 3: Identify numeric columns for averaging from the filtered dataframe
+    numeric_cols = filtered_df.select_dtypes(include=['number']).columns
+    # Step 3: Calculate summary statistics
+    summary_df = filtered_df.groupby('well_ID').agg(
+        peaks_per_well=('ID', 'size'),
+        unique_IDs_with_amplitude=('ID', 'nunique'),  # Count unique IDs per well with non-null amplitude
+        **{col: (col, 'mean') for col in numeric_cols}  # exclude 'amplitude' from averaging if it's numeric
+    ).reset_index()
+    # Step 3: Calculate summary statistics
+    summary_df_2 = peak_details_df.groupby('well_ID').agg(
+        cells_per_well=('object_number', 'nunique'),
+    ).reset_index()
+    summary_df['cells_per_well'] = summary_df_2['cells_per_well']
+    summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
+    return summary_df
+def summarize_per_well_inf_non_inf(peak_details_df):
+    # Step 1: Split the 'ID' column
+    split_columns = peak_details_df['ID'].str.split('_', expand=True)
+    peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
+    # Step 2: Create 'well_ID' by combining 'row' and 'column'
+    peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
+    # Assume 'pathogen_count' indicates infection if > 0
+    # Add an 'infected_status' column to classify cells
+    peak_details_df['infected_status'] = peak_details_df['infected'].apply(lambda x: 'infected' if x > 0 else 'non_infected')
+    # Preparation for Step 3: Identify numeric columns for averaging
+    numeric_cols = peak_details_df.select_dtypes(include=['number']).columns
+    # Step 3: Calculate summary statistics
+    summary_df = peak_details_df.groupby(['well_ID', 'infected_status']).agg(
+        cells_per_well=('object_number', 'nunique'),
+        peaks_per_well=('ID', 'size'),
+        **{col: (col, 'mean') for col in numeric_cols}
+    ).reset_index()
+    # Calculate peaks per cell
+    summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
+    return summary_df
 def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intensity', size_filter='cell_area', fluctuation_threshold=0.25, num_lines=None, peak_height=0.01, pathogen=None, cytoplasm=None, remove_transient=True, verbose=False, transience_threshold=0.9):
     # Load data
     conn = sqlite3.connect(db_loc)
@@ -626,7 +701,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
     cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
     df = cell_df.copy()
     # Fit exponential decay model to all scaled fluorescence data
     try:
         params, _ = curve_fit(exponential_decay, df['time'], df[measurement], p0=[max(df[measurement]), 0.01, min(df[measurement])], maxfev=10000)
@@ -653,11 +728,14 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
             if verbose:
                 print(f'Group length: {len(group)} Timelapse length: {total_timepoints}, threshold:{threshold}')
-            if not len(group) <= threshold:
+            if len(group) <= threshold:
                 transience_removed += 1
+                if verbose:
+                    print(f'removed group {unique_id} due to transience')
                 continue
         size_diff = group[size_filter].std() / group[size_filter].mean()
         if size_diff <= fluctuation_threshold:
             group['delta_' + measurement] = group['corrected_' + measurement].diff().fillna(0)
             corrected_dfs.append(group)
@@ -665,12 +743,50 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
             # Detect peaks
             peaks, properties = find_peaks(group['delta_' + measurement], height=peak_height)
+            # Set values < 0 to 0
+            group_filtered = group.copy()
+            group_filtered['delta_' + measurement] = group['delta_' + measurement].clip(lower=0)
+            above_zero_auc = trapz(y=group_filtered['delta_' + measurement], x=group_filtered['time'])
+            auc = trapz(y=group['delta_' + measurement], x=group_filtered['time'])
+            is_infected = (group['parasite_count'] > 0).any()
+            if is_infected:
+                is_infected = 1
+            else:
+                is_infected = 0
+            if len(peaks) == 0:
+                peak_details_list.append({
+                    'ID': unique_id,
+                    'plate': group['plate'].iloc[0],
+                    'row': group['row'].iloc[0],
+                    'column': group['column'].iloc[0],
+                    'field': group['field'].iloc[0],
+                    'object_number': group['object_number'].iloc[0],
+                    'time': np.nan,  # The time of the peak
+                    'amplitude': np.nan,
+                    'delta': np.nan,
+                    'AUC': auc,
+                    'AUC_positive': above_zero_auc,
+                    'AUC_peak': np.nan,
+                    'infected': is_infected
+                })
             # Inside the for loop where peaks are detected
             for i, peak in enumerate(peaks):
-                amplitude = properties['peak_heights'][i]  # Correctly access the amplitude
-                peak_time = group['time'].iloc[peak]  # Time corresponding to the peak
-                # Get the number of pathogens in the cell at the time of the peak
+                amplitude = properties['peak_heights'][i]
+                peak_time = group['time'].iloc[peak]
                 pathogen_count_at_peak = group['parasite_count'].iloc[peak]
+                start_idx = max(peak - 1, 0)
+                end_idx = min(peak + 1, len(group) - 1)
+                # Using indices to slice for AUC calculation
+                peak_segment_y = group['delta_' + measurement].iloc[start_idx:end_idx + 1]
+                peak_segment_x = group['time'].iloc[start_idx:end_idx + 1]
+                peak_auc = trapz(y=peak_segment_y, x=peak_segment_x)
                 peak_details_list.append({
                     'ID': unique_id,
                     'plate': group['plate'].iloc[0],
@@ -681,6 +797,9 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
                     'time': peak_time,  # The time of the peak
                     'amplitude': amplitude,
                     'delta': group['delta_' + measurement].iloc[peak],
+                    'AUC': auc,
+                    'AUC_positive': above_zero_auc,
+                    'AUC_peak': peak_auc,
                     'infected': pathogen_count_at_peak
                 })
         else:
@@ -697,7 +816,14 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
         return
     peak_details_df = pd.DataFrame(peak_details_list)
+    summary_df = summarize_per_well(peak_details_df)
+    summary_df_inf_non_inf = summarize_per_well_inf_non_inf(peak_details_df)
+    save_results_dataframe(df=peak_details_df, src=db_loc, results_name='peak_details')
+    save_results_dataframe(df=result_df, src=db_loc, results_name='results')
+    save_results_dataframe(df=summary_df, src=db_loc, results_name='well_results')
+    save_results_dataframe(df=summary_df_inf_non_inf, src=db_loc, results_name='well_results_inf_non_inf')
     # Plotting
     fig, ax = plt.subplots(figsize=(10, 8))
     sampled_groups = result_df['plate_row_column_field_object'].unique()
@@ -714,12 +840,16 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
     ax.set_xlabel('Time')
     ax.set_ylabel('Normalized Delta ' + measurement)
     plt.tight_layout()
     plt.show()
+    save_figure(fig, src=db_loc, figure_number=1)
     if pathogen:
         infected_vs_noninfected(result_df, measurement)
+        save_figure(fig, src=db_loc, figure_number=2)
-        # Identifying cells with and without infection
+        # Identify cells with and without pathogens
         infected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') > 0]['plate_row_column_field_object'].unique()
         noninfected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') == 0]['plate_row_column_field_object'].unique()
@@ -733,5 +863,5 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
         print(f'Average number of peaks per infected cell: {avg_inf_peaks_per_cell:.2f}')
         print(f'Average number of peaks per non-infected cell: {avg_non_inf_peaks_per_cell:.2f}')
-    return result_df, peak_details_df
+    print(f'done')
+    return result_df, peak_details_df, fig

spacr/train.py CHANGED Viewed

@@ -194,8 +194,8 @@ def test_model_performance(loaders, model, loader_name_list, epoch, train_mode,
 def train_test_model(src, settings, custom_model=False, custom_model_path=None):
-    from .io import save_settings, _copy_missclassified
-    from .utils import pick_best_model, test_model_performance
+    from .io import _save_settings, _copy_missclassified
+    from .utils import pick_best_model
     from .core import generate_loaders
     settings['src'] = src
@@ -208,7 +208,7 @@ def train_test_model(src, settings, custom_model=False, custom_model_path=None):
         model = torch.load(custom_model_path)
     if settings['train']:
-        save_settings(settings, src)
+        _save_settings(settings, src)
     torch.cuda.empty_cache()
     torch.cuda.memory.empty_cache()
     gc.collect()
@@ -227,7 +227,9 @@ def train_test_model(src, settings, custom_model=False, custom_model_path=None):
                                                     validation_split=settings['val_split'],
                                                     pin_memory=settings['pin_memory'],
                                                     normalize=settings['normalize'],
-                                                    verbose=settings['verbose'])
+                                                    channels=settings['channels'],
+                                                    verbose=settings['verbose'])
     if settings['test']:
         test, _, plate_names_test = generate_loaders(src,
@@ -240,6 +242,7 @@ def train_test_model(src, settings, custom_model=False, custom_model_path=None):
                                    validation_split=0.0,
                                    pin_memory=settings['pin_memory'],
                                    normalize=settings['normalize'],
+                                   channels=settings['channels'],
                                    verbose=settings['verbose'])
         if model == None:
             model_path = pick_best_model(src+'/model')
@@ -330,8 +333,8 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
         None
     """
-    from .io import save_model, save_progress
-    from .utils import evaluate_model_performance, compute_irm_penalty, calculate_loss, choose_model
+    from .io import _save_model, _save_progress
+    from .utils import compute_irm_penalty, calculate_loss, choose_model #evaluate_model_performance,
     print(f'Train batches:{len(train_loaders)}, Validation batches:{len(val_loaders)}')
@@ -347,6 +350,11 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
         break
     model = choose_model(model_type, device, init_weights, dropout_rate, use_checkpoint)
+    if model is None:
+        print(f'Model {model_type} not found')
+        return
     model.to(device)
     if optimizer_type == 'adamw':
@@ -421,10 +429,10 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
                 if schedule == 'step_lr':
                     scheduler.step()
-            save_progress(dst, results_df, train_metrics_df)
+            _save_progress(dst, results_df, train_metrics_df)
             clear_output(wait=True)
             display(results_df)
-            save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
+            _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
     if train_mode == 'irm':
         dummy_w = torch.nn.Parameter(torch.Tensor([1.0])).to(device)
@@ -494,7 +502,7 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
             clear_output(wait=True)
             display(results_df)
-            save_progress(dst, results_df, train_metrics_df)
-            save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
+            _save_progress(dst, results_df, train_metrics_df)
+            _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
             print(f'Saved model: {dst}')
     return

spacr/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, re, sqlite3, gc, torch, torchvision, time, random, string, shutil, cv2, tarfile, glob
+import sys, os, re, sqlite3, gc, torch, torchvision, time, random, string, shutil, cv2, tarfile, glob
 import numpy as np
 from cellpose import models as cp_models
@@ -46,12 +46,6 @@ from torchvision.models.resnet import ResNet18_Weights, ResNet34_Weights, ResNet
 from .logger import log_function_call
-#from .io import _read_and_join_tables, _save_figure
-#from .timelapse import _btrack_track_cells, _trackpy_track_cells
-#from .plot import _plot_images_on_grid, plot_masks, _plot_histograms_and_stats, plot_resize, _plot_plates, _reg_v_plot, plot_masks
-#from .core import identify_masks
 def _gen_rgb_image(image, cahnnels):
     rgb_image = np.take(image, cahnnels, axis=-1)
     rgb_image = rgb_image.astype(float)
@@ -71,7 +65,8 @@ def _outline_and_overlay(image, rgb_image, mask_dims, outline_colors, outline_th
         outline = np.zeros_like(mask, dtype=np.uint8)  # Use uint8 for contour detection efficiency
         # Find and draw contours
-        for j in np.unique(mask)[1:]:
+        for j in np.unique(mask):
+        #for j in np.unique(mask)[1:]:
             contours = find_contours(mask == j, 0.5)
             # Convert contours for OpenCV format and draw directly to optimize
             cv_contours = [np.flip(contour.astype(int), axis=1) for contour in contours]
@@ -781,8 +776,8 @@ def _get_object_settings(object_type, settings):
     object_settings = {}
     object_settings['diameter'] = _get_diam(settings['magnification'], obj=object_type)
-    object_settings['minimum_size'] = (object_settings['diameter']**2)/5
-    object_settings['maximum_size'] = (object_settings['diameter']**2)*3
+    object_settings['minimum_size'] = (object_settings['diameter']**2)/4
+    object_settings['maximum_size'] = (object_settings['diameter']**2)*10
     object_settings['merge'] = False
     object_settings['resample'] = True
     object_settings['remove_border_objects'] = False
@@ -793,21 +788,22 @@ def _get_object_settings(object_type, settings):
             object_settings['model_name'] = 'cyto'
         else:
             object_settings['model_name'] = 'cyto2'
-        object_settings['filter_size'] = True
-        object_settings['filter_intensity'] = True
+        object_settings['filter_size'] = False
+        object_settings['filter_intensity'] = False
         object_settings['restore_type'] = settings.get('cell_restore_type', None)
     elif object_type == 'nucleus':
         object_settings['model_name'] = 'nuclei'
-        object_settings['filter_size'] = True
-        object_settings['filter_intensity'] = True
+        object_settings['filter_size'] = False
+        object_settings['filter_intensity'] = False
         object_settings['restore_type'] = settings.get('nucleus_restore_type', None)
     elif object_type == 'pathogen':
         object_settings['model_name'] = 'cyto'
         object_settings['filter_size'] = True
-        object_settings['filter_intensity'] = True
+        object_settings['filter_intensity'] = False
         object_settings['restore_type'] = settings.get('pathogen_restore_type', None)
+        object_settings['merge'] = settings['merge_pathogens']
     else:
         print(f'Object type: {object_type} not supported. Supported object types are : cell, nucleus and pathogen')
@@ -884,17 +880,15 @@ def _get_cellpose_channels(src, nucleus_channel, pathogen_channel, cell_channel)
     if not pathogen_channel is None:
         if not nucleus_channel is None:
-            cellpose_channels['pathogen'] = [0,1]
+            if not pathogen_channel is None:
+                cellpose_channels['pathogen'] = [0,2]
+            else:
+                cellpose_channels['pathogen'] = [0,1]
         else:
             cellpose_channels['pathogen'] = [0,0]
     if not cell_channel is None:
         if not nucleus_channel is None:
-            if not pathogen_channel is None:
-                cellpose_channels['cell'] = [0,2]
-            else:
-                cellpose_channels['cell'] = [0,1]
-        elif not pathogen_channel is None:
             cellpose_channels['cell'] = [0,1]
         else:
             cellpose_channels['cell'] = [0,0]
@@ -1069,7 +1063,7 @@ class Cache:
         cache (OrderedDict): The cache data structure.
     """
-    def _init__(self, max_size):
+    def __init__(self, max_size):
         self.cache = OrderedDict()
         self.max_size = max_size
@@ -1100,7 +1094,7 @@ class ScaledDotProductAttention(nn.Module):
     """
-    def _init__(self, d_k):
+    def __init__(self, d_k):
         super(ScaledDotProductAttention, self).__init__()
         self.d_k = d_k
@@ -1131,7 +1125,7 @@ class SelfAttention(nn.Module):
         d_k (int): Dimensionality of the key and query vectors.
     """
-    def _init__(self, in_channels, d_k):
+    def __init__(self, in_channels, d_k):
         super(SelfAttention, self).__init__()
         self.W_q = nn.Linear(in_channels, d_k)
         self.W_k = nn.Linear(in_channels, d_k)
@@ -1155,7 +1149,7 @@ class SelfAttention(nn.Module):
         return output
 class ScaledDotProductAttention(nn.Module):
-    def _init__(self, d_k):
+    def __init__(self, d_k):
         """
         Initializes the ScaledDotProductAttention module.
@@ -1192,7 +1186,7 @@ class SelfAttention(nn.Module):
         in_channels (int): Number of input channels.
         d_k (int): Dimensionality of the key and query vectors.
     """
-    def _init__(self, in_channels, d_k):
+    def __init__(self, in_channels, d_k):
         super(SelfAttention, self).__init__()
         self.W_q = nn.Linear(in_channels, d_k)
         self.W_k = nn.Linear(in_channels, d_k)
@@ -1223,7 +1217,7 @@ class EarlyFusion(nn.Module):
     Args:
         in_channels (int): Number of input channels.
     """
-    def _init__(self, in_channels):
+    def __init__(self, in_channels):
         super(EarlyFusion, self).__init__()
         self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=1, stride=1)
@@ -1242,7 +1236,7 @@ class EarlyFusion(nn.Module):
 # Spatial Attention Mechanism
 class SpatialAttention(nn.Module):
-    def _init__(self, kernel_size=7):
+    def __init__(self, kernel_size=7):
         """
         Initializes the SpatialAttention module.
@@ -1287,7 +1281,7 @@ class MultiScaleBlockWithAttention(nn.Module):
         forward: Forward method for the module.
     """
-    def _init__(self, in_channels, out_channels):
+    def __init__(self, in_channels, out_channels):
         super(MultiScaleBlockWithAttention, self).__init__()
         self.dilated_conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, dilation=1, padding=1)
         self.spatial_attention = nn.Conv2d(out_channels, out_channels, kernel_size=1)
@@ -1320,7 +1314,7 @@ class MultiScaleBlockWithAttention(nn.Module):
 # Final Classifier
 class CustomCellClassifier(nn.Module):
-    def _init__(self, num_classes, pathogen_channel, use_attention, use_checkpoint, dropout_rate):
+    def __init__(self, num_classes, pathogen_channel, use_attention, use_checkpoint, dropout_rate):
         super(CustomCellClassifier, self).__init__()
         self.early_fusion = EarlyFusion(in_channels=3)
@@ -1349,7 +1343,7 @@ class CustomCellClassifier(nn.Module):
 #CNN and Transformer class, pick any Torch model.
 class TorchModel(nn.Module):
-    def _init__(self, model_name='resnet50', pretrained=True, dropout_rate=None, use_checkpoint=False):
+    def __init__(self, model_name='resnet50', pretrained=True, dropout_rate=None, use_checkpoint=False):
         super(TorchModel, self).__init__()
         self.model_name = model_name
         self.use_checkpoint = use_checkpoint
@@ -1423,7 +1417,7 @@ class TorchModel(nn.Module):
         return logits
 class FocalLossWithLogits(nn.Module):
-    def _init__(self, alpha=1, gamma=2):
+    def __init__(self, alpha=1, gamma=2):
         super(FocalLossWithLogits, self).__init__()
         self.alpha = alpha
         self.gamma = gamma
@@ -1435,7 +1429,7 @@ class FocalLossWithLogits(nn.Module):
         return focal_loss.mean()
 class ResNet(nn.Module):
-    def _init__(self, resnet_type='resnet50', dropout_rate=None, use_checkpoint=False, init_weights='imagenet'):
+    def __init__(self, resnet_type='resnet50', dropout_rate=None, use_checkpoint=False, init_weights='imagenet'):
         super(ResNet, self).__init__()
         resnet_map = {
@@ -1788,25 +1782,24 @@ def annotate_predictions(csv_loc):
     df['cond'] = df.apply(assign_condition, axis=1)
     return df
-def init_globals(counter_, lock_):
+def initiate_counter(counter_, lock_):
     global counter, lock
     counter = counter_
     lock = lock_
-def add_images_to_tar(args):
-    global counter, lock, total_images
-    paths_chunk, tar_path = args
+def add_images_to_tar(paths_chunk, tar_path, total_images):
     with tarfile.open(tar_path, 'w') as tar:
-        for img_path in paths_chunk:
+        for i, img_path in enumerate(paths_chunk):
             arcname = os.path.basename(img_path)
             try:
                 tar.add(img_path, arcname=arcname)
                 with lock:
                     counter.value += 1
-                    print(f"\rProcessed: {counter.value}/{total_images}", end='', flush=True)
+                    if counter.value % 100 == 0:  # Print every 100 updates
+                        progress = (counter.value / total_images) * 100
+                        print(f"Progress: {counter.value}/{total_images} ({progress:.2f}%)", end='\r', file=sys.stdout, flush=True)
             except FileNotFoundError:
                 print(f"File not found: {img_path}")
-    return tar_path
 def generate_fraction_map(df, gene_column, min_frequency=0.0):
     df['fraction'] = df['count']/df['well_read_sum']
@@ -2255,8 +2248,8 @@ def dice_coefficient(mask1, mask2):
 def extract_boundaries(mask, dilation_radius=1):
     binary_mask = (mask > 0).astype(np.uint8)
     struct_elem = np.ones((dilation_radius*2+1, dilation_radius*2+1))
-    dilated = binary_dilation(binary_mask, footprint=struct_elem)
-    eroded = binary_erosion(binary_mask, footprint=struct_elem)
+    dilated = morphology.binary_dilation(binary_mask, footprint=struct_elem)
+    eroded = morphology.binary_erosion(binary_mask, footprint=struct_elem)
     boundary = dilated ^ eroded
     return boundary
@@ -2669,6 +2662,13 @@ def _filter_cp_masks(masks, flows, filter_size, filter_intensity, minimum_size,
             print(f'Number of objects before filtration: {num_objects}')
             plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
+        if merge:
+            mask = merge_touching_objects(mask, threshold=0.66)
+            if plot and idx == 0:
+                num_objects = mask_object_count(mask)
+                print(f'Number of objects after merging adjacent objects, : {num_objects}')
+                plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
         if filter_size:
             props = measure.regionprops_table(mask, properties=['label', 'area'])
             valid_labels = props['label'][np.logical_and(props['area'] > minimum_size, props['area'] < maximum_size)]
@@ -2714,13 +2714,6 @@ def _filter_cp_masks(masks, flows, filter_size, filter_intensity, minimum_size,
                 print(f'Number of objects after removing border objects, : {num_objects}')
                 plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
-        if merge:
-            mask = merge_touching_objects(mask, threshold=0.25)
-            if plot and idx == 0:
-                num_objects = mask_object_count(mask)
-                print(f'Number of objects after merging adjacent objects, : {num_objects}')
-                plot_masks(batch=image, masks=mask, flows=flow, cmap='inferno', figuresize=figuresize, nr=1, file_type='.npz', print_object_number=True)
         mask_stack.append(mask)
     return mask_stack
@@ -2789,6 +2782,7 @@ def _run_test_mode(src, regex, timelapse=False):
     # Prepare for random selection
     set_identifiers = list(images_by_set.keys())
+    random.seed(42)
     random.shuffle(set_identifiers)  # Randomize the order
     # Select a subset based on the test_images count

{spacr-0.0.20.dist-info → spacr-0.0.35.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.0.20
+Version: 0.0.35
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson
@@ -19,6 +19,7 @@ Requires-Dist: scikit-image >=0.22.0
 Requires-Dist: scikit-learn >=1.4.1
 Requires-Dist: seaborn >=0.13.2
 Requires-Dist: matplotlib >=3.8.3
+Requires-Dist: shap >=0.45.0
 Requires-Dist: pillow >=10.2.0
 Requires-Dist: imageio >=2.34.0
 Requires-Dist: scipy >=1.12.0
@@ -31,6 +32,8 @@ Requires-Dist: IPython >=8.18.1
 Requires-Dist: opencv-python-headless >=4.9.0.80
 Requires-Dist: umap >=0.1.1
 Requires-Dist: ttkthemes >=3.2.2
+Requires-Dist: xgboost >=2.0.3
+Requires-Dist: PyWavelets >=1.6.0
 Requires-Dist: lxml >=5.1.0
 Provides-Extra: dev
 Requires-Dist: pytest >=3.9 ; extra == 'dev'
@@ -49,7 +52,7 @@ Requires-Dist: opencv-python-headless ; extra == 'headless'
 <tr>
 <td>
-Spatial phenotype analysis of crisp screens (SpaCr). A collection of functions for generating cellpose masks -> single object images and measurements -> annotation and classification of single object images. Spacr uses batch normalization to facilitate accurate segmentation of objects with low foreground representation.
+Spatial phenotype analysis of CRISPR-Cas9 screens (SpaCr). The spatial organization of organelles and proteins within cells constitutes a key level of functional regulation. In the context of infectious disease, the spatial relationships between host cell structures and intracellular pathogens are critical to understand host clearance mechanisms and how pathogens evade them. Spacr is a Python-based software package for generating single cell image data for deep-learning sub-cellular/cellular phenotypic classification from pooled genetic CRISPR-Cas9 screens. Spacr provides a flexible toolset to extract single cell images and measurements from high content cell painting experiments, train deep-learning models to classify cellular/ subcellular phenotypes, simulate and analyze pooled CRISPR-Cas9 imaging screens.
 </td>
 <td>

spacr-0.0.35.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,35 @@
+spacr/__init__.py,sha256=mDi-Qu5r1vZnqIbUBV1JAoSq-mxmMEOmni1JSG2e4Wo,879
+spacr/__main__.py,sha256=_qRkhbFrH_cXr7AZs6KHL8Hh4VApqNdpNCtiKn2ePTo,285
+spacr/alpha.py,sha256=1LUtTaeVHqcTMxoCMA7mlsNYyaR0KEaXglwXsUQLwKo,13776
+spacr/annotate_app.py,sha256=8ziG6HZ6Kvany2yYDR15jtW84OnPFe8SZXykIrrNfX0,20031
+spacr/chris.py,sha256=YlBjSgeZaY8HPy6jkrT_ISAnCMAKVfvCxF0I9eAZLFM,2418
+spacr/cli.py,sha256=507jfOOEV8BoL4eeUcblvH-iiDHdBrEVJLu1ghAAPSc,1800
+spacr/core.py,sha256=_R8gXNnjf680yrnbCi2piWQUz7PDbqWYn7SL5MACLfo,156457
+spacr/foldseek.py,sha256=cWtLzvFF2O_mq5I71UMiuU9DTvDCp7wl6aaWAZRrBZc,33970
+spacr/get_alfafold_structures.py,sha256=n0g8gne-oyAV3Uo6qxZoJq5X1cUUyD8u0pOC_W2PX40,3541
+spacr/graph_learning.py,sha256=sD4eOC7Q16rr7WO20mCi_E16_LqioGUUgPamAHIIeNI,12568
+spacr/graph_learning_lap.py,sha256=MyNRLb63gsjBlui-ByZ0anHugYulL6M-OsGm8rnGBmE,3385
+spacr/gui.py,sha256=2d2JHYVWhEFUkB3u_2OarCV_V07eLtJKUMKVKuJ7nAo,6430
+spacr/gui_classify_app.py,sha256=RqVC5Ac0GiFhKg1qUyU-xfbSVMwSTjySkReWCvmsZ1U,7917
+spacr/gui_mask_app.py,sha256=c0NwgzzMSYx7xE60sp6zLE1h7ct3pnT7n7gz1SqmWbs,9750
+spacr/gui_measure_app.py,sha256=I6OXPJZZ0sG3OqRO4l7Fr9Sb_5jr6X1Y2LXLgPW_Q_4,9675
+spacr/gui_sim_app.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+spacr/gui_utils.py,sha256=URyEUfDn7RSE8iWBDwmC1jXWNA2Vl1oA2GsSjbSbuFU,40715
+spacr/io.py,sha256=-Ho1Rw46s5DfutcTAtp0RhEs3I-GPgXIphOpjJHutPU,108688
+spacr/logger.py,sha256=7Zqr3TuuOQLWT32gYr2q1qvv7x0a2JhLANmZcnBXAW8,670
+spacr/mask_app.py,sha256=p9oA0JH0Rcly2Fbsrg-Vye_iThRCZZF9axU6hkE3SAI,39376
+spacr/measure.py,sha256=_f6UDugw75rILEg0uo2-QsUGUvc4AQdrdHl-BPZk74I,54686
+spacr/old_code.py,sha256=KxljHpKNsV5EfX9ifN2xJTnUeqAhyabZyfDWd5THOOc,11226
+spacr/plot.py,sha256=VtDKTJ_zo8CAVC3ILuIN_wUP6197vq089wNZuom7T8g,61655
+spacr/sim.py,sha256=2NR5hm--HVcYQnj1SCHoUCVbh_b2XUjjjfoAUIXFwnQ,72997
+spacr/timelapse.py,sha256=plPjR8nZ7_Q50VAvMvHK2TUE4F-vh7R23JnI6tSW02g,39661
+spacr/train.py,sha256=lp66dWYkiMMlgdYlMjAsJnkIZFWLizKB-xwyVnKgFBs,25904
+spacr/umap.py,sha256=4QSrQ16Og-Ijq-SwguMQT2f20UWz1LE5HQeSLmzSl8c,29370
+spacr/utils.py,sha256=D3WRf_0w0T6dZHh3BfwScGBQjorljgWW6CQUdM0ToN8,120918
+spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
+spacr-0.0.35.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.0.35.dist-info/METADATA,sha256=0iEqhFIza7SaHVeYbl0Rc8WocPgGEVtiwMoWsIBBZzQ,4973
+spacr-0.0.35.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+spacr-0.0.35.dist-info/entry_points.txt,sha256=_khj_UcegrI5N29QcgxECsxVsfpVQzG3U5OUoKdtTcw,288
+spacr-0.0.35.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.0.35.dist-info/RECORD,,

spacr-0.0.35.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,8 @@
+[console_scripts]
+annotate = spacr.annotate_app:gui_annotation
+classify = spacr.gui_classify_app:gui_classify
+gui = spacr.gui:gui_app
+make_masks = spacr.mask_app:gui_make_masks
+mask = spacr.gui_mask_app:gui_mask
+measure = spacr.gui_measure_app:gui_measure
+sim = spacr.gui_sim_app:gui_sim

spacr 0.0.20__py3-none-any.whl → 0.0.35__py3-none-any.whl

spacr 0.0.20py3-none-any.whl → 0.0.35py3-none-any.whl