PyPI - spacr - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

spacr 0.0.2py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

spacr/__init__.py +2 -2
spacr/__main__.py +0 -2
spacr/alpha.py +803 -14
spacr/annotate_app.py +118 -120
spacr/chris.py +50 -0
spacr/core.py +1544 -533
spacr/deep_spacr.py +696 -0
spacr/foldseek.py +779 -0
spacr/get_alfafold_structures.py +72 -0
spacr/graph_learning.py +297 -253
spacr/gui.py +145 -0
spacr/gui_2.py +90 -0
spacr/gui_classify_app.py +70 -80
spacr/gui_mask_app.py +114 -91
spacr/gui_measure_app.py +109 -88
spacr/gui_utils.py +376 -32
spacr/io.py +441 -438
spacr/mask_app.py +116 -9
spacr/measure.py +169 -69
spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/old_code.py +70 -2
spacr/plot.py +173 -17
spacr/sequencing.py +1130 -0
spacr/sim.py +630 -125
spacr/timelapse.py +139 -10
spacr/train.py +188 -21
spacr/umap.py +0 -689
spacr/utils.py +1360 -119
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/METADATA +17 -29
spacr-0.0.6.dist-info/RECORD +39 -0
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/WHEEL +1 -1
spacr-0.0.6.dist-info/entry_points.txt +9 -0
spacr-0.0.2.dist-info/RECORD +0 -31
spacr-0.0.2.dist-info/entry_points.txt +0 -7
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/LICENSE +0 -0
{spacr-0.0.2.dist-info → spacr-0.0.6.dist-info}/top_level.txt +0 -0

spacr/timelapse.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 import pandas as pd
 from collections import defaultdict
 import matplotlib.pyplot as plt
-from matplotlib.animation import FuncAnimation
 from IPython.display import display
 from IPython.display import Image as ipyimage
 import trackpy as tp
@@ -11,6 +10,7 @@ from btrack import datasets as btrack_datasets
 from skimage.measure import regionprops
 from scipy.signal import find_peaks
 from scipy.optimize import curve_fit
+from scipy.integrate import trapz
 import matplotlib.pyplot as plt
 from .logger import log_function_call
@@ -590,6 +590,80 @@ def infected_vs_noninfected(result_df, measurement):
     plt.tight_layout()
     plt.show()
+def save_figure(fig, src, figure_number):
+    source = os.path.dirname(src)
+    results_fldr = os.path.join(source,'results')
+    os.makedirs(results_fldr, exist_ok=True)
+    fig_loc = os.path.join(results_fldr, f'figure_{figure_number}.pdf')
+    fig.savefig(fig_loc)
+    print(f'Saved figure:{fig_loc}')
+def save_results_dataframe(df, src, results_name):
+    source = os.path.dirname(src)
+    results_fldr = os.path.join(source,'results')
+    os.makedirs(results_fldr, exist_ok=True)
+    csv_loc = os.path.join(results_fldr, f'{results_name}.csv')
+    df.to_csv(csv_loc, index=True)
+    print(f'Saved results:{csv_loc}')
+def summarize_per_well(peak_details_df):
+    # Step 1: Split the 'ID' column
+    split_columns = peak_details_df['ID'].str.split('_', expand=True)
+    peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
+    # Step 2: Create 'well_ID' by combining 'row' and 'column'
+    peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
+    # Filter entries where 'amplitude' is not null
+    filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
+    # Preparation for Step 3: Identify numeric columns for averaging from the filtered dataframe
+    numeric_cols = filtered_df.select_dtypes(include=['number']).columns
+    # Step 3: Calculate summary statistics
+    summary_df = filtered_df.groupby('well_ID').agg(
+        peaks_per_well=('ID', 'size'),
+        unique_IDs_with_amplitude=('ID', 'nunique'),  # Count unique IDs per well with non-null amplitude
+        **{col: (col, 'mean') for col in numeric_cols}  # exclude 'amplitude' from averaging if it's numeric
+    ).reset_index()
+    # Step 3: Calculate summary statistics
+    summary_df_2 = peak_details_df.groupby('well_ID').agg(
+        cells_per_well=('object_number', 'nunique'),
+    ).reset_index()
+    summary_df['cells_per_well'] = summary_df_2['cells_per_well']
+    summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
+    return summary_df
+def summarize_per_well_inf_non_inf(peak_details_df):
+    # Step 1: Split the 'ID' column
+    split_columns = peak_details_df['ID'].str.split('_', expand=True)
+    peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
+    # Step 2: Create 'well_ID' by combining 'row' and 'column'
+    peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
+    # Assume 'pathogen_count' indicates infection if > 0
+    # Add an 'infected_status' column to classify cells
+    peak_details_df['infected_status'] = peak_details_df['infected'].apply(lambda x: 'infected' if x > 0 else 'non_infected')
+    # Preparation for Step 3: Identify numeric columns for averaging
+    numeric_cols = peak_details_df.select_dtypes(include=['number']).columns
+    # Step 3: Calculate summary statistics
+    summary_df = peak_details_df.groupby(['well_ID', 'infected_status']).agg(
+        cells_per_well=('object_number', 'nunique'),
+        peaks_per_well=('ID', 'size'),
+        **{col: (col, 'mean') for col in numeric_cols}
+    ).reset_index()
+    # Calculate peaks per cell
+    summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
+    return summary_df
 def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intensity', size_filter='cell_area', fluctuation_threshold=0.25, num_lines=None, peak_height=0.01, pathogen=None, cytoplasm=None, remove_transient=True, verbose=False, transience_threshold=0.9):
     # Load data
     conn = sqlite3.connect(db_loc)
@@ -626,7 +700,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
     cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
     df = cell_df.copy()
     # Fit exponential decay model to all scaled fluorescence data
     try:
         params, _ = curve_fit(exponential_decay, df['time'], df[measurement], p0=[max(df[measurement]), 0.01, min(df[measurement])], maxfev=10000)
@@ -653,11 +727,14 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
             if verbose:
                 print(f'Group length: {len(group)} Timelapse length: {total_timepoints}, threshold:{threshold}')
-            if not len(group) <= threshold:
+            if len(group) <= threshold:
                 transience_removed += 1
+                if verbose:
+                    print(f'removed group {unique_id} due to transience')
                 continue
         size_diff = group[size_filter].std() / group[size_filter].mean()
         if size_diff <= fluctuation_threshold:
             group['delta_' + measurement] = group['corrected_' + measurement].diff().fillna(0)
             corrected_dfs.append(group)
@@ -665,12 +742,50 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
             # Detect peaks
             peaks, properties = find_peaks(group['delta_' + measurement], height=peak_height)
+            # Set values < 0 to 0
+            group_filtered = group.copy()
+            group_filtered['delta_' + measurement] = group['delta_' + measurement].clip(lower=0)
+            above_zero_auc = trapz(y=group_filtered['delta_' + measurement], x=group_filtered['time'])
+            auc = trapz(y=group['delta_' + measurement], x=group_filtered['time'])
+            is_infected = (group['parasite_count'] > 0).any()
+            if is_infected:
+                is_infected = 1
+            else:
+                is_infected = 0
+            if len(peaks) == 0:
+                peak_details_list.append({
+                    'ID': unique_id,
+                    'plate': group['plate'].iloc[0],
+                    'row': group['row'].iloc[0],
+                    'column': group['column'].iloc[0],
+                    'field': group['field'].iloc[0],
+                    'object_number': group['object_number'].iloc[0],
+                    'time': np.nan,  # The time of the peak
+                    'amplitude': np.nan,
+                    'delta': np.nan,
+                    'AUC': auc,
+                    'AUC_positive': above_zero_auc,
+                    'AUC_peak': np.nan,
+                    'infected': is_infected
+                })
             # Inside the for loop where peaks are detected
             for i, peak in enumerate(peaks):
-                amplitude = properties['peak_heights'][i]  # Correctly access the amplitude
-                peak_time = group['time'].iloc[peak]  # Time corresponding to the peak
-                # Get the number of pathogens in the cell at the time of the peak
+                amplitude = properties['peak_heights'][i]
+                peak_time = group['time'].iloc[peak]
                 pathogen_count_at_peak = group['parasite_count'].iloc[peak]
+                start_idx = max(peak - 1, 0)
+                end_idx = min(peak + 1, len(group) - 1)
+                # Using indices to slice for AUC calculation
+                peak_segment_y = group['delta_' + measurement].iloc[start_idx:end_idx + 1]
+                peak_segment_x = group['time'].iloc[start_idx:end_idx + 1]
+                peak_auc = trapz(y=peak_segment_y, x=peak_segment_x)
                 peak_details_list.append({
                     'ID': unique_id,
                     'plate': group['plate'].iloc[0],
@@ -681,6 +796,9 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
                     'time': peak_time,  # The time of the peak
                     'amplitude': amplitude,
                     'delta': group['delta_' + measurement].iloc[peak],
+                    'AUC': auc,
+                    'AUC_positive': above_zero_auc,
+                    'AUC_peak': peak_auc,
                     'infected': pathogen_count_at_peak
                 })
         else:
@@ -697,7 +815,14 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
         return
     peak_details_df = pd.DataFrame(peak_details_list)
+    summary_df = summarize_per_well(peak_details_df)
+    summary_df_inf_non_inf = summarize_per_well_inf_non_inf(peak_details_df)
+    save_results_dataframe(df=peak_details_df, src=db_loc, results_name='peak_details')
+    save_results_dataframe(df=result_df, src=db_loc, results_name='results')
+    save_results_dataframe(df=summary_df, src=db_loc, results_name='well_results')
+    save_results_dataframe(df=summary_df_inf_non_inf, src=db_loc, results_name='well_results_inf_non_inf')
     # Plotting
     fig, ax = plt.subplots(figsize=(10, 8))
     sampled_groups = result_df['plate_row_column_field_object'].unique()
@@ -714,12 +839,16 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
     ax.set_xlabel('Time')
     ax.set_ylabel('Normalized Delta ' + measurement)
     plt.tight_layout()
     plt.show()
+    save_figure(fig, src=db_loc, figure_number=1)
     if pathogen:
         infected_vs_noninfected(result_df, measurement)
+        save_figure(fig, src=db_loc, figure_number=2)
-        # Identifying cells with and without infection
+        # Identify cells with and without pathogens
         infected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') > 0]['plate_row_column_field_object'].unique()
         noninfected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') == 0]['plate_row_column_field_object'].unique()
@@ -733,5 +862,5 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
         print(f'Average number of peaks per infected cell: {avg_inf_peaks_per_cell:.2f}')
         print(f'Average number of peaks per non-infected cell: {avg_non_inf_peaks_per_cell:.2f}')
-    return result_df, peak_details_df
+    print(f'done')
+    return result_df, peak_details_df, fig

spacr/train.py CHANGED Viewed

@@ -6,6 +6,7 @@ from torch.autograd import grad
 from torch.optim.lr_scheduler import StepLR
 import torch.nn.functional as F
 from IPython.display import display, clear_output
+import difflib
 from .logger import log_function_call
@@ -194,8 +195,8 @@ def test_model_performance(loaders, model, loader_name_list, epoch, train_mode,
 def train_test_model(src, settings, custom_model=False, custom_model_path=None):
-    from .io import save_settings, _copy_missclassified
-    from .utils import pick_best_model, test_model_performance
+    from .io import _save_settings, _copy_missclassified
+    from .utils import pick_best_model
     from .core import generate_loaders
     settings['src'] = src
@@ -208,7 +209,7 @@ def train_test_model(src, settings, custom_model=False, custom_model_path=None):
         model = torch.load(custom_model_path)
     if settings['train']:
-        save_settings(settings, src)
+        _save_settings(settings, src)
     torch.cuda.empty_cache()
     torch.cuda.memory.empty_cache()
     gc.collect()
@@ -227,20 +228,23 @@ def train_test_model(src, settings, custom_model=False, custom_model_path=None):
                                                     validation_split=settings['val_split'],
                                                     pin_memory=settings['pin_memory'],
                                                     normalize=settings['normalize'],
-                                                    verbose=settings['verbose'])
+                                                    channels=settings['channels'],
+                                                    verbose=settings['verbose'])
     if settings['test']:
         test, _, plate_names_test = generate_loaders(src,
-                                   train_mode=settings['train_mode'],
-                                   mode='test',
-                                   image_size=settings['image_size'],
-                                   batch_size=settings['batch_size'],
-                                   classes=settings['classes'],
-                                   num_workers=settings['num_workers'],
-                                   validation_split=0.0,
-                                   pin_memory=settings['pin_memory'],
-                                   normalize=settings['normalize'],
-                                   verbose=settings['verbose'])
+                                                     train_mode=settings['train_mode'],
+                                                     mode='test',
+                                                     image_size=settings['image_size'],
+                                                     batch_size=settings['batch_size'],
+                                                     classes=settings['classes'],
+                                                     num_workers=settings['num_workers'],
+                                                     validation_split=0.0,
+                                                     pin_memory=settings['pin_memory'],
+                                                     normalize=settings['normalize'],
+                                                     channels=settings['channels'],
+                                                     verbose=settings['verbose'])
         if model == None:
             model_path = pick_best_model(src+'/model')
             print(f'Best model: {model_path}')
@@ -330,8 +334,8 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
         None
     """
-    from .io import save_model, save_progress
-    from .utils import evaluate_model_performance, compute_irm_penalty, calculate_loss, choose_model
+    from .io import _save_model, _save_progress
+    from .utils import compute_irm_penalty, calculate_loss, choose_model
     print(f'Train batches:{len(train_loaders)}, Validation batches:{len(val_loaders)}')
@@ -347,6 +351,11 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
         break
     model = choose_model(model_type, device, init_weights, dropout_rate, use_checkpoint)
+    if model is None:
+        print(f'Model {model_type} not found')
+        return
     model.to(device)
     if optimizer_type == 'adamw':
@@ -421,10 +430,10 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
                 if schedule == 'step_lr':
                     scheduler.step()
-            save_progress(dst, results_df, train_metrics_df)
+            _save_progress(dst, results_df, train_metrics_df)
             clear_output(wait=True)
             display(results_df)
-            save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
+            _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
     if train_mode == 'irm':
         dummy_w = torch.nn.Parameter(torch.Tensor([1.0])).to(device)
@@ -494,7 +503,165 @@ def train_model(dst, model_type, train_loaders, train_loader_names, train_mode='
             clear_output(wait=True)
             display(results_df)
-            save_progress(dst, results_df, train_metrics_df)
-            save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
+            _save_progress(dst, results_df, train_metrics_df)
+            _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_save=[0.99,0.98,0.95,0.94])
             print(f'Saved model: {dst}')
-    return
+    return
+def get_submodules(model, prefix=''):
+    submodules = []
+    for name, module in model.named_children():
+        full_name = prefix + ('.' if prefix else '') + name
+        submodules.append(full_name)
+        submodules.extend(get_submodules(module, full_name))
+    return submodules
+def visualize_model_attention_v2(src, model_type='maxvit', model_path='', image_size=224, channels=[1,2,3], normalize=True, class_names=None, save_saliency=False, save_dir='saliency_maps'):
+    import torch
+    import os
+    from spacr.utils import SaliencyMapGenerator, preprocess_image
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from PIL import Image
+    use_cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if use_cuda else "cpu")
+    # Load the entire model object
+    model = torch.load(model_path)
+    model.to(device)
+    # Create directory for saving saliency maps if it does not exist
+    if save_saliency and not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    # Collect all images and their tensors
+    images = []
+    input_tensors = []
+    filenames = []
+    for file in os.listdir(src):
+        image_path = os.path.join(src, file)
+        image, input_tensor = preprocess_image(image_path, normalize=normalize, image_size=image_size, channels=channels)
+        images.append(image)
+        input_tensors.append(input_tensor)
+        filenames.append(file)
+    input_tensors = torch.cat(input_tensors).to(device)
+    class_labels = torch.zeros(input_tensors.size(0), dtype=torch.long).to(device)  # Replace with actual class labels if available
+    # Generate saliency maps
+    cam_generator = SaliencyMapGenerator(model)
+    saliency_maps = cam_generator.compute_saliency_maps(input_tensors, class_labels)
+    # Plot images, saliency maps, and overlays
+    saliency_maps = saliency_maps.cpu().numpy()
+    N = len(images)
+    dst = os.path.join(src, 'saliency_maps')
+    os.makedirs(dst, exist_ok=True)
+    for i in range(N):
+        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
+        # Original image
+        axes[0].imshow(images[i])
+        axes[0].axis('off')
+        if class_names:
+            axes[0].set_title(class_names[class_labels[i].item()])
+        # Saliency map
+        axes[1].imshow(saliency_maps[i], cmap=plt.cm.hot)
+        axes[1].axis('off')
+        # Overlay
+        overlay = np.array(images[i])
+        axes[2].imshow(overlay)
+        axes[2].imshow(saliency_maps[i], cmap='jet', alpha=0.5)
+        axes[2].axis('off')
+        plt.tight_layout()
+        plt.show()
+        # Save the saliency map if required
+        if save_saliency:
+            saliency_image = Image.fromarray((saliency_maps[i] * 255).astype(np.uint8))
+            saliency_image.save(os.path.join(dst, f'saliency_{filenames[i]}'))
+def visualize_model_attention(src, model_type='maxvit', model_path='', image_size=224, channels=[1,2,3], normalize=True, class_names=None, save_saliency=False, save_dir='saliency_maps'):
+    import torch
+    import os
+    from spacr.utils import SaliencyMapGenerator, preprocess_image
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from PIL import Image
+    use_cuda = torch.cuda.is_available()
+    device = torch.device("cuda" if use_cuda else "cpu")
+    # Load the entire model object
+    model = torch.load(model_path)
+    model.to(device)
+    # Create directory for saving saliency maps if it does not exist
+    if save_saliency and not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    # Collect all images and their tensors
+    images = []
+    input_tensors = []
+    filenames = []
+    for file in os.listdir(src):
+        if not file.endswith('.png'):
+            continue
+        image_path = os.path.join(src, file)
+        image, input_tensor = preprocess_image(image_path, normalize=normalize, image_size=image_size, channels=channels)
+        images.append(image)
+        input_tensors.append(input_tensor)
+        filenames.append(file)
+    input_tensors = torch.cat(input_tensors).to(device)
+    class_labels = torch.zeros(input_tensors.size(0), dtype=torch.long).to(device)  # Replace with actual class labels if available
+    # Generate saliency maps
+    cam_generator = SaliencyMapGenerator(model)
+    saliency_maps = cam_generator.compute_saliency_maps(input_tensors, class_labels)
+    # Convert saliency maps to numpy arrays
+    saliency_maps = saliency_maps.cpu().numpy()
+    N = len(images)
+    dst = os.path.join(src, 'saliency_maps')
+    for i in range(N):
+        fig, axes = plt.subplots(1, 3, figsize=(20, 5))
+        # Original image
+        axes[0].imshow(images[i])
+        axes[0].axis('off')
+        if class_names:
+            axes[0].set_title(f"Class: {class_names[class_labels[i].item()]}")
+        # Saliency Map
+        axes[1].imshow(saliency_maps[i, 0], cmap='hot')
+        axes[1].axis('off')
+        axes[1].set_title("Saliency Map")
+        # Overlay
+        overlay = np.array(images[i])
+        overlay = overlay / overlay.max()
+        saliency_map_rgb = np.stack([saliency_maps[i, 0]] * 3, axis=-1)  # Convert saliency map to RGB
+        overlay = (overlay * 0.5 + saliency_map_rgb * 0.5).clip(0, 1)
+        axes[2].imshow(overlay)
+        axes[2].axis('off')
+        axes[2].set_title("Overlay")
+        plt.tight_layout()
+        plt.show()
+        # Save the saliency map if required
+        if save_saliency:
+            os.makedirs(dst, exist_ok=True)
+            saliency_image = Image.fromarray((saliency_maps[i, 0] * 255).astype(np.uint8))
+            saliency_image.save(os.path.join(dst, f'saliency_{filenames[i]}'))

spacr 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl

spacr 0.0.2py3-none-any.whl → 0.0.6py3-none-any.whl