PyPI - spacr - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

spacr 0.0.1py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

spacr/__init__.py +6 -2
spacr/__main__.py +0 -2
spacr/alpha.py +807 -0
spacr/annotate_app.py +118 -120
spacr/chris.py +50 -0
spacr/cli.py +25 -187
spacr/core.py +1611 -389
spacr/deep_spacr.py +696 -0
spacr/foldseek.py +779 -0
spacr/get_alfafold_structures.py +72 -0
spacr/graph_learning.py +320 -0
spacr/graph_learning_lap.py +84 -0
spacr/gui.py +145 -0
spacr/gui_2.py +90 -0
spacr/gui_classify_app.py +187 -0
spacr/gui_mask_app.py +149 -174
spacr/gui_measure_app.py +116 -109
spacr/gui_sim_app.py +0 -0
spacr/gui_utils.py +679 -139
spacr/io.py +620 -469
spacr/mask_app.py +116 -9
spacr/measure.py +178 -84
spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
spacr/old_code.py +255 -1
spacr/plot.py +263 -100
spacr/sequencing.py +1130 -0
spacr/sim.py +634 -122
spacr/timelapse.py +343 -53
spacr/train.py +195 -22
spacr/umap.py +0 -689
spacr/utils.py +1530 -188
spacr-0.0.6.dist-info/METADATA +118 -0
spacr-0.0.6.dist-info/RECORD +39 -0
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/WHEEL +1 -1
spacr-0.0.6.dist-info/entry_points.txt +9 -0
spacr-0.0.1.dist-info/METADATA +0 -64
spacr-0.0.1.dist-info/RECORD +0 -26
spacr-0.0.1.dist-info/entry_points.txt +0 -5
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/LICENSE +0 -0
{spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/top_level.txt +0 -0

spacr/timelapse.py CHANGED Viewed

@@ -1,14 +1,17 @@
-import cv2, os, re, glob, random, btrack
+import cv2, os, re, glob, random, btrack, sqlite3
 import numpy as np
 import pandas as pd
 from collections import defaultdict
 import matplotlib.pyplot as plt
-from matplotlib.animation import FuncAnimation
 from IPython.display import display
 from IPython.display import Image as ipyimage
 import trackpy as tp
 from btrack import datasets as btrack_datasets
 from skimage.measure import regionprops
+from scipy.signal import find_peaks
+from scipy.optimize import curve_fit
+from scipy.integrate import trapz
+import matplotlib.pyplot as plt
 from .logger import log_function_call
@@ -144,56 +147,6 @@ def _sort_key(file_path):
         # Return a tuple that sorts this file as "earliest" or "lowest"
         return ('', '', '', 0)
-def _save_mask_timelapse_as_gif(masks, path, cmap, norm, filenames):
-    """
-    Save a timelapse of masks as a GIF.
-    Parameters:
-    masks (list): List of mask frames.
-    path (str): Path to save the GIF.
-    cmap: Colormap for displaying the masks.
-    norm: Normalization for the masks.
-    filenames (list): List of filenames corresponding to each mask frame.
-    Returns:
-    None
-    """
-    def _update(frame):
-        """
-        Update the plot with the given frame.
-        Parameters:
-        frame (int): The frame number to update the plot with.
-        Returns:
-        None
-        """
-        nonlocal filename_text_obj
-        if filename_text_obj is not None:
-            filename_text_obj.remove()
-        ax.clear()
-        ax.axis('off')
-        current_mask = masks[frame]
-        ax.imshow(current_mask, cmap=cmap, norm=norm)
-        ax.set_title(f'Frame: {frame}', fontsize=24, color='white')
-        filename_text = filenames[frame]
-        filename_text_obj = fig.text(0.5, 0.01, filename_text, ha='center', va='center', fontsize=20, color='white')
-        for label_value in np.unique(current_mask):
-            if label_value == 0: continue  # Skip background
-            y, x = np.mean(np.where(current_mask == label_value), axis=1)
-            ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
-    fig, ax = plt.subplots(figsize=(50, 50), facecolor='black')
-    ax.set_facecolor('black')
-    ax.axis('off')
-    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
-    filename_text_obj = None
-    anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
-    anim.save(path, writer='pillow', fps=2, dpi=80)  # Adjust DPI for size/quality
-    plt.close(fig)
-    print(f'Saved timelapse to {path}')
 def _masks_to_gif(masks, gif_folder, name, filenames, object_type):
     """
     Converts a sequence of masks into a GIF file.
@@ -208,6 +161,9 @@ def _masks_to_gif(masks, gif_folder, name, filenames, object_type):
     Returns:
         None
     """
+    from .io import _save_mask_timelapse_as_gif
     def _display_gif(path):
         with open(path, 'rb') as file:
             display(ipyimage(file.read()))
@@ -220,7 +176,7 @@ def _masks_to_gif(masks, gif_folder, name, filenames, object_type):
     norm = plt.cm.colors.Normalize(vmin=0, vmax=highest_label)
     save_path_gif = os.path.join(gif_folder, f'timelapse_masks_{object_type}_{name}.gif')
-    _save_mask_timelapse_as_gif(masks, save_path_gif, cmap, norm, filenames)
+    _save_mask_timelapse_as_gif(masks, None, save_path_gif, cmap, norm, filenames)
     #_display_gif(save_path_gif)
 def _timelapse_masks_to_gif(folder_path, mask_channels, object_types):
@@ -450,6 +406,8 @@ def _trackpy_track_cells(src, name, batch_filenames, object_type, masks, timelap
         from .plot import _visualize_and_save_timelapse_stack_with_tracks
         from .utils import _masks_to_masks_stack
+        print(f'Tracking objects with trackpy')
         if timelapse_displacement is None:
             features = _prepare_for_tracking(masks)
             timelapse_displacement = _find_optimal_search_range(features, initial_search_range=500, increment=10, max_attempts=49, memory=3)
@@ -574,3 +532,335 @@ def _btrack_track_cells(src, name, batch_filenames, object_type, plot, save, mas
     mask_stack = _masks_to_masks_stack(masks)
     return mask_stack
+def exponential_decay(x, a, b, c):
+    return a * np.exp(-b * x) + c
+def preprocess_pathogen_data(pathogen_df):
+    # Group by identifiers and count the number of parasites
+    parasite_counts = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
+    # Aggregate numerical columns and take the first of object columns
+    agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
+    pathogen_agg = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
+    # Merge the counts back into the aggregated data
+    pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id'])
+    # Remove the object_label column as it corresponds to the pathogen ID not the cell ID
+    if 'object_label' in pathogen_agg.columns:
+        pathogen_agg.drop(columns=['object_label'], inplace=True)
+    # Change the name of pathogen_cell_id to object_label
+    pathogen_agg.rename(columns={'pathogen_cell_id': 'object_label'}, inplace=True)
+    return pathogen_agg
+def plot_data(measurement, group, ax, label, marker='o', linestyle='-'):
+    ax.plot(group['time'], group['delta_' + measurement], marker=marker, linestyle=linestyle, label=label)
+def infected_vs_noninfected(result_df, measurement):
+    # Separate the merged dataframe into two groups based on pathogen_count
+    infected_cells_df = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') > 0]
+    uninfected_cells_df = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') == 0]
+    # Plotting
+    fig, axs = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
+    # Plot for cells that were infected at some time
+    for group_id in infected_cells_df['plate_row_column_field_object'].unique():
+        group = infected_cells_df[infected_cells_df['plate_row_column_field_object'] == group_id]
+        plot_data(measurement, group, axs[0], 'Infected', marker='x')
+    # Plot for cells that were never infected
+    for group_id in uninfected_cells_df['plate_row_column_field_object'].unique():
+        group = uninfected_cells_df[uninfected_cells_df['plate_row_column_field_object'] == group_id]
+        plot_data(measurement, group, axs[1], 'Uninfected')
+    # Set the titles and labels
+    axs[0].set_title('Cells Infected at Some Time')
+    axs[1].set_title('Cells Never Infected')
+    for ax in axs:
+        ax.set_xlabel('Time')
+        ax.set_ylabel('Normalized Delta ' + measurement)
+        all_timepoints = sorted(result_df['time'].unique())
+        ax.set_xticks(all_timepoints)
+        ax.set_xticklabels(all_timepoints, rotation=45, ha="right")
+    plt.tight_layout()
+    plt.show()
+def save_figure(fig, src, figure_number):
+    source = os.path.dirname(src)
+    results_fldr = os.path.join(source,'results')
+    os.makedirs(results_fldr, exist_ok=True)
+    fig_loc = os.path.join(results_fldr, f'figure_{figure_number}.pdf')
+    fig.savefig(fig_loc)
+    print(f'Saved figure:{fig_loc}')
+def save_results_dataframe(df, src, results_name):
+    source = os.path.dirname(src)
+    results_fldr = os.path.join(source,'results')
+    os.makedirs(results_fldr, exist_ok=True)
+    csv_loc = os.path.join(results_fldr, f'{results_name}.csv')
+    df.to_csv(csv_loc, index=True)
+    print(f'Saved results:{csv_loc}')
+def summarize_per_well(peak_details_df):
+    # Step 1: Split the 'ID' column
+    split_columns = peak_details_df['ID'].str.split('_', expand=True)
+    peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
+    # Step 2: Create 'well_ID' by combining 'row' and 'column'
+    peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
+    # Filter entries where 'amplitude' is not null
+    filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
+    # Preparation for Step 3: Identify numeric columns for averaging from the filtered dataframe
+    numeric_cols = filtered_df.select_dtypes(include=['number']).columns
+    # Step 3: Calculate summary statistics
+    summary_df = filtered_df.groupby('well_ID').agg(
+        peaks_per_well=('ID', 'size'),
+        unique_IDs_with_amplitude=('ID', 'nunique'),  # Count unique IDs per well with non-null amplitude
+        **{col: (col, 'mean') for col in numeric_cols}  # exclude 'amplitude' from averaging if it's numeric
+    ).reset_index()
+    # Step 3: Calculate summary statistics
+    summary_df_2 = peak_details_df.groupby('well_ID').agg(
+        cells_per_well=('object_number', 'nunique'),
+    ).reset_index()
+    summary_df['cells_per_well'] = summary_df_2['cells_per_well']
+    summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
+    return summary_df
+def summarize_per_well_inf_non_inf(peak_details_df):
+    # Step 1: Split the 'ID' column
+    split_columns = peak_details_df['ID'].str.split('_', expand=True)
+    peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
+    # Step 2: Create 'well_ID' by combining 'row' and 'column'
+    peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
+    # Assume 'pathogen_count' indicates infection if > 0
+    # Add an 'infected_status' column to classify cells
+    peak_details_df['infected_status'] = peak_details_df['infected'].apply(lambda x: 'infected' if x > 0 else 'non_infected')
+    # Preparation for Step 3: Identify numeric columns for averaging
+    numeric_cols = peak_details_df.select_dtypes(include=['number']).columns
+    # Step 3: Calculate summary statistics
+    summary_df = peak_details_df.groupby(['well_ID', 'infected_status']).agg(
+        cells_per_well=('object_number', 'nunique'),
+        peaks_per_well=('ID', 'size'),
+        **{col: (col, 'mean') for col in numeric_cols}
+    ).reset_index()
+    # Calculate peaks per cell
+    summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
+    return summary_df
+def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intensity', size_filter='cell_area', fluctuation_threshold=0.25, num_lines=None, peak_height=0.01, pathogen=None, cytoplasm=None, remove_transient=True, verbose=False, transience_threshold=0.9):
+    # Load data
+    conn = sqlite3.connect(db_loc)
+    # Load cell table
+    cell_df = pd.read_sql(f"SELECT * FROM {'cell'}", conn)
+    if pathogen:
+        pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
+        pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
+        pathogen_df = preprocess_pathogen_data(pathogen_df)
+        cell_df = cell_df.merge(pathogen_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
+        cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
+        print(f'After pathogen merge: {len(cell_df)} objects')
+    # Optionally load cytoplasm table and merge
+    if cytoplasm:
+        cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
+        # Merge on specified columns
+        cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
+        print(f'After cytoplasm merge: {len(cell_df)} objects')
+    conn.close()
+    # Continue with your existing processing on cell_df now containing merged data...
+    # Prepare DataFrame (use cell_df instead of df)
+    prcf_components = cell_df['prcf'].str.split('_', expand=True)
+    cell_df['plate'] = prcf_components[0]
+    cell_df['row'] = prcf_components[1]
+    cell_df['column'] = prcf_components[2]
+    cell_df['field'] = prcf_components[3]
+    cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
+    cell_df['object_number'] = cell_df['object_label']
+    cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
+    df = cell_df.copy()
+    # Fit exponential decay model to all scaled fluorescence data
+    try:
+        params, _ = curve_fit(exponential_decay, df['time'], df[measurement], p0=[max(df[measurement]), 0.01, min(df[measurement])], maxfev=10000)
+        df['corrected_' + measurement] = df[measurement] / exponential_decay(df['time'], *params)
+    except RuntimeError as e:
+        print(f"Curve fitting failed for the entire dataset with error: {e}")
+        return
+    if verbose:
+        print(f'Analyzing: {len(df)} objects')
+    # Normalizing corrected fluorescence for each cell
+    corrected_dfs = []
+    peak_details_list = []
+    total_timepoints = df['time'].nunique()
+    size_filter_removed = 0
+    transience_removed = 0
+    for unique_id, group in df.groupby('plate_row_column_field_object'):
+        group = group.sort_values('time')
+        if remove_transient:
+            threshold = int(transience_threshold * total_timepoints)
+            if verbose:
+                print(f'Group length: {len(group)} Timelapse length: {total_timepoints}, threshold:{threshold}')
+            if len(group) <= threshold:
+                transience_removed += 1
+                if verbose:
+                    print(f'removed group {unique_id} due to transience')
+                continue
+        size_diff = group[size_filter].std() / group[size_filter].mean()
+        if size_diff <= fluctuation_threshold:
+            group['delta_' + measurement] = group['corrected_' + measurement].diff().fillna(0)
+            corrected_dfs.append(group)
+            # Detect peaks
+            peaks, properties = find_peaks(group['delta_' + measurement], height=peak_height)
+            # Set values < 0 to 0
+            group_filtered = group.copy()
+            group_filtered['delta_' + measurement] = group['delta_' + measurement].clip(lower=0)
+            above_zero_auc = trapz(y=group_filtered['delta_' + measurement], x=group_filtered['time'])
+            auc = trapz(y=group['delta_' + measurement], x=group_filtered['time'])
+            is_infected = (group['parasite_count'] > 0).any()
+            if is_infected:
+                is_infected = 1
+            else:
+                is_infected = 0
+            if len(peaks) == 0:
+                peak_details_list.append({
+                    'ID': unique_id,
+                    'plate': group['plate'].iloc[0],
+                    'row': group['row'].iloc[0],
+                    'column': group['column'].iloc[0],
+                    'field': group['field'].iloc[0],
+                    'object_number': group['object_number'].iloc[0],
+                    'time': np.nan,  # The time of the peak
+                    'amplitude': np.nan,
+                    'delta': np.nan,
+                    'AUC': auc,
+                    'AUC_positive': above_zero_auc,
+                    'AUC_peak': np.nan,
+                    'infected': is_infected
+                })
+            # Inside the for loop where peaks are detected
+            for i, peak in enumerate(peaks):
+                amplitude = properties['peak_heights'][i]
+                peak_time = group['time'].iloc[peak]
+                pathogen_count_at_peak = group['parasite_count'].iloc[peak]
+                start_idx = max(peak - 1, 0)
+                end_idx = min(peak + 1, len(group) - 1)
+                # Using indices to slice for AUC calculation
+                peak_segment_y = group['delta_' + measurement].iloc[start_idx:end_idx + 1]
+                peak_segment_x = group['time'].iloc[start_idx:end_idx + 1]
+                peak_auc = trapz(y=peak_segment_y, x=peak_segment_x)
+                peak_details_list.append({
+                    'ID': unique_id,
+                    'plate': group['plate'].iloc[0],
+                    'row': group['row'].iloc[0],
+                    'column': group['column'].iloc[0],
+                    'field': group['field'].iloc[0],
+                    'object_number': group['object_number'].iloc[0],
+                    'time': peak_time,  # The time of the peak
+                    'amplitude': amplitude,
+                    'delta': group['delta_' + measurement].iloc[peak],
+                    'AUC': auc,
+                    'AUC_positive': above_zero_auc,
+                    'AUC_peak': peak_auc,
+                    'infected': pathogen_count_at_peak
+                })
+        else:
+            size_filter_removed += 1
+    if verbose:
+        print(f'Removed {size_filter_removed} objects due to size filter fluctuation')
+        print(f'Removed {transience_removed} objects due to transience')
+    if len(corrected_dfs) > 0:
+        result_df = pd.concat(corrected_dfs)
+    else:
+        print("No suitable cells found for analysis")
+        return
+    peak_details_df = pd.DataFrame(peak_details_list)
+    summary_df = summarize_per_well(peak_details_df)
+    summary_df_inf_non_inf = summarize_per_well_inf_non_inf(peak_details_df)
+    save_results_dataframe(df=peak_details_df, src=db_loc, results_name='peak_details')
+    save_results_dataframe(df=result_df, src=db_loc, results_name='results')
+    save_results_dataframe(df=summary_df, src=db_loc, results_name='well_results')
+    save_results_dataframe(df=summary_df_inf_non_inf, src=db_loc, results_name='well_results_inf_non_inf')
+    # Plotting
+    fig, ax = plt.subplots(figsize=(10, 8))
+    sampled_groups = result_df['plate_row_column_field_object'].unique()
+    if num_lines is not None and 0 < num_lines < len(sampled_groups):
+        sampled_groups = np.random.choice(sampled_groups, size=num_lines, replace=False)
+    for group_id in sampled_groups:
+        group = result_df[result_df['plate_row_column_field_object'] == group_id]
+        ax.plot(group['time'], group['delta_' + measurement], marker='o', linestyle='-')
+    ax.set_xticks(sorted(df['time'].unique()))
+    ax.set_xticklabels(sorted(df['time'].unique()), rotation=45, ha="right")
+    ax.set_title(f'Normalized Delta of {measurement} Over Time (Corrected for Photobleaching)')
+    ax.set_xlabel('Time')
+    ax.set_ylabel('Normalized Delta ' + measurement)
+    plt.tight_layout()
+    plt.show()
+    save_figure(fig, src=db_loc, figure_number=1)
+    if pathogen:
+        infected_vs_noninfected(result_df, measurement)
+        save_figure(fig, src=db_loc, figure_number=2)
+        # Identify cells with and without pathogens
+        infected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') > 0]['plate_row_column_field_object'].unique()
+        noninfected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') == 0]['plate_row_column_field_object'].unique()
+        # Peaks in infected and noninfected cells
+        infected_peaks = peak_details_df[peak_details_df['ID'].isin(infected_cells)]
+        noninfected_peaks = peak_details_df[peak_details_df['ID'].isin(noninfected_cells)]
+        # Calculate the average number of peaks per cell
+        avg_inf_peaks_per_cell = len(infected_peaks) / len(infected_cells) if len(infected_cells) > 0 else 0
+        avg_non_inf_peaks_per_cell = len(noninfected_peaks) / len(noninfected_cells) if len(noninfected_cells) > 0 else 0
+        print(f'Average number of peaks per infected cell: {avg_inf_peaks_per_cell:.2f}')
+        print(f'Average number of peaks per non-infected cell: {avg_non_inf_peaks_per_cell:.2f}')
+    print(f'done')
+    return result_df, peak_details_df, fig

spacr 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl

spacr 0.0.1py3-none-any.whl → 0.0.6py3-none-any.whl