spacr 0.0.1__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +6 -2
- spacr/__main__.py +0 -2
- spacr/alpha.py +807 -0
- spacr/annotate_app.py +118 -120
- spacr/chris.py +50 -0
- spacr/cli.py +25 -187
- spacr/core.py +1611 -389
- spacr/deep_spacr.py +696 -0
- spacr/foldseek.py +779 -0
- spacr/get_alfafold_structures.py +72 -0
- spacr/graph_learning.py +320 -0
- spacr/graph_learning_lap.py +84 -0
- spacr/gui.py +145 -0
- spacr/gui_2.py +90 -0
- spacr/gui_classify_app.py +187 -0
- spacr/gui_mask_app.py +149 -174
- spacr/gui_measure_app.py +116 -109
- spacr/gui_sim_app.py +0 -0
- spacr/gui_utils.py +679 -139
- spacr/io.py +620 -469
- spacr/mask_app.py +116 -9
- spacr/measure.py +178 -84
- spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
- spacr/old_code.py +255 -1
- spacr/plot.py +263 -100
- spacr/sequencing.py +1130 -0
- spacr/sim.py +634 -122
- spacr/timelapse.py +343 -53
- spacr/train.py +195 -22
- spacr/umap.py +0 -689
- spacr/utils.py +1530 -188
- spacr-0.0.6.dist-info/METADATA +118 -0
- spacr-0.0.6.dist-info/RECORD +39 -0
- {spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/WHEEL +1 -1
- spacr-0.0.6.dist-info/entry_points.txt +9 -0
- spacr-0.0.1.dist-info/METADATA +0 -64
- spacr-0.0.1.dist-info/RECORD +0 -26
- spacr-0.0.1.dist-info/entry_points.txt +0 -5
- {spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/LICENSE +0 -0
- {spacr-0.0.1.dist-info → spacr-0.0.6.dist-info}/top_level.txt +0 -0
spacr/timelapse.py
CHANGED
@@ -1,14 +1,17 @@
|
|
1
|
-
import cv2, os, re, glob, random, btrack
|
1
|
+
import cv2, os, re, glob, random, btrack, sqlite3
|
2
2
|
import numpy as np
|
3
3
|
import pandas as pd
|
4
4
|
from collections import defaultdict
|
5
5
|
import matplotlib.pyplot as plt
|
6
|
-
from matplotlib.animation import FuncAnimation
|
7
6
|
from IPython.display import display
|
8
7
|
from IPython.display import Image as ipyimage
|
9
8
|
import trackpy as tp
|
10
9
|
from btrack import datasets as btrack_datasets
|
11
10
|
from skimage.measure import regionprops
|
11
|
+
from scipy.signal import find_peaks
|
12
|
+
from scipy.optimize import curve_fit
|
13
|
+
from scipy.integrate import trapz
|
14
|
+
import matplotlib.pyplot as plt
|
12
15
|
|
13
16
|
from .logger import log_function_call
|
14
17
|
|
@@ -144,56 +147,6 @@ def _sort_key(file_path):
|
|
144
147
|
# Return a tuple that sorts this file as "earliest" or "lowest"
|
145
148
|
return ('', '', '', 0)
|
146
149
|
|
147
|
-
def _save_mask_timelapse_as_gif(masks, path, cmap, norm, filenames):
|
148
|
-
"""
|
149
|
-
Save a timelapse of masks as a GIF.
|
150
|
-
|
151
|
-
Parameters:
|
152
|
-
masks (list): List of mask frames.
|
153
|
-
path (str): Path to save the GIF.
|
154
|
-
cmap: Colormap for displaying the masks.
|
155
|
-
norm: Normalization for the masks.
|
156
|
-
filenames (list): List of filenames corresponding to each mask frame.
|
157
|
-
|
158
|
-
Returns:
|
159
|
-
None
|
160
|
-
"""
|
161
|
-
def _update(frame):
|
162
|
-
"""
|
163
|
-
Update the plot with the given frame.
|
164
|
-
|
165
|
-
Parameters:
|
166
|
-
frame (int): The frame number to update the plot with.
|
167
|
-
|
168
|
-
Returns:
|
169
|
-
None
|
170
|
-
"""
|
171
|
-
nonlocal filename_text_obj
|
172
|
-
if filename_text_obj is not None:
|
173
|
-
filename_text_obj.remove()
|
174
|
-
ax.clear()
|
175
|
-
ax.axis('off')
|
176
|
-
current_mask = masks[frame]
|
177
|
-
ax.imshow(current_mask, cmap=cmap, norm=norm)
|
178
|
-
ax.set_title(f'Frame: {frame}', fontsize=24, color='white')
|
179
|
-
filename_text = filenames[frame]
|
180
|
-
filename_text_obj = fig.text(0.5, 0.01, filename_text, ha='center', va='center', fontsize=20, color='white')
|
181
|
-
for label_value in np.unique(current_mask):
|
182
|
-
if label_value == 0: continue # Skip background
|
183
|
-
y, x = np.mean(np.where(current_mask == label_value), axis=1)
|
184
|
-
ax.text(x, y, str(label_value), color='white', fontsize=24, ha='center', va='center')
|
185
|
-
|
186
|
-
fig, ax = plt.subplots(figsize=(50, 50), facecolor='black')
|
187
|
-
ax.set_facecolor('black')
|
188
|
-
ax.axis('off')
|
189
|
-
plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0, hspace=0)
|
190
|
-
|
191
|
-
filename_text_obj = None
|
192
|
-
anim = FuncAnimation(fig, _update, frames=len(masks), blit=False)
|
193
|
-
anim.save(path, writer='pillow', fps=2, dpi=80) # Adjust DPI for size/quality
|
194
|
-
plt.close(fig)
|
195
|
-
print(f'Saved timelapse to {path}')
|
196
|
-
|
197
150
|
def _masks_to_gif(masks, gif_folder, name, filenames, object_type):
|
198
151
|
"""
|
199
152
|
Converts a sequence of masks into a GIF file.
|
@@ -208,6 +161,9 @@ def _masks_to_gif(masks, gif_folder, name, filenames, object_type):
|
|
208
161
|
Returns:
|
209
162
|
None
|
210
163
|
"""
|
164
|
+
|
165
|
+
from .io import _save_mask_timelapse_as_gif
|
166
|
+
|
211
167
|
def _display_gif(path):
|
212
168
|
with open(path, 'rb') as file:
|
213
169
|
display(ipyimage(file.read()))
|
@@ -220,7 +176,7 @@ def _masks_to_gif(masks, gif_folder, name, filenames, object_type):
|
|
220
176
|
norm = plt.cm.colors.Normalize(vmin=0, vmax=highest_label)
|
221
177
|
|
222
178
|
save_path_gif = os.path.join(gif_folder, f'timelapse_masks_{object_type}_{name}.gif')
|
223
|
-
_save_mask_timelapse_as_gif(masks, save_path_gif, cmap, norm, filenames)
|
179
|
+
_save_mask_timelapse_as_gif(masks, None, save_path_gif, cmap, norm, filenames)
|
224
180
|
#_display_gif(save_path_gif)
|
225
181
|
|
226
182
|
def _timelapse_masks_to_gif(folder_path, mask_channels, object_types):
|
@@ -450,6 +406,8 @@ def _trackpy_track_cells(src, name, batch_filenames, object_type, masks, timelap
|
|
450
406
|
from .plot import _visualize_and_save_timelapse_stack_with_tracks
|
451
407
|
from .utils import _masks_to_masks_stack
|
452
408
|
|
409
|
+
print(f'Tracking objects with trackpy')
|
410
|
+
|
453
411
|
if timelapse_displacement is None:
|
454
412
|
features = _prepare_for_tracking(masks)
|
455
413
|
timelapse_displacement = _find_optimal_search_range(features, initial_search_range=500, increment=10, max_attempts=49, memory=3)
|
@@ -574,3 +532,335 @@ def _btrack_track_cells(src, name, batch_filenames, object_type, plot, save, mas
|
|
574
532
|
|
575
533
|
mask_stack = _masks_to_masks_stack(masks)
|
576
534
|
return mask_stack
|
535
|
+
|
536
|
+
def exponential_decay(x, a, b, c):
|
537
|
+
return a * np.exp(-b * x) + c
|
538
|
+
|
539
|
+
def preprocess_pathogen_data(pathogen_df):
|
540
|
+
# Group by identifiers and count the number of parasites
|
541
|
+
parasite_counts = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
|
542
|
+
|
543
|
+
# Aggregate numerical columns and take the first of object columns
|
544
|
+
agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
|
545
|
+
pathogen_agg = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
|
546
|
+
|
547
|
+
# Merge the counts back into the aggregated data
|
548
|
+
pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id'])
|
549
|
+
|
550
|
+
# Remove the object_label column as it corresponds to the pathogen ID not the cell ID
|
551
|
+
if 'object_label' in pathogen_agg.columns:
|
552
|
+
pathogen_agg.drop(columns=['object_label'], inplace=True)
|
553
|
+
|
554
|
+
# Change the name of pathogen_cell_id to object_label
|
555
|
+
pathogen_agg.rename(columns={'pathogen_cell_id': 'object_label'}, inplace=True)
|
556
|
+
|
557
|
+
return pathogen_agg
|
558
|
+
|
559
|
+
def plot_data(measurement, group, ax, label, marker='o', linestyle='-'):
|
560
|
+
ax.plot(group['time'], group['delta_' + measurement], marker=marker, linestyle=linestyle, label=label)
|
561
|
+
|
562
|
+
def infected_vs_noninfected(result_df, measurement):
|
563
|
+
# Separate the merged dataframe into two groups based on pathogen_count
|
564
|
+
infected_cells_df = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') > 0]
|
565
|
+
uninfected_cells_df = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') == 0]
|
566
|
+
|
567
|
+
# Plotting
|
568
|
+
fig, axs = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
|
569
|
+
|
570
|
+
# Plot for cells that were infected at some time
|
571
|
+
for group_id in infected_cells_df['plate_row_column_field_object'].unique():
|
572
|
+
group = infected_cells_df[infected_cells_df['plate_row_column_field_object'] == group_id]
|
573
|
+
plot_data(measurement, group, axs[0], 'Infected', marker='x')
|
574
|
+
|
575
|
+
# Plot for cells that were never infected
|
576
|
+
for group_id in uninfected_cells_df['plate_row_column_field_object'].unique():
|
577
|
+
group = uninfected_cells_df[uninfected_cells_df['plate_row_column_field_object'] == group_id]
|
578
|
+
plot_data(measurement, group, axs[1], 'Uninfected')
|
579
|
+
|
580
|
+
# Set the titles and labels
|
581
|
+
axs[0].set_title('Cells Infected at Some Time')
|
582
|
+
axs[1].set_title('Cells Never Infected')
|
583
|
+
for ax in axs:
|
584
|
+
ax.set_xlabel('Time')
|
585
|
+
ax.set_ylabel('Normalized Delta ' + measurement)
|
586
|
+
all_timepoints = sorted(result_df['time'].unique())
|
587
|
+
ax.set_xticks(all_timepoints)
|
588
|
+
ax.set_xticklabels(all_timepoints, rotation=45, ha="right")
|
589
|
+
|
590
|
+
plt.tight_layout()
|
591
|
+
plt.show()
|
592
|
+
|
593
|
+
def save_figure(fig, src, figure_number):
|
594
|
+
source = os.path.dirname(src)
|
595
|
+
results_fldr = os.path.join(source,'results')
|
596
|
+
os.makedirs(results_fldr, exist_ok=True)
|
597
|
+
fig_loc = os.path.join(results_fldr, f'figure_{figure_number}.pdf')
|
598
|
+
fig.savefig(fig_loc)
|
599
|
+
print(f'Saved figure:{fig_loc}')
|
600
|
+
|
601
|
+
def save_results_dataframe(df, src, results_name):
|
602
|
+
source = os.path.dirname(src)
|
603
|
+
results_fldr = os.path.join(source,'results')
|
604
|
+
os.makedirs(results_fldr, exist_ok=True)
|
605
|
+
csv_loc = os.path.join(results_fldr, f'{results_name}.csv')
|
606
|
+
df.to_csv(csv_loc, index=True)
|
607
|
+
print(f'Saved results:{csv_loc}')
|
608
|
+
|
609
|
+
def summarize_per_well(peak_details_df):
|
610
|
+
# Step 1: Split the 'ID' column
|
611
|
+
split_columns = peak_details_df['ID'].str.split('_', expand=True)
|
612
|
+
peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
|
613
|
+
|
614
|
+
# Step 2: Create 'well_ID' by combining 'row' and 'column'
|
615
|
+
peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
|
616
|
+
|
617
|
+
# Filter entries where 'amplitude' is not null
|
618
|
+
filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
|
619
|
+
|
620
|
+
# Preparation for Step 3: Identify numeric columns for averaging from the filtered dataframe
|
621
|
+
numeric_cols = filtered_df.select_dtypes(include=['number']).columns
|
622
|
+
|
623
|
+
# Step 3: Calculate summary statistics
|
624
|
+
summary_df = filtered_df.groupby('well_ID').agg(
|
625
|
+
peaks_per_well=('ID', 'size'),
|
626
|
+
unique_IDs_with_amplitude=('ID', 'nunique'), # Count unique IDs per well with non-null amplitude
|
627
|
+
**{col: (col, 'mean') for col in numeric_cols} # exclude 'amplitude' from averaging if it's numeric
|
628
|
+
).reset_index()
|
629
|
+
|
630
|
+
# Step 3: Calculate summary statistics
|
631
|
+
summary_df_2 = peak_details_df.groupby('well_ID').agg(
|
632
|
+
cells_per_well=('object_number', 'nunique'),
|
633
|
+
).reset_index()
|
634
|
+
|
635
|
+
summary_df['cells_per_well'] = summary_df_2['cells_per_well']
|
636
|
+
summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
|
637
|
+
|
638
|
+
return summary_df
|
639
|
+
|
640
|
+
def summarize_per_well_inf_non_inf(peak_details_df):
|
641
|
+
# Step 1: Split the 'ID' column
|
642
|
+
split_columns = peak_details_df['ID'].str.split('_', expand=True)
|
643
|
+
peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
|
644
|
+
|
645
|
+
# Step 2: Create 'well_ID' by combining 'row' and 'column'
|
646
|
+
peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
|
647
|
+
|
648
|
+
# Assume 'pathogen_count' indicates infection if > 0
|
649
|
+
# Add an 'infected_status' column to classify cells
|
650
|
+
peak_details_df['infected_status'] = peak_details_df['infected'].apply(lambda x: 'infected' if x > 0 else 'non_infected')
|
651
|
+
|
652
|
+
# Preparation for Step 3: Identify numeric columns for averaging
|
653
|
+
numeric_cols = peak_details_df.select_dtypes(include=['number']).columns
|
654
|
+
|
655
|
+
# Step 3: Calculate summary statistics
|
656
|
+
summary_df = peak_details_df.groupby(['well_ID', 'infected_status']).agg(
|
657
|
+
cells_per_well=('object_number', 'nunique'),
|
658
|
+
peaks_per_well=('ID', 'size'),
|
659
|
+
**{col: (col, 'mean') for col in numeric_cols}
|
660
|
+
).reset_index()
|
661
|
+
|
662
|
+
# Calculate peaks per cell
|
663
|
+
summary_df['peaks_per_cell'] = summary_df['peaks_per_well'] / summary_df['cells_per_well']
|
664
|
+
|
665
|
+
return summary_df
|
666
|
+
|
667
|
+
def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intensity', size_filter='cell_area', fluctuation_threshold=0.25, num_lines=None, peak_height=0.01, pathogen=None, cytoplasm=None, remove_transient=True, verbose=False, transience_threshold=0.9):
|
668
|
+
# Load data
|
669
|
+
conn = sqlite3.connect(db_loc)
|
670
|
+
# Load cell table
|
671
|
+
cell_df = pd.read_sql(f"SELECT * FROM {'cell'}", conn)
|
672
|
+
|
673
|
+
if pathogen:
|
674
|
+
pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
|
675
|
+
pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
|
676
|
+
pathogen_df = preprocess_pathogen_data(pathogen_df)
|
677
|
+
cell_df = cell_df.merge(pathogen_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
|
678
|
+
cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
|
679
|
+
print(f'After pathogen merge: {len(cell_df)} objects')
|
680
|
+
|
681
|
+
# Optionally load cytoplasm table and merge
|
682
|
+
if cytoplasm:
|
683
|
+
cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
|
684
|
+
# Merge on specified columns
|
685
|
+
cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
|
686
|
+
|
687
|
+
print(f'After cytoplasm merge: {len(cell_df)} objects')
|
688
|
+
|
689
|
+
conn.close()
|
690
|
+
|
691
|
+
# Continue with your existing processing on cell_df now containing merged data...
|
692
|
+
# Prepare DataFrame (use cell_df instead of df)
|
693
|
+
prcf_components = cell_df['prcf'].str.split('_', expand=True)
|
694
|
+
cell_df['plate'] = prcf_components[0]
|
695
|
+
cell_df['row'] = prcf_components[1]
|
696
|
+
cell_df['column'] = prcf_components[2]
|
697
|
+
cell_df['field'] = prcf_components[3]
|
698
|
+
cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
|
699
|
+
cell_df['object_number'] = cell_df['object_label']
|
700
|
+
cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
|
701
|
+
|
702
|
+
df = cell_df.copy()
|
703
|
+
|
704
|
+
# Fit exponential decay model to all scaled fluorescence data
|
705
|
+
try:
|
706
|
+
params, _ = curve_fit(exponential_decay, df['time'], df[measurement], p0=[max(df[measurement]), 0.01, min(df[measurement])], maxfev=10000)
|
707
|
+
df['corrected_' + measurement] = df[measurement] / exponential_decay(df['time'], *params)
|
708
|
+
except RuntimeError as e:
|
709
|
+
print(f"Curve fitting failed for the entire dataset with error: {e}")
|
710
|
+
return
|
711
|
+
if verbose:
|
712
|
+
print(f'Analyzing: {len(df)} objects')
|
713
|
+
|
714
|
+
# Normalizing corrected fluorescence for each cell
|
715
|
+
corrected_dfs = []
|
716
|
+
peak_details_list = []
|
717
|
+
total_timepoints = df['time'].nunique()
|
718
|
+
size_filter_removed = 0
|
719
|
+
transience_removed = 0
|
720
|
+
|
721
|
+
for unique_id, group in df.groupby('plate_row_column_field_object'):
|
722
|
+
group = group.sort_values('time')
|
723
|
+
if remove_transient:
|
724
|
+
|
725
|
+
threshold = int(transience_threshold * total_timepoints)
|
726
|
+
|
727
|
+
if verbose:
|
728
|
+
print(f'Group length: {len(group)} Timelapse length: {total_timepoints}, threshold:{threshold}')
|
729
|
+
|
730
|
+
if len(group) <= threshold:
|
731
|
+
transience_removed += 1
|
732
|
+
if verbose:
|
733
|
+
print(f'removed group {unique_id} due to transience')
|
734
|
+
continue
|
735
|
+
|
736
|
+
size_diff = group[size_filter].std() / group[size_filter].mean()
|
737
|
+
|
738
|
+
if size_diff <= fluctuation_threshold:
|
739
|
+
group['delta_' + measurement] = group['corrected_' + measurement].diff().fillna(0)
|
740
|
+
corrected_dfs.append(group)
|
741
|
+
|
742
|
+
# Detect peaks
|
743
|
+
peaks, properties = find_peaks(group['delta_' + measurement], height=peak_height)
|
744
|
+
|
745
|
+
# Set values < 0 to 0
|
746
|
+
group_filtered = group.copy()
|
747
|
+
group_filtered['delta_' + measurement] = group['delta_' + measurement].clip(lower=0)
|
748
|
+
above_zero_auc = trapz(y=group_filtered['delta_' + measurement], x=group_filtered['time'])
|
749
|
+
auc = trapz(y=group['delta_' + measurement], x=group_filtered['time'])
|
750
|
+
is_infected = (group['parasite_count'] > 0).any()
|
751
|
+
|
752
|
+
if is_infected:
|
753
|
+
is_infected = 1
|
754
|
+
else:
|
755
|
+
is_infected = 0
|
756
|
+
|
757
|
+
if len(peaks) == 0:
|
758
|
+
peak_details_list.append({
|
759
|
+
'ID': unique_id,
|
760
|
+
'plate': group['plate'].iloc[0],
|
761
|
+
'row': group['row'].iloc[0],
|
762
|
+
'column': group['column'].iloc[0],
|
763
|
+
'field': group['field'].iloc[0],
|
764
|
+
'object_number': group['object_number'].iloc[0],
|
765
|
+
'time': np.nan, # The time of the peak
|
766
|
+
'amplitude': np.nan,
|
767
|
+
'delta': np.nan,
|
768
|
+
'AUC': auc,
|
769
|
+
'AUC_positive': above_zero_auc,
|
770
|
+
'AUC_peak': np.nan,
|
771
|
+
'infected': is_infected
|
772
|
+
})
|
773
|
+
|
774
|
+
# Inside the for loop where peaks are detected
|
775
|
+
for i, peak in enumerate(peaks):
|
776
|
+
|
777
|
+
amplitude = properties['peak_heights'][i]
|
778
|
+
peak_time = group['time'].iloc[peak]
|
779
|
+
pathogen_count_at_peak = group['parasite_count'].iloc[peak]
|
780
|
+
|
781
|
+
start_idx = max(peak - 1, 0)
|
782
|
+
end_idx = min(peak + 1, len(group) - 1)
|
783
|
+
|
784
|
+
# Using indices to slice for AUC calculation
|
785
|
+
peak_segment_y = group['delta_' + measurement].iloc[start_idx:end_idx + 1]
|
786
|
+
peak_segment_x = group['time'].iloc[start_idx:end_idx + 1]
|
787
|
+
peak_auc = trapz(y=peak_segment_y, x=peak_segment_x)
|
788
|
+
|
789
|
+
peak_details_list.append({
|
790
|
+
'ID': unique_id,
|
791
|
+
'plate': group['plate'].iloc[0],
|
792
|
+
'row': group['row'].iloc[0],
|
793
|
+
'column': group['column'].iloc[0],
|
794
|
+
'field': group['field'].iloc[0],
|
795
|
+
'object_number': group['object_number'].iloc[0],
|
796
|
+
'time': peak_time, # The time of the peak
|
797
|
+
'amplitude': amplitude,
|
798
|
+
'delta': group['delta_' + measurement].iloc[peak],
|
799
|
+
'AUC': auc,
|
800
|
+
'AUC_positive': above_zero_auc,
|
801
|
+
'AUC_peak': peak_auc,
|
802
|
+
'infected': pathogen_count_at_peak
|
803
|
+
})
|
804
|
+
else:
|
805
|
+
size_filter_removed += 1
|
806
|
+
|
807
|
+
if verbose:
|
808
|
+
print(f'Removed {size_filter_removed} objects due to size filter fluctuation')
|
809
|
+
print(f'Removed {transience_removed} objects due to transience')
|
810
|
+
|
811
|
+
if len(corrected_dfs) > 0:
|
812
|
+
result_df = pd.concat(corrected_dfs)
|
813
|
+
else:
|
814
|
+
print("No suitable cells found for analysis")
|
815
|
+
return
|
816
|
+
|
817
|
+
peak_details_df = pd.DataFrame(peak_details_list)
|
818
|
+
summary_df = summarize_per_well(peak_details_df)
|
819
|
+
summary_df_inf_non_inf = summarize_per_well_inf_non_inf(peak_details_df)
|
820
|
+
|
821
|
+
save_results_dataframe(df=peak_details_df, src=db_loc, results_name='peak_details')
|
822
|
+
save_results_dataframe(df=result_df, src=db_loc, results_name='results')
|
823
|
+
save_results_dataframe(df=summary_df, src=db_loc, results_name='well_results')
|
824
|
+
save_results_dataframe(df=summary_df_inf_non_inf, src=db_loc, results_name='well_results_inf_non_inf')
|
825
|
+
|
826
|
+
# Plotting
|
827
|
+
fig, ax = plt.subplots(figsize=(10, 8))
|
828
|
+
sampled_groups = result_df['plate_row_column_field_object'].unique()
|
829
|
+
if num_lines is not None and 0 < num_lines < len(sampled_groups):
|
830
|
+
sampled_groups = np.random.choice(sampled_groups, size=num_lines, replace=False)
|
831
|
+
|
832
|
+
for group_id in sampled_groups:
|
833
|
+
group = result_df[result_df['plate_row_column_field_object'] == group_id]
|
834
|
+
ax.plot(group['time'], group['delta_' + measurement], marker='o', linestyle='-')
|
835
|
+
|
836
|
+
ax.set_xticks(sorted(df['time'].unique()))
|
837
|
+
ax.set_xticklabels(sorted(df['time'].unique()), rotation=45, ha="right")
|
838
|
+
ax.set_title(f'Normalized Delta of {measurement} Over Time (Corrected for Photobleaching)')
|
839
|
+
ax.set_xlabel('Time')
|
840
|
+
ax.set_ylabel('Normalized Delta ' + measurement)
|
841
|
+
plt.tight_layout()
|
842
|
+
|
843
|
+
plt.show()
|
844
|
+
|
845
|
+
save_figure(fig, src=db_loc, figure_number=1)
|
846
|
+
|
847
|
+
if pathogen:
|
848
|
+
infected_vs_noninfected(result_df, measurement)
|
849
|
+
save_figure(fig, src=db_loc, figure_number=2)
|
850
|
+
|
851
|
+
# Identify cells with and without pathogens
|
852
|
+
infected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') > 0]['plate_row_column_field_object'].unique()
|
853
|
+
noninfected_cells = result_df[result_df.groupby('plate_row_column_field_object')['parasite_count'].transform('max') == 0]['plate_row_column_field_object'].unique()
|
854
|
+
|
855
|
+
# Peaks in infected and noninfected cells
|
856
|
+
infected_peaks = peak_details_df[peak_details_df['ID'].isin(infected_cells)]
|
857
|
+
noninfected_peaks = peak_details_df[peak_details_df['ID'].isin(noninfected_cells)]
|
858
|
+
|
859
|
+
# Calculate the average number of peaks per cell
|
860
|
+
avg_inf_peaks_per_cell = len(infected_peaks) / len(infected_cells) if len(infected_cells) > 0 else 0
|
861
|
+
avg_non_inf_peaks_per_cell = len(noninfected_peaks) / len(noninfected_cells) if len(noninfected_cells) > 0 else 0
|
862
|
+
|
863
|
+
print(f'Average number of peaks per infected cell: {avg_inf_peaks_per_cell:.2f}')
|
864
|
+
print(f'Average number of peaks per non-infected cell: {avg_non_inf_peaks_per_cell:.2f}')
|
865
|
+
print(f'done')
|
866
|
+
return result_df, peak_details_df, fig
|