spacr 0.4.15__py3-none-any.whl → 0.4.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/core.py +52 -9
- spacr/deep_spacr.py +2 -3
- spacr/gui_core.py +247 -41
- spacr/gui_elements.py +133 -2
- spacr/gui_utils.py +17 -15
- spacr/io.py +540 -55
- spacr/ml.py +141 -258
- spacr/plot.py +76 -34
- spacr/sequencing.py +73 -38
- spacr/settings.py +136 -128
- spacr/submodules.py +619 -213
- spacr/timelapse.py +25 -25
- spacr/toxo.py +23 -23
- spacr/utils.py +162 -89
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/METADATA +2 -1
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/RECORD +20 -20
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/LICENSE +0 -0
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/WHEEL +0 -0
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/entry_points.txt +0 -0
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/top_level.txt +0 -0
spacr/timelapse.py
CHANGED
@@ -533,14 +533,14 @@ def exponential_decay(x, a, b, c):
|
|
533
533
|
|
534
534
|
def preprocess_pathogen_data(pathogen_df):
|
535
535
|
# Group by identifiers and count the number of parasites
|
536
|
-
parasite_counts = pathogen_df.groupby(['
|
536
|
+
parasite_counts = pathogen_df.groupby(['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
|
537
537
|
|
538
538
|
# Aggregate numerical columns and take the first of object columns
|
539
|
-
agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['
|
540
|
-
pathogen_agg = pathogen_df.groupby(['
|
539
|
+
agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id', 'parasite_count']}
|
540
|
+
pathogen_agg = pathogen_df.groupby(['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
|
541
541
|
|
542
542
|
# Merge the counts back into the aggregated data
|
543
|
-
pathogen_agg = pathogen_agg.merge(parasite_counts, on=['
|
543
|
+
pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id'])
|
544
544
|
|
545
545
|
# Remove the object_label column as it corresponds to the pathogen ID not the cell ID
|
546
546
|
if 'object_label' in pathogen_agg.columns:
|
@@ -604,10 +604,10 @@ def save_results_dataframe(df, src, results_name):
|
|
604
604
|
def summarize_per_well(peak_details_df):
|
605
605
|
# Step 1: Split the 'ID' column
|
606
606
|
split_columns = peak_details_df['ID'].str.split('_', expand=True)
|
607
|
-
peak_details_df[['
|
607
|
+
peak_details_df[['plateID', 'rowID', 'columnID', 'fieldID', 'object_number']] = split_columns
|
608
608
|
|
609
|
-
# Step 2: Create 'well_ID' by combining '
|
610
|
-
peak_details_df['well_ID'] = peak_details_df['
|
609
|
+
# Step 2: Create 'well_ID' by combining 'rowID' and 'columnID'
|
610
|
+
peak_details_df['well_ID'] = peak_details_df['rowID'] + '_' + peak_details_df['columnID']
|
611
611
|
|
612
612
|
# Filter entries where 'amplitude' is not null
|
613
613
|
filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
|
@@ -635,10 +635,10 @@ def summarize_per_well(peak_details_df):
|
|
635
635
|
def summarize_per_well_inf_non_inf(peak_details_df):
|
636
636
|
# Step 1: Split the 'ID' column
|
637
637
|
split_columns = peak_details_df['ID'].str.split('_', expand=True)
|
638
|
-
peak_details_df[['
|
638
|
+
peak_details_df[['plateID', 'rowID', 'columnID', 'fieldID', 'object_number']] = split_columns
|
639
639
|
|
640
|
-
# Step 2: Create 'well_ID' by combining '
|
641
|
-
peak_details_df['well_ID'] = peak_details_df['
|
640
|
+
# Step 2: Create 'well_ID' by combining 'rowID' and 'columnID'
|
641
|
+
peak_details_df['well_ID'] = peak_details_df['rowID'] + '_' + peak_details_df['columnID']
|
642
642
|
|
643
643
|
# Assume 'pathogen_count' indicates infection if > 0
|
644
644
|
# Add an 'infected_status' column to classify cells
|
@@ -669,7 +669,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
669
669
|
pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
|
670
670
|
pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
|
671
671
|
pathogen_df = preprocess_pathogen_data(pathogen_df)
|
672
|
-
cell_df = cell_df.merge(pathogen_df, on=['
|
672
|
+
cell_df = cell_df.merge(pathogen_df, on=['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
|
673
673
|
cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
|
674
674
|
print(f'After pathogen merge: {len(cell_df)} objects')
|
675
675
|
|
@@ -677,7 +677,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
677
677
|
if cytoplasm:
|
678
678
|
cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
|
679
679
|
# Merge on specified columns
|
680
|
-
cell_df = cell_df.merge(cytoplasm_df, on=['
|
680
|
+
cell_df = cell_df.merge(cytoplasm_df, on=['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
|
681
681
|
|
682
682
|
print(f'After cytoplasm merge: {len(cell_df)} objects')
|
683
683
|
|
@@ -686,13 +686,13 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
686
686
|
# Continue with your existing processing on cell_df now containing merged data...
|
687
687
|
# Prepare DataFrame (use cell_df instead of df)
|
688
688
|
prcf_components = cell_df['prcf'].str.split('_', expand=True)
|
689
|
-
cell_df['
|
690
|
-
cell_df['
|
691
|
-
cell_df['
|
692
|
-
cell_df['
|
689
|
+
cell_df['plateID'] = prcf_components[0]
|
690
|
+
cell_df['rowID'] = prcf_components[1]
|
691
|
+
cell_df['columnID'] = prcf_components[2]
|
692
|
+
cell_df['fieldID'] = prcf_components[3]
|
693
693
|
cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
|
694
694
|
cell_df['object_number'] = cell_df['object_label']
|
695
|
-
cell_df['plate_row_column_field_object'] = cell_df['
|
695
|
+
cell_df['plate_row_column_field_object'] = cell_df['plateID'].astype(str) + '_' + cell_df['rowID'].astype(str) + '_' + cell_df['columnID'].astype(str) + '_' + cell_df['fieldID'].astype(str) + '_' + cell_df['object_label'].astype(str)
|
696
696
|
|
697
697
|
df = cell_df.copy()
|
698
698
|
|
@@ -752,10 +752,10 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
752
752
|
if len(peaks) == 0:
|
753
753
|
peak_details_list.append({
|
754
754
|
'ID': unique_id,
|
755
|
-
'
|
756
|
-
'
|
757
|
-
'
|
758
|
-
'
|
755
|
+
'plateID': group['plateID'].iloc[0],
|
756
|
+
'rowID': group['rowID'].iloc[0],
|
757
|
+
'columnID': group['columnID'].iloc[0],
|
758
|
+
'fieldID': group['fieldID'].iloc[0],
|
759
759
|
'object_number': group['object_number'].iloc[0],
|
760
760
|
'time': np.nan, # The time of the peak
|
761
761
|
'amplitude': np.nan,
|
@@ -783,10 +783,10 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
783
783
|
|
784
784
|
peak_details_list.append({
|
785
785
|
'ID': unique_id,
|
786
|
-
'
|
787
|
-
'
|
788
|
-
'
|
789
|
-
'
|
786
|
+
'plateID': group['plateID'].iloc[0],
|
787
|
+
'rowID': group['rowID'].iloc[0],
|
788
|
+
'columnID': group['columnID'].iloc[0],
|
789
|
+
'fieldID': group['fieldID'].iloc[0],
|
790
790
|
'object_number': group['object_number'].iloc[0],
|
791
791
|
'time': peak_time, # The time of the peak
|
792
792
|
'amplitude': amplitude,
|
spacr/toxo.py
CHANGED
@@ -494,25 +494,25 @@ def generate_score_heatmap(settings):
|
|
494
494
|
if 'column_name' in df.columns:
|
495
495
|
df = df[df['column_name']==column]
|
496
496
|
elif 'column' in df.columns:
|
497
|
-
df['
|
497
|
+
df['columnID'] = df['column']
|
498
498
|
df = df[df['column_name']==column]
|
499
499
|
if not plate is None:
|
500
|
-
df['
|
501
|
-
grouped_df = df.groupby(['
|
502
|
-
grouped_df['prc'] = grouped_df['
|
500
|
+
df['plateID'] = f"plate{plate}"
|
501
|
+
grouped_df = df.groupby(['plateID', 'rowID', 'column_name'])[data_column].mean().reset_index()
|
502
|
+
grouped_df['prc'] = grouped_df['plateID'].astype(str) + '_' + grouped_df['rowID'].astype(str) + '_' + grouped_df['column_name'].astype(str)
|
503
503
|
return grouped_df
|
504
504
|
|
505
505
|
def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
|
506
506
|
df = pd.read_csv(csv)
|
507
507
|
df = df[df['column_name']==column]
|
508
508
|
if plate not in df.columns:
|
509
|
-
df['
|
509
|
+
df['plateID'] = f"plate{plate}"
|
510
510
|
df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
|
511
|
-
grouped_df = df.groupby(['
|
511
|
+
grouped_df = df.groupby(['plateID', 'rowID', 'column_name'])['count'].sum().reset_index()
|
512
512
|
grouped_df = grouped_df.rename(columns={'count': 'total_count'})
|
513
|
-
merged_df = pd.merge(df, grouped_df, on=['
|
513
|
+
merged_df = pd.merge(df, grouped_df, on=['plateID', 'rowID', 'column_name'])
|
514
514
|
merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
|
515
|
-
merged_df['prc'] = merged_df['
|
515
|
+
merged_df['prc'] = merged_df['plateID'].astype(str) + '_' + merged_df['rowID'].astype(str) + '_' + merged_df['column_name'].astype(str)
|
516
516
|
return merged_df
|
517
517
|
|
518
518
|
def plot_multi_channel_heatmap(df, column='c3'):
|
@@ -524,17 +524,17 @@ def generate_score_heatmap(settings):
|
|
524
524
|
- column: Column to filter by (default is 'c3').
|
525
525
|
"""
|
526
526
|
# Extract row number and convert to integer for sorting
|
527
|
-
df['row_num'] = df['
|
527
|
+
df['row_num'] = df['rowID'].str.extract(r'(\d+)').astype(int)
|
528
528
|
|
529
529
|
# Filter and sort by plate, row, and column
|
530
530
|
df = df[df['column_name'] == column]
|
531
|
-
df = df.sort_values(by=['
|
531
|
+
df = df.sort_values(by=['plateID', 'row_num', 'column_name'])
|
532
532
|
|
533
533
|
# Drop temporary 'row_num' column after sorting
|
534
534
|
df = df.drop('row_num', axis=1)
|
535
535
|
|
536
536
|
# Create a new column combining plate, row, and column for the index
|
537
|
-
df['plate_row_col'] = df['
|
537
|
+
df['plate_row_col'] = df['plateID'] + '-' + df['rowID'] + '-' + df['column_name']
|
538
538
|
|
539
539
|
# Set 'plate_row_col' as the index
|
540
540
|
df.set_index('plate_row_col', inplace=True)
|
@@ -593,9 +593,9 @@ def generate_score_heatmap(settings):
|
|
593
593
|
df = pd.read_csv(csv_file) # Read CSV into DataFrame
|
594
594
|
df = df[df['column_name']==column]
|
595
595
|
if not plate is None:
|
596
|
-
df['
|
597
|
-
# Group the data by '
|
598
|
-
grouped_df = df.groupby(['
|
596
|
+
df['plateID'] = f"plate{plate}"
|
597
|
+
# Group the data by 'plateID', 'rowID', and 'column_name'
|
598
|
+
grouped_df = df.groupby(['plateID', 'rowID', 'column_name'])[data_column].mean().reset_index()
|
599
599
|
# Use the CSV filename to create a new column name
|
600
600
|
folder_name = os.path.dirname(csv_file).replace(".csv", "")
|
601
601
|
new_column_name = os.path.basename(f"{folder_name}_{data_column}")
|
@@ -606,8 +606,8 @@ def generate_score_heatmap(settings):
|
|
606
606
|
if combined_df is None:
|
607
607
|
combined_df = grouped_df
|
608
608
|
else:
|
609
|
-
combined_df = pd.merge(combined_df, grouped_df, on=['
|
610
|
-
combined_df['prc'] = combined_df['
|
609
|
+
combined_df = pd.merge(combined_df, grouped_df, on=['plateID', 'rowID', 'column_name'], how='outer')
|
610
|
+
combined_df['prc'] = combined_df['plateID'].astype(str) + '_' + combined_df['rowID'].astype(str) + '_' + combined_df['column_name'].astype(str)
|
611
611
|
return combined_df
|
612
612
|
|
613
613
|
def calculate_mae(df):
|
@@ -629,16 +629,16 @@ def generate_score_heatmap(settings):
|
|
629
629
|
mae_df = pd.DataFrame(mae_data)
|
630
630
|
return mae_df
|
631
631
|
|
632
|
-
result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['
|
633
|
-
df = calculate_fraction_mixed_condition(settings['csv'], settings['
|
632
|
+
result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plateID'], settings['columnID'], )
|
633
|
+
df = calculate_fraction_mixed_condition(settings['csv'], settings['plateID'], settings['columnID'], settings['control_sgrnas'])
|
634
634
|
df = df[df['grna_name']==settings['fraction_grna']]
|
635
635
|
fraction_df = df[['fraction', 'prc']]
|
636
636
|
merged_df = pd.merge(fraction_df, result_df, on=['prc'])
|
637
|
-
cv_df = group_cv_score(settings['cv_csv'], settings['
|
637
|
+
cv_df = group_cv_score(settings['cv_csv'], settings['plateID'], settings['columnID'], settings['data_column_cv'])
|
638
638
|
cv_df = cv_df[[settings['data_column_cv'], 'prc']]
|
639
639
|
merged_df = pd.merge(merged_df, cv_df, on=['prc'])
|
640
640
|
|
641
|
-
fig = plot_multi_channel_heatmap(merged_df, settings['
|
641
|
+
fig = plot_multi_channel_heatmap(merged_df, settings['columnID'])
|
642
642
|
if 'row_number' in merged_df.columns:
|
643
643
|
merged_df = merged_df.drop('row_num', axis=1)
|
644
644
|
mae_df = calculate_mae(merged_df)
|
@@ -646,9 +646,9 @@ def generate_score_heatmap(settings):
|
|
646
646
|
mae_df = mae_df.drop('row_num', axis=1)
|
647
647
|
|
648
648
|
if not settings['dst'] is None:
|
649
|
-
mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['
|
650
|
-
merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['
|
651
|
-
heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['
|
649
|
+
mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plateID']}.csv")
|
650
|
+
merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}_data.csv")
|
651
|
+
heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}.pdf")
|
652
652
|
mae_df.to_csv(mae_dst, index=False)
|
653
653
|
merged_df.to_csv(merged_dst, index=False)
|
654
654
|
fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')
|