spacr 0.3.52__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/gui_elements.py +1 -1
- spacr/gui_utils.py +0 -111
- spacr/io.py +114 -140
- spacr/measure.py +10 -11
- spacr/ml.py +55 -41
- spacr/plot.py +24 -293
- spacr/sequencing.py +13 -9
- spacr/settings.py +15 -9
- spacr/submodules.py +19 -19
- spacr/timelapse.py +16 -16
- spacr/toxo.py +15 -15
- spacr/utils.py +72 -164
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/METADATA +1 -1
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/RECORD +18 -18
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/LICENSE +0 -0
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/WHEEL +0 -0
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/top_level.txt +0 -0
spacr/sequencing.py
CHANGED
@@ -125,7 +125,7 @@ def process_chunk(chunk_data):
|
|
125
125
|
consensus_sequences.append(consensus_seq)
|
126
126
|
column_sequence = match.group('column')
|
127
127
|
grna_sequence = match.group('grna')
|
128
|
-
row_sequence = match.group('
|
128
|
+
row_sequence = match.group('row_name')
|
129
129
|
columns.append(column_sequence)
|
130
130
|
grnas.append(grna_sequence)
|
131
131
|
rows.append(row_sequence)
|
@@ -176,7 +176,7 @@ def process_chunk(chunk_data):
|
|
176
176
|
consensus_sequences.append(consensus_seq)
|
177
177
|
column_sequence = match.group('column')
|
178
178
|
grna_sequence = match.group('grna')
|
179
|
-
row_sequence = match.group('
|
179
|
+
row_sequence = match.group('row_name')
|
180
180
|
columns.append(column_sequence)
|
181
181
|
grnas.append(grna_sequence)
|
182
182
|
rows.append(row_sequence)
|
@@ -532,7 +532,7 @@ def graph_sequencing_stats(settings):
|
|
532
532
|
# Iterate through the fraction thresholds
|
533
533
|
for threshold in fraction_thresholds:
|
534
534
|
filtered_df = df[df['fraction'] >= threshold]
|
535
|
-
unique_count = filtered_df.groupby(['plate', '
|
535
|
+
unique_count = filtered_df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
|
536
536
|
results.append((threshold, unique_count))
|
537
537
|
|
538
538
|
results_df = pd.DataFrame(results, columns=['fraction_threshold', 'unique_count'])
|
@@ -588,17 +588,21 @@ def graph_sequencing_stats(settings):
|
|
588
588
|
# Apply the closest threshold to the DataFrame
|
589
589
|
df = df[df['fraction'] >= closest_threshold]
|
590
590
|
|
591
|
-
# Group by 'plate', '
|
592
|
-
unique_counts = df.groupby(['plate', '
|
593
|
-
unique_count_mean = df.groupby(['plate', '
|
594
|
-
unique_count_std = df.groupby(['plate', '
|
591
|
+
# Group by 'plate', 'row_name', 'column' and compute unique counts of 'grna'
|
592
|
+
unique_counts = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().reset_index(name='unique_counts')
|
593
|
+
unique_count_mean = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
|
594
|
+
unique_count_std = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().std()
|
595
595
|
|
596
596
|
# Merge the unique counts back into the original DataFrame
|
597
|
-
df = pd.merge(df, unique_counts, on=['plate', '
|
597
|
+
df = pd.merge(df, unique_counts, on=['plate', 'row_name', 'column'], how='left')
|
598
598
|
|
599
599
|
print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
|
600
|
-
display(df)
|
601
600
|
#_plot_density(df, dependent_variable='unique_counts')
|
601
|
+
|
602
|
+
has_underscore = df['row_name'].str.contains('_').any()
|
603
|
+
if has_underscore:
|
604
|
+
df['row_name'] = df['row_name'].apply(lambda x: x.split('_')[1])
|
605
|
+
|
602
606
|
plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
|
603
607
|
|
604
608
|
return closest_threshold
|
spacr/settings.py
CHANGED
@@ -198,7 +198,7 @@ def set_default_umap_image_settings(settings={}):
|
|
198
198
|
settings.setdefault('smooth_lines', True)
|
199
199
|
settings.setdefault('clustering', 'dbscan')
|
200
200
|
settings.setdefault('exclude', None)
|
201
|
-
settings.setdefault('col_to_compare', '
|
201
|
+
settings.setdefault('col_to_compare', 'column_name')
|
202
202
|
settings.setdefault('pos', 'c1')
|
203
203
|
settings.setdefault('neg', 'c2')
|
204
204
|
settings.setdefault('embedding_by_controls', False)
|
@@ -289,7 +289,7 @@ def set_default_analyze_screen(settings):
|
|
289
289
|
settings.setdefault('minimum_cell_count',25)
|
290
290
|
settings.setdefault('n_estimators',100)
|
291
291
|
settings.setdefault('test_size',0.2)
|
292
|
-
settings.setdefault('location_column','
|
292
|
+
settings.setdefault('location_column','column_name')
|
293
293
|
settings.setdefault('positive_control','c2')
|
294
294
|
settings.setdefault('negative_control','c1')
|
295
295
|
settings.setdefault('exclude',None)
|
@@ -337,8 +337,9 @@ def set_default_train_test_model(settings):
|
|
337
337
|
return settings
|
338
338
|
|
339
339
|
def set_generate_training_dataset_defaults(settings):
|
340
|
-
|
340
|
+
|
341
341
|
settings.setdefault('src','path')
|
342
|
+
settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
|
342
343
|
settings.setdefault('dataset_mode','metadata')
|
343
344
|
settings.setdefault('annotation_column','test')
|
344
345
|
settings.setdefault('annotated_classes',[1,2])
|
@@ -346,7 +347,7 @@ def set_generate_training_dataset_defaults(settings):
|
|
346
347
|
settings.setdefault('size',224)
|
347
348
|
settings.setdefault('test_split',0.1)
|
348
349
|
settings.setdefault('class_metadata',[['c1'],['c2']])
|
349
|
-
settings.setdefault('metadata_type_by','
|
350
|
+
settings.setdefault('metadata_type_by','column_name')
|
350
351
|
settings.setdefault('channel_of_interest',3)
|
351
352
|
settings.setdefault('custom_measurement',None)
|
352
353
|
settings.setdefault('tables',None)
|
@@ -369,7 +370,7 @@ def deep_spacr_defaults(settings):
|
|
369
370
|
settings.setdefault('size',224)
|
370
371
|
settings.setdefault('test_split',0.1)
|
371
372
|
settings.setdefault('class_metadata',[['c1'],['c2']])
|
372
|
-
settings.setdefault('metadata_type_by','
|
373
|
+
settings.setdefault('metadata_type_by','column_name')
|
373
374
|
settings.setdefault('channel_of_interest',3)
|
374
375
|
settings.setdefault('custom_measurement',None)
|
375
376
|
settings.setdefault('tables',None)
|
@@ -453,7 +454,7 @@ def get_analyze_recruitment_default_settings(settings):
|
|
453
454
|
settings.setdefault('pathogen_plate_metadata',[['c1', 'c2', 'c3'],['c4','c5', 'c6']])
|
454
455
|
settings.setdefault('treatments',['cm', 'lovastatin'])
|
455
456
|
settings.setdefault('treatment_plate_metadata',[['r1', 'r2','r3'], ['r4', 'r5','r6']])
|
456
|
-
settings.setdefault('metadata_types',['
|
457
|
+
settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
|
457
458
|
settings.setdefault('channel_dims',[0,1,2,3])
|
458
459
|
settings.setdefault('cell_chann_dim',3)
|
459
460
|
settings.setdefault('cell_mask_dim',4)
|
@@ -531,18 +532,22 @@ def get_perform_regression_default_settings(settings):
|
|
531
532
|
settings.setdefault('score_data','list of paths')
|
532
533
|
settings.setdefault('positive_control','239740')
|
533
534
|
settings.setdefault('negative_control','233460')
|
535
|
+
settings.setdefault('min_n',0)
|
534
536
|
settings.setdefault('controls',['000000_1','000000_10','000000_11','000000_12','000000_13','000000_14','000000_15','000000_16','000000_17','000000_18','000000_19','000000_20','000000_21','000000_22','000000_23','000000_24','000000_25','000000_26','000000_27','000000_28','000000_29','000000_3','000000_30','000000_31','000000_32','000000_4','000000_5','000000_6','000000_8','000000_9'])
|
535
|
-
settings.setdefault('fraction_threshold',
|
537
|
+
settings.setdefault('fraction_threshold',None)
|
536
538
|
settings.setdefault('dependent_variable','pred')
|
537
539
|
settings.setdefault('threshold_method','std')
|
538
540
|
settings.setdefault('threshold_multiplier',3)
|
541
|
+
settings.setdefault('target_unique_count',5)
|
539
542
|
settings.setdefault('transform',None)
|
543
|
+
settings.setdefault('log_x',False)
|
544
|
+
settings.setdefault('log_y',False)
|
545
|
+
settings.setdefault('x_lim',None)
|
540
546
|
settings.setdefault('agg_type','mean')
|
541
|
-
settings.setdefault('min_cell_count',
|
547
|
+
settings.setdefault('min_cell_count',None)
|
542
548
|
settings.setdefault('regression_type','ols')
|
543
549
|
settings.setdefault('random_row_column_effects',False)
|
544
550
|
settings.setdefault('split_axis_lims','')
|
545
|
-
settings.setdefault('plate','')
|
546
551
|
settings.setdefault('cov_type',None)
|
547
552
|
settings.setdefault('alpha',1)
|
548
553
|
settings.setdefault('filter_value',['c1', 'c2', 'c3'])
|
@@ -557,6 +562,7 @@ def get_perform_regression_default_settings(settings):
|
|
557
562
|
print(f"Using alpha as quantile for quantile regression, alpha: {settings['alpha']}")
|
558
563
|
settings['agg_type'] = None
|
559
564
|
print(f'agg_type set to None for quantile regression')
|
565
|
+
|
560
566
|
return settings
|
561
567
|
|
562
568
|
def get_check_cellpose_models_default_settings(settings):
|
spacr/submodules.py
CHANGED
@@ -341,17 +341,17 @@ def count_phenotypes(settings):
|
|
341
341
|
unique_values_count = df[settings['annotation_column']].nunique(dropna=True)
|
342
342
|
print(f"Unique values in {settings['annotation_column']} (excluding NaN): {unique_values_count}")
|
343
343
|
|
344
|
-
# Count unique values in 'value' column, grouped by 'plate', '
|
345
|
-
grouped_unique_count = df.groupby(['plate', '
|
344
|
+
# Count unique values in 'value' column, grouped by 'plate', 'row_name', 'column'
|
345
|
+
grouped_unique_count = df.groupby(['plate', 'row_name', 'column'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
|
346
346
|
display(grouped_unique_count)
|
347
347
|
|
348
348
|
save_path = os.path.join(settings['src'], 'phenotype_counts.csv')
|
349
349
|
|
350
350
|
# Group by plate, row, and column, then count the occurrences of each unique value
|
351
|
-
grouped_counts = df.groupby(['plate', '
|
351
|
+
grouped_counts = df.groupby(['plate', 'row_name', 'column', 'value']).size().reset_index(name='count')
|
352
352
|
|
353
353
|
# Pivot the DataFrame so that unique values are columns and their counts are in the rows
|
354
|
-
pivot_df = grouped_counts.pivot_table(index=['plate', '
|
354
|
+
pivot_df = grouped_counts.pivot_table(index=['plate', 'row_name', 'column'], columns='value', values='count', fill_value=0)
|
355
355
|
|
356
356
|
# Flatten the multi-level columns
|
357
357
|
pivot_df.columns = [f"value_{int(col)}" for col in pivot_df.columns]
|
@@ -376,17 +376,17 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
376
376
|
column='column', value='c3', plate=None, save_paths=None):
|
377
377
|
|
378
378
|
def calculate_well_score_fractions(df, class_columns='cv_predictions'):
|
379
|
-
if all(col in df.columns for col in ['plate', '
|
380
|
-
df['prc'] = df['plate'] + '_' + df['
|
379
|
+
if all(col in df.columns for col in ['plate', 'row_name', 'column']):
|
380
|
+
df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
|
381
381
|
else:
|
382
|
-
raise ValueError("Cannot find 'plate', '
|
383
|
-
prc_summary = df.groupby(['plate', '
|
384
|
-
well_counts = (df.groupby(['plate', '
|
382
|
+
raise ValueError("Cannot find 'plate', 'row_name', or 'column' in df.columns")
|
383
|
+
prc_summary = df.groupby(['plate', 'row_name', 'column', 'prc']).size().reset_index(name='total_rows')
|
384
|
+
well_counts = (df.groupby(['plate', 'row_name', 'column', 'prc', class_columns])
|
385
385
|
.size()
|
386
386
|
.unstack(fill_value=0)
|
387
387
|
.reset_index()
|
388
388
|
.rename(columns={0: 'class_0', 1: 'class_1'}))
|
389
|
-
summary_df = pd.merge(prc_summary, well_counts, on=['plate', '
|
389
|
+
summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row_name', 'column', 'prc'], how='left')
|
390
390
|
summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
|
391
391
|
summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
|
392
392
|
return summary_df
|
@@ -481,8 +481,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
481
481
|
return result
|
482
482
|
|
483
483
|
def calculate_well_read_fraction(df, count_column='count'):
|
484
|
-
if all(col in df.columns for col in ['plate', '
|
485
|
-
df['prc'] = df['plate'] + '_' + df['
|
484
|
+
if all(col in df.columns for col in ['plate', 'row_name', 'column']):
|
485
|
+
df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
|
486
486
|
else:
|
487
487
|
raise ValueError("Cannot find plate, row or column in df.columns")
|
488
488
|
grouped_df = df.groupby('prc')[count_column].sum().reset_index()
|
@@ -501,18 +501,18 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
501
501
|
reads_df_temp['plate'] = f"plate{i+1}"
|
502
502
|
scores_df_temp['plate'] = f"plate{i+1}"
|
503
503
|
|
504
|
-
if 'col' in reads_df_temp.columns:
|
505
|
-
reads_df_temp = reads_df_temp.rename(columns={'col': 'column'})
|
506
504
|
if 'column_name' in reads_df_temp.columns:
|
507
505
|
reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
|
508
|
-
if '
|
509
|
-
|
506
|
+
if 'column_name' in reads_df_temp.columns:
|
507
|
+
reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
|
508
|
+
if 'column_name' in scores_df_temp.columns:
|
509
|
+
scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
|
510
510
|
if 'column_name' in scores_df_temp.columns:
|
511
511
|
scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
|
512
512
|
if 'row_name' in reads_df_temp.columns:
|
513
|
-
reads_df_temp = reads_df_temp.rename(columns={'row_name': '
|
513
|
+
reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row_name'})
|
514
514
|
if 'row_name' in scores_df_temp.columns:
|
515
|
-
scores_df_temp = scores_df_temp.rename(columns={'row_name': '
|
515
|
+
scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row_name'})
|
516
516
|
|
517
517
|
reads_ls.append(reads_df_temp)
|
518
518
|
scores_ls.append(scores_df_temp)
|
@@ -539,7 +539,7 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
539
539
|
|
540
540
|
df_emp = pd.DataFrame([(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1])) for key, val in empirical_dict.items()],columns=['key', 'value1', 'value2', 'pc_fraction', 'nc_fraction'])
|
541
541
|
|
542
|
-
df = pd.merge(df, df_emp, left_on='
|
542
|
+
df = pd.merge(df, df_emp, left_on='row_name', right_on='key')
|
543
543
|
|
544
544
|
if any in y_columns not in df.columns:
|
545
545
|
print(f"columns in dataframe:")
|
spacr/timelapse.py
CHANGED
@@ -533,14 +533,14 @@ def exponential_decay(x, a, b, c):
|
|
533
533
|
|
534
534
|
def preprocess_pathogen_data(pathogen_df):
|
535
535
|
# Group by identifiers and count the number of parasites
|
536
|
-
parasite_counts = pathogen_df.groupby(['plate', '
|
536
|
+
parasite_counts = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
|
537
537
|
|
538
538
|
# Aggregate numerical columns and take the first of object columns
|
539
|
-
agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', '
|
540
|
-
pathogen_agg = pathogen_df.groupby(['plate', '
|
539
|
+
agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
|
540
|
+
pathogen_agg = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
|
541
541
|
|
542
542
|
# Merge the counts back into the aggregated data
|
543
|
-
pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', '
|
543
|
+
pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id'])
|
544
544
|
|
545
545
|
# Remove the object_label column as it corresponds to the pathogen ID not the cell ID
|
546
546
|
if 'object_label' in pathogen_agg.columns:
|
@@ -604,10 +604,10 @@ def save_results_dataframe(df, src, results_name):
|
|
604
604
|
def summarize_per_well(peak_details_df):
|
605
605
|
# Step 1: Split the 'ID' column
|
606
606
|
split_columns = peak_details_df['ID'].str.split('_', expand=True)
|
607
|
-
peak_details_df[['plate', '
|
607
|
+
peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
|
608
608
|
|
609
|
-
# Step 2: Create 'well_ID' by combining '
|
610
|
-
peak_details_df['well_ID'] = peak_details_df['
|
609
|
+
# Step 2: Create 'well_ID' by combining 'row_name' and 'column'
|
610
|
+
peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
|
611
611
|
|
612
612
|
# Filter entries where 'amplitude' is not null
|
613
613
|
filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
|
@@ -635,10 +635,10 @@ def summarize_per_well(peak_details_df):
|
|
635
635
|
def summarize_per_well_inf_non_inf(peak_details_df):
|
636
636
|
# Step 1: Split the 'ID' column
|
637
637
|
split_columns = peak_details_df['ID'].str.split('_', expand=True)
|
638
|
-
peak_details_df[['plate', '
|
638
|
+
peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
|
639
639
|
|
640
|
-
# Step 2: Create 'well_ID' by combining '
|
641
|
-
peak_details_df['well_ID'] = peak_details_df['
|
640
|
+
# Step 2: Create 'well_ID' by combining 'row_name' and 'column'
|
641
|
+
peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
|
642
642
|
|
643
643
|
# Assume 'pathogen_count' indicates infection if > 0
|
644
644
|
# Add an 'infected_status' column to classify cells
|
@@ -669,7 +669,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
669
669
|
pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
|
670
670
|
pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
|
671
671
|
pathogen_df = preprocess_pathogen_data(pathogen_df)
|
672
|
-
cell_df = cell_df.merge(pathogen_df, on=['plate', '
|
672
|
+
cell_df = cell_df.merge(pathogen_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
|
673
673
|
cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
|
674
674
|
print(f'After pathogen merge: {len(cell_df)} objects')
|
675
675
|
|
@@ -677,7 +677,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
677
677
|
if cytoplasm:
|
678
678
|
cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
|
679
679
|
# Merge on specified columns
|
680
|
-
cell_df = cell_df.merge(cytoplasm_df, on=['plate', '
|
680
|
+
cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
|
681
681
|
|
682
682
|
print(f'After cytoplasm merge: {len(cell_df)} objects')
|
683
683
|
|
@@ -687,12 +687,12 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
687
687
|
# Prepare DataFrame (use cell_df instead of df)
|
688
688
|
prcf_components = cell_df['prcf'].str.split('_', expand=True)
|
689
689
|
cell_df['plate'] = prcf_components[0]
|
690
|
-
cell_df['
|
690
|
+
cell_df['row_name'] = prcf_components[1]
|
691
691
|
cell_df['column'] = prcf_components[2]
|
692
692
|
cell_df['field'] = prcf_components[3]
|
693
693
|
cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
|
694
694
|
cell_df['object_number'] = cell_df['object_label']
|
695
|
-
cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['
|
695
|
+
cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row_name'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
|
696
696
|
|
697
697
|
df = cell_df.copy()
|
698
698
|
|
@@ -753,7 +753,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
753
753
|
peak_details_list.append({
|
754
754
|
'ID': unique_id,
|
755
755
|
'plate': group['plate'].iloc[0],
|
756
|
-
'
|
756
|
+
'row_name': group['row_name'].iloc[0],
|
757
757
|
'column': group['column'].iloc[0],
|
758
758
|
'field': group['field'].iloc[0],
|
759
759
|
'object_number': group['object_number'].iloc[0],
|
@@ -784,7 +784,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
|
|
784
784
|
peak_details_list.append({
|
785
785
|
'ID': unique_id,
|
786
786
|
'plate': group['plate'].iloc[0],
|
787
|
-
'
|
787
|
+
'row_name': group['row_name'].iloc[0],
|
788
788
|
'column': group['column'].iloc[0],
|
789
789
|
'field': group['field'].iloc[0],
|
790
790
|
'object_number': group['object_number'].iloc[0],
|
spacr/toxo.py
CHANGED
@@ -468,15 +468,15 @@ def generate_score_heatmap(settings):
|
|
468
468
|
def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
|
469
469
|
|
470
470
|
df = pd.read_csv(csv)
|
471
|
-
if '
|
472
|
-
df = df[df['
|
471
|
+
if 'column_name' in df.columns:
|
472
|
+
df = df[df['column_name']==column]
|
473
473
|
elif 'column' in df.columns:
|
474
|
-
df['
|
475
|
-
df = df[df['
|
474
|
+
df['column_name'] = df['column']
|
475
|
+
df = df[df['column_name']==column]
|
476
476
|
if not plate is None:
|
477
477
|
df['plate'] = f"plate{plate}"
|
478
|
-
grouped_df = df.groupby(['plate', '
|
479
|
-
grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['
|
478
|
+
grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
|
479
|
+
grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row_name'].astype(str) + '_' + grouped_df['column_name'].astype(str)
|
480
480
|
return grouped_df
|
481
481
|
|
482
482
|
def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
|
@@ -501,17 +501,17 @@ def generate_score_heatmap(settings):
|
|
501
501
|
- column: Column to filter by (default is 'c3').
|
502
502
|
"""
|
503
503
|
# Extract row number and convert to integer for sorting
|
504
|
-
df['row_num'] = df['
|
504
|
+
df['row_num'] = df['row_name'].str.extract(r'(\d+)').astype(int)
|
505
505
|
|
506
506
|
# Filter and sort by plate, row, and column
|
507
|
-
df = df[df['
|
508
|
-
df = df.sort_values(by=['plate', 'row_num', '
|
507
|
+
df = df[df['column_name'] == column]
|
508
|
+
df = df.sort_values(by=['plate', 'row_num', 'column_name'])
|
509
509
|
|
510
510
|
# Drop temporary 'row_num' column after sorting
|
511
511
|
df = df.drop('row_num', axis=1)
|
512
512
|
|
513
513
|
# Create a new column combining plate, row, and column for the index
|
514
|
-
df['plate_row_col'] = df['plate'] + '-' + df['
|
514
|
+
df['plate_row_col'] = df['plate'] + '-' + df['row_name'] + '-' + df['column_name']
|
515
515
|
|
516
516
|
# Set 'plate_row_col' as the index
|
517
517
|
df.set_index('plate_row_col', inplace=True)
|
@@ -568,11 +568,11 @@ def generate_score_heatmap(settings):
|
|
568
568
|
# Loop through all collected CSV files and process them
|
569
569
|
for csv_file in ls:
|
570
570
|
df = pd.read_csv(csv_file) # Read CSV into DataFrame
|
571
|
-
df = df[df['
|
571
|
+
df = df[df['column_name']==column]
|
572
572
|
if not plate is None:
|
573
573
|
df['plate'] = f"plate{plate}"
|
574
|
-
# Group the data by 'plate', '
|
575
|
-
grouped_df = df.groupby(['plate', '
|
574
|
+
# Group the data by 'plate', 'row_name', and 'column_name'
|
575
|
+
grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
|
576
576
|
# Use the CSV filename to create a new column name
|
577
577
|
folder_name = os.path.dirname(csv_file).replace(".csv", "")
|
578
578
|
new_column_name = os.path.basename(f"{folder_name}_{data_column}")
|
@@ -583,8 +583,8 @@ def generate_score_heatmap(settings):
|
|
583
583
|
if combined_df is None:
|
584
584
|
combined_df = grouped_df
|
585
585
|
else:
|
586
|
-
combined_df = pd.merge(combined_df, grouped_df, on=['plate', '
|
587
|
-
combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['
|
586
|
+
combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row_name', 'column_name'], how='outer')
|
587
|
+
combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row_name'].astype(str) + '_' + combined_df['column_name'].astype(str)
|
588
588
|
return combined_df
|
589
589
|
|
590
590
|
def calculate_mae(df):
|