spacr 0.3.52__py3-none-any.whl → 0.3.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/sequencing.py CHANGED
@@ -125,7 +125,7 @@ def process_chunk(chunk_data):
125
125
  consensus_sequences.append(consensus_seq)
126
126
  column_sequence = match.group('column')
127
127
  grna_sequence = match.group('grna')
128
- row_sequence = match.group('row')
128
+ row_sequence = match.group('row_name')
129
129
  columns.append(column_sequence)
130
130
  grnas.append(grna_sequence)
131
131
  rows.append(row_sequence)
@@ -176,7 +176,7 @@ def process_chunk(chunk_data):
176
176
  consensus_sequences.append(consensus_seq)
177
177
  column_sequence = match.group('column')
178
178
  grna_sequence = match.group('grna')
179
- row_sequence = match.group('row')
179
+ row_sequence = match.group('row_name')
180
180
  columns.append(column_sequence)
181
181
  grnas.append(grna_sequence)
182
182
  rows.append(row_sequence)
@@ -532,7 +532,7 @@ def graph_sequencing_stats(settings):
532
532
  # Iterate through the fraction thresholds
533
533
  for threshold in fraction_thresholds:
534
534
  filtered_df = df[df['fraction'] >= threshold]
535
- unique_count = filtered_df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
535
+ unique_count = filtered_df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
536
536
  results.append((threshold, unique_count))
537
537
 
538
538
  results_df = pd.DataFrame(results, columns=['fraction_threshold', 'unique_count'])
@@ -588,17 +588,21 @@ def graph_sequencing_stats(settings):
588
588
  # Apply the closest threshold to the DataFrame
589
589
  df = df[df['fraction'] >= closest_threshold]
590
590
 
591
- # Group by 'plate', 'row', 'column' and compute unique counts of 'grna'
592
- unique_counts = df.groupby(['plate', 'row', 'column'])['grna'].nunique().reset_index(name='unique_counts')
593
- unique_count_mean = df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
594
- unique_count_std = df.groupby(['plate', 'row', 'column'])['grna'].nunique().std()
591
+ # Group by 'plate', 'row_name', 'column' and compute unique counts of 'grna'
592
+ unique_counts = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().reset_index(name='unique_counts')
593
+ unique_count_mean = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
594
+ unique_count_std = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().std()
595
595
 
596
596
  # Merge the unique counts back into the original DataFrame
597
- df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
597
+ df = pd.merge(df, unique_counts, on=['plate', 'row_name', 'column'], how='left')
598
598
 
599
599
  print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
600
- display(df)
601
600
  #_plot_density(df, dependent_variable='unique_counts')
601
+
602
+ has_underscore = df['row_name'].str.contains('_').any()
603
+ if has_underscore:
604
+ df['row_name'] = df['row_name'].apply(lambda x: x.split('_')[1])
605
+
602
606
  plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
603
607
 
604
608
  return closest_threshold
spacr/settings.py CHANGED
@@ -198,7 +198,7 @@ def set_default_umap_image_settings(settings={}):
198
198
  settings.setdefault('smooth_lines', True)
199
199
  settings.setdefault('clustering', 'dbscan')
200
200
  settings.setdefault('exclude', None)
201
- settings.setdefault('col_to_compare', 'col')
201
+ settings.setdefault('col_to_compare', 'column_name')
202
202
  settings.setdefault('pos', 'c1')
203
203
  settings.setdefault('neg', 'c2')
204
204
  settings.setdefault('embedding_by_controls', False)
@@ -289,7 +289,7 @@ def set_default_analyze_screen(settings):
289
289
  settings.setdefault('minimum_cell_count',25)
290
290
  settings.setdefault('n_estimators',100)
291
291
  settings.setdefault('test_size',0.2)
292
- settings.setdefault('location_column','col')
292
+ settings.setdefault('location_column','column_name')
293
293
  settings.setdefault('positive_control','c2')
294
294
  settings.setdefault('negative_control','c1')
295
295
  settings.setdefault('exclude',None)
@@ -337,8 +337,9 @@ def set_default_train_test_model(settings):
337
337
  return settings
338
338
 
339
339
  def set_generate_training_dataset_defaults(settings):
340
-
340
+
341
341
  settings.setdefault('src','path')
342
+ settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
342
343
  settings.setdefault('dataset_mode','metadata')
343
344
  settings.setdefault('annotation_column','test')
344
345
  settings.setdefault('annotated_classes',[1,2])
@@ -346,7 +347,7 @@ def set_generate_training_dataset_defaults(settings):
346
347
  settings.setdefault('size',224)
347
348
  settings.setdefault('test_split',0.1)
348
349
  settings.setdefault('class_metadata',[['c1'],['c2']])
349
- settings.setdefault('metadata_type_by','col')
350
+ settings.setdefault('metadata_type_by','column_name')
350
351
  settings.setdefault('channel_of_interest',3)
351
352
  settings.setdefault('custom_measurement',None)
352
353
  settings.setdefault('tables',None)
@@ -369,7 +370,7 @@ def deep_spacr_defaults(settings):
369
370
  settings.setdefault('size',224)
370
371
  settings.setdefault('test_split',0.1)
371
372
  settings.setdefault('class_metadata',[['c1'],['c2']])
372
- settings.setdefault('metadata_type_by','col')
373
+ settings.setdefault('metadata_type_by','column_name')
373
374
  settings.setdefault('channel_of_interest',3)
374
375
  settings.setdefault('custom_measurement',None)
375
376
  settings.setdefault('tables',None)
@@ -453,7 +454,7 @@ def get_analyze_recruitment_default_settings(settings):
453
454
  settings.setdefault('pathogen_plate_metadata',[['c1', 'c2', 'c3'],['c4','c5', 'c6']])
454
455
  settings.setdefault('treatments',['cm', 'lovastatin'])
455
456
  settings.setdefault('treatment_plate_metadata',[['r1', 'r2','r3'], ['r4', 'r5','r6']])
456
- settings.setdefault('metadata_types',['col', 'col', 'row'])
457
+ settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
457
458
  settings.setdefault('channel_dims',[0,1,2,3])
458
459
  settings.setdefault('cell_chann_dim',3)
459
460
  settings.setdefault('cell_mask_dim',4)
@@ -531,18 +532,22 @@ def get_perform_regression_default_settings(settings):
531
532
  settings.setdefault('score_data','list of paths')
532
533
  settings.setdefault('positive_control','239740')
533
534
  settings.setdefault('negative_control','233460')
535
+ settings.setdefault('min_n',0)
534
536
  settings.setdefault('controls',['000000_1','000000_10','000000_11','000000_12','000000_13','000000_14','000000_15','000000_16','000000_17','000000_18','000000_19','000000_20','000000_21','000000_22','000000_23','000000_24','000000_25','000000_26','000000_27','000000_28','000000_29','000000_3','000000_30','000000_31','000000_32','000000_4','000000_5','000000_6','000000_8','000000_9'])
535
- settings.setdefault('fraction_threshold',0.12)
537
+ settings.setdefault('fraction_threshold',None)
536
538
  settings.setdefault('dependent_variable','pred')
537
539
  settings.setdefault('threshold_method','std')
538
540
  settings.setdefault('threshold_multiplier',3)
541
+ settings.setdefault('target_unique_count',5)
539
542
  settings.setdefault('transform',None)
543
+ settings.setdefault('log_x',False)
544
+ settings.setdefault('log_y',False)
545
+ settings.setdefault('x_lim',None)
540
546
  settings.setdefault('agg_type','mean')
541
- settings.setdefault('min_cell_count',25)
547
+ settings.setdefault('min_cell_count',None)
542
548
  settings.setdefault('regression_type','ols')
543
549
  settings.setdefault('random_row_column_effects',False)
544
550
  settings.setdefault('split_axis_lims','')
545
- settings.setdefault('plate','')
546
551
  settings.setdefault('cov_type',None)
547
552
  settings.setdefault('alpha',1)
548
553
  settings.setdefault('filter_value',['c1', 'c2', 'c3'])
@@ -557,6 +562,7 @@ def get_perform_regression_default_settings(settings):
557
562
  print(f"Using alpha as quantile for quantile regression, alpha: {settings['alpha']}")
558
563
  settings['agg_type'] = None
559
564
  print(f'agg_type set to None for quantile regression')
565
+
560
566
  return settings
561
567
 
562
568
  def get_check_cellpose_models_default_settings(settings):
spacr/submodules.py CHANGED
@@ -341,17 +341,17 @@ def count_phenotypes(settings):
341
341
  unique_values_count = df[settings['annotation_column']].nunique(dropna=True)
342
342
  print(f"Unique values in {settings['annotation_column']} (excluding NaN): {unique_values_count}")
343
343
 
344
- # Count unique values in 'value' column, grouped by 'plate', 'row', 'column'
345
- grouped_unique_count = df.groupby(['plate', 'row', 'column'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
344
+ # Count unique values in 'value' column, grouped by 'plate', 'row_name', 'column'
345
+ grouped_unique_count = df.groupby(['plate', 'row_name', 'column'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
346
346
  display(grouped_unique_count)
347
347
 
348
348
  save_path = os.path.join(settings['src'], 'phenotype_counts.csv')
349
349
 
350
350
  # Group by plate, row, and column, then count the occurrences of each unique value
351
- grouped_counts = df.groupby(['plate', 'row', 'column', 'value']).size().reset_index(name='count')
351
+ grouped_counts = df.groupby(['plate', 'row_name', 'column', 'value']).size().reset_index(name='count')
352
352
 
353
353
  # Pivot the DataFrame so that unique values are columns and their counts are in the rows
354
- pivot_df = grouped_counts.pivot_table(index=['plate', 'row', 'column'], columns='value', values='count', fill_value=0)
354
+ pivot_df = grouped_counts.pivot_table(index=['plate', 'row_name', 'column'], columns='value', values='count', fill_value=0)
355
355
 
356
356
  # Flatten the multi-level columns
357
357
  pivot_df.columns = [f"value_{int(col)}" for col in pivot_df.columns]
@@ -376,17 +376,17 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
376
376
  column='column', value='c3', plate=None, save_paths=None):
377
377
 
378
378
  def calculate_well_score_fractions(df, class_columns='cv_predictions'):
379
- if all(col in df.columns for col in ['plate', 'row', 'column']):
380
- df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
379
+ if all(col in df.columns for col in ['plate', 'row_name', 'column']):
380
+ df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
381
381
  else:
382
- raise ValueError("Cannot find 'plate', 'row', or 'column' in df.columns")
383
- prc_summary = df.groupby(['plate', 'row', 'column', 'prc']).size().reset_index(name='total_rows')
384
- well_counts = (df.groupby(['plate', 'row', 'column', 'prc', class_columns])
382
+ raise ValueError("Cannot find 'plate', 'row_name', or 'column' in df.columns")
383
+ prc_summary = df.groupby(['plate', 'row_name', 'column', 'prc']).size().reset_index(name='total_rows')
384
+ well_counts = (df.groupby(['plate', 'row_name', 'column', 'prc', class_columns])
385
385
  .size()
386
386
  .unstack(fill_value=0)
387
387
  .reset_index()
388
388
  .rename(columns={0: 'class_0', 1: 'class_1'}))
389
- summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row', 'column', 'prc'], how='left')
389
+ summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row_name', 'column', 'prc'], how='left')
390
390
  summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
391
391
  summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
392
392
  return summary_df
@@ -481,8 +481,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
481
481
  return result
482
482
 
483
483
  def calculate_well_read_fraction(df, count_column='count'):
484
- if all(col in df.columns for col in ['plate', 'row', 'column']):
485
- df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
484
+ if all(col in df.columns for col in ['plate', 'row_name', 'column']):
485
+ df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
486
486
  else:
487
487
  raise ValueError("Cannot find plate, row or column in df.columns")
488
488
  grouped_df = df.groupby('prc')[count_column].sum().reset_index()
@@ -501,18 +501,18 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
501
501
  reads_df_temp['plate'] = f"plate{i+1}"
502
502
  scores_df_temp['plate'] = f"plate{i+1}"
503
503
 
504
- if 'col' in reads_df_temp.columns:
505
- reads_df_temp = reads_df_temp.rename(columns={'col': 'column'})
506
504
  if 'column_name' in reads_df_temp.columns:
507
505
  reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
508
- if 'col' in scores_df_temp.columns:
509
- scores_df_temp = scores_df_temp.rename(columns={'col': 'column'})
506
+ if 'column_name' in reads_df_temp.columns:
507
+ reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
508
+ if 'column_name' in scores_df_temp.columns:
509
+ scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
510
510
  if 'column_name' in scores_df_temp.columns:
511
511
  scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
512
512
  if 'row_name' in reads_df_temp.columns:
513
- reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row'})
513
+ reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row_name'})
514
514
  if 'row_name' in scores_df_temp.columns:
515
- scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row'})
515
+ scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row_name'})
516
516
 
517
517
  reads_ls.append(reads_df_temp)
518
518
  scores_ls.append(scores_df_temp)
@@ -539,7 +539,7 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
539
539
 
540
540
  df_emp = pd.DataFrame([(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1])) for key, val in empirical_dict.items()],columns=['key', 'value1', 'value2', 'pc_fraction', 'nc_fraction'])
541
541
 
542
- df = pd.merge(df, df_emp, left_on='row', right_on='key')
542
+ df = pd.merge(df, df_emp, left_on='row_name', right_on='key')
543
543
 
544
544
  if any in y_columns not in df.columns:
545
545
  print(f"columns in dataframe:")
spacr/timelapse.py CHANGED
@@ -533,14 +533,14 @@ def exponential_decay(x, a, b, c):
533
533
 
534
534
  def preprocess_pathogen_data(pathogen_df):
535
535
  # Group by identifiers and count the number of parasites
536
- parasite_counts = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
536
+ parasite_counts = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
537
537
 
538
538
  # Aggregate numerical columns and take the first of object columns
539
- agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
540
- pathogen_agg = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
539
+ agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
540
+ pathogen_agg = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
541
541
 
542
542
  # Merge the counts back into the aggregated data
543
- pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id'])
543
+ pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id'])
544
544
 
545
545
  # Remove the object_label column as it corresponds to the pathogen ID not the cell ID
546
546
  if 'object_label' in pathogen_agg.columns:
@@ -604,10 +604,10 @@ def save_results_dataframe(df, src, results_name):
604
604
  def summarize_per_well(peak_details_df):
605
605
  # Step 1: Split the 'ID' column
606
606
  split_columns = peak_details_df['ID'].str.split('_', expand=True)
607
- peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
607
+ peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
608
608
 
609
- # Step 2: Create 'well_ID' by combining 'row' and 'column'
610
- peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
609
+ # Step 2: Create 'well_ID' by combining 'row_name' and 'column'
610
+ peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
611
611
 
612
612
  # Filter entries where 'amplitude' is not null
613
613
  filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
@@ -635,10 +635,10 @@ def summarize_per_well(peak_details_df):
635
635
  def summarize_per_well_inf_non_inf(peak_details_df):
636
636
  # Step 1: Split the 'ID' column
637
637
  split_columns = peak_details_df['ID'].str.split('_', expand=True)
638
- peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
638
+ peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
639
639
 
640
- # Step 2: Create 'well_ID' by combining 'row' and 'column'
641
- peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
640
+ # Step 2: Create 'well_ID' by combining 'row_name' and 'column'
641
+ peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
642
642
 
643
643
  # Assume 'pathogen_count' indicates infection if > 0
644
644
  # Add an 'infected_status' column to classify cells
@@ -669,7 +669,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
669
669
  pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
670
670
  pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
671
671
  pathogen_df = preprocess_pathogen_data(pathogen_df)
672
- cell_df = cell_df.merge(pathogen_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
672
+ cell_df = cell_df.merge(pathogen_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
673
673
  cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
674
674
  print(f'After pathogen merge: {len(cell_df)} objects')
675
675
 
@@ -677,7 +677,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
677
677
  if cytoplasm:
678
678
  cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
679
679
  # Merge on specified columns
680
- cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
680
+ cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
681
681
 
682
682
  print(f'After cytoplasm merge: {len(cell_df)} objects')
683
683
 
@@ -687,12 +687,12 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
687
687
  # Prepare DataFrame (use cell_df instead of df)
688
688
  prcf_components = cell_df['prcf'].str.split('_', expand=True)
689
689
  cell_df['plate'] = prcf_components[0]
690
- cell_df['row'] = prcf_components[1]
690
+ cell_df['row_name'] = prcf_components[1]
691
691
  cell_df['column'] = prcf_components[2]
692
692
  cell_df['field'] = prcf_components[3]
693
693
  cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
694
694
  cell_df['object_number'] = cell_df['object_label']
695
- cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
695
+ cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row_name'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
696
696
 
697
697
  df = cell_df.copy()
698
698
 
@@ -753,7 +753,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
753
753
  peak_details_list.append({
754
754
  'ID': unique_id,
755
755
  'plate': group['plate'].iloc[0],
756
- 'row': group['row'].iloc[0],
756
+ 'row_name': group['row_name'].iloc[0],
757
757
  'column': group['column'].iloc[0],
758
758
  'field': group['field'].iloc[0],
759
759
  'object_number': group['object_number'].iloc[0],
@@ -784,7 +784,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
784
784
  peak_details_list.append({
785
785
  'ID': unique_id,
786
786
  'plate': group['plate'].iloc[0],
787
- 'row': group['row'].iloc[0],
787
+ 'row_name': group['row_name'].iloc[0],
788
788
  'column': group['column'].iloc[0],
789
789
  'field': group['field'].iloc[0],
790
790
  'object_number': group['object_number'].iloc[0],
spacr/toxo.py CHANGED
@@ -468,15 +468,15 @@ def generate_score_heatmap(settings):
468
468
  def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
469
469
 
470
470
  df = pd.read_csv(csv)
471
- if 'col' in df.columns:
472
- df = df[df['col']==column]
471
+ if 'column_name' in df.columns:
472
+ df = df[df['column_name']==column]
473
473
  elif 'column' in df.columns:
474
- df['col'] = df['column']
475
- df = df[df['col']==column]
474
+ df['column_name'] = df['column']
475
+ df = df[df['column_name']==column]
476
476
  if not plate is None:
477
477
  df['plate'] = f"plate{plate}"
478
- grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
479
- grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row'].astype(str) + '_' + grouped_df['col'].astype(str)
478
+ grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
479
+ grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row_name'].astype(str) + '_' + grouped_df['column_name'].astype(str)
480
480
  return grouped_df
481
481
 
482
482
  def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
@@ -501,17 +501,17 @@ def generate_score_heatmap(settings):
501
501
  - column: Column to filter by (default is 'c3').
502
502
  """
503
503
  # Extract row number and convert to integer for sorting
504
- df['row_num'] = df['row'].str.extract(r'(\d+)').astype(int)
504
+ df['row_num'] = df['row_name'].str.extract(r'(\d+)').astype(int)
505
505
 
506
506
  # Filter and sort by plate, row, and column
507
- df = df[df['col'] == column]
508
- df = df.sort_values(by=['plate', 'row_num', 'col'])
507
+ df = df[df['column_name'] == column]
508
+ df = df.sort_values(by=['plate', 'row_num', 'column_name'])
509
509
 
510
510
  # Drop temporary 'row_num' column after sorting
511
511
  df = df.drop('row_num', axis=1)
512
512
 
513
513
  # Create a new column combining plate, row, and column for the index
514
- df['plate_row_col'] = df['plate'] + '-' + df['row'] + '-' + df['col']
514
+ df['plate_row_col'] = df['plate'] + '-' + df['row_name'] + '-' + df['column_name']
515
515
 
516
516
  # Set 'plate_row_col' as the index
517
517
  df.set_index('plate_row_col', inplace=True)
@@ -568,11 +568,11 @@ def generate_score_heatmap(settings):
568
568
  # Loop through all collected CSV files and process them
569
569
  for csv_file in ls:
570
570
  df = pd.read_csv(csv_file) # Read CSV into DataFrame
571
- df = df[df['col']==column]
571
+ df = df[df['column_name']==column]
572
572
  if not plate is None:
573
573
  df['plate'] = f"plate{plate}"
574
- # Group the data by 'plate', 'row', and 'col'
575
- grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
574
+ # Group the data by 'plate', 'row_name', and 'column_name'
575
+ grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
576
576
  # Use the CSV filename to create a new column name
577
577
  folder_name = os.path.dirname(csv_file).replace(".csv", "")
578
578
  new_column_name = os.path.basename(f"{folder_name}_{data_column}")
@@ -583,8 +583,8 @@ def generate_score_heatmap(settings):
583
583
  if combined_df is None:
584
584
  combined_df = grouped_df
585
585
  else:
586
- combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row', 'col'], how='outer')
587
- combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row'].astype(str) + '_' + combined_df['col'].astype(str)
586
+ combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row_name', 'column_name'], how='outer')
587
+ combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row_name'].astype(str) + '_' + combined_df['column_name'].astype(str)
588
588
  return combined_df
589
589
 
590
590
  def calculate_mae(df):