spacr 0.3.50__py3-none-any.whl → 0.3.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/settings.py CHANGED
@@ -198,7 +198,7 @@ def set_default_umap_image_settings(settings={}):
198
198
  settings.setdefault('smooth_lines', True)
199
199
  settings.setdefault('clustering', 'dbscan')
200
200
  settings.setdefault('exclude', None)
201
- settings.setdefault('col_to_compare', 'col')
201
+ settings.setdefault('col_to_compare', 'column_name')
202
202
  settings.setdefault('pos', 'c1')
203
203
  settings.setdefault('neg', 'c2')
204
204
  settings.setdefault('embedding_by_controls', False)
@@ -246,7 +246,7 @@ def get_measure_crop_settings(settings={}):
246
246
  settings.setdefault('normalize_by','png')
247
247
  settings.setdefault('crop_mode',['cell'])
248
248
  settings.setdefault('dialate_pngs', False)
249
- settings.setdefault('dialate_png_ratios', [0.2,0.2])
249
+ settings.setdefault('dialate_png_ratios', [0.2])
250
250
 
251
251
  # Timelapsed settings
252
252
  settings.setdefault('timelapse', False)
@@ -289,7 +289,7 @@ def set_default_analyze_screen(settings):
289
289
  settings.setdefault('minimum_cell_count',25)
290
290
  settings.setdefault('n_estimators',100)
291
291
  settings.setdefault('test_size',0.2)
292
- settings.setdefault('location_column','col')
292
+ settings.setdefault('location_column','column_name')
293
293
  settings.setdefault('positive_control','c2')
294
294
  settings.setdefault('negative_control','c1')
295
295
  settings.setdefault('exclude',None)
@@ -337,8 +337,9 @@ def set_default_train_test_model(settings):
337
337
  return settings
338
338
 
339
339
  def set_generate_training_dataset_defaults(settings):
340
-
340
+
341
341
  settings.setdefault('src','path')
342
+ settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
342
343
  settings.setdefault('dataset_mode','metadata')
343
344
  settings.setdefault('annotation_column','test')
344
345
  settings.setdefault('annotated_classes',[1,2])
@@ -346,7 +347,7 @@ def set_generate_training_dataset_defaults(settings):
346
347
  settings.setdefault('size',224)
347
348
  settings.setdefault('test_split',0.1)
348
349
  settings.setdefault('class_metadata',[['c1'],['c2']])
349
- settings.setdefault('metadata_type_by','col')
350
+ settings.setdefault('metadata_type_by','column_name')
350
351
  settings.setdefault('channel_of_interest',3)
351
352
  settings.setdefault('custom_measurement',None)
352
353
  settings.setdefault('tables',None)
@@ -369,7 +370,7 @@ def deep_spacr_defaults(settings):
369
370
  settings.setdefault('size',224)
370
371
  settings.setdefault('test_split',0.1)
371
372
  settings.setdefault('class_metadata',[['c1'],['c2']])
372
- settings.setdefault('metadata_type_by','col')
373
+ settings.setdefault('metadata_type_by','column_name')
373
374
  settings.setdefault('channel_of_interest',3)
374
375
  settings.setdefault('custom_measurement',None)
375
376
  settings.setdefault('tables',None)
@@ -453,7 +454,7 @@ def get_analyze_recruitment_default_settings(settings):
453
454
  settings.setdefault('pathogen_plate_metadata',[['c1', 'c2', 'c3'],['c4','c5', 'c6']])
454
455
  settings.setdefault('treatments',['cm', 'lovastatin'])
455
456
  settings.setdefault('treatment_plate_metadata',[['r1', 'r2','r3'], ['r4', 'r5','r6']])
456
- settings.setdefault('metadata_types',['col', 'col', 'row'])
457
+ settings.setdefault('metadata_types',['column_name', 'column_name', 'row_name'])
457
458
  settings.setdefault('channel_dims',[0,1,2,3])
458
459
  settings.setdefault('cell_chann_dim',3)
459
460
  settings.setdefault('cell_mask_dim',4)
@@ -531,18 +532,22 @@ def get_perform_regression_default_settings(settings):
531
532
  settings.setdefault('score_data','list of paths')
532
533
  settings.setdefault('positive_control','239740')
533
534
  settings.setdefault('negative_control','233460')
535
+ settings.setdefault('min_n',0)
534
536
  settings.setdefault('controls',['000000_1','000000_10','000000_11','000000_12','000000_13','000000_14','000000_15','000000_16','000000_17','000000_18','000000_19','000000_20','000000_21','000000_22','000000_23','000000_24','000000_25','000000_26','000000_27','000000_28','000000_29','000000_3','000000_30','000000_31','000000_32','000000_4','000000_5','000000_6','000000_8','000000_9'])
535
- settings.setdefault('fraction_threshold',0.12)
537
+ settings.setdefault('fraction_threshold',None)
536
538
  settings.setdefault('dependent_variable','pred')
537
539
  settings.setdefault('threshold_method','std')
538
540
  settings.setdefault('threshold_multiplier',3)
541
+ settings.setdefault('target_unique_count',5)
539
542
  settings.setdefault('transform',None)
543
+ settings.setdefault('log_x',False)
544
+ settings.setdefault('log_y',False)
545
+ settings.setdefault('x_lim',None)
540
546
  settings.setdefault('agg_type','mean')
541
- settings.setdefault('min_cell_count',25)
547
+ settings.setdefault('min_cell_count',None)
542
548
  settings.setdefault('regression_type','ols')
543
549
  settings.setdefault('random_row_column_effects',False)
544
550
  settings.setdefault('split_axis_lims','')
545
- settings.setdefault('plate','')
546
551
  settings.setdefault('cov_type',None)
547
552
  settings.setdefault('alpha',1)
548
553
  settings.setdefault('filter_value',['c1', 'c2', 'c3'])
@@ -557,6 +562,7 @@ def get_perform_regression_default_settings(settings):
557
562
  print(f"Using alpha as quantile for quantile regression, alpha: {settings['alpha']}")
558
563
  settings['agg_type'] = None
559
564
  print(f'agg_type set to None for quantile regression')
565
+
560
566
  return settings
561
567
 
562
568
  def get_check_cellpose_models_default_settings(settings):
@@ -697,16 +703,6 @@ expected_types = {
697
703
  "overlay_chans": list,
698
704
  "overlay": bool,
699
705
  "normalization_percentiles": list,
700
- "print_object_number": bool,
701
- "nr": int,
702
- "figuresize": int,
703
- "cmap": str,
704
- "test_mode": bool,
705
- "test_images": int,
706
- "remove_background_cell": bool,
707
- "remove_background_nucleus": bool,
708
- "remove_background_pathogen": bool,
709
- "pathogen_model": (str, type(None)),
710
706
  "filter": bool,
711
707
  "fill_in":bool,
712
708
  "upscale": bool,
@@ -825,18 +821,6 @@ expected_types = {
825
821
  "transform": (str, type(None)),
826
822
  "agg_type": str,
827
823
  "min_cell_count": int,
828
- "regression_type": str,
829
- "random_row_column_effects": bool,
830
- "alpha": float,
831
- "fraction_threshold": float,
832
- "class_1_threshold": (float, type(None)),
833
- "batch_size": int,
834
- "CP_prob": float,
835
- "flow_threshold": float,
836
- "percentiles": (list, type(None)),
837
- "invert": bool,
838
- "diameter": int,
839
- "grayscale": bool,
840
824
  "resize": bool,
841
825
  "target_height": (int, type(None)),
842
826
  "target_width": (int, type(None)),
@@ -881,9 +865,6 @@ expected_types = {
881
865
  "metadata_type_by":str,
882
866
  "custom_measurement":str,
883
867
  "custom_model":bool,
884
- "size":int,
885
- "test_split":float,
886
- "class_metadata":list, # This is a list of lists
887
868
  "png_type":str,
888
869
  "custom_model_path":str,
889
870
  "generate_training_dataset":bool,
@@ -894,6 +875,7 @@ expected_types = {
894
875
  "correlate":bool,
895
876
  "target_layer":str,
896
877
  "save_to_db":bool,
878
+ "test_mode":bool,
897
879
  "normalize_input":bool,
898
880
  }
899
881
 
@@ -904,7 +886,7 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
904
886
  "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
905
887
  "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
906
888
  "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
907
- "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
889
+ "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
908
890
  "Sequencing": ["signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
909
891
  "Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
910
892
  "Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
@@ -939,6 +921,9 @@ def check_settings(vars_dict, expected_types, q=None):
939
921
  continue
940
922
 
941
923
  value = var.get()
924
+ if value == 'None':
925
+ value = None
926
+
942
927
  expected_type = expected_types.get(key, str)
943
928
 
944
929
  try:
@@ -953,14 +938,19 @@ def check_settings(vars_dict, expected_types, q=None):
953
938
  # settings[key] = None
954
939
  else:
955
940
  raise ValueError("Invalid format for list or list of lists")
941
+
956
942
  elif expected_type == list:
957
943
  settings[key] = parse_list(value) if value else None
944
+
945
+ if isinstance(settings[key], list) and len(settings[key]) == 1:
946
+ settings[key] = settings[key][0]
947
+
958
948
  elif expected_type == bool:
959
949
  settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
960
950
  elif expected_type == (int, type(None)):
961
- settings[key] = int(value) if value else None
951
+ settings[key] = settings[key] = int(value) if isinstance(value, int) or str(value).isdigit() else None
962
952
  elif expected_type == (float, type(None)):
963
- settings[key] = float(value) if value else None
953
+ settings[key] = float(value) if isinstance(value, float) or (isinstance(value, str) and value.replace(".", "", 1).isdigit()) else None
964
954
  elif expected_type == (int, float):
965
955
  settings[key] = float(value) if '.' in value else int(value)
966
956
  elif expected_type == (str, type(None)):
@@ -1000,7 +990,7 @@ def check_settings(vars_dict, expected_types, q=None):
1000
990
  settings[key] = expected_type(value) if value else None
1001
991
  except (ValueError, SyntaxError) as e:
1002
992
  expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
1003
- q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}")
993
+ q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}, Value entered: {value}")
1004
994
  return
1005
995
 
1006
996
  return settings
spacr/submodules.py CHANGED
@@ -341,17 +341,17 @@ def count_phenotypes(settings):
341
341
  unique_values_count = df[settings['annotation_column']].nunique(dropna=True)
342
342
  print(f"Unique values in {settings['annotation_column']} (excluding NaN): {unique_values_count}")
343
343
 
344
- # Count unique values in 'value' column, grouped by 'plate', 'row', 'column'
345
- grouped_unique_count = df.groupby(['plate', 'row', 'column'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
344
+ # Count unique values in 'value' column, grouped by 'plate', 'row_name', 'column'
345
+ grouped_unique_count = df.groupby(['plate', 'row_name', 'column'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
346
346
  display(grouped_unique_count)
347
347
 
348
348
  save_path = os.path.join(settings['src'], 'phenotype_counts.csv')
349
349
 
350
350
  # Group by plate, row, and column, then count the occurrences of each unique value
351
- grouped_counts = df.groupby(['plate', 'row', 'column', 'value']).size().reset_index(name='count')
351
+ grouped_counts = df.groupby(['plate', 'row_name', 'column', 'value']).size().reset_index(name='count')
352
352
 
353
353
  # Pivot the DataFrame so that unique values are columns and their counts are in the rows
354
- pivot_df = grouped_counts.pivot_table(index=['plate', 'row', 'column'], columns='value', values='count', fill_value=0)
354
+ pivot_df = grouped_counts.pivot_table(index=['plate', 'row_name', 'column'], columns='value', values='count', fill_value=0)
355
355
 
356
356
  # Flatten the multi-level columns
357
357
  pivot_df.columns = [f"value_{int(col)}" for col in pivot_df.columns]
@@ -376,17 +376,17 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
376
376
  column='column', value='c3', plate=None, save_paths=None):
377
377
 
378
378
  def calculate_well_score_fractions(df, class_columns='cv_predictions'):
379
- if all(col in df.columns for col in ['plate', 'row', 'column']):
380
- df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
379
+ if all(col in df.columns for col in ['plate', 'row_name', 'column']):
380
+ df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
381
381
  else:
382
- raise ValueError("Cannot find 'plate', 'row', or 'column' in df.columns")
383
- prc_summary = df.groupby(['plate', 'row', 'column', 'prc']).size().reset_index(name='total_rows')
384
- well_counts = (df.groupby(['plate', 'row', 'column', 'prc', class_columns])
382
+ raise ValueError("Cannot find 'plate', 'row_name', or 'column' in df.columns")
383
+ prc_summary = df.groupby(['plate', 'row_name', 'column', 'prc']).size().reset_index(name='total_rows')
384
+ well_counts = (df.groupby(['plate', 'row_name', 'column', 'prc', class_columns])
385
385
  .size()
386
386
  .unstack(fill_value=0)
387
387
  .reset_index()
388
388
  .rename(columns={0: 'class_0', 1: 'class_1'}))
389
- summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row', 'column', 'prc'], how='left')
389
+ summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row_name', 'column', 'prc'], how='left')
390
390
  summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
391
391
  summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
392
392
  return summary_df
@@ -481,8 +481,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
481
481
  return result
482
482
 
483
483
  def calculate_well_read_fraction(df, count_column='count'):
484
- if all(col in df.columns for col in ['plate', 'row', 'column']):
485
- df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
484
+ if all(col in df.columns for col in ['plate', 'row_name', 'column']):
485
+ df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
486
486
  else:
487
487
  raise ValueError("Cannot find plate, row or column in df.columns")
488
488
  grouped_df = df.groupby('prc')[count_column].sum().reset_index()
@@ -501,18 +501,18 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
501
501
  reads_df_temp['plate'] = f"plate{i+1}"
502
502
  scores_df_temp['plate'] = f"plate{i+1}"
503
503
 
504
- if 'col' in reads_df_temp.columns:
505
- reads_df_temp = reads_df_temp.rename(columns={'col': 'column'})
506
504
  if 'column_name' in reads_df_temp.columns:
507
505
  reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
508
- if 'col' in scores_df_temp.columns:
509
- scores_df_temp = scores_df_temp.rename(columns={'col': 'column'})
506
+ if 'column_name' in reads_df_temp.columns:
507
+ reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
508
+ if 'column_name' in scores_df_temp.columns:
509
+ scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
510
510
  if 'column_name' in scores_df_temp.columns:
511
511
  scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
512
512
  if 'row_name' in reads_df_temp.columns:
513
- reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row'})
513
+ reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row_name'})
514
514
  if 'row_name' in scores_df_temp.columns:
515
- scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row'})
515
+ scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row_name'})
516
516
 
517
517
  reads_ls.append(reads_df_temp)
518
518
  scores_ls.append(scores_df_temp)
@@ -539,7 +539,7 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
539
539
 
540
540
  df_emp = pd.DataFrame([(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1])) for key, val in empirical_dict.items()],columns=['key', 'value1', 'value2', 'pc_fraction', 'nc_fraction'])
541
541
 
542
- df = pd.merge(df, df_emp, left_on='row', right_on='key')
542
+ df = pd.merge(df, df_emp, left_on='row_name', right_on='key')
543
543
 
544
544
  if any in y_columns not in df.columns:
545
545
  print(f"columns in dataframe:")
spacr/timelapse.py CHANGED
@@ -533,14 +533,14 @@ def exponential_decay(x, a, b, c):
533
533
 
534
534
  def preprocess_pathogen_data(pathogen_df):
535
535
  # Group by identifiers and count the number of parasites
536
- parasite_counts = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
536
+ parasite_counts = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
537
537
 
538
538
  # Aggregate numerical columns and take the first of object columns
539
- agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
540
- pathogen_agg = pathogen_df.groupby(['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
539
+ agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
540
+ pathogen_agg = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
541
541
 
542
542
  # Merge the counts back into the aggregated data
543
- pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row', 'col', 'field', 'timeid', 'pathogen_cell_id'])
543
+ pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id'])
544
544
 
545
545
  # Remove the object_label column as it corresponds to the pathogen ID not the cell ID
546
546
  if 'object_label' in pathogen_agg.columns:
@@ -604,10 +604,10 @@ def save_results_dataframe(df, src, results_name):
604
604
  def summarize_per_well(peak_details_df):
605
605
  # Step 1: Split the 'ID' column
606
606
  split_columns = peak_details_df['ID'].str.split('_', expand=True)
607
- peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
607
+ peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
608
608
 
609
- # Step 2: Create 'well_ID' by combining 'row' and 'column'
610
- peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
609
+ # Step 2: Create 'well_ID' by combining 'row_name' and 'column'
610
+ peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
611
611
 
612
612
  # Filter entries where 'amplitude' is not null
613
613
  filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
@@ -635,10 +635,10 @@ def summarize_per_well(peak_details_df):
635
635
  def summarize_per_well_inf_non_inf(peak_details_df):
636
636
  # Step 1: Split the 'ID' column
637
637
  split_columns = peak_details_df['ID'].str.split('_', expand=True)
638
- peak_details_df[['plate', 'row', 'column', 'field', 'object_number']] = split_columns
638
+ peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
639
639
 
640
- # Step 2: Create 'well_ID' by combining 'row' and 'column'
641
- peak_details_df['well_ID'] = peak_details_df['row'] + '_' + peak_details_df['column']
640
+ # Step 2: Create 'well_ID' by combining 'row_name' and 'column'
641
+ peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
642
642
 
643
643
  # Assume 'pathogen_count' indicates infection if > 0
644
644
  # Add an 'infected_status' column to classify cells
@@ -669,7 +669,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
669
669
  pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
670
670
  pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
671
671
  pathogen_df = preprocess_pathogen_data(pathogen_df)
672
- cell_df = cell_df.merge(pathogen_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
672
+ cell_df = cell_df.merge(pathogen_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
673
673
  cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
674
674
  print(f'After pathogen merge: {len(cell_df)} objects')
675
675
 
@@ -677,7 +677,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
677
677
  if cytoplasm:
678
678
  cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
679
679
  # Merge on specified columns
680
- cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row', 'col', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
680
+ cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
681
681
 
682
682
  print(f'After cytoplasm merge: {len(cell_df)} objects')
683
683
 
@@ -687,12 +687,12 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
687
687
  # Prepare DataFrame (use cell_df instead of df)
688
688
  prcf_components = cell_df['prcf'].str.split('_', expand=True)
689
689
  cell_df['plate'] = prcf_components[0]
690
- cell_df['row'] = prcf_components[1]
690
+ cell_df['row_name'] = prcf_components[1]
691
691
  cell_df['column'] = prcf_components[2]
692
692
  cell_df['field'] = prcf_components[3]
693
693
  cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
694
694
  cell_df['object_number'] = cell_df['object_label']
695
- cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
695
+ cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row_name'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
696
696
 
697
697
  df = cell_df.copy()
698
698
 
@@ -753,7 +753,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
753
753
  peak_details_list.append({
754
754
  'ID': unique_id,
755
755
  'plate': group['plate'].iloc[0],
756
- 'row': group['row'].iloc[0],
756
+ 'row_name': group['row_name'].iloc[0],
757
757
  'column': group['column'].iloc[0],
758
758
  'field': group['field'].iloc[0],
759
759
  'object_number': group['object_number'].iloc[0],
@@ -784,7 +784,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
784
784
  peak_details_list.append({
785
785
  'ID': unique_id,
786
786
  'plate': group['plate'].iloc[0],
787
- 'row': group['row'].iloc[0],
787
+ 'row_name': group['row_name'].iloc[0],
788
788
  'column': group['column'].iloc[0],
789
789
  'field': group['field'].iloc[0],
790
790
  'object_number': group['object_number'].iloc[0],
spacr/toxo.py CHANGED
@@ -10,6 +10,17 @@ from matplotlib.legend import Legend
10
10
  from matplotlib.transforms import Bbox
11
11
  from brokenaxes import brokenaxes
12
12
 
13
+ import os
14
+ import pandas as pd
15
+ import seaborn as sns
16
+ import matplotlib.pyplot as plt
17
+ from scipy.spatial.distance import cosine
18
+ from scipy.stats import pearsonr
19
+ import pandas as pd
20
+ import matplotlib.pyplot as plt
21
+ import seaborn as sns
22
+ from sklearn.metrics import mean_absolute_error
23
+
13
24
 
14
25
  from matplotlib.gridspec import GridSpec
15
26
 
@@ -450,4 +461,172 @@ def plot_gene_heatmaps(data, gene_list, columns, x_column='Gene ID', normalize=F
450
461
  plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
451
462
  print(f"Figure saved to {save_path}")
452
463
 
453
- plt.show()
464
+ plt.show()
465
+
466
+ def generate_score_heatmap(settings):
467
+
468
+ def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
469
+
470
+ df = pd.read_csv(csv)
471
+ if 'column_name' in df.columns:
472
+ df = df[df['column_name']==column]
473
+ elif 'column' in df.columns:
474
+ df['column_name'] = df['column']
475
+ df = df[df['column_name']==column]
476
+ if not plate is None:
477
+ df['plate'] = f"plate{plate}"
478
+ grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
479
+ grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row_name'].astype(str) + '_' + grouped_df['column_name'].astype(str)
480
+ return grouped_df
481
+
482
+ def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
483
+ df = pd.read_csv(csv)
484
+ df = df[df['column_name']==column]
485
+ if plate not in df.columns:
486
+ df['plate'] = f"plate{plate}"
487
+ df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
488
+ grouped_df = df.groupby(['plate', 'row_name', 'column_name'])['count'].sum().reset_index()
489
+ grouped_df = grouped_df.rename(columns={'count': 'total_count'})
490
+ merged_df = pd.merge(df, grouped_df, on=['plate', 'row_name', 'column_name'])
491
+ merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
492
+ merged_df['prc'] = merged_df['plate'].astype(str) + '_' + merged_df['row_name'].astype(str) + '_' + merged_df['column_name'].astype(str)
493
+ return merged_df
494
+
495
+ def plot_multi_channel_heatmap(df, column='c3'):
496
+ """
497
+ Plot a heatmap with multiple channels as columns.
498
+
499
+ Parameters:
500
+ - df: DataFrame with scores for different channels.
501
+ - column: Column to filter by (default is 'c3').
502
+ """
503
+ # Extract row number and convert to integer for sorting
504
+ df['row_num'] = df['row_name'].str.extract(r'(\d+)').astype(int)
505
+
506
+ # Filter and sort by plate, row, and column
507
+ df = df[df['column_name'] == column]
508
+ df = df.sort_values(by=['plate', 'row_num', 'column_name'])
509
+
510
+ # Drop temporary 'row_num' column after sorting
511
+ df = df.drop('row_num', axis=1)
512
+
513
+ # Create a new column combining plate, row, and column for the index
514
+ df['plate_row_col'] = df['plate'] + '-' + df['row_name'] + '-' + df['column_name']
515
+
516
+ # Set 'plate_row_col' as the index
517
+ df.set_index('plate_row_col', inplace=True)
518
+
519
+ # Extract only numeric data for the heatmap
520
+ heatmap_data = df.select_dtypes(include=[float, int])
521
+
522
+ # Plot heatmap with square boxes, no annotations, and 'viridis' colormap
523
+ plt.figure(figsize=(12, 8))
524
+ sns.heatmap(
525
+ heatmap_data,
526
+ cmap="viridis",
527
+ cbar=True,
528
+ square=True,
529
+ annot=False
530
+ )
531
+
532
+ plt.title("Heatmap of Prediction Scores for All Channels")
533
+ plt.xlabel("Channels")
534
+ plt.ylabel("Plate-Row-Column")
535
+ plt.tight_layout()
536
+
537
+ # Save the figure object and return it
538
+ fig = plt.gcf()
539
+ plt.show()
540
+
541
+ return fig
542
+
543
+
544
+ def combine_classification_scores(folders, csv_name, data_column, plate=1, column='c3'):
545
+ # Ensure `folders` is a list
546
+ if isinstance(folders, str):
547
+ folders = [folders]
548
+
549
+ ls = [] # Initialize ls to store found CSV file paths
550
+
551
+ # Iterate over the provided folders
552
+ for folder in folders:
553
+ sub_folders = os.listdir(folder) # Get sub-folder list
554
+ for sub_folder in sub_folders: # Iterate through sub-folders
555
+ path = os.path.join(folder, sub_folder) # Join the full path
556
+
557
+ if os.path.isdir(path): # Check if it’s a directory
558
+ csv = os.path.join(path, csv_name) # Join path to the CSV file
559
+ if os.path.exists(csv): # If CSV exists, add to list
560
+ ls.append(csv)
561
+ else:
562
+ print(f'No such file: {csv}')
563
+
564
+ # Initialize combined DataFrame
565
+ combined_df = None
566
+ print(f'Found {len(ls)} CSV files')
567
+
568
+ # Loop through all collected CSV files and process them
569
+ for csv_file in ls:
570
+ df = pd.read_csv(csv_file) # Read CSV into DataFrame
571
+ df = df[df['column_name']==column]
572
+ if not plate is None:
573
+ df['plate'] = f"plate{plate}"
574
+ # Group the data by 'plate', 'row_name', and 'column_name'
575
+ grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
576
+ # Use the CSV filename to create a new column name
577
+ folder_name = os.path.dirname(csv_file).replace(".csv", "")
578
+ new_column_name = os.path.basename(f"{folder_name}_{data_column}")
579
+ print(new_column_name)
580
+ grouped_df = grouped_df.rename(columns={data_column: new_column_name})
581
+
582
+ # Merge into the combined DataFrame
583
+ if combined_df is None:
584
+ combined_df = grouped_df
585
+ else:
586
+ combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row_name', 'column_name'], how='outer')
587
+ combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row_name'].astype(str) + '_' + combined_df['column_name'].astype(str)
588
+ return combined_df
589
+
590
+ def calculate_mae(df):
591
+ """
592
+ Calculate the MAE between each channel's predictions and the fraction column for all rows.
593
+ """
594
+ # Extract numeric columns excluding 'fraction' and 'prc'
595
+ channels = df.drop(columns=['fraction', 'prc']).select_dtypes(include=[float, int])
596
+
597
+ mae_data = []
598
+
599
+ # Compute MAE for each channel with 'fraction' for all rows
600
+ for column in channels.columns:
601
+ for index, row in df.iterrows():
602
+ mae = mean_absolute_error([row['fraction']], [row[column]])
603
+ mae_data.append({'Channel': column, 'MAE': mae, 'Row': row['prc']})
604
+
605
+ # Convert the list of dictionaries to a DataFrame
606
+ mae_df = pd.DataFrame(mae_data)
607
+ return mae_df
608
+
609
+ result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plate'], settings['column'], )
610
+ df = calculate_fraction_mixed_condition(settings['csv'], settings['plate'], settings['column'], settings['control_sgrnas'])
611
+ df = df[df['grna_name']==settings['fraction_grna']]
612
+ fraction_df = df[['fraction', 'prc']]
613
+ merged_df = pd.merge(fraction_df, result_df, on=['prc'])
614
+ cv_df = group_cv_score(settings['cv_csv'], settings['plate'], settings['column'], settings['data_column_cv'])
615
+ cv_df = cv_df[[settings['data_column_cv'], 'prc']]
616
+ merged_df = pd.merge(merged_df, cv_df, on=['prc'])
617
+
618
+ fig = plot_multi_channel_heatmap(merged_df, settings['column'])
619
+ if 'row_number' in merged_df.columns:
620
+ merged_df = merged_df.drop('row_num', axis=1)
621
+ mae_df = calculate_mae(merged_df)
622
+ if 'row_number' in mae_df.columns:
623
+ mae_df = mae_df.drop('row_num', axis=1)
624
+
625
+ if not settings['dst'] is None:
626
+ mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plate']}.csv")
627
+ merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}_data.csv")
628
+ heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}.pdf")
629
+ mae_df.to_csv(mae_dst, index=False)
630
+ merged_df.to_csv(merged_dst, index=False)
631
+ fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')
632
+ return merged_df