spacr 0.4.15__py3-none-any.whl → 0.4.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/timelapse.py CHANGED
@@ -533,14 +533,14 @@ def exponential_decay(x, a, b, c):
533
533
 
534
534
  def preprocess_pathogen_data(pathogen_df):
535
535
  # Group by identifiers and count the number of parasites
536
- parasite_counts = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
536
+ parasite_counts = pathogen_df.groupby(['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id']).size().reset_index(name='parasite_count')
537
537
 
538
538
  # Aggregate numerical columns and take the first of object columns
539
- agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id', 'parasite_count']}
540
- pathogen_agg = pathogen_df.groupby(['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
539
+ agg_funcs = {col: 'mean' if np.issubdtype(pathogen_df[col].dtype, np.number) else 'first' for col in pathogen_df.columns if col not in ['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id', 'parasite_count']}
540
+ pathogen_agg = pathogen_df.groupby(['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id']).agg(agg_funcs).reset_index()
541
541
 
542
542
  # Merge the counts back into the aggregated data
543
- pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'pathogen_cell_id'])
543
+ pathogen_agg = pathogen_agg.merge(parasite_counts, on=['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'pathogen_cell_id'])
544
544
 
545
545
  # Remove the object_label column as it corresponds to the pathogen ID not the cell ID
546
546
  if 'object_label' in pathogen_agg.columns:
@@ -604,10 +604,10 @@ def save_results_dataframe(df, src, results_name):
604
604
  def summarize_per_well(peak_details_df):
605
605
  # Step 1: Split the 'ID' column
606
606
  split_columns = peak_details_df['ID'].str.split('_', expand=True)
607
- peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
607
+ peak_details_df[['plateID', 'rowID', 'columnID', 'fieldID', 'object_number']] = split_columns
608
608
 
609
- # Step 2: Create 'well_ID' by combining 'row_name' and 'column'
610
- peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
609
+ # Step 2: Create 'well_ID' by combining 'rowID' and 'columnID'
610
+ peak_details_df['well_ID'] = peak_details_df['rowID'] + '_' + peak_details_df['columnID']
611
611
 
612
612
  # Filter entries where 'amplitude' is not null
613
613
  filtered_df = peak_details_df[peak_details_df['amplitude'].notna()]
@@ -635,10 +635,10 @@ def summarize_per_well(peak_details_df):
635
635
  def summarize_per_well_inf_non_inf(peak_details_df):
636
636
  # Step 1: Split the 'ID' column
637
637
  split_columns = peak_details_df['ID'].str.split('_', expand=True)
638
- peak_details_df[['plate', 'row_name', 'column', 'field', 'object_number']] = split_columns
638
+ peak_details_df[['plateID', 'rowID', 'columnID', 'fieldID', 'object_number']] = split_columns
639
639
 
640
- # Step 2: Create 'well_ID' by combining 'row_name' and 'column'
641
- peak_details_df['well_ID'] = peak_details_df['row_name'] + '_' + peak_details_df['column']
640
+ # Step 2: Create 'well_ID' by combining 'rowID' and 'columnID'
641
+ peak_details_df['well_ID'] = peak_details_df['rowID'] + '_' + peak_details_df['columnID']
642
642
 
643
643
  # Assume 'pathogen_count' indicates infection if > 0
644
644
  # Add an 'infected_status' column to classify cells
@@ -669,7 +669,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
669
669
  pathogen_df = pd.read_sql("SELECT * FROM pathogen", conn)
670
670
  pathogen_df['pathogen_cell_id'] = pathogen_df['pathogen_cell_id'].astype(float).astype('Int64')
671
671
  pathogen_df = preprocess_pathogen_data(pathogen_df)
672
- cell_df = cell_df.merge(pathogen_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
672
+ cell_df = cell_df.merge(pathogen_df, on=['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'object_label'], how='left', suffixes=('', '_pathogen'))
673
673
  cell_df['parasite_count'] = cell_df['parasite_count'].fillna(0)
674
674
  print(f'After pathogen merge: {len(cell_df)} objects')
675
675
 
@@ -677,7 +677,7 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
677
677
  if cytoplasm:
678
678
  cytoplasm_df = pd.read_sql(f"SELECT * FROM {'cytoplasm'}", conn)
679
679
  # Merge on specified columns
680
- cell_df = cell_df.merge(cytoplasm_df, on=['plate', 'row_name', 'column_name', 'field', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
680
+ cell_df = cell_df.merge(cytoplasm_df, on=['plateID', 'rowID', 'column_name', 'fieldID', 'timeid', 'object_label'], how='left', suffixes=('', '_cytoplasm'))
681
681
 
682
682
  print(f'After cytoplasm merge: {len(cell_df)} objects')
683
683
 
@@ -686,13 +686,13 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
686
686
  # Continue with your existing processing on cell_df now containing merged data...
687
687
  # Prepare DataFrame (use cell_df instead of df)
688
688
  prcf_components = cell_df['prcf'].str.split('_', expand=True)
689
- cell_df['plate'] = prcf_components[0]
690
- cell_df['row_name'] = prcf_components[1]
691
- cell_df['column'] = prcf_components[2]
692
- cell_df['field'] = prcf_components[3]
689
+ cell_df['plateID'] = prcf_components[0]
690
+ cell_df['rowID'] = prcf_components[1]
691
+ cell_df['columnID'] = prcf_components[2]
692
+ cell_df['fieldID'] = prcf_components[3]
693
693
  cell_df['time'] = prcf_components[4].str.extract('t(\d+)').astype(int)
694
694
  cell_df['object_number'] = cell_df['object_label']
695
- cell_df['plate_row_column_field_object'] = cell_df['plate'].astype(str) + '_' + cell_df['row_name'].astype(str) + '_' + cell_df['column'].astype(str) + '_' + cell_df['field'].astype(str) + '_' + cell_df['object_label'].astype(str)
695
+ cell_df['plate_row_column_field_object'] = cell_df['plateID'].astype(str) + '_' + cell_df['rowID'].astype(str) + '_' + cell_df['columnID'].astype(str) + '_' + cell_df['fieldID'].astype(str) + '_' + cell_df['object_label'].astype(str)
696
696
 
697
697
  df = cell_df.copy()
698
698
 
@@ -752,10 +752,10 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
752
752
  if len(peaks) == 0:
753
753
  peak_details_list.append({
754
754
  'ID': unique_id,
755
- 'plate': group['plate'].iloc[0],
756
- 'row_name': group['row_name'].iloc[0],
757
- 'column': group['column'].iloc[0],
758
- 'field': group['field'].iloc[0],
755
+ 'plateID': group['plateID'].iloc[0],
756
+ 'rowID': group['rowID'].iloc[0],
757
+ 'columnID': group['columnID'].iloc[0],
758
+ 'fieldID': group['fieldID'].iloc[0],
759
759
  'object_number': group['object_number'].iloc[0],
760
760
  'time': np.nan, # The time of the peak
761
761
  'amplitude': np.nan,
@@ -783,10 +783,10 @@ def analyze_calcium_oscillations(db_loc, measurement='cell_channel_1_mean_intens
783
783
 
784
784
  peak_details_list.append({
785
785
  'ID': unique_id,
786
- 'plate': group['plate'].iloc[0],
787
- 'row_name': group['row_name'].iloc[0],
788
- 'column': group['column'].iloc[0],
789
- 'field': group['field'].iloc[0],
786
+ 'plateID': group['plateID'].iloc[0],
787
+ 'rowID': group['rowID'].iloc[0],
788
+ 'columnID': group['columnID'].iloc[0],
789
+ 'fieldID': group['fieldID'].iloc[0],
790
790
  'object_number': group['object_number'].iloc[0],
791
791
  'time': peak_time, # The time of the peak
792
792
  'amplitude': amplitude,
spacr/toxo.py CHANGED
@@ -494,25 +494,25 @@ def generate_score_heatmap(settings):
494
494
  if 'column_name' in df.columns:
495
495
  df = df[df['column_name']==column]
496
496
  elif 'column' in df.columns:
497
- df['column_name'] = df['column']
497
+ df['columnID'] = df['column']
498
498
  df = df[df['column_name']==column]
499
499
  if not plate is None:
500
- df['plate'] = f"plate{plate}"
501
- grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
502
- grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row_name'].astype(str) + '_' + grouped_df['column_name'].astype(str)
500
+ df['plateID'] = f"plate{plate}"
501
+ grouped_df = df.groupby(['plateID', 'rowID', 'column_name'])[data_column].mean().reset_index()
502
+ grouped_df['prc'] = grouped_df['plateID'].astype(str) + '_' + grouped_df['rowID'].astype(str) + '_' + grouped_df['column_name'].astype(str)
503
503
  return grouped_df
504
504
 
505
505
  def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
506
506
  df = pd.read_csv(csv)
507
507
  df = df[df['column_name']==column]
508
508
  if plate not in df.columns:
509
- df['plate'] = f"plate{plate}"
509
+ df['plateID'] = f"plate{plate}"
510
510
  df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
511
- grouped_df = df.groupby(['plate', 'row_name', 'column_name'])['count'].sum().reset_index()
511
+ grouped_df = df.groupby(['plateID', 'rowID', 'column_name'])['count'].sum().reset_index()
512
512
  grouped_df = grouped_df.rename(columns={'count': 'total_count'})
513
- merged_df = pd.merge(df, grouped_df, on=['plate', 'row_name', 'column_name'])
513
+ merged_df = pd.merge(df, grouped_df, on=['plateID', 'rowID', 'column_name'])
514
514
  merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
515
- merged_df['prc'] = merged_df['plate'].astype(str) + '_' + merged_df['row_name'].astype(str) + '_' + merged_df['column_name'].astype(str)
515
+ merged_df['prc'] = merged_df['plateID'].astype(str) + '_' + merged_df['rowID'].astype(str) + '_' + merged_df['column_name'].astype(str)
516
516
  return merged_df
517
517
 
518
518
  def plot_multi_channel_heatmap(df, column='c3'):
@@ -524,17 +524,17 @@ def generate_score_heatmap(settings):
524
524
  - column: Column to filter by (default is 'c3').
525
525
  """
526
526
  # Extract row number and convert to integer for sorting
527
- df['row_num'] = df['row_name'].str.extract(r'(\d+)').astype(int)
527
+ df['row_num'] = df['rowID'].str.extract(r'(\d+)').astype(int)
528
528
 
529
529
  # Filter and sort by plate, row, and column
530
530
  df = df[df['column_name'] == column]
531
- df = df.sort_values(by=['plate', 'row_num', 'column_name'])
531
+ df = df.sort_values(by=['plateID', 'row_num', 'column_name'])
532
532
 
533
533
  # Drop temporary 'row_num' column after sorting
534
534
  df = df.drop('row_num', axis=1)
535
535
 
536
536
  # Create a new column combining plate, row, and column for the index
537
- df['plate_row_col'] = df['plate'] + '-' + df['row_name'] + '-' + df['column_name']
537
+ df['plate_row_col'] = df['plateID'] + '-' + df['rowID'] + '-' + df['column_name']
538
538
 
539
539
  # Set 'plate_row_col' as the index
540
540
  df.set_index('plate_row_col', inplace=True)
@@ -593,9 +593,9 @@ def generate_score_heatmap(settings):
593
593
  df = pd.read_csv(csv_file) # Read CSV into DataFrame
594
594
  df = df[df['column_name']==column]
595
595
  if not plate is None:
596
- df['plate'] = f"plate{plate}"
597
- # Group the data by 'plate', 'row_name', and 'column_name'
598
- grouped_df = df.groupby(['plate', 'row_name', 'column_name'])[data_column].mean().reset_index()
596
+ df['plateID'] = f"plate{plate}"
597
+ # Group the data by 'plateID', 'rowID', and 'column_name'
598
+ grouped_df = df.groupby(['plateID', 'rowID', 'column_name'])[data_column].mean().reset_index()
599
599
  # Use the CSV filename to create a new column name
600
600
  folder_name = os.path.dirname(csv_file).replace(".csv", "")
601
601
  new_column_name = os.path.basename(f"{folder_name}_{data_column}")
@@ -606,8 +606,8 @@ def generate_score_heatmap(settings):
606
606
  if combined_df is None:
607
607
  combined_df = grouped_df
608
608
  else:
609
- combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row_name', 'column_name'], how='outer')
610
- combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row_name'].astype(str) + '_' + combined_df['column_name'].astype(str)
609
+ combined_df = pd.merge(combined_df, grouped_df, on=['plateID', 'rowID', 'column_name'], how='outer')
610
+ combined_df['prc'] = combined_df['plateID'].astype(str) + '_' + combined_df['rowID'].astype(str) + '_' + combined_df['column_name'].astype(str)
611
611
  return combined_df
612
612
 
613
613
  def calculate_mae(df):
@@ -629,16 +629,16 @@ def generate_score_heatmap(settings):
629
629
  mae_df = pd.DataFrame(mae_data)
630
630
  return mae_df
631
631
 
632
- result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plate'], settings['column'], )
633
- df = calculate_fraction_mixed_condition(settings['csv'], settings['plate'], settings['column'], settings['control_sgrnas'])
632
+ result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plateID'], settings['columnID'], )
633
+ df = calculate_fraction_mixed_condition(settings['csv'], settings['plateID'], settings['columnID'], settings['control_sgrnas'])
634
634
  df = df[df['grna_name']==settings['fraction_grna']]
635
635
  fraction_df = df[['fraction', 'prc']]
636
636
  merged_df = pd.merge(fraction_df, result_df, on=['prc'])
637
- cv_df = group_cv_score(settings['cv_csv'], settings['plate'], settings['column'], settings['data_column_cv'])
637
+ cv_df = group_cv_score(settings['cv_csv'], settings['plateID'], settings['columnID'], settings['data_column_cv'])
638
638
  cv_df = cv_df[[settings['data_column_cv'], 'prc']]
639
639
  merged_df = pd.merge(merged_df, cv_df, on=['prc'])
640
640
 
641
- fig = plot_multi_channel_heatmap(merged_df, settings['column'])
641
+ fig = plot_multi_channel_heatmap(merged_df, settings['columnID'])
642
642
  if 'row_number' in merged_df.columns:
643
643
  merged_df = merged_df.drop('row_num', axis=1)
644
644
  mae_df = calculate_mae(merged_df)
@@ -646,9 +646,9 @@ def generate_score_heatmap(settings):
646
646
  mae_df = mae_df.drop('row_num', axis=1)
647
647
 
648
648
  if not settings['dst'] is None:
649
- mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plate']}.csv")
650
- merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}_data.csv")
651
- heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}.pdf")
649
+ mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plateID']}.csv")
650
+ merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}_data.csv")
651
+ heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}.pdf")
652
652
  mae_df.to_csv(mae_dst, index=False)
653
653
  merged_df.to_csv(merged_dst, index=False)
654
654
  fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')