spacr 0.4.15__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. spacr/__init__.py +2 -2
  2. spacr/core.py +52 -10
  3. spacr/deep_spacr.py +2 -3
  4. spacr/gui.py +0 -1
  5. spacr/gui_core.py +247 -41
  6. spacr/gui_elements.py +133 -2
  7. spacr/gui_utils.py +22 -17
  8. spacr/io.py +624 -149
  9. spacr/ml.py +141 -258
  10. spacr/plot.py +76 -34
  11. spacr/resources/MEDIAR/__pycache__/SetupDict.cpython-39.pyc +0 -0
  12. spacr/resources/MEDIAR/__pycache__/evaluate.cpython-39.pyc +0 -0
  13. spacr/resources/MEDIAR/__pycache__/generate_mapping.cpython-39.pyc +0 -0
  14. spacr/resources/MEDIAR/__pycache__/main.cpython-39.pyc +0 -0
  15. spacr/resources/MEDIAR/core/Baseline/__pycache__/Predictor.cpython-39.pyc +0 -0
  16. spacr/resources/MEDIAR/core/Baseline/__pycache__/Trainer.cpython-39.pyc +0 -0
  17. spacr/resources/MEDIAR/core/Baseline/__pycache__/__init__.cpython-39.pyc +0 -0
  18. spacr/resources/MEDIAR/core/Baseline/__pycache__/utils.cpython-39.pyc +0 -0
  19. spacr/resources/MEDIAR/core/MEDIAR/__pycache__/EnsemblePredictor.cpython-39.pyc +0 -0
  20. spacr/resources/MEDIAR/core/MEDIAR/__pycache__/Predictor.cpython-39.pyc +0 -0
  21. spacr/resources/MEDIAR/core/MEDIAR/__pycache__/Trainer.cpython-39.pyc +0 -0
  22. spacr/resources/MEDIAR/core/MEDIAR/__pycache__/__init__.cpython-39.pyc +0 -0
  23. spacr/resources/MEDIAR/core/MEDIAR/__pycache__/utils.cpython-39.pyc +0 -0
  24. spacr/resources/MEDIAR/core/__pycache__/BasePredictor.cpython-39.pyc +0 -0
  25. spacr/resources/MEDIAR/core/__pycache__/BaseTrainer.cpython-39.pyc +0 -0
  26. spacr/resources/MEDIAR/core/__pycache__/__init__.cpython-39.pyc +0 -0
  27. spacr/resources/MEDIAR/core/__pycache__/utils.cpython-39.pyc +0 -0
  28. spacr/resources/MEDIAR/train_tools/__pycache__/__init__.cpython-39.pyc +0 -0
  29. spacr/resources/MEDIAR/train_tools/__pycache__/measures.cpython-39.pyc +0 -0
  30. spacr/resources/MEDIAR/train_tools/__pycache__/utils.cpython-39.pyc +0 -0
  31. spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/__init__.cpython-39.pyc +0 -0
  32. spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/datasetter.cpython-39.pyc +0 -0
  33. spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/transforms.cpython-39.pyc +0 -0
  34. spacr/resources/MEDIAR/train_tools/data_utils/__pycache__/utils.cpython-39.pyc +0 -0
  35. spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/CellAware.cpython-39.pyc +0 -0
  36. spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/LoadImage.cpython-39.pyc +0 -0
  37. spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/NormalizeImage.cpython-39.pyc +0 -0
  38. spacr/resources/MEDIAR/train_tools/data_utils/custom/__pycache__/__init__.cpython-39.pyc +0 -0
  39. spacr/resources/MEDIAR/train_tools/models/__pycache__/MEDIARFormer.cpython-39.pyc +0 -0
  40. spacr/resources/MEDIAR/train_tools/models/__pycache__/__init__.cpython-39.pyc +0 -0
  41. spacr/sequencing.py +73 -38
  42. spacr/settings.py +161 -135
  43. spacr/submodules.py +618 -215
  44. spacr/timelapse.py +197 -29
  45. spacr/toxo.py +23 -23
  46. spacr/utils.py +186 -128
  47. {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/METADATA +5 -2
  48. {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/RECORD +53 -24
  49. spacr/stats.py +0 -221
  50. /spacr/{cellpose.py → spacr_cellpose.py} +0 -0
  51. {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/LICENSE +0 -0
  52. {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/WHEEL +0 -0
  53. {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/entry_points.txt +0 -0
  54. {spacr-0.4.15.dist-info → spacr-0.5.0.dist-info}/top_level.txt +0 -0
spacr/utils.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip, subprocess, time, requests, ast, traceback
2
-
3
2
  import numpy as np
4
3
  import pandas as pd
5
4
  from cellpose import models as cp_models
@@ -78,7 +77,7 @@ def filepaths_to_database(img_paths, settings, source_folder, crop_mode):
78
77
 
79
78
  parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=settings['timelapse'])))
80
79
 
81
- columns = ['plate', 'row_name', 'column_name', 'field']
80
+ columns = ['plateID', 'rowID', 'columnID', 'fieldID']
82
81
 
83
82
  if settings['timelapse']:
84
83
  columns = columns + ['time_id']
@@ -113,7 +112,7 @@ def activation_maps_to_database(img_paths, source_folder, settings):
113
112
  png_df = pd.DataFrame(img_paths, columns=['png_path'])
114
113
  png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
115
114
  parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=False)))
116
- columns = ['plate', 'row_name', 'column_name', 'field', 'prcfo', 'object']
115
+ columns = ['plateID', 'rowID', 'columnID', 'fieldID', 'prcfo', 'object']
117
116
  png_df[columns] = parts
118
117
 
119
118
  dataset_name = os.path.splitext(os.path.basename(settings['dataset']))[0]
@@ -136,7 +135,7 @@ def activation_correlations_to_database(df, img_paths, source_folder, settings):
136
135
  png_df = pd.DataFrame(img_paths, columns=['png_path'])
137
136
  png_df['file_name'] = png_df['png_path'].apply(lambda x: os.path.basename(x))
138
137
  parts = png_df['file_name'].apply(lambda x: pd.Series(_map_wells_png(x, timelapse=False)))
139
- columns = ['plate', 'row_name', 'column_name', 'field', 'prcfo', 'object']
138
+ columns = ['plateID', 'rowID', 'columnID', 'fieldID', 'prcfo', 'object']
140
139
  png_df[columns] = parts
141
140
 
142
141
  # Align both DataFrames by file_name
@@ -319,18 +318,26 @@ def load_settings(csv_file_path, show=False, setting_key='setting_key', setting_
319
318
 
320
319
  return result_dict
321
320
 
322
-
323
321
  def save_settings(settings, name='settings', show=False):
324
322
 
325
- settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
326
- if show:
327
- display(settings_df)
323
+ settings_2 = settings.copy()
328
324
 
329
- if isinstance(settings['src'], list):
330
- src = settings['src'][0]
325
+ if isinstance(settings_2['src'], list):
326
+ src = settings_2['src'][0]
331
327
  name = f"{name}_list"
332
328
  else:
333
- src = settings['src']
329
+ src = settings_2['src']
330
+
331
+ if 'test_mode' in settings_2.keys():
332
+ settings_2['test_mode'] = False
333
+
334
+ if 'plot' in settings_2.keys():
335
+ settings_2['plot'] = False
336
+
337
+ settings_df = pd.DataFrame(list(settings_2.items()), columns=['Key', 'Value'])
338
+
339
+ if show:
340
+ display(settings_df)
334
341
 
335
342
  settings_csv = os.path.join(src,'settings',f'{name}.csv')
336
343
  os.makedirs(os.path.join(src,'settings'), exist_ok=True)
@@ -546,10 +553,10 @@ def _get_cellpose_batch_size():
546
553
  except Exception as e:
547
554
  return 8
548
555
 
549
- def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager', pick_slice=False, skip_mode='01'):
556
+ def _extract_filename_metadata(filenames, src, regular_expression, metadata_type='cellvoyager'):
550
557
 
551
558
  images_by_key = defaultdict(list)
552
-
559
+
553
560
  for filename in filenames:
554
561
  match = regular_expression.match(filename)
555
562
  if match:
@@ -560,39 +567,44 @@ def _extract_filename_metadata(filenames, src, regular_expression, metadata_type
560
567
  plate = os.path.basename(src)
561
568
 
562
569
  well = match.group('wellID')
563
- field = match.group('fieldID')
564
- channel = match.group('chanID')
565
- mode = None
566
-
567
570
  if well[0].isdigit():
568
571
  well = str(_safe_int_convert(well))
572
+
573
+ field = match.group('fieldID')
569
574
  if field[0].isdigit():
570
575
  field = str(_safe_int_convert(field))
576
+
577
+ channel = match.group('chanID')
571
578
  if channel[0].isdigit():
572
579
  channel = str(_safe_int_convert(channel))
573
-
580
+
581
+ if 'timeID' in match.groupdict():
582
+ timeID = match.group('timeID')
583
+ if timeID[0].isdigit():
584
+ timeID = str(_safe_int_convert(timeID))
585
+ else:
586
+ timeID = None
587
+
588
+ if 'sliceID' in match.groupdict():
589
+ sliceID = match.group('sliceID')
590
+ if sliceID[0].isdigit():
591
+ sliceID = str(_safe_int_convert(sliceID))
592
+ else:
593
+ sliceID = None
594
+
574
595
  if metadata_type =='cq1':
575
596
  orig_wellID = wellID
576
597
  wellID = _convert_cq1_well_id(wellID)
577
598
  print(f'Converted Well ID: {orig_wellID} to {wellID}', end='\r', flush=True)
578
599
 
579
- if pick_slice:
580
- try:
581
- mode = match.group('AID')
582
- except IndexError:
583
- sliceid = '00'
584
-
585
- if mode == skip_mode:
586
- continue
587
-
588
- key = (plate, well, field, channel, mode)
589
- file_path = os.path.join(src, filename) # Store the full path
600
+ key = (plate, well, field, channel, timeID, sliceID)
601
+ file_path = os.path.join(src, filename)
590
602
  images_by_key[key].append(file_path)
591
603
 
592
604
  except IndexError:
593
605
  print(f"Could not extract information from filename {filename} using provided regex")
594
606
  else:
595
- print(f"Filename {filename} did not match provided regex")
607
+ print(f"Filename {filename} did not match provided regex: {regular_expression}")
596
608
  continue
597
609
 
598
610
  return images_by_key
@@ -634,11 +646,11 @@ def _update_database_with_merged_info(db_path, df, table='png_list', columns=['p
634
646
  if 'prcfo' not in df.columns:
635
647
  print(f'generating prcfo columns')
636
648
  try:
637
- df['prcfo'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str) + '_o' + df['object_label'].astype(int).astype(str)
649
+ df['prcfo'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str) + '_' + df['fieldID'].astype(str) + '_o' + df['object_label'].astype(int).astype(str)
638
650
  except Exception as e:
639
651
  print('Merging on cell failed, trying with cell_id')
640
652
  try:
641
- df['prcfo'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str) + '_o' + df['cell_id'].astype(int).astype(str)
653
+ df['prcfo'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str) + '_' + df['fieldID'].astype(str) + '_o' + df['cell_id'].astype(int).astype(str)
642
654
  except Exception as e:
643
655
  print(e)
644
656
 
@@ -730,7 +742,7 @@ def _map_values(row, values, locs):
730
742
  if locs:
731
743
  value_dict = {loc: value for value, loc_list in zip(values, locs) for loc in loc_list}
732
744
  # Determine if we're dealing with row or column based on first location identifier
733
- type_ = 'row_name' if locs[0][0][0] == 'r' else 'column_name'
745
+ type_ = 'rowID' if locs[0][0][0] == 'r' else 'columnID'
734
746
  return value_dict.get(row[type_], None)
735
747
  return values[0] if values else None
736
748
 
@@ -915,21 +927,21 @@ def _merge_and_save_to_database(morph_df, intensity_df, table_type, source_folde
915
927
  merged_df['file_name'] = file_name
916
928
  merged_df['path_name'] = os.path.join(source_folder, file_name + '.npy')
917
929
  if timelapse:
918
- merged_df[['plate', 'row_name', 'column_name', 'field', 'timeid', 'prcf']] = merged_df['file_name'].apply(lambda x: pd.Series(_map_wells(x, timelapse)))
930
+ merged_df[['plateID', 'rowID', 'columnID', 'fieldID', 'timeid', 'prcf']] = merged_df['file_name'].apply(lambda x: pd.Series(_map_wells(x, timelapse)))
919
931
  else:
920
- merged_df[['plate', 'row_name', 'column_name', 'field', 'prcf']] = merged_df['file_name'].apply(lambda x: pd.Series(_map_wells(x, timelapse)))
932
+ merged_df[['plateID', 'rowID', 'columnID', 'fieldID', 'prcf']] = merged_df['file_name'].apply(lambda x: pd.Series(_map_wells(x, timelapse)))
921
933
  cols = merged_df.columns.tolist() # get the list of all columns
922
934
  if table_type == 'cell' or table_type == 'cytoplasm':
923
- column_list = ['object_label', 'plate', 'row_name', 'column_name', 'field', 'prcf', 'file_name', 'path_name']
935
+ column_list = ['object_label', 'plateID', 'rowID', 'columnID', 'fieldID', 'prcf', 'file_name', 'path_name']
924
936
  elif table_type == 'nucleus' or table_type == 'pathogen':
925
- column_list = ['object_label', 'cell_id', 'plate', 'row_name', 'column_name', 'field', 'prcf', 'file_name', 'path_name']
937
+ column_list = ['object_label', 'cell_id', 'plateID', 'rowID', 'columnID', 'fieldID', 'prcf', 'file_name', 'path_name']
926
938
  else:
927
939
  raise ValueError(f"Invalid table_type: {table_type}")
928
940
  # Check if all columns in column_list are in cols
929
941
  missing_columns = [col for col in column_list if col not in cols]
930
942
  if len(missing_columns) == 1 and missing_columns[0] == 'cell_id':
931
943
  missing_columns = False
932
- column_list = ['object_label', 'plate', 'row_name', 'column_name', 'field', 'prcf', 'file_name', 'path_name']
944
+ column_list = ['object_label', 'plateID', 'rowID', 'columnID', 'fieldID', 'prcf', 'file_name', 'path_name']
933
945
  if missing_columns:
934
946
  raise ValueError(f"Columns missing in DataFrame: {missing_columns}")
935
947
  for i, col in enumerate(column_list):
@@ -1143,43 +1155,6 @@ def _masks_to_masks_stack(masks):
1143
1155
  for idx, mask in enumerate(masks):
1144
1156
  mask_stack.append(mask)
1145
1157
  return mask_stack
1146
-
1147
- def _get_diam_v1(mag, obj):
1148
-
1149
- if mag == 20:
1150
- if obj == 'cell':
1151
- diamiter = 120
1152
- elif obj == 'nucleus':
1153
- diamiter = 60
1154
- elif obj == 'pathogen':
1155
- diamiter = 20
1156
- else:
1157
- raise ValueError("Invalid magnification: Use 20, 40 or 60")
1158
-
1159
- elif mag == 40:
1160
- if obj == 'cell':
1161
- diamiter = 160
1162
- elif obj == 'nucleus':
1163
- diamiter = 80
1164
- elif obj == 'pathogen':
1165
- diamiter = 40
1166
- else:
1167
- raise ValueError("Invalid magnification: Use 20, 40 or 60")
1168
-
1169
- elif mag == 60:
1170
- if obj == 'cell':
1171
- diamiter = 200
1172
- if obj == 'nucleus':
1173
- diamiter = 90
1174
- if obj == 'pathogen':
1175
- diamiter = 60
1176
- else:
1177
- raise ValueError("Invalid magnification: Use 20, 40 or 60")
1178
-
1179
- else:
1180
- raise ValueError("Invalid magnification: Use 20, 40 or 60")
1181
-
1182
- return diamiter
1183
1158
 
1184
1159
  def _get_diam(mag, obj):
1185
1160
 
@@ -1339,11 +1314,11 @@ def annotate_conditions(df, cells=None, cell_loc=None, pathogens=None, pathogen_
1339
1314
  """
1340
1315
 
1341
1316
  def _get_type(val):
1342
- """Determine if a value maps to 'row_name' or 'column_name'."""
1317
+ """Determine if a value maps to 'rowID' or 'columnID'."""
1343
1318
  if isinstance(val, str) and val.startswith('c'):
1344
- return 'column_name'
1319
+ return 'columnID'
1345
1320
  elif isinstance(val, str) and val.startswith('r'):
1346
- return 'row_name'
1321
+ return 'rowID'
1347
1322
  return None
1348
1323
 
1349
1324
  def _map_or_default(column_name, values, loc, df):
@@ -1411,7 +1386,7 @@ def _split_data(df, group_by, object_type):
1411
1386
  # Ensure 'prcf' column exists by concatenating specific columns
1412
1387
  if 'prcf' not in df.columns:
1413
1388
  try:
1414
- df['prcf'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str) + '_' + df['field'].astype(str)
1389
+ df['prcf'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str) + '_' + df['fieldID'].astype(str)
1415
1390
  except Exception as e:
1416
1391
  print(e)
1417
1392
 
@@ -1508,7 +1483,7 @@ def _group_by_well(df):
1508
1483
  non_numeric_cols = df.select_dtypes(include=['object']).columns
1509
1484
 
1510
1485
  # Apply mean function to numeric columns and first to non-numeric
1511
- df_grouped = df.groupby(['plate', 'row_name', 'column_name']).agg({**{col: np.mean for col in numeric_cols}, **{col: 'first' for col in non_numeric_cols}})
1486
+ df_grouped = df.groupby(['plateID', 'rowID', 'columnID']).agg({**{col: np.mean for col in numeric_cols}, **{col: 'first' for col in non_numeric_cols}})
1512
1487
  return df_grouped
1513
1488
 
1514
1489
  ###################################################
@@ -2187,11 +2162,11 @@ def augment_classes(dst, nc, pc, generate=True,move=True):
2187
2162
  def annotate_predictions(csv_loc):
2188
2163
  df = pd.read_csv(csv_loc)
2189
2164
  df['filename'] = df['path'].apply(lambda x: x.split('/')[-1])
2190
- df[['plate', 'well', 'field', 'object']] = df['filename'].str.split('_', expand=True)
2165
+ df[['plateID', 'well', 'fieldID', 'object']] = df['filename'].str.split('_', expand=True)
2191
2166
  df['object'] = df['object'].str.replace('.png', '')
2192
2167
 
2193
2168
  def assign_condition(row):
2194
- plate = int(row['plate'])
2169
+ plate = int(row['plateID'])
2195
2170
  col = int(row['well'][1:])
2196
2171
 
2197
2172
  if col > 3:
@@ -2342,7 +2317,7 @@ def check_multicollinearity(x):
2342
2317
 
2343
2318
  def lasso_reg(merged_df, alpha_value=0.01, reg_type='lasso'):
2344
2319
  # Separate predictors and response
2345
- X = merged_df[['gene', 'grna', 'plate', 'row_name', 'column']]
2320
+ X = merged_df[['gene', 'grna', 'plateID', 'rowID', 'columnID']]
2346
2321
  y = merged_df['pred']
2347
2322
 
2348
2323
  # One-hot encode the categorical predictors
@@ -3120,12 +3095,8 @@ def _get_regex(metadata_type, img_format, custom_regex=None):
3120
3095
  regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
3121
3096
  elif metadata_type == 'cq1':
3122
3097
  regex = f'W(?P<wellID>.*)F(?P<fieldID>.*)T(?P<timeID>.*)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
3123
- elif metadata_type == 'nikon':
3124
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
3125
- elif metadata_type == 'zeis':
3126
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
3127
- elif metadata_type == 'leica':
3128
- regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>..)A(?P<AID>..)Z(?P<sliceID>.*)C(?P<chanID>.*){img_format}'
3098
+ elif metadata_type == 'auto':
3099
+ regex = f'(?P<plateID>.*)_(?P<wellID>.*)_T(?P<timeID>.*)F(?P<fieldID>.*)L(?P<laserID>.*)C(?P<chanID>.*).tif'
3129
3100
  elif metadata_type == 'custom':
3130
3101
  regex = f'({custom_regex}){img_format}'
3131
3102
 
@@ -3143,7 +3114,7 @@ def _run_test_mode(src, regex, timelapse=False, test_images=10, random_test=True
3143
3114
 
3144
3115
  if os.path.exists(os.path.join(src, 'orig')):
3145
3116
  src = os.path.join(src, 'orig')
3146
-
3117
+
3147
3118
  all_filenames = [filename for filename in os.listdir(src) if regular_expression.match(filename)]
3148
3119
  print(f'Found {len(all_filenames)} files')
3149
3120
  images_by_set = defaultdict(list)
@@ -3185,7 +3156,6 @@ def _choose_model(model_name, device, object_type='cell', restore_type=None, obj
3185
3156
  model_path = os.path.join(current_dir, 'models', 'cp', 'toxo_pv_lumen.CP_model')
3186
3157
  print(model_path)
3187
3158
  model = cp_models.CellposeModel(gpu=torch.cuda.is_available(), model_type=None, pretrained_model=model_path, diam_mean=diameter, device=device)
3188
- #model = cp_models.Cellpose(gpu=torch.cuda.is_available(), model_type='cyto', device=device)
3189
3159
  print(f'Using Toxoplasma PV lumen model to generate pathogen masks')
3190
3160
  return model
3191
3161
 
@@ -3313,15 +3283,6 @@ class SaliencyMapGenerator:
3313
3283
  return fig
3314
3284
 
3315
3285
  def percentile_normalize(self, img, lower_percentile=2, upper_percentile=98):
3316
- """
3317
- Normalize each channel of the image to the given percentiles.
3318
- Args:
3319
- img: Input image as numpy array with shape (H, W, C)
3320
- lower_percentile: Lower percentile for normalization (default 2)
3321
- upper_percentile: Upper percentile for normalization (default 98)
3322
- Returns:
3323
- img: Normalized image
3324
- """
3325
3286
  img_normalized = np.zeros_like(img)
3326
3287
 
3327
3288
  for c in range(img.shape[2]): # Iterate over each channel
@@ -3331,7 +3292,6 @@ class SaliencyMapGenerator:
3331
3292
 
3332
3293
  return img_normalized
3333
3294
 
3334
-
3335
3295
  class GradCAMGenerator:
3336
3296
  def __init__(self, model, target_layer, cam_type='gradcam'):
3337
3297
  self.model = model
@@ -3436,15 +3396,6 @@ class GradCAMGenerator:
3436
3396
  return fig
3437
3397
 
3438
3398
  def percentile_normalize(self, img, lower_percentile=2, upper_percentile=98):
3439
- """
3440
- Normalize each channel of the image to the given percentiles.
3441
- Args:
3442
- img: Input image as numpy array with shape (H, W, C)
3443
- lower_percentile: Lower percentile for normalization (default 2)
3444
- upper_percentile: Upper percentile for normalization (default 98)
3445
- Returns:
3446
- img: Normalized image
3447
- """
3448
3399
  img_normalized = np.zeros_like(img)
3449
3400
 
3450
3401
  for c in range(img.shape[2]): # Iterate over each channel
@@ -4714,12 +4665,12 @@ def process_vision_results(df, threshold=0.5):
4714
4665
  # Split the 'path' column using _map_wells function
4715
4666
  mapped_values = df['path'].apply(lambda x: _map_wells(x))
4716
4667
 
4717
- df['plate'] = mapped_values.apply(lambda x: x[0])
4718
- df['row_name'] = mapped_values.apply(lambda x: x[1])
4719
- df['column'] = mapped_values.apply(lambda x: x[2])
4720
- df['field'] = mapped_values.apply(lambda x: x[3])
4668
+ df['plateID'] = mapped_values.apply(lambda x: x[0])
4669
+ df['rowID'] = mapped_values.apply(lambda x: x[1])
4670
+ df['columnID'] = mapped_values.apply(lambda x: x[2])
4671
+ df['fieldID'] = mapped_values.apply(lambda x: x[3])
4721
4672
  df['object'] = df['path'].str.split('_').str[3].str.split('.').str[0]
4722
- df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
4673
+ df['prc'] = df['plateID'].astype(str) + '_' + df['rowID'].astype(str) + '_' + df['columnID'].astype(str)
4723
4674
  df['cv_predictions'] = (df['pred'] >= threshold).astype(int)
4724
4675
 
4725
4676
  return df
@@ -4732,10 +4683,10 @@ def get_ml_results_paths(src, model_type='xgboost', channel_of_interest=1):
4732
4683
  elif isinstance(channel_of_interest, int):
4733
4684
  feature_string = f"channel_{channel_of_interest}"
4734
4685
 
4735
- elif channel_of_interest is 'morphology':
4686
+ elif channel_of_interest == 'morphology':
4736
4687
  feature_string = 'morphology'
4737
4688
 
4738
- elif channel_of_interest is None:
4689
+ elif channel_of_interest == None:
4739
4690
  feature_string = 'all_features'
4740
4691
  else:
4741
4692
  raise ValueError(f"Unsupported channel_of_interest: {channel_of_interest}. Supported values are 'int', 'list', 'None', or 'morphology'.")
@@ -5134,24 +5085,24 @@ def fill_holes_in_mask(mask):
5134
5085
 
5135
5086
  def correct_metadata_column_names(df):
5136
5087
  if 'plate_name' in df.columns:
5137
- df = df.rename(columns={'plate_name': 'plate'})
5088
+ df = df.rename(columns={'plate_name': 'plateID'})
5138
5089
  if 'column_name' in df.columns:
5139
- df = df.rename(columns={'column_name': 'column'})
5090
+ df = df.rename(columns={'column_name': 'columnID'})
5140
5091
  if 'col' in df.columns:
5141
- df = df.rename(columns={'col': 'column'})
5092
+ df = df.rename(columns={'col': 'columnID'})
5142
5093
  if 'row_name' in df.columns:
5143
- df = df.rename(columns={'row_name': 'row_name'})
5094
+ df = df.rename(columns={'row_name': 'rowID'})
5144
5095
  if 'grna_name' in df.columns:
5145
5096
  df = df.rename(columns={'grna_name': 'grna'})
5146
5097
  if 'plate_row' in df.columns:
5147
- df[['plate', 'row_name']] = df['plate_row'].str.split('_', expand=True)
5098
+ df[['plateID', 'rowID']] = df['plate_row'].str.split('_', expand=True)
5148
5099
  return df
5149
5100
 
5150
- def control_filelist(folder, mode='column', values=['01','02']):
5101
+ def control_filelist(folder, mode='columnID', values=['01','02']):
5151
5102
  files = os.listdir(folder)
5152
- if mode is 'column':
5103
+ if mode == 'columnID':
5153
5104
  filtered_files = [file for file in files if file.split('_')[1][1:] in values]
5154
- if mode is 'row_name':
5105
+ if mode == 'rowID':
5155
5106
  filtered_files = [file for file in files if file.split('_')[1][:1] in values]
5156
5107
  return filtered_files
5157
5108
 
@@ -5169,12 +5120,12 @@ def rename_columns_in_db(db_path):
5169
5120
  columns_info = cursor.fetchall()
5170
5121
  column_names = [col[1] for col in columns_info]
5171
5122
 
5172
- # Check if columns 'row' or 'col' exist
5123
+ # Check if columns 'rowID' or 'columnID' exist
5173
5124
  columns_to_rename = {}
5174
5125
  if 'row' in column_names:
5175
- columns_to_rename['row'] = 'row_name'
5126
+ columns_to_rename['row'] = 'rowID'
5176
5127
  if 'col' in column_names:
5177
- columns_to_rename['col'] = 'column_name'
5128
+ columns_to_rename['col'] = 'columnID'
5178
5129
 
5179
5130
  # Rename columns if necessary
5180
5131
  if columns_to_rename:
@@ -5418,3 +5369,110 @@ def normalize_src_path(src):
5418
5369
  return src # Return as a string if not a list
5419
5370
 
5420
5371
  raise ValueError(f"Invalid type for 'src': {type(src).__name__}, expected str or list")
5372
+
5373
+ def generate_image_path_map(root_folder, valid_extensions=("tif", "tiff", "png", "jpg", "jpeg", "bmp", "czi", "nd2", "lif")):
5374
+ """
5375
+ Recursively scans a folder and its subfolders for images, then creates a mapping of:
5376
+ {original_image_path: new_image_path}, where the new path includes all subfolder names.
5377
+
5378
+ Args:
5379
+ root_folder (str): The root directory to scan for images.
5380
+ valid_extensions (tuple): Tuple of valid image file extensions.
5381
+
5382
+ Returns:
5383
+ dict: A dictionary mapping original image paths to their new paths.
5384
+ """
5385
+ image_path_map = {}
5386
+
5387
+ for dirpath, _, filenames in os.walk(root_folder):
5388
+ for file in filenames:
5389
+ ext = file.lower().split('.')[-1]
5390
+ if ext in valid_extensions:
5391
+ # Get relative path of the image from root_folder
5392
+ relative_path = os.path.relpath(dirpath, root_folder)
5393
+
5394
+ # Construct new filename: Embed folder hierarchy into the name
5395
+ folder_parts = relative_path.split(os.sep) # Get all folder names
5396
+ folder_info = "_".join(folder_parts) if folder_parts else "" # Join with underscores
5397
+
5398
+ # Generate new filename
5399
+ new_filename = f"{folder_info}_{file}" if folder_info else file
5400
+
5401
+ # Store in dictionary (original path -> new path)
5402
+ original_path = os.path.join(dirpath, file)
5403
+ new_path = os.path.join(root_folder, new_filename)
5404
+ image_path_map[original_path] = new_path
5405
+
5406
+ return image_path_map
5407
+
5408
+ def copy_images_to_consolidated(image_path_map, root_folder):
5409
+ """
5410
+ Copies images from their original locations to a 'consolidated' folder,
5411
+ renaming them according to the generated dictionary.
5412
+
5413
+ Args:
5414
+ image_path_map (dict): Dictionary mapping {original_path: new_path}.
5415
+ root_folder (str): The root directory where the 'consolidated' folder will be created.
5416
+ """
5417
+
5418
+ consolidated_folder = os.path.join(root_folder, "consolidated")
5419
+ os.makedirs(consolidated_folder, exist_ok=True) # Ensure 'consolidated' folder exists
5420
+ files_processed = 0
5421
+ files_to_process = len(image_path_map)
5422
+ time_ls= []
5423
+
5424
+ for original_path, new_path in image_path_map.items():
5425
+
5426
+ start = time.time()
5427
+ new_filename = os.path.basename(new_path) # Extract only the new filename
5428
+ new_file_path = os.path.join(consolidated_folder, new_filename) # Place in 'consolidated' folder
5429
+
5430
+ shutil.copy2(original_path, new_file_path) # Copy file with metadata preserved
5431
+
5432
+ files_processed += 1
5433
+ stop = time.time()
5434
+ duration = (stop - start)
5435
+ time_ls.append(duration)
5436
+
5437
+ print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'Consolidating images')
5438
+ #print(f"Copied: {original_path} -> {new_file_path}")
5439
+
5440
+ def correct_metadata(df):
5441
+
5442
+ #if 'object' in df.columns:
5443
+ # df['objectID'] = df['object']
5444
+
5445
+ if 'object_name' in df.columns:
5446
+ df['objectID'] = df['object_name']
5447
+
5448
+ if 'field_name' in df.columns:
5449
+ df['fieldID'] = df['field_name']
5450
+
5451
+ if 'plate' in df.columns:
5452
+ df['plateID'] = df['plate']
5453
+
5454
+ if 'plate_name' in df.columns:
5455
+ df['plateID'] = df['plate_name']
5456
+
5457
+ if 'row' in df.columns:
5458
+ df = df.rename(columns={'row': 'rowID'})
5459
+
5460
+ if 'row_name' in df.columns:
5461
+ df = df.rename(columns={'row_name': 'rowID'})
5462
+
5463
+ if 'col' in df.columns:
5464
+ df = df.rename(columns={'col': 'columnID'})
5465
+
5466
+ if 'column' in df.columns:
5467
+ df = df.rename(columns={'column': 'columnID'})
5468
+
5469
+ if 'column_name' in df.columns:
5470
+ df = df.rename(columns={'column_name': 'columnID'})
5471
+
5472
+ if 'field' in df.columns:
5473
+ df = df.rename(columns={'field': 'fieldID'})
5474
+
5475
+ if 'field_name' in df.columns:
5476
+ df = df.rename(columns={'field_name': 'fieldID'})
5477
+
5478
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.4.15
3
+ Version: 0.5.0
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -41,6 +41,9 @@ Requires-Dist: pillow<11.0,>=10.2.0
41
41
  Requires-Dist: tifffile>=2023.4.12
42
42
  Requires-Dist: nd2reader<4.0,>=3.3.0
43
43
  Requires-Dist: czifile
44
+ Requires-Dist: pylibCZIrw<6.0,>=5.0.0
45
+ Requires-Dist: aicspylibczi
46
+ Requires-Dist: readlif
44
47
  Requires-Dist: imageio<3.0,>=2.34.0
45
48
  Requires-Dist: pingouin<1.0,>=0.5.5
46
49
  Requires-Dist: umap-learn<1.0,>=0.5.6
@@ -78,7 +81,7 @@ Provides-Extra: headless
78
81
  Requires-Dist: opencv-python-headless; extra == "headless"
79
82
 
80
83
  .. |Documentation Status| image:: https://readthedocs.org/projects/spacr/badge/?version=latest
81
- :target: https://spacr.readthedocs.io/en/latest/?badge=latest
84
+ :target: https://einarolafsson.github.io/spacr
82
85
  .. |PyPI version| image:: https://badge.fury.io/py/spacr.svg
83
86
  :target: https://badge.fury.io/py/spacr
84
87
  .. |Python version| image:: https://img.shields.io/pypi/pyversions/spacr