spacr 0.2.46__py3-none-any.whl → 0.2.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. spacr/core.py +306 -21
  2. spacr/deep_spacr.py +101 -41
  3. spacr/gui.py +1 -3
  4. spacr/gui_core.py +78 -65
  5. spacr/gui_elements.py +437 -152
  6. spacr/gui_utils.py +84 -73
  7. spacr/io.py +14 -7
  8. spacr/measure.py +196 -145
  9. spacr/plot.py +2 -42
  10. spacr/resources/font/open_sans/OFL.txt +93 -0
  11. spacr/resources/font/open_sans/OpenSans-Italic-VariableFont_wdth,wght.ttf +0 -0
  12. spacr/resources/font/open_sans/OpenSans-VariableFont_wdth,wght.ttf +0 -0
  13. spacr/resources/font/open_sans/README.txt +100 -0
  14. spacr/resources/font/open_sans/static/OpenSans-Bold.ttf +0 -0
  15. spacr/resources/font/open_sans/static/OpenSans-BoldItalic.ttf +0 -0
  16. spacr/resources/font/open_sans/static/OpenSans-ExtraBold.ttf +0 -0
  17. spacr/resources/font/open_sans/static/OpenSans-ExtraBoldItalic.ttf +0 -0
  18. spacr/resources/font/open_sans/static/OpenSans-Italic.ttf +0 -0
  19. spacr/resources/font/open_sans/static/OpenSans-Light.ttf +0 -0
  20. spacr/resources/font/open_sans/static/OpenSans-LightItalic.ttf +0 -0
  21. spacr/resources/font/open_sans/static/OpenSans-Medium.ttf +0 -0
  22. spacr/resources/font/open_sans/static/OpenSans-MediumItalic.ttf +0 -0
  23. spacr/resources/font/open_sans/static/OpenSans-Regular.ttf +0 -0
  24. spacr/resources/font/open_sans/static/OpenSans-SemiBold.ttf +0 -0
  25. spacr/resources/font/open_sans/static/OpenSans-SemiBoldItalic.ttf +0 -0
  26. spacr/resources/font/open_sans/static/OpenSans_Condensed-Bold.ttf +0 -0
  27. spacr/resources/font/open_sans/static/OpenSans_Condensed-BoldItalic.ttf +0 -0
  28. spacr/resources/font/open_sans/static/OpenSans_Condensed-ExtraBold.ttf +0 -0
  29. spacr/resources/font/open_sans/static/OpenSans_Condensed-ExtraBoldItalic.ttf +0 -0
  30. spacr/resources/font/open_sans/static/OpenSans_Condensed-Italic.ttf +0 -0
  31. spacr/resources/font/open_sans/static/OpenSans_Condensed-Light.ttf +0 -0
  32. spacr/resources/font/open_sans/static/OpenSans_Condensed-LightItalic.ttf +0 -0
  33. spacr/resources/font/open_sans/static/OpenSans_Condensed-Medium.ttf +0 -0
  34. spacr/resources/font/open_sans/static/OpenSans_Condensed-MediumItalic.ttf +0 -0
  35. spacr/resources/font/open_sans/static/OpenSans_Condensed-Regular.ttf +0 -0
  36. spacr/resources/font/open_sans/static/OpenSans_Condensed-SemiBold.ttf +0 -0
  37. spacr/resources/font/open_sans/static/OpenSans_Condensed-SemiBoldItalic.ttf +0 -0
  38. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Bold.ttf +0 -0
  39. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-BoldItalic.ttf +0 -0
  40. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-ExtraBold.ttf +0 -0
  41. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-ExtraBoldItalic.ttf +0 -0
  42. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Italic.ttf +0 -0
  43. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Light.ttf +0 -0
  44. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-LightItalic.ttf +0 -0
  45. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Medium.ttf +0 -0
  46. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-MediumItalic.ttf +0 -0
  47. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Regular.ttf +0 -0
  48. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-SemiBold.ttf +0 -0
  49. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-SemiBoldItalic.ttf +0 -0
  50. spacr/sequencing.py +481 -587
  51. spacr/settings.py +197 -122
  52. spacr/utils.py +21 -13
  53. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/METADATA +7 -4
  54. spacr-0.2.56.dist-info/RECORD +100 -0
  55. spacr-0.2.46.dist-info/RECORD +0 -60
  56. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/LICENSE +0 -0
  57. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/WHEEL +0 -0
  58. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/entry_points.txt +0 -0
  59. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/top_level.txt +0 -0
spacr/core.py CHANGED
@@ -877,7 +877,106 @@ def annotate_results(pred_loc):
877
877
  display(df)
878
878
  return df
879
879
 
880
- def generate_dataset(src, file_metadata=None, experiment='TSG101_screen', sample=None):
880
+ def generate_dataset(settings={}):
881
+
882
+ from .utils import initiate_counter, add_images_to_tar
883
+
884
+ db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
885
+ dst = os.path.join(settings['src'], 'datasets')
886
+ all_paths = []
887
+
888
+ # Connect to the database and retrieve the image paths
889
+ print(f"Reading DataBase: {db_path}")
890
+ try:
891
+ with sqlite3.connect(db_path) as conn:
892
+ cursor = conn.cursor()
893
+ if settings['file_metadata']:
894
+ if isinstance(settings['file_metadata'], str):
895
+ cursor.execute("SELECT png_path FROM png_list WHERE png_path LIKE ?", (f"%{settings['file_metadata']}%",))
896
+ else:
897
+ cursor.execute("SELECT png_path FROM png_list")
898
+
899
+ while True:
900
+ rows = cursor.fetchmany(1000)
901
+ if not rows:
902
+ break
903
+ all_paths.extend([row[0] for row in rows])
904
+
905
+ except sqlite3.Error as e:
906
+ print(f"Database error: {e}")
907
+ return
908
+ except Exception as e:
909
+ print(f"Error: {e}")
910
+ return
911
+
912
+ if isinstance(settings['sample'], int):
913
+ selected_paths = random.sample(all_paths, settings['sample'])
914
+ print(f"Random selection of {len(selected_paths)} paths")
915
+ else:
916
+ selected_paths = all_paths
917
+ random.shuffle(selected_paths)
918
+ print(f"All paths: {len(selected_paths)} paths")
919
+
920
+ total_images = len(selected_paths)
921
+ print(f"Found {total_images} images")
922
+
923
+ # Create a temp folder in dst
924
+ temp_dir = os.path.join(dst, "temp_tars")
925
+ os.makedirs(temp_dir, exist_ok=True)
926
+
927
+ # Chunking the data
928
+ num_procs = max(2, cpu_count() - 2)
929
+ chunk_size = len(selected_paths) // num_procs
930
+ remainder = len(selected_paths) % num_procs
931
+
932
+ paths_chunks = []
933
+ start = 0
934
+ for i in range(num_procs):
935
+ end = start + chunk_size + (1 if i < remainder else 0)
936
+ paths_chunks.append(selected_paths[start:end])
937
+ start = end
938
+
939
+ temp_tar_files = [os.path.join(temp_dir, f"temp_{i}.tar") for i in range(num_procs)]
940
+
941
+ print(f"Generating temporary tar files in {dst}")
942
+
943
+ # Initialize shared counter and lock
944
+ counter = Value('i', 0)
945
+ lock = Lock()
946
+
947
+ with Pool(processes=num_procs, initializer=initiate_counter, initargs=(counter, lock)) as pool:
948
+ pool.starmap(add_images_to_tar, [(paths_chunks[i], temp_tar_files[i], total_images) for i in range(num_procs)])
949
+
950
+ # Combine the temporary tar files into a final tar
951
+ date_name = datetime.date.today().strftime('%y%m%d')
952
+ if not settings['file_metadata'] is None:
953
+ tar_name = f"{date_name}_{settings['experiment']}_{settings['file_metadata']}.tar"
954
+ else:
955
+ tar_name = f"{date_name}_{settings['experiment']}.tar"
956
+ tar_name = os.path.join(dst, tar_name)
957
+ if os.path.exists(tar_name):
958
+ number = random.randint(1, 100)
959
+ tar_name_2 = f"{date_name}_{settings['experiment']}_{settings['file_metadata']}_{number}.tar"
960
+ print(f"Warning: {os.path.basename(tar_name)} exists, saving as {os.path.basename(tar_name_2)} ")
961
+ tar_name = os.path.join(dst, tar_name_2)
962
+
963
+ print(f"Merging temporary files")
964
+
965
+ with tarfile.open(tar_name, 'w') as final_tar:
966
+ for temp_tar_path in temp_tar_files:
967
+ with tarfile.open(temp_tar_path, 'r') as temp_tar:
968
+ for member in temp_tar.getmembers():
969
+ file_obj = temp_tar.extractfile(member)
970
+ final_tar.addfile(member, file_obj)
971
+ os.remove(temp_tar_path)
972
+
973
+ # Delete the temp folder
974
+ shutil.rmtree(temp_dir)
975
+ print(f"\nSaved {total_images} images to {tar_name}")
976
+
977
+ return tar_name
978
+
979
+ def generate_dataset_v1(src, file_metadata=None, experiment='TSG101_screen', sample=None):
881
980
 
882
981
  from .utils import initiate_counter, add_images_to_tar
883
982
 
@@ -974,7 +1073,7 @@ def generate_dataset(src, file_metadata=None, experiment='TSG101_screen', sample
974
1073
  shutil.rmtree(temp_dir)
975
1074
  print(f"\nSaved {total_images} images to {tar_name}")
976
1075
 
977
- def apply_model_to_tar(tar_path, model_path, file_type='cell_png', image_size=224, batch_size=64, normalize=True, preload='images', n_jobs=10, threshold=0.5, verbose=False):
1076
+ def apply_model_to_tar_v1(tar_path, model_path, file_type='cell_png', image_size=224, batch_size=64, normalize=True, preload='images', n_jobs=10, threshold=0.5, verbose=False):
978
1077
 
979
1078
  from .io import TarImageDataset
980
1079
  from .utils import process_vision_results, print_progress
@@ -1044,6 +1143,76 @@ def apply_model_to_tar(tar_path, model_path, file_type='cell_png', image_size=22
1044
1143
  torch.cuda.memory.empty_cache()
1045
1144
  return df
1046
1145
 
1146
+ def apply_model_to_tar(settings={}):
1147
+
1148
+ from .io import TarImageDataset
1149
+ from .utils import process_vision_results, print_progress
1150
+
1151
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
1152
+ if settings['normalize']:
1153
+ transform = transforms.Compose([
1154
+ transforms.ToTensor(),
1155
+ transforms.CenterCrop(size=(settings['image_size'], settings['image_size'])),
1156
+ transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
1157
+ else:
1158
+ transform = transforms.Compose([
1159
+ transforms.ToTensor(),
1160
+ transforms.CenterCrop(size=(settings['image_size'], settings['image_size']))])
1161
+
1162
+ if settings['verbose']:
1163
+ print(f"Loading model from {settings['model_path']}")
1164
+ print(f"Loading dataset from {settings['tar_path']}")
1165
+
1166
+ model = torch.load(settings['model_path'])
1167
+
1168
+ dataset = TarImageDataset(settings['tar_path'], transform=transform)
1169
+ data_loader = DataLoader(dataset, batch_size=settings['batch_size'], shuffle=True, num_workers=settings['n_jobs'], pin_memory=True)
1170
+
1171
+ model_name = os.path.splitext(os.path.basename(settings['model_path']))[0]
1172
+ dataset_name = os.path.splitext(os.path.basename(settings['tar_path']))[0]
1173
+ date_name = datetime.date.today().strftime('%y%m%d')
1174
+ dst = os.path.dirname(settings['tar_path'])
1175
+ result_loc = f'{dst}/{date_name}_{dataset_name}_{model_name}_result.csv'
1176
+
1177
+ model.eval()
1178
+ model = model.to(device)
1179
+
1180
+ if settings['verbose']:
1181
+ print(model)
1182
+ print(f'Generated dataset with {len(dataset)} images')
1183
+ print(f'Generating loader from {len(data_loader)} batches')
1184
+ print(f'Results wil be saved in: {result_loc}')
1185
+ print(f'Model is in eval mode')
1186
+ print(f'Model loaded to device')
1187
+
1188
+ prediction_pos_probs = []
1189
+ filenames_list = []
1190
+ time_ls = []
1191
+ gc.collect()
1192
+ with torch.no_grad():
1193
+ for batch_idx, (batch_images, filenames) in enumerate(data_loader, start=1):
1194
+ start = time.time()
1195
+ images = batch_images.to(torch.float).to(device)
1196
+ outputs = model(images)
1197
+ batch_prediction_pos_prob = torch.sigmoid(outputs).cpu().numpy()
1198
+ prediction_pos_probs.extend(batch_prediction_pos_prob.tolist())
1199
+ filenames_list.extend(filenames)
1200
+ stop = time.time()
1201
+ duration = stop - start
1202
+ time_ls.append(duration)
1203
+ files_processed = batch_idx*settings['batch_size']
1204
+ files_to_process = len(data_loader)
1205
+ print_progress(files_processed, files_to_process, n_jobs=settings['n_jobs'], time_ls=time_ls, batch_size=settings['batch_size'], operation_type="Tar dataset")
1206
+
1207
+ data = {'path':filenames_list, 'pred':prediction_pos_probs}
1208
+ df = pd.DataFrame(data, index=None)
1209
+ df = process_vision_results(df, settings['score_threshold'])
1210
+
1211
+ df.to_csv(result_loc, index=True, header=True, mode='w')
1212
+ torch.cuda.empty_cache()
1213
+ torch.cuda.memory.empty_cache()
1214
+ return df
1215
+
1047
1216
  def apply_model(src, model_path, image_size=224, batch_size=64, normalize=True, n_jobs=10):
1048
1217
 
1049
1218
  from .io import NoClassDataset
@@ -1206,19 +1375,19 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
1206
1375
  for path in train_data:
1207
1376
  start = time.time()
1208
1377
  shutil.copy(path, os.path.join(train_class_dir, os.path.basename(path)))
1209
- processed_files += 1
1210
1378
  duration = time.time() - start
1211
1379
  time_ls.append(duration)
1212
1380
  print_progress(processed_files, total_files, n_jobs=1, time_ls=None, batch_size=None, operation_type="Copying files for Train dataset")
1381
+ processed_files += 1
1213
1382
 
1214
1383
  # Copy test files
1215
1384
  for path in test_data:
1216
1385
  start = time.time()
1217
1386
  shutil.copy(path, os.path.join(test_class_dir, os.path.basename(path)))
1218
- processed_files += 1
1219
1387
  duration = time.time() - start
1220
1388
  time_ls.append(duration)
1221
1389
  print_progress(processed_files, total_files, n_jobs=1, time_ls=None, batch_size=None, operation_type="Copying files for Test dataset")
1390
+ processed_files += 1
1222
1391
 
1223
1392
  # Print summary
1224
1393
  for cls in classes:
@@ -1226,9 +1395,9 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
1226
1395
  test_class_dir = os.path.join(dst, f'test/{cls}')
1227
1396
  print(f'Train class {cls}: {len(os.listdir(train_class_dir))}, Test class {cls}: {len(os.listdir(test_class_dir))}')
1228
1397
 
1229
- return
1398
+ return os.path.join(dst, 'train'), os.path.join(dst, 'test')
1230
1399
 
1231
- def generate_training_dataset(src, mode='annotation', annotation_column='test', annotated_classes=[1,2], classes=['nc','pc'], size=200, test_split=0.1, class_metadata=[['c1'],['c2']], metadata_type_by='col', channel_of_interest=3, custom_measurement=None, tables=None, png_type='cell_png'):
1400
+ def generate_training_dataset_v1(src, mode='annotation', annotation_column='test', annotated_classes=[1,2], classes=['nc','pc'], size=200, test_split=0.1, class_metadata=[['c1'],['c2']], metadata_type_by='col', channel_of_interest=3, custom_measurement=None, tables=None, png_type='cell_png'):
1232
1401
 
1233
1402
  from .io import _read_and_merge_data, _read_db
1234
1403
  from .utils import get_paths_from_db, annotate_conditions
@@ -1329,6 +1498,110 @@ def generate_training_dataset(src, mode='annotation', annotation_column='test',
1329
1498
 
1330
1499
  return
1331
1500
 
1501
+ def generate_training_dataset(settings):
1502
+
1503
+ from .io import _read_and_merge_data, _read_db
1504
+ from .utils import get_paths_from_db, annotate_conditions
1505
+ from .settings import set_generate_training_dataset_defaults
1506
+
1507
+ settings = set_generate_training_dataset_defaults(settings)
1508
+
1509
+ db_path = os.path.join(settings['src'], 'measurements','measurements.db')
1510
+ dst = os.path.join(settings['src'], 'datasets', 'training')
1511
+
1512
+ if os.path.exists(dst):
1513
+ for i in range(1, 1000):
1514
+ dst = os.path.join(settings['src'], 'datasets', f'training_{i}')
1515
+ if not os.path.exists(dst):
1516
+ print(f'Creating new directory for training: {dst}')
1517
+ break
1518
+
1519
+ if settings['dataset_mode'] == 'annotation':
1520
+ class_paths_ls_2 = []
1521
+ class_paths_ls = training_dataset_from_annotation(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'])
1522
+ for class_paths in class_paths_ls:
1523
+ class_paths_temp = random.sample(class_paths, settings['size'])
1524
+ class_paths_ls_2.append(class_paths_temp)
1525
+ class_paths_ls = class_paths_ls_2
1526
+
1527
+ elif settings['dataset_mode'] == 'metadata':
1528
+ class_paths_ls = []
1529
+ class_len_ls = []
1530
+ [df] = _read_db(db_loc=db_path, tables=['png_list'])
1531
+ df['metadata_based_class'] = pd.NA
1532
+ for i, class_ in enumerate(settings['classes']):
1533
+ ls = settings['class_metadata'][i]
1534
+ df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
1535
+
1536
+ for class_ in settings['classes']:
1537
+ if settings['size'] == None:
1538
+ c_s = []
1539
+ for c in settings['classes']:
1540
+ c_s_t_df = df[df['metadata_based_class'] == c]
1541
+ c_s.append(len(c_s_t_df))
1542
+ print(f'Found {len(c_s_t_df)} images for class {c}')
1543
+ size = min(c_s)
1544
+ print(f'Using the smallest class size: {size}')
1545
+
1546
+ class_temp_df = df[df['metadata_based_class'] == class_]
1547
+ class_len_ls.append(len(class_temp_df))
1548
+ print(f'Found {len(class_temp_df)} images for class {class_}')
1549
+ class_paths_temp = random.sample(class_temp_df['png_path'].tolist(), settings['size'])
1550
+ class_paths_ls.append(class_paths_temp)
1551
+
1552
+ elif settings['dataset_mode'] == 'recruitment':
1553
+ class_paths_ls = []
1554
+ if not isinstance(settings['tables'], list):
1555
+ tables = ['cell', 'nucleus', 'pathogen','cytoplasm']
1556
+
1557
+ df, _ = _read_and_merge_data(locs=[db_path],
1558
+ tables=tables,
1559
+ verbose=False,
1560
+ include_multinucleated=True,
1561
+ include_multiinfected=True,
1562
+ include_noninfected=True)
1563
+
1564
+ print('length df 1', len(df))
1565
+
1566
+ df = annotate_conditions(df, cells=['HeLa'], cell_loc=None, pathogens=['pathogen'], pathogen_loc=None, treatments=settings['classes'], treatment_loc=settings['class_metadata'], types = settings['metadata_type_by'])
1567
+ print('length df 2', len(df))
1568
+ [png_list_df] = _read_db(db_loc=db_path, tables=['png_list'])
1569
+
1570
+ if settings['custom_measurement'] != None:
1571
+
1572
+ if not isinstance(settings['custom_measurement'], list):
1573
+ print(f'custom_measurement should be a list, add [ measurement_1, measurement_2 ] or [ measurement ]')
1574
+ return
1575
+
1576
+ if isinstance(settings['custom_measurement'], list):
1577
+ if len(settings['custom_measurement']) == 2:
1578
+ print(f"Classes will be defined by the Q1 and Q3 quantiles of recruitment ({settings['custom_measurement'][0]}/{settings['custom_measurement'][1]})")
1579
+ df['recruitment'] = df[f"{settings['custom_measurement'][0]}']/df[f'{settings['custom_measurement'][1]}"]
1580
+ if len(settings['custom_measurement']) == 1:
1581
+ print(f"Classes will be defined by the Q1 and Q3 quantiles of recruitment ({settings['custom_measurement'][0]})")
1582
+ df['recruitment'] = df[f"{settings['custom_measurement'][0]}"]
1583
+ else:
1584
+ print(f"Classes will be defined by the Q1 and Q3 quantiles of recruitment (pathogen/cytoplasm for channel {settings['channel_of_interest']})")
1585
+ df['recruitment'] = df[f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity']/df[f'cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity"]
1586
+
1587
+ q25 = df['recruitment'].quantile(0.25)
1588
+ q75 = df['recruitment'].quantile(0.75)
1589
+ df_lower = df[df['recruitment'] <= q25]
1590
+ df_upper = df[df['recruitment'] >= q75]
1591
+
1592
+ class_paths_lower = get_paths_from_db(df=df_lower, png_df=png_list_df, image_type=settings['png_type'])
1593
+
1594
+ class_paths_lower = random.sample(class_paths_lower['png_path'].tolist(), settings['size'])
1595
+ class_paths_ls.append(class_paths_lower)
1596
+
1597
+ class_paths_upper = get_paths_from_db(df=df_upper, png_df=png_list_df, image_type=settings['png_type'])
1598
+ class_paths_upper = random.sample(class_paths_upper['png_path'].tolist(), settings['size'])
1599
+ class_paths_ls.append(class_paths_upper)
1600
+
1601
+ train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_paths_ls, classes=settings['classes'], test_split=settings['test_split'])
1602
+
1603
+ return train_class_dir, test_class_dir
1604
+
1332
1605
  def generate_loaders(src, train_mode='erm', mode='train', image_size=224, batch_size=32, classes=['nc','pc'], n_jobs=None, validation_split=0.0, max_show=2, pin_memory=False, normalize=False, channels=[1, 2, 3], augment=False, verbose=False):
1333
1606
 
1334
1607
  """
@@ -1729,20 +2002,43 @@ def preprocess_generate_masks(src, settings={}):
1729
2002
 
1730
2003
  if settings['preprocess']:
1731
2004
  settings, src = preprocess_img_data(settings)
1732
-
2005
+
2006
+ files_to_process = 3
2007
+ files_processed = 0
1733
2008
  if settings['masks']:
1734
2009
  mask_src = os.path.join(src, 'norm_channel_stack')
1735
2010
  if settings['cell_channel'] != None:
2011
+ time_ls=[]
1736
2012
  if check_mask_folder(src, 'cell_mask_stack'):
2013
+ start = time.time()
1737
2014
  generate_cellpose_masks(mask_src, settings, 'cell')
2015
+ stop = time.time()
2016
+ duration = (stop - start)
2017
+ time_ls.append(duration)
2018
+ files_processed += 1
2019
+ print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'cell_mask_gen')
1738
2020
 
1739
2021
  if settings['nucleus_channel'] != None:
2022
+ time_ls=[]
1740
2023
  if check_mask_folder(src, 'nucleus_mask_stack'):
2024
+ start = time.time()
1741
2025
  generate_cellpose_masks(mask_src, settings, 'nucleus')
2026
+ stop = time.time()
2027
+ duration = (stop - start)
2028
+ time_ls.append(duration)
2029
+ files_processed += 1
2030
+ print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'nucleus_mask_gen')
1742
2031
 
1743
2032
  if settings['pathogen_channel'] != None:
2033
+ time_ls=[]
1744
2034
  if check_mask_folder(src, 'pathogen_mask_stack'):
2035
+ start = time.time()
1745
2036
  generate_cellpose_masks(mask_src, settings, 'pathogen')
2037
+ stop = time.time()
2038
+ duration = (stop - start)
2039
+ time_ls.append(duration)
2040
+ files_processed += 1
2041
+ print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type=f'pathogen_mask_gen')
1746
2042
 
1747
2043
  #if settings['organelle'] != None:
1748
2044
  # if check_mask_folder(src, 'organelle_mask_stack'):
@@ -2012,7 +2308,6 @@ def generate_cellpose_masks(src, settings, object_type):
2012
2308
  average_sizes = []
2013
2309
  time_ls = []
2014
2310
 
2015
- files_to_process = len(paths)
2016
2311
  for file_index, path in enumerate(paths):
2017
2312
  name = os.path.basename(path)
2018
2313
  name, ext = os.path.splitext(name)
@@ -2050,7 +2345,6 @@ def generate_cellpose_masks(src, settings, object_type):
2050
2345
  print(f'Cut batch at indecies: {timelapse_frame_limits}, New batch_size: {batch_size} ')
2051
2346
 
2052
2347
  for i in range(0, stack.shape[0], batch_size):
2053
- start = time.time()
2054
2348
  mask_stack = []
2055
2349
  if stack.shape[3] == 1:
2056
2350
  batch = stack[i: i+batch_size, :, :, [0,0]].astype(stack.dtype)
@@ -2072,14 +2366,6 @@ def generate_cellpose_masks(src, settings, object_type):
2072
2366
  save_path = os.path.join(movie_path, f'timelapse_{object_type}_{name}.mp4')
2073
2367
  _npz_to_movie(batch, batch_filenames, save_path, fps=2)
2074
2368
 
2075
- stop = time.time()
2076
- duration = (stop - start)
2077
- time_ls.append(duration)
2078
- files_processed = (file_index+1)*len(batch_filenames)
2079
- files_processed = len(paths)*batch.shape[0]
2080
- print('file_index', file_index, 'len(paths)', len(paths), 'batch.shape[0]', batch.shape[0])
2081
- print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=batch.shape[0], operation_type=f'{object_type}_mask_gen')
2082
-
2083
2369
  output = model.eval(x=batch,
2084
2370
  batch_size=cellpose_batch_size,
2085
2371
  normalize=False,
@@ -2484,7 +2770,6 @@ def ml_analysis(df, channel_of_interest=3, location_column='col', positive_contr
2484
2770
  df_metadata = df[[location_column]].copy()
2485
2771
  df, features = filter_dataframe_features(df, channel_of_interest, exclude, remove_low_variance_features, remove_highly_correlated_features, verbose)
2486
2772
 
2487
-
2488
2773
  if verbose:
2489
2774
  print(f'Found {len(features)} numerical features in the dataframe')
2490
2775
  print(f'Features used in training: {features}')
@@ -2629,7 +2914,6 @@ def check_index(df, elements=5, split_char='_'):
2629
2914
  print(idx)
2630
2915
  raise ValueError(f"Found {len(problematic_indices)} problematic indices that do not split into {elements} parts.")
2631
2916
 
2632
- #def plate_heatmap(src, model_type='xgboost', variable='predictions', grouping='mean', min_max='allq', cmap='viridis', channel_of_interest=3, min_count=25, n_estimators=100, col_to_compare='col', pos='c2', neg='c1', exclude=None, n_repeats=10, clean=True, nr_to_plot=20, verbose=False, n_jobs=-1):
2633
2917
  def generate_ml_scores(src, settings):
2634
2918
 
2635
2919
  from .io import _read_and_merge_data
@@ -2667,7 +2951,7 @@ def generate_ml_scores(src, settings):
2667
2951
  settings['top_features'],
2668
2952
  settings['n_estimators'],
2669
2953
  settings['test_size'],
2670
- settings['model_type'],
2954
+ settings['model_type_ml'],
2671
2955
  settings['n_jobs'],
2672
2956
  settings['remove_low_variance_features'],
2673
2957
  settings['remove_highly_correlated_features'],
@@ -2688,7 +2972,7 @@ def generate_ml_scores(src, settings):
2688
2972
  min_count=settings['minimum_cell_count'],
2689
2973
  verbose=settings['verbose'])
2690
2974
 
2691
- data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv = get_ml_results_paths(src, settings['model_type'], settings['channel_of_interest'])
2975
+ data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv = get_ml_results_paths(src, settings['model_type_ml'], settings['channel_of_interest'])
2692
2976
  df, permutation_df, feature_importance_df, _, _, _, _, _, metrics_df = output
2693
2977
 
2694
2978
  settings_df.to_csv(settings_csv, index=False)
@@ -2845,6 +3129,7 @@ def generate_image_umap(settings={}):
2845
3129
  settings['plot_outlines'] = False
2846
3130
  settings['smooth_lines'] = False
2847
3131
 
3132
+ print(f'Generating Image UMAP ...')
2848
3133
  settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
2849
3134
  settings_dir = os.path.join(settings['src'][0],'settings')
2850
3135
  settings_csv = os.path.join(settings_dir,'embedding_settings.csv')