spacr 0.4.15__py3-none-any.whl → 0.4.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/submodules.py CHANGED
@@ -1,14 +1,21 @@
1
-
2
-
3
-
4
1
  import seaborn as sns
5
- import os, random, sqlite3, re, shap
2
+ import os, random, sqlite3, re, shap, string, time
6
3
  import pandas as pd
7
4
  import numpy as np
8
- import cellpose
5
+
9
6
  from skimage.measure import regionprops, label
7
+ from skimage.transform import resize as sk_resize, rotate
8
+ from skimage.exposure import rescale_intensity
9
+
10
+ import cellpose
11
+ from cellpose import models as cp_models
12
+ from cellpose import train as train_cp
10
13
  from cellpose import models as cp_models
14
+ from cellpose import io as cp_io
11
15
  from cellpose import train as train_cp
16
+ from cellpose.metrics import aggregated_jaccard_index
17
+ from cellpose.metrics import average_precision
18
+
12
19
  from IPython.display import display
13
20
  from sklearn.ensemble import RandomForestClassifier
14
21
  from sklearn.inspection import permutation_importance
@@ -17,10 +24,548 @@ from scipy.stats import chi2_contingency, pearsonr
17
24
  from scipy.spatial.distance import cosine
18
25
 
19
26
  from sklearn.metrics import mean_absolute_error
20
-
27
+ from skimage.measure import regionprops, label as sklabel
21
28
  import matplotlib.pyplot as plt
22
29
  from natsort import natsorted
23
30
 
31
+ import torch
32
+ from torch.utils.data import Dataset
33
+ from spacr.settings import get_train_cellpose_default_settings
34
+ from spacr.utils import save_settings, invert_image
35
+
36
+ class CellposeLazyDataset(Dataset):
37
+ def __init__(self, image_files, label_files, settings, randomize=True, augment=False):
38
+ combined = list(zip(image_files, label_files))
39
+ if randomize:
40
+ random.shuffle(combined)
41
+ self.image_files, self.label_files = zip(*combined)
42
+ self.normalize = settings['normalize']
43
+ self.percentiles = settings.get('percentiles', [2, 99])
44
+ self.target_size = settings['target_size']
45
+ self.augment = augment
46
+
47
+ def __len__(self):
48
+ return len(self.image_files) * (8 if self.augment else 1)
49
+
50
+ def apply_augmentation(self, image, label, aug_idx):
51
+ if aug_idx == 1:
52
+ return rotate(image, 90, resize=False, preserve_range=True), rotate(label, 90, resize=False, preserve_range=True)
53
+ elif aug_idx == 2:
54
+ return rotate(image, 180, resize=False, preserve_range=True), rotate(label, 180, resize=False, preserve_range=True)
55
+ elif aug_idx == 3:
56
+ return rotate(image, 270, resize=False, preserve_range=True), rotate(label, 270, resize=False, preserve_range=True)
57
+ elif aug_idx == 4:
58
+ return np.fliplr(image), np.fliplr(label)
59
+ elif aug_idx == 5:
60
+ return np.flipud(image), np.flipud(label)
61
+ elif aug_idx == 6:
62
+ return np.fliplr(rotate(image, 90, resize=False, preserve_range=True)), np.fliplr(rotate(label, 90, resize=False, preserve_range=True))
63
+ elif aug_idx == 7:
64
+ return np.flipud(rotate(image, 90, resize=False, preserve_range=True)), np.flipud(rotate(label, 90, resize=False, preserve_range=True))
65
+ return image, label
66
+
67
+ def __getitem__(self, idx):
68
+ base_idx = idx // 8 if self.augment else idx
69
+ aug_idx = idx % 8 if self.augment else 0
70
+
71
+ image = cp_io.imread(self.image_files[base_idx])
72
+ label = cp_io.imread(self.label_files[base_idx])
73
+
74
+ if image.ndim == 3:
75
+ image = image.mean(axis=-1)
76
+
77
+ if image.max() > 1:
78
+ image = image / image.max()
79
+
80
+ if self.normalize:
81
+ lower_p, upper_p = np.percentile(image, self.percentiles)
82
+ image = rescale_intensity(image, in_range=(lower_p, upper_p), out_range=(0, 1))
83
+
84
+ image, label = self.apply_augmentation(image, label, aug_idx)
85
+
86
+ image_shape = (self.target_size, self.target_size)
87
+ image = sk_resize(image, image_shape, preserve_range=True, anti_aliasing=True).astype(np.float32)
88
+ label = sk_resize(label, image_shape, order=0, preserve_range=True, anti_aliasing=False).astype(np.uint8)
89
+
90
+ return image, label
91
+
92
+ def train_cellpose(settings):
93
+
94
+ from spacr.settings import get_train_cellpose_default_settings
95
+ from spacr.utils import save_settings
96
+
97
+ settings = get_train_cellpose_default_settings(settings)
98
+ img_src = os.path.join(settings['src'], 'train', 'images')
99
+ mask_src = os.path.join(settings['src'], 'train', 'masks')
100
+ target_size = settings['target_size']
101
+
102
+ model_name = f"{settings['model_name']}_cyto_e{settings['n_epochs']}_X{target_size}_Y{target_size}.CP_model"
103
+ model_save_path = os.path.join(settings['src'], 'models', 'cellpose_model')
104
+ os.makedirs(model_save_path, exist_ok=True)
105
+
106
+ save_settings(settings, name=model_name)
107
+
108
+ model = cp_models.CellposeModel(gpu=True, model_type='cyto', diam_mean=30, pretrained_model='cyto')
109
+ cp_channels = [0, 0]
110
+
111
+ #train_image_files = sorted([os.path.join(img_src, f) for f in os.listdir(img_src) if f.endswith('.tif')])
112
+ #train_label_files = sorted([os.path.join(mask_src, f) for f in os.listdir(mask_src) if f.endswith('.tif')])
113
+
114
+ image_filenames = set(f for f in os.listdir(img_src) if f.endswith('.tif'))
115
+ label_filenames = set(f for f in os.listdir(mask_src) if f.endswith('.tif'))
116
+
117
+ # Only keep files that are present in both folders
118
+ matched_filenames = sorted(image_filenames & label_filenames)
119
+
120
+ train_image_files = [os.path.join(img_src, f) for f in matched_filenames]
121
+ train_label_files = [os.path.join(mask_src, f) for f in matched_filenames]
122
+
123
+ train_dataset = CellposeLazyDataset(train_image_files, train_label_files, settings, randomize=True, augment=settings['augment'])
124
+
125
+ n_aug = 8 if settings['augment'] else 1
126
+ max_base_images = len(train_dataset) // n_aug if settings['augment'] else len(train_dataset)
127
+ n_base = min(settings['batch_size'], max_base_images)
128
+
129
+ unique_base_indices = list(range(max_base_images))
130
+ random.shuffle(unique_base_indices)
131
+ selected_indices = unique_base_indices[:n_base]
132
+
133
+ images, labels = [], []
134
+ for idx in selected_indices:
135
+ for aug_idx in range(n_aug):
136
+ i = idx * n_aug + aug_idx if settings['augment'] else idx
137
+ img, lbl = train_dataset[i]
138
+ images.append(img)
139
+ labels.append(lbl)
140
+ try:
141
+ plot_cellpose_batch(images, labels)
142
+ except:
143
+ print(f"could not print batch images")
144
+
145
+ print(f"Training model with {len(images)} ber patch for {settings['n_epochs']} Epochs")
146
+
147
+ train_cp.train_seg(model.net,
148
+ train_data=images,
149
+ train_labels=labels,
150
+ channels=cp_channels,
151
+ save_path=model_save_path,
152
+ n_epochs=settings['n_epochs'],
153
+ batch_size=settings['batch_size'],
154
+ learning_rate=settings['learning_rate'],
155
+ weight_decay=settings['weight_decay'],
156
+ model_name=model_name,
157
+ save_every=max(1, (settings['n_epochs'] // 10)),
158
+ rescale=False)
159
+
160
+ print(f"Model saved at: {model_save_path}/{model_name}")
161
+
162
+ def test_cellpose_model(settings):
163
+
164
+ from spacr.utils import save_settings, print_progress
165
+ from .settings import get_default_test_cellpose_model_settings
166
+
167
+ def plot_cellpose_resilts(i, j, results_dir, img, lbl, pred, flow):
168
+ from spacr. plot import generate_mask_random_cmap
169
+ fig, axs = plt.subplots(1, 5, figsize=(16, 4), gridspec_kw={'wspace': 0.1, 'hspace': 0.1})
170
+ cmap_lbl = generate_mask_random_cmap(lbl)
171
+ cmap_pred = generate_mask_random_cmap(pred)
172
+
173
+ axs[0].imshow(img, cmap='gray')
174
+ axs[0].set_title('Image')
175
+ axs[0].axis('off')
176
+
177
+ axs[1].imshow(lbl, cmap=cmap_lbl, interpolation='nearest')
178
+ axs[1].set_title('True Mask')
179
+ axs[1].axis('off')
180
+
181
+ axs[2].imshow(pred, cmap=cmap_pred, interpolation='nearest')
182
+ axs[2].set_title('Predicted Mask')
183
+ axs[2].axis('off')
184
+
185
+ axs[3].imshow(flow[2], cmap='gray')
186
+ axs[3].set_title('Cell Probability')
187
+ axs[3].axis('off')
188
+
189
+ axs[4].imshow(flow[0], cmap='gray')
190
+ axs[4].set_title('Flows')
191
+ axs[4].axis('off')
192
+
193
+ save_path = os.path.join(results_dir, f"cellpose_result_{i+j:03d}.png")
194
+ plt.savefig(save_path, dpi=200, bbox_inches='tight')
195
+ plt.show()
196
+ plt.close(fig)
197
+
198
+
199
+ settings = get_default_test_cellpose_model_settings(settings)
200
+
201
+ save_settings(settings, name='test_cellpose_model')
202
+ test_image_folder = os.path.join(settings['src'], 'test', 'images')
203
+ test_label_folder = os.path.join(settings['src'], 'test', 'masks')
204
+ results_dir = os.path.join(settings['src'], 'results')
205
+ os.makedirs(results_dir, exist_ok=True)
206
+
207
+ print(f"Results will be saved in: {results_dir}")
208
+
209
+ image_filenames = set(f for f in os.listdir(test_image_folder) if f.endswith('.tif'))
210
+ label_filenames = set(f for f in os.listdir(test_label_folder) if f.endswith('.tif'))
211
+
212
+ # Only keep files that are present in both folders
213
+ matched_filenames = sorted(image_filenames & label_filenames)
214
+
215
+ test_image_files = [os.path.join(test_image_folder, f) for f in matched_filenames]
216
+ test_label_files = [os.path.join(test_label_folder, f) for f in matched_filenames]
217
+
218
+ print(f"Found {len(test_image_files)} images and {len(test_label_files)} masks")
219
+
220
+ test_dataset = CellposeLazyDataset(test_image_files, test_label_files, settings, randomize=False, augment=False)
221
+
222
+ model = cp_models.CellposeModel(gpu=True, pretrained_model=settings['model_path'])
223
+
224
+ batch_size = settings['batch_size']
225
+ scores = []
226
+ names = []
227
+ time_ls = []
228
+
229
+ files_to_process = len(test_image_folder)
230
+
231
+ for i in range(0, len(test_dataset), batch_size):
232
+ start = time.time()
233
+ batch = [test_dataset[j] for j in range(i, min(i + batch_size, len(test_dataset)))]
234
+ images, labels = zip(*batch)
235
+
236
+ masks_pred, flows, _ = model.eval(x=list(images),
237
+ channels=[0, 0],
238
+ normalize=False,
239
+ diameter=30,
240
+ flow_threshold=settings['FT'],
241
+ cellprob_threshold=settings['CP_probability'],
242
+ rescale=None,
243
+ resample=True,
244
+ interp=True,
245
+ anisotropy=None,
246
+ min_size=5,
247
+ augment=True,
248
+ tile=True,
249
+ tile_overlap=0.2,
250
+ bsize=224)
251
+
252
+ n_objects_true_ls = []
253
+ n_objects_pred_ls = []
254
+ mean_area_true_ls = []
255
+ mean_area_pred_ls = []
256
+ tp_ls, fp_ls, fn_ls = [], [], []
257
+ precision_ls, recall_ls, f1_ls, accuracy_ls = [], [], [], []
258
+
259
+ for j, (img, lbl, pred, flow) in enumerate(zip(images, labels, masks_pred, flows)):
260
+ score = float(aggregated_jaccard_index([lbl], [pred]))
261
+ fname = os.path.basename(test_label_files[i + j])
262
+ scores.append(score)
263
+ names.append(fname)
264
+
265
+ # Label masks
266
+ lbl_lab = label(lbl)
267
+ pred_lab = label(pred)
268
+
269
+ # Count objects
270
+ n_true = lbl_lab.max()
271
+ n_pred = pred_lab.max()
272
+ n_objects_true_ls.append(n_true)
273
+ n_objects_pred_ls.append(n_pred)
274
+
275
+ # Mean object size (area)
276
+ area_true = [p.area for p in regionprops(lbl_lab)]
277
+ area_pred = [p.area for p in regionprops(pred_lab)]
278
+
279
+ mean_area_true = np.mean(area_true) if area_true else 0
280
+ mean_area_pred = np.mean(area_pred) if area_pred else 0
281
+ mean_area_true_ls.append(mean_area_true)
282
+ mean_area_pred_ls.append(mean_area_pred)
283
+
284
+ # Compute object-level TP, FP, FN
285
+ ap, tp, fp, fn = average_precision([lbl], [pred], threshold=[0.5])
286
+ tp, fp, fn = int(tp[0, 0]), int(fp[0, 0]), int(fn[0, 0])
287
+ tp_ls.append(tp)
288
+ fp_ls.append(fp)
289
+ fn_ls.append(fn)
290
+
291
+ # Precision, Recall, F1, Accuracy
292
+ prec = tp / (tp + fp) if (tp + fp) > 0 else 0
293
+ rec = tp / (tp + fn) if (tp + fn) > 0 else 0
294
+ f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0
295
+ acc = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0
296
+
297
+ precision_ls.append(prec)
298
+ recall_ls.append(rec)
299
+ f1_ls.append(f1)
300
+ accuracy_ls.append(acc)
301
+
302
+ if settings['save']:
303
+ plot_cellpose_resilts(i, j, results_dir, img, lbl, pred, flow)
304
+
305
+ if settings['save']:
306
+ plot_cellpose_resilts(i,j,results_dir, img, lbl, pred, flow)
307
+
308
+ stop = time.time()
309
+ duration = stop-start
310
+ files_processed = (i+1) * batch_size
311
+ time_ls.append(duration)
312
+ print_progress(files_processed, files_to_process, n_jobs=1, time_ls=None, batch_size=batch_size, operation_type="test custom cellpose model")
313
+
314
+ df_results = pd.DataFrame({
315
+ 'label_image': names,
316
+ 'Jaccard': scores,
317
+ 'n_objects_true': n_objects_true_ls,
318
+ 'n_objects_pred': n_objects_pred_ls,
319
+ 'mean_area_true': mean_area_true_ls,
320
+ 'mean_area_pred': mean_area_pred_ls,
321
+ 'TP': tp_ls,
322
+ 'FP': fp_ls,
323
+ 'FN': fn_ls,
324
+ 'Precision': precision_ls,
325
+ 'Recall': recall_ls,
326
+ 'F1': f1_ls,
327
+ 'Accuracy': accuracy_ls
328
+ })
329
+
330
+ df_results['n_error'] = abs(df_results['n_objects_pred'] - df_results['n_objects_true'])
331
+
332
+ print(f"Average true objects/image: {df_results['n_objects_true'].mean():.2f}")
333
+ print(f"Average predicted objects/image: {df_results['n_objects_pred'].mean():.2f}")
334
+ print(f"Mean object area (true): {df_results['mean_area_true'].mean():.2f} px")
335
+ print(f"Mean object area (pred): {df_results['mean_area_pred'].mean():.2f} px")
336
+ print(f"Average Jaccard score: {df_results['Jaccard'].mean():.4f}")
337
+
338
+ print(f"Average Precision: {df_results['Precision'].mean():.3f}")
339
+ print(f"Average Recall: {df_results['Recall'].mean():.3f}")
340
+ print(f"Average F1-score: {df_results['F1'].mean():.3f}")
341
+ print(f"Average Accuracy: {df_results['Accuracy'].mean():.3f}")
342
+
343
+ display(df_results)
344
+
345
+ if settings['save']:
346
+ df_results.to_csv(os.path.join(results_dir, 'test_results.csv'), index=False)
347
+
348
+ def apply_cellpose_model(settings):
349
+
350
+ from .settings import get_default_apply_cellpose_model_settings
351
+ from spacr.utils import save_settings, print_progress
352
+
353
+ def plot_cellpose_result(i, j, results_dir, img, pred, flow):
354
+
355
+ from .plot import generate_mask_random_cmap
356
+
357
+ fig, axs = plt.subplots(1, 4, figsize=(16, 4), gridspec_kw={'wspace': 0.1, 'hspace': 0.1})
358
+ cmap_pred = generate_mask_random_cmap(pred)
359
+
360
+ axs[0].imshow(img, cmap='gray')
361
+ axs[0].set_title('Image')
362
+ axs[0].axis('off')
363
+
364
+ axs[1].imshow(pred, cmap=cmap_pred, interpolation='nearest')
365
+ axs[1].set_title('Predicted Mask')
366
+ axs[1].axis('off')
367
+
368
+ axs[2].imshow(flow[2], cmap='gray')
369
+ axs[2].set_title('Cell Probability')
370
+ axs[2].axis('off')
371
+
372
+ axs[3].imshow(flow[0], cmap='gray')
373
+ axs[3].set_title('Flows')
374
+ axs[3].axis('off')
375
+
376
+ save_path = os.path.join(results_dir, f"cellpose_result_{i + j:03d}.png")
377
+ plt.savefig(save_path, dpi=200, bbox_inches='tight')
378
+ plt.show()
379
+ plt.close(fig)
380
+
381
+
382
+ settings = get_default_apply_cellpose_model_settings(settings)
383
+ save_settings(settings, name='apply_cellpose_model')
384
+
385
+ image_folder = os.path.join(settings['src'])
386
+ results_dir = os.path.join(settings['src'], 'results')
387
+ os.makedirs(results_dir, exist_ok=True)
388
+ print(f"Results will be saved in: {results_dir}")
389
+
390
+ image_files = sorted([os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.tif')])
391
+ print(f"Found {len(image_files)} images")
392
+
393
+ dummy_labels = [image_files[0]] * len(image_files)
394
+ dataset = CellposeLazyDataset(image_files, dummy_labels, settings, randomize=False, augment=False)
395
+
396
+ model = cp_models.CellposeModel(gpu=True, pretrained_model=settings['model_path'])
397
+ batch_size = settings['batch_size']
398
+ measurements = []
399
+
400
+ files_to_process = len(image_files)
401
+ time_ls = []
402
+
403
+ for i in range(0, len(dataset), batch_size):
404
+ start = time.time()
405
+ batch = [dataset[j] for j in range(i, min(i + batch_size, len(dataset)))]
406
+ images, _ = zip(*batch)
407
+
408
+ X = list(images)
409
+
410
+ print(settings['CP_probability'])
411
+ masks_pred, flows, _ = model.eval(x=list(images),
412
+ channels=[0, 0],
413
+ normalize=False,
414
+ diameter=30,
415
+ flow_threshold=settings['FT'],
416
+ cellprob_threshold=settings['CP_probability'],
417
+ rescale=None,
418
+ resample=True,
419
+ interp=True,
420
+ anisotropy=None,
421
+ min_size=5,
422
+ augment=True,
423
+ tile=True,
424
+ tile_overlap=0.2,
425
+ bsize=224)
426
+
427
+ for j, (img, pred, flow) in enumerate(zip(images, masks_pred, flows)):
428
+ fname = os.path.basename(image_files[i + j])
429
+
430
+ if settings.get('circularize', False):
431
+ h, w = pred.shape
432
+ Y, X = np.ogrid[:h, :w]
433
+ center_x, center_y = w / 2, h / 2
434
+ radius = min(center_x, center_y)
435
+ circular_mask = (X - center_x)**2 + (Y - center_y)**2 <= radius**2
436
+ pred = pred * circular_mask
437
+
438
+ if settings['save']:
439
+ plot_cellpose_result(i, j, results_dir, img, pred, flow)
440
+
441
+ props = regionprops(sklabel(pred))
442
+ for k, prop in enumerate(props):
443
+ measurements.append({
444
+ 'image': fname,
445
+ 'object_id': k + 1,
446
+ 'area': prop.area
447
+ })
448
+
449
+ stop = time.time()
450
+ duration = stop-start
451
+ files_processed = (i+1) * batch_size
452
+ time_ls.append(duration)
453
+ print_progress(files_processed, files_to_process, n_jobs=1, time_ls=None, batch_size=batch_size, operation_type="apply custom cellpose model")
454
+
455
+
456
+ # Write after each batch
457
+ df_measurements = pd.DataFrame(measurements)
458
+ df_measurements.to_csv(os.path.join(results_dir, 'measurements.csv'), index=False)
459
+ print("Saved object counts and areas to measurements.csv")
460
+
461
+ df_summary = df_measurements.groupby('image').agg(
462
+ object_count=('object_id', 'count'),
463
+ average_area=('area', 'mean')
464
+ ).reset_index()
465
+ df_summary.to_csv(os.path.join(results_dir, 'summary.csv'), index=False)
466
+ print("Saved object count and average area to summary.csv")
467
+
468
+ def plot_cellpose_batch(images, labels):
469
+ from spacr.plot import generate_mask_random_cmap
470
+
471
+ cmap_lbl = generate_mask_random_cmap(labels)
472
+ batch_size = len(images)
473
+ fig, axs = plt.subplots(2, batch_size, figsize=(4 * batch_size, 8))
474
+ for i in range(batch_size):
475
+ axs[0, i].imshow(images[i], cmap='gray')
476
+ axs[0, i].set_title(f'Image {i+1}')
477
+ axs[0, i].axis('off')
478
+ axs[1, i].imshow(labels[i], cmap=cmap_lbl, interpolation='nearest')
479
+ axs[1, i].set_title(f'Label {i+1}')
480
+ axs[1, i].axis('off')
481
+ plt.show()
482
+
483
+ def analyze_percent_positive(settings):
484
+ from spacr.io import _read_and_merge_data
485
+ from spacr.utils import save_settings
486
+ from .settings import default_settings_analyze_percent_positive
487
+
488
+ settings = default_settings_analyze_percent_positive(settings)
489
+
490
+ def translate_well_in_df(csv_loc):
491
+ # Load and extract metadata
492
+ df = pd.read_csv(csv_loc)
493
+ df[['plateID', 'well']] = df['Renamed TIFF'].str.replace('.tif', '', regex=False).str.split('_', expand=True)[[0, 1]]
494
+ df['plate_well'] = df['plateID'] + '_' + df['well']
495
+
496
+ # Retain one row per plate_well
497
+ df_2 = df.drop_duplicates(subset='plate_well').copy()
498
+
499
+ # Translate well to row and column
500
+ df_2['rowID'] = 'r' + df_2['well'].str[0].map(lambda x: str(string.ascii_uppercase.index(x) + 1))
501
+ df_2['column_name'] = 'c' + df_2['well'].str[1:].astype(int).astype(str)
502
+
503
+ # Optional: add prcf ID (plate_row_column_field)
504
+ df_2['fieldID'] = 'f1' # default or extract from filename if needed
505
+ df_2['prc'] = 'p' + df_2['plateID'].str.extract(r'(\d+)')[0] + '_' + df_2['rowID'] + '_' + df_2['column_name']
506
+
507
+ return df_2
508
+
509
+ def annotate_and_summarize(df, value_col, condition_col, well_col, threshold, annotation_col='annotation'):
510
+ """
511
+ Annotate and summarize a DataFrame based on a threshold.
512
+
513
+ Parameters:
514
+ - df: pandas.DataFrame
515
+ - value_col: str, column name to apply threshold on
516
+ - condition_col: str, column name for experimental condition
517
+ - well_col: str, column name for wells
518
+ - threshold: float, threshold value for annotation
519
+ - annotation_col: str, name of the new annotation column
520
+
521
+ Returns:
522
+ - df: annotated DataFrame
523
+ - summary_df: DataFrame with counts and fractions per condition and well
524
+ """
525
+ # Annotate
526
+ df[annotation_col] = np.where(df[value_col] > threshold, 'above', 'below')
527
+
528
+ # Count per condition and well
529
+ count_df = df.groupby([condition_col, well_col, annotation_col]).size().unstack(fill_value=0)
530
+
531
+ # Calculate total and fractions
532
+ count_df['total'] = count_df.sum(axis=1)
533
+ count_df['fraction_above'] = count_df.get('above', 0) / count_df['total']
534
+ count_df['fraction_below'] = count_df.get('below', 0) / count_df['total']
535
+
536
+ return df, count_df.reset_index()
537
+
538
+ save_settings(settings, name='analyze_percent_positive', show=False)
539
+
540
+ df, _ = _read_and_merge_data(locs=[settings['src']+'/measurements/measurements.db'],
541
+ tables=settings['tables'],
542
+ verbose=True,
543
+ nuclei_limit=None,
544
+ pathogen_limit=None)
545
+
546
+ df['condition'] = 'none'
547
+
548
+ if not settings['filter_1'] is None:
549
+ df = df[df[settings['filter_1'][0]]>settings['filter_1'][1]]
550
+
551
+ condition_col = 'condition'
552
+ well_col = 'prc'
553
+
554
+ df, count_df = annotate_and_summarize(df, settings['value_col'], condition_col, well_col, settings['threshold'], annotation_col='annotation')
555
+ count_df[['plateID', 'rowID', 'column_name']] = count_df['prc'].str.split('_', expand=True)
556
+
557
+ csv_loc = os.path.join(settings['src'], 'rename_log.csv')
558
+ csv_out_loc = os.path.join(settings['src'], 'result.csv')
559
+ translate_df = translate_well_in_df(csv_loc)
560
+
561
+ merged = pd.merge(count_df, translate_df, on=['rowID', 'column_name'], how='inner')
562
+
563
+ merged = merged[['plate_y', 'well', 'plate_well','fieldID','rowID','column_name','prc_x','Original File','Renamed TIFF','above','below','fraction_above','fraction_below']]
564
+ merged[[f'part{i}' for i in range(merged['Original File'].str.count('_').max() + 1)]] = merged['Original File'].str.split('_', expand=True)
565
+ merged.to_csv(csv_out_loc, index=False)
566
+ display(merged)
567
+ return merged
568
+
24
569
  def analyze_recruitment(settings):
25
570
  """
26
571
  Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
@@ -198,147 +743,6 @@ def analyze_plaques(settings):
198
743
 
199
744
  print(f"Analysis completed and saved to database '{db_name}'.")
200
745
 
201
- def train_cellpose(settings):
202
-
203
- from .io import _load_normalized_images_and_labels, _load_images_and_labels
204
- from .settings import get_train_cellpose_default_settings
205
- from .utils import save_settings
206
-
207
- settings = get_train_cellpose_default_settings(settings)
208
-
209
- img_src = settings['img_src']
210
- mask_src = os.path.join(img_src, 'masks')
211
- test_img_src = settings['test_img_src']
212
- test_mask_src = settings['test_mask_src']
213
-
214
- if settings['resize']:
215
- target_height = settings['width_height'][1]
216
- target_width = settings['width_height'][0]
217
-
218
- if settings['test']:
219
- test_img_src = os.path.join(os.path.dirname(settings['img_src']), 'test')
220
- test_mask_src = os.path.join(settings['test_img_src'], 'mask')
221
-
222
- test_images, test_masks, test_image_names, test_mask_names = None,None,None,None
223
- print(settings)
224
-
225
- if settings['from_scratch']:
226
- model_name=f"scratch_{settings['model_name']}_{settings['model_type']}_e{settings['n_epochs']}_X{target_width}_Y{target_height}.CP_model"
227
- else:
228
- if settings['resize']:
229
- model_name=f"{settings['model_name']}_{settings['model_type']}_e{settings['n_epochs']}_X{target_width}_Y{target_height}.CP_model"
230
- else:
231
- model_name=f"{settings['model_name']}_{settings['model_type']}_e{settings['n_epochs']}.CP_model"
232
-
233
- model_save_path = os.path.join(settings['mask_src'], 'models', 'cellpose_model')
234
- print(model_save_path)
235
- os.makedirs(model_save_path, exist_ok=True)
236
-
237
- save_settings(settings, name=model_name)
238
-
239
- if settings['from_scratch']:
240
- model = cp_models.CellposeModel(gpu=True, model_type=settings['model_type'], diam_mean=settings['diameter'], pretrained_model=None)
241
- else:
242
- model = cp_models.CellposeModel(gpu=True, model_type=settings['model_type'])
243
-
244
- if settings['normalize']:
245
-
246
- image_files = [os.path.join(img_src, f) for f in os.listdir(img_src) if f.endswith('.tif')]
247
- label_files = [os.path.join(mask_src, f) for f in os.listdir(mask_src) if f.endswith('.tif')]
248
- images, masks, image_names, mask_names, orig_dims = _load_normalized_images_and_labels(image_files,
249
- label_files,
250
- settings['channels'],
251
- settings['percentiles'],
252
- settings['invert'],
253
- settings['verbose'],
254
- settings['remove_background'],
255
- settings['background'],
256
- settings['Signal_to_noise'],
257
- settings['target_height'],
258
- settings['target_width'])
259
- images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
260
-
261
- if settings['test']:
262
- test_image_files = [os.path.join(test_img_src, f) for f in os.listdir(test_img_src) if f.endswith('.tif')]
263
- test_label_files = [os.path.join(test_mask_src, f) for f in os.listdir(test_mask_src) if f.endswith('.tif')]
264
- test_images, test_masks, test_image_names, test_mask_names = _load_normalized_images_and_labels(test_image_files,
265
- test_label_files,
266
- settings['channels'],
267
- settings['percentiles'],
268
- settings['invert'],
269
- settings['verbose'],
270
- settings['remove_background'],
271
- settings['background'],
272
- settings['Signal_to_noise'],
273
- settings['target_height'],
274
- settings['target_width'])
275
- test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
276
-
277
- else:
278
- images, masks, image_names, mask_names = _load_images_and_labels(img_src, mask_src, settings['invert'])
279
- images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
280
-
281
- if settings['test']:
282
- test_images, test_masks, test_image_names, test_mask_names = _load_images_and_labels(test_img_src,
283
- test_mask_src,
284
- settings['invert'])
285
-
286
- test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
287
-
288
- #if resize:
289
- # images, masks = resize_images_and_labels(images, masks, target_height, target_width, show_example=True)
290
-
291
- if settings['model_type'] == 'cyto':
292
- cp_channels = [0,1]
293
- if settings['model_type'] == 'cyto2':
294
- cp_channels = [0,2]
295
- if settings['model_type'] == 'nucleus':
296
- cp_channels = [0,0]
297
- if settings['grayscale']:
298
- cp_channels = [0,0]
299
- images = [np.squeeze(img) if img.ndim == 3 and 1 in img.shape else img for img in images]
300
-
301
- masks = [np.squeeze(mask) if mask.ndim == 3 and 1 in mask.shape else mask for mask in masks]
302
-
303
- print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {masks[0].shape}, image type: masks[0].shape')
304
- save_every = int(settings['n_epochs']/10)
305
- if save_every < 10:
306
- save_every = settings['n_epochs']
307
-
308
- train_cp.train_seg(model.net,
309
- train_data=images,
310
- train_labels=masks,
311
- train_files=image_names,
312
- train_labels_files=mask_names,
313
- train_probs=None,
314
- test_data=test_images,
315
- test_labels=test_masks,
316
- test_files=test_image_names,
317
- test_labels_files=test_mask_names,
318
- test_probs=None,
319
- load_files=True,
320
- batch_size=settings['batch_size'],
321
- learning_rate=settings['learning_rate'],
322
- n_epochs=settings['n_epochs'],
323
- weight_decay=settings['weight_decay'],
324
- momentum=0.9,
325
- SGD=False,
326
- channels=cp_channels,
327
- channel_axis=None,
328
- normalize=False,
329
- compute_flows=False,
330
- save_path=model_save_path,
331
- save_every=save_every,
332
- nimg_per_epoch=None,
333
- nimg_test_per_epoch=None,
334
- rescale=settings['rescale'],
335
- #scale_range=None,
336
- #bsize=224,
337
- min_train_masks=1,
338
- model_name=settings['model_name'])
339
-
340
- return print(f"Model saved at: {model_save_path}/{model_name}")
341
-
342
746
  def count_phenotypes(settings):
343
747
  from .io import _read_db
344
748
 
@@ -350,17 +754,17 @@ def count_phenotypes(settings):
350
754
  unique_values_count = df[settings['annotation_column']].nunique(dropna=True)
351
755
  print(f"Unique values in {settings['annotation_column']} (excluding NaN): {unique_values_count}")
352
756
 
353
- # Count unique values in 'value' column, grouped by 'plate', 'row_name', 'column'
354
- grouped_unique_count = df.groupby(['plate', 'row_name', 'column'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
757
+ # Count unique values in 'value' column, grouped by 'plateID', 'rowID', 'columnID'
758
+ grouped_unique_count = df.groupby(['plateID', 'rowID', 'columnID'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
355
759
  display(grouped_unique_count)
356
760
 
357
761
  save_path = os.path.join(settings['src'], 'phenotype_counts.csv')
358
762
 
359
763
  # Group by plate, row, and column, then count the occurrences of each unique value
360
- grouped_counts = df.groupby(['plate', 'row_name', 'column', 'value']).size().reset_index(name='count')
764
+ grouped_counts = df.groupby(['plateID', 'rowID', 'columnID', 'value']).size().reset_index(name='count')
361
765
 
362
766
  # Pivot the DataFrame so that unique values are columns and their counts are in the rows
363
- pivot_df = grouped_counts.pivot_table(index=['plate', 'row_name', 'column'], columns='value', values='count', fill_value=0)
767
+ pivot_df = grouped_counts.pivot_table(index=['plateID', 'rowID', 'columnID'], columns='value', values='count', fill_value=0)
364
768
 
365
769
  # Flatten the multi-level columns
366
770
  pivot_df.columns = [f"value_{int(col)}" for col in pivot_df.columns]
@@ -382,20 +786,20 @@ def count_phenotypes(settings):
382
786
  def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),'r2':(90,10),'r3':(80,20),'r4':(80,20),'r5':(70,30),'r6':(70,30),'r7':(60,40),'r8':(60,40),'r9':(50,50),'r10':(50,50),'r11':(40,60),'r12':(40,60),'r13':(30,70),'r14':(30,70),'r15':(20,80),'r16':(20,80)},
383
787
  pc_grna='TGGT1_220950_1', nc_grna='TGGT1_233460_4',
384
788
  y_columns=['class_1_fraction', 'TGGT1_220950_1_fraction', 'nc_fraction'],
385
- column='column', value='c3', plate=None, save_paths=None):
789
+ column='columnID', value='c3', plate=None, save_paths=None):
386
790
 
387
791
  def calculate_well_score_fractions(df, class_columns='cv_predictions'):
388
- if all(col in df.columns for col in ['plate', 'row_name', 'column']):
389
- df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
792
+ if all(col in df.columns for col in ['plateID', 'rowID', 'columnID']):
793
+ df['prc'] = df['plateID'] + '_' + df['rowID'] + '_' + df['columnID']
390
794
  else:
391
- raise ValueError("Cannot find 'plate', 'row_name', or 'column' in df.columns")
392
- prc_summary = df.groupby(['plate', 'row_name', 'column', 'prc']).size().reset_index(name='total_rows')
393
- well_counts = (df.groupby(['plate', 'row_name', 'column', 'prc', class_columns])
795
+ raise ValueError("Cannot find 'plateID', 'rowID', or 'columnID' in df.columns")
796
+ prc_summary = df.groupby(['plateID', 'rowID', 'columnID', 'prc']).size().reset_index(name='total_rows')
797
+ well_counts = (df.groupby(['plateID', 'rowID', 'columnID', 'prc', class_columns])
394
798
  .size()
395
799
  .unstack(fill_value=0)
396
800
  .reset_index()
397
801
  .rename(columns={0: 'class_0', 1: 'class_1'}))
398
- summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row_name', 'column', 'prc'], how='left')
802
+ summary_df = pd.merge(prc_summary, well_counts, on=['plateID', 'rowID', 'columnID', 'prc'], how='left')
399
803
  summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
400
804
  summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
401
805
  return summary_df
@@ -490,8 +894,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
490
894
  return result
491
895
 
492
896
  def calculate_well_read_fraction(df, count_column='count'):
493
- if all(col in df.columns for col in ['plate', 'row_name', 'column']):
494
- df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column']
897
+ if all(col in df.columns for col in ['plateID', 'rowID', 'columnID']):
898
+ df['prc'] = df['plateID'] + '_' + df['rowID'] + '_' + df['columnID']
495
899
  else:
496
900
  raise ValueError("Cannot find plate, row or column in df.columns")
497
901
  grouped_df = df.groupby('prc')[count_column].sum().reset_index()
@@ -507,21 +911,17 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
507
911
  for i, reads_csv_temp in enumerate(reads_csv):
508
912
  reads_df_temp = pd.read_csv(reads_csv_temp)
509
913
  scores_df_temp = pd.read_csv(scores_csv[i])
510
- reads_df_temp['plate'] = f"plate{i+1}"
511
- scores_df_temp['plate'] = f"plate{i+1}"
914
+ reads_df_temp['plateID'] = f"plate{i+1}"
915
+ scores_df_temp['plateID'] = f"plate{i+1}"
512
916
 
917
+ if 'column' in reads_df_temp.columns:
918
+ reads_df_temp = reads_df_temp.rename(columns={'column': 'columnID'})
513
919
  if 'column_name' in reads_df_temp.columns:
514
- reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
515
- if 'column_name' in reads_df_temp.columns:
516
- reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
517
- if 'column_name' in scores_df_temp.columns:
518
- scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
519
- if 'column_name' in scores_df_temp.columns:
520
- scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
521
- if 'row_name' in reads_df_temp.columns:
522
- reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row_name'})
920
+ reads_df_temp = reads_df_temp.rename(columns={'column_name': 'columnID'})
921
+ if 'row' in reads_df_temp.columns:
922
+ reads_df_temp = reads_df_temp.rename(columns={'row_name': 'rowID'})
523
923
  if 'row_name' in scores_df_temp.columns:
524
- scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row_name'})
924
+ scores_df_temp = scores_df_temp.rename(columns={'row_name': 'rowID'})
525
925
 
526
926
  reads_ls.append(reads_df_temp)
527
927
  scores_ls.append(scores_df_temp)
@@ -535,8 +935,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
535
935
  reads_df = pd.read_csv(reads_csv)
536
936
  scores_df = pd.read_csv(scores_csv)
537
937
  if plate != None:
538
- reads_df['plate'] = plate
539
- scores_df['plate'] = plate
938
+ reads_df['plateID'] = plate
939
+ scores_df['plateID'] = plate
540
940
 
541
941
  reads_df = calculate_well_read_fraction(reads_df)
542
942
  scores_df = calculate_well_score_fractions(scores_df)
@@ -548,7 +948,7 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
548
948
 
549
949
  df_emp = pd.DataFrame([(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1])) for key, val in empirical_dict.items()],columns=['key', 'value1', 'value2', 'pc_fraction', 'nc_fraction'])
550
950
 
551
- df = pd.merge(df, df_emp, left_on='row_name', right_on='key')
951
+ df = pd.merge(df, df_emp, left_on='rowID', right_on='key')
552
952
 
553
953
  if any in y_columns not in df.columns:
554
954
  print(f"columns in dataframe:")
@@ -698,11 +1098,17 @@ def interperate_vision_model(settings={}):
698
1098
  # Clean and align columns for merging
699
1099
  df['object_label'] = df['object_label'].str.replace('o', '')
700
1100
 
701
- if 'row_name' not in scores_df.columns:
702
- scores_df['row_name'] = scores_df['row']
1101
+ if 'rowID' not in scores_df.columns:
1102
+ if 'row' in scores_df.columns:
1103
+ scores_df['rowID'] = scores_df['row']
1104
+ if 'row_name' in scores_df.columns:
1105
+ scores_df['rowID'] = scores_df['row_name']
703
1106
 
704
- if 'column_name' not in scores_df.columns:
705
- scores_df['column_name'] = scores_df['col']
1107
+ if 'columnID' not in scores_df.columns:
1108
+ if 'column_name' in scores_df.columns:
1109
+ scores_df['columnID'] = scores_df['column_name']
1110
+ if 'column' in scores_df.columns:
1111
+ scores_df['columnID'] = scores_df['column']
706
1112
 
707
1113
  if 'object_label' not in scores_df.columns:
708
1114
  scores_df['object_label'] = scores_df['object']
@@ -714,14 +1120,14 @@ def interperate_vision_model(settings={}):
714
1120
  scores_df['object_label'] = scores_df['object'].astype(str)
715
1121
 
716
1122
  # Ensure all join columns have the same data type in both DataFrames
717
- df[['plate', 'row_name', 'column_name', 'field', 'object_label']] = df[['plate', 'row_name', 'column_name', 'field', 'object_label']].astype(str)
718
- scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label']] = scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label']].astype(str)
1123
+ df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']] = df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']].astype(str)
1124
+ scores_df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']] = scores_df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']].astype(str)
719
1125
 
720
1126
  # Select only the necessary columns from scores_df for merging
721
- scores_df = scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label', settings['score_column']]]
1127
+ scores_df = scores_df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label', settings['score_column']]]
722
1128
 
723
1129
  # Now merge DataFrames
724
- merged_df = pd.merge(df, scores_df, on=['plate', 'row_name', 'column_name', 'field', 'object_label'], how='inner')
1130
+ merged_df = pd.merge(df, scores_df, on=['plateID', 'rowID', 'column_name', 'fieldID', 'object_label'], how='inner')
725
1131
 
726
1132
  # Separate numerical features and the score column
727
1133
  X = merged_df.select_dtypes(include='number').drop(columns=[settings['score_column']])
@@ -997,8 +1403,8 @@ def analyze_endodyogeny(settings):
997
1403
  output['data'] = df
998
1404
 
999
1405
 
1000
- if settings['level'] == 'plate':
1001
- prc_column = 'plate'
1406
+ if settings['level'] == 'plateID':
1407
+ prc_column = 'plateID'
1002
1408
  else:
1003
1409
  prc_column = 'prc'
1004
1410
 
@@ -1144,28 +1550,28 @@ def generate_score_heatmap(settings):
1144
1550
  def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
1145
1551
 
1146
1552
  df = pd.read_csv(csv)
1147
- if 'col' in df.columns:
1148
- df = df[df['col']==column]
1553
+ if 'columnID' in df.columns:
1554
+ df = df[df['columnID']==column]
1149
1555
  elif 'column' in df.columns:
1150
- df['col'] = df['column']
1151
- df = df[df['col']==column]
1556
+ df['columnID'] = df['column']
1557
+ df = df[df['columnID']==column]
1152
1558
  if not plate is None:
1153
- df['plate'] = f"plate{plate}"
1154
- grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
1155
- grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row'].astype(str) + '_' + grouped_df['col'].astype(str)
1559
+ df['plateID'] = f"plate{plate}"
1560
+ grouped_df = df.groupby(['plateID', 'rowID', 'columnID'])[data_column].mean().reset_index()
1561
+ grouped_df['prc'] = grouped_df['plateID'].astype(str) + '_' + grouped_df['rowID'].astype(str) + '_' + grouped_df['columnID'].astype(str)
1156
1562
  return grouped_df
1157
1563
 
1158
1564
  def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
1159
1565
  df = pd.read_csv(csv)
1160
1566
  df = df[df['column_name']==column]
1161
1567
  if plate not in df.columns:
1162
- df['plate'] = f"plate{plate}"
1568
+ df['plateID'] = f"plate{plate}"
1163
1569
  df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
1164
- grouped_df = df.groupby(['plate', 'row_name', 'column_name'])['count'].sum().reset_index()
1570
+ grouped_df = df.groupby(['plateID', 'rowID', 'columnID'])['count'].sum().reset_index()
1165
1571
  grouped_df = grouped_df.rename(columns={'count': 'total_count'})
1166
- merged_df = pd.merge(df, grouped_df, on=['plate', 'row_name', 'column_name'])
1572
+ merged_df = pd.merge(df, grouped_df, on=['plateID', 'rowID', 'column_name'])
1167
1573
  merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
1168
- merged_df['prc'] = merged_df['plate'].astype(str) + '_' + merged_df['row_name'].astype(str) + '_' + merged_df['column_name'].astype(str)
1574
+ merged_df['prc'] = merged_df['plateID'].astype(str) + '_' + merged_df['rowID'].astype(str) + '_' + merged_df['column_name'].astype(str)
1169
1575
  return merged_df
1170
1576
 
1171
1577
  def plot_multi_channel_heatmap(df, column='c3', cmap='coolwarm'):
@@ -1177,17 +1583,17 @@ def generate_score_heatmap(settings):
1177
1583
  - column: Column to filter by (default is 'c3').
1178
1584
  """
1179
1585
  # Extract row number and convert to integer for sorting
1180
- df['row_num'] = df['row'].str.extract(r'(\d+)').astype(int)
1586
+ df['row_num'] = df['rowID'].str.extract(r'(\d+)').astype(int)
1181
1587
 
1182
1588
  # Filter and sort by plate, row, and column
1183
- df = df[df['col'] == column]
1184
- df = df.sort_values(by=['plate', 'row_num', 'col'])
1589
+ df = df[df['columnID'] == column]
1590
+ df = df.sort_values(by=['plateID', 'row_num', 'columnID'])
1185
1591
 
1186
1592
  # Drop temporary 'row_num' column after sorting
1187
1593
  df = df.drop('row_num', axis=1)
1188
1594
 
1189
1595
  # Create a new column combining plate, row, and column for the index
1190
- df['plate_row_col'] = df['plate'] + '-' + df['row'] + '-' + df['col']
1596
+ df['plate_row_col'] = df['plateID'] + '-' + df['rowID'] + '-' + df['columnID']
1191
1597
 
1192
1598
  # Set 'plate_row_col' as the index
1193
1599
  df.set_index('plate_row_col', inplace=True)
@@ -1244,11 +1650,11 @@ def generate_score_heatmap(settings):
1244
1650
  # Loop through all collected CSV files and process them
1245
1651
  for csv_file in ls:
1246
1652
  df = pd.read_csv(csv_file) # Read CSV into DataFrame
1247
- df = df[df['col']==column]
1653
+ df = df[df['columnID']==column]
1248
1654
  if not plate is None:
1249
- df['plate'] = f"plate{plate}"
1250
- # Group the data by 'plate', 'row', and 'col'
1251
- grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
1655
+ df['plateID'] = f"plate{plate}"
1656
+ # Group the data by 'plateID', 'rowID', and 'columnID'
1657
+ grouped_df = df.groupby(['plateID', 'rowID', 'columnID'])[data_column].mean().reset_index()
1252
1658
  # Use the CSV filename to create a new column name
1253
1659
  folder_name = os.path.dirname(csv_file).replace(".csv", "")
1254
1660
  new_column_name = os.path.basename(f"{folder_name}_{data_column}")
@@ -1259,8 +1665,8 @@ def generate_score_heatmap(settings):
1259
1665
  if combined_df is None:
1260
1666
  combined_df = grouped_df
1261
1667
  else:
1262
- combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row', 'col'], how='outer')
1263
- combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row'].astype(str) + '_' + combined_df['col'].astype(str)
1668
+ combined_df = pd.merge(combined_df, grouped_df, on=['plateID', 'rowID', 'columnID'], how='outer')
1669
+ combined_df['prc'] = combined_df['plateID'].astype(str) + '_' + combined_df['rowID'].astype(str) + '_' + combined_df['columnID'].astype(str)
1264
1670
  return combined_df
1265
1671
 
1266
1672
  def calculate_mae(df):
@@ -1282,16 +1688,16 @@ def generate_score_heatmap(settings):
1282
1688
  mae_df = pd.DataFrame(mae_data)
1283
1689
  return mae_df
1284
1690
 
1285
- result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plate'], settings['column'], )
1286
- df = calculate_fraction_mixed_condition(settings['csv'], settings['plate'], settings['column'], settings['control_sgrnas'])
1691
+ result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plateID'], settings['columnID'], )
1692
+ df = calculate_fraction_mixed_condition(settings['csv'], settings['plateID'], settings['columnID'], settings['control_sgrnas'])
1287
1693
  df = df[df['grna_name']==settings['fraction_grna']]
1288
1694
  fraction_df = df[['fraction', 'prc']]
1289
1695
  merged_df = pd.merge(fraction_df, result_df, on=['prc'])
1290
- cv_df = group_cv_score(settings['cv_csv'], settings['plate'], settings['column'], settings['data_column_cv'])
1696
+ cv_df = group_cv_score(settings['cv_csv'], settings['plateID'], settings['columnID'], settings['data_column_cv'])
1291
1697
  cv_df = cv_df[[settings['data_column_cv'], 'prc']]
1292
1698
  merged_df = pd.merge(merged_df, cv_df, on=['prc'])
1293
1699
 
1294
- fig = plot_multi_channel_heatmap(merged_df, settings['column'], settings['cmap'])
1700
+ fig = plot_multi_channel_heatmap(merged_df, settings['columnID'], settings['cmap'])
1295
1701
  if 'row_number' in merged_df.columns:
1296
1702
  merged_df = merged_df.drop('row_num', axis=1)
1297
1703
  mae_df = calculate_mae(merged_df)
@@ -1299,9 +1705,9 @@ def generate_score_heatmap(settings):
1299
1705
  mae_df = mae_df.drop('row_num', axis=1)
1300
1706
 
1301
1707
  if not settings['dst'] is None:
1302
- mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plate']}.csv")
1303
- merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}_data.csv")
1304
- heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}.pdf")
1708
+ mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plateID']}.csv")
1709
+ merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}_data.csv")
1710
+ heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}.pdf")
1305
1711
  mae_df.to_csv(mae_dst, index=False)
1306
1712
  merged_df.to_csv(merged_dst, index=False)
1307
1713
  fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')