spacr 0.4.15__py3-none-any.whl → 0.4.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/core.py +52 -9
- spacr/deep_spacr.py +2 -3
- spacr/gui_core.py +247 -41
- spacr/gui_elements.py +133 -2
- spacr/gui_utils.py +17 -15
- spacr/io.py +540 -55
- spacr/ml.py +141 -258
- spacr/plot.py +76 -34
- spacr/sequencing.py +73 -38
- spacr/settings.py +136 -128
- spacr/submodules.py +619 -213
- spacr/timelapse.py +25 -25
- spacr/toxo.py +23 -23
- spacr/utils.py +162 -89
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/METADATA +2 -1
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/RECORD +20 -20
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/LICENSE +0 -0
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/WHEEL +0 -0
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/entry_points.txt +0 -0
- {spacr-0.4.15.dist-info → spacr-0.4.60.dist-info}/top_level.txt +0 -0
spacr/submodules.py
CHANGED
@@ -1,14 +1,21 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
1
|
import seaborn as sns
|
5
|
-
import os, random, sqlite3, re, shap
|
2
|
+
import os, random, sqlite3, re, shap, string, time
|
6
3
|
import pandas as pd
|
7
4
|
import numpy as np
|
8
|
-
|
5
|
+
|
9
6
|
from skimage.measure import regionprops, label
|
7
|
+
from skimage.transform import resize as sk_resize, rotate
|
8
|
+
from skimage.exposure import rescale_intensity
|
9
|
+
|
10
|
+
import cellpose
|
11
|
+
from cellpose import models as cp_models
|
12
|
+
from cellpose import train as train_cp
|
10
13
|
from cellpose import models as cp_models
|
14
|
+
from cellpose import io as cp_io
|
11
15
|
from cellpose import train as train_cp
|
16
|
+
from cellpose.metrics import aggregated_jaccard_index
|
17
|
+
from cellpose.metrics import average_precision
|
18
|
+
|
12
19
|
from IPython.display import display
|
13
20
|
from sklearn.ensemble import RandomForestClassifier
|
14
21
|
from sklearn.inspection import permutation_importance
|
@@ -17,10 +24,548 @@ from scipy.stats import chi2_contingency, pearsonr
|
|
17
24
|
from scipy.spatial.distance import cosine
|
18
25
|
|
19
26
|
from sklearn.metrics import mean_absolute_error
|
20
|
-
|
27
|
+
from skimage.measure import regionprops, label as sklabel
|
21
28
|
import matplotlib.pyplot as plt
|
22
29
|
from natsort import natsorted
|
23
30
|
|
31
|
+
import torch
|
32
|
+
from torch.utils.data import Dataset
|
33
|
+
from spacr.settings import get_train_cellpose_default_settings
|
34
|
+
from spacr.utils import save_settings, invert_image
|
35
|
+
|
36
|
+
class CellposeLazyDataset(Dataset):
|
37
|
+
def __init__(self, image_files, label_files, settings, randomize=True, augment=False):
|
38
|
+
combined = list(zip(image_files, label_files))
|
39
|
+
if randomize:
|
40
|
+
random.shuffle(combined)
|
41
|
+
self.image_files, self.label_files = zip(*combined)
|
42
|
+
self.normalize = settings['normalize']
|
43
|
+
self.percentiles = settings.get('percentiles', [2, 99])
|
44
|
+
self.target_size = settings['target_size']
|
45
|
+
self.augment = augment
|
46
|
+
|
47
|
+
def __len__(self):
|
48
|
+
return len(self.image_files) * (8 if self.augment else 1)
|
49
|
+
|
50
|
+
def apply_augmentation(self, image, label, aug_idx):
|
51
|
+
if aug_idx == 1:
|
52
|
+
return rotate(image, 90, resize=False, preserve_range=True), rotate(label, 90, resize=False, preserve_range=True)
|
53
|
+
elif aug_idx == 2:
|
54
|
+
return rotate(image, 180, resize=False, preserve_range=True), rotate(label, 180, resize=False, preserve_range=True)
|
55
|
+
elif aug_idx == 3:
|
56
|
+
return rotate(image, 270, resize=False, preserve_range=True), rotate(label, 270, resize=False, preserve_range=True)
|
57
|
+
elif aug_idx == 4:
|
58
|
+
return np.fliplr(image), np.fliplr(label)
|
59
|
+
elif aug_idx == 5:
|
60
|
+
return np.flipud(image), np.flipud(label)
|
61
|
+
elif aug_idx == 6:
|
62
|
+
return np.fliplr(rotate(image, 90, resize=False, preserve_range=True)), np.fliplr(rotate(label, 90, resize=False, preserve_range=True))
|
63
|
+
elif aug_idx == 7:
|
64
|
+
return np.flipud(rotate(image, 90, resize=False, preserve_range=True)), np.flipud(rotate(label, 90, resize=False, preserve_range=True))
|
65
|
+
return image, label
|
66
|
+
|
67
|
+
def __getitem__(self, idx):
|
68
|
+
base_idx = idx // 8 if self.augment else idx
|
69
|
+
aug_idx = idx % 8 if self.augment else 0
|
70
|
+
|
71
|
+
image = cp_io.imread(self.image_files[base_idx])
|
72
|
+
label = cp_io.imread(self.label_files[base_idx])
|
73
|
+
|
74
|
+
if image.ndim == 3:
|
75
|
+
image = image.mean(axis=-1)
|
76
|
+
|
77
|
+
if image.max() > 1:
|
78
|
+
image = image / image.max()
|
79
|
+
|
80
|
+
if self.normalize:
|
81
|
+
lower_p, upper_p = np.percentile(image, self.percentiles)
|
82
|
+
image = rescale_intensity(image, in_range=(lower_p, upper_p), out_range=(0, 1))
|
83
|
+
|
84
|
+
image, label = self.apply_augmentation(image, label, aug_idx)
|
85
|
+
|
86
|
+
image_shape = (self.target_size, self.target_size)
|
87
|
+
image = sk_resize(image, image_shape, preserve_range=True, anti_aliasing=True).astype(np.float32)
|
88
|
+
label = sk_resize(label, image_shape, order=0, preserve_range=True, anti_aliasing=False).astype(np.uint8)
|
89
|
+
|
90
|
+
return image, label
|
91
|
+
|
92
|
+
def train_cellpose(settings):
|
93
|
+
|
94
|
+
from spacr.settings import get_train_cellpose_default_settings
|
95
|
+
from spacr.utils import save_settings
|
96
|
+
|
97
|
+
settings = get_train_cellpose_default_settings(settings)
|
98
|
+
img_src = os.path.join(settings['src'], 'train', 'images')
|
99
|
+
mask_src = os.path.join(settings['src'], 'train', 'masks')
|
100
|
+
target_size = settings['target_size']
|
101
|
+
|
102
|
+
model_name = f"{settings['model_name']}_cyto_e{settings['n_epochs']}_X{target_size}_Y{target_size}.CP_model"
|
103
|
+
model_save_path = os.path.join(settings['src'], 'models', 'cellpose_model')
|
104
|
+
os.makedirs(model_save_path, exist_ok=True)
|
105
|
+
|
106
|
+
save_settings(settings, name=model_name)
|
107
|
+
|
108
|
+
model = cp_models.CellposeModel(gpu=True, model_type='cyto', diam_mean=30, pretrained_model='cyto')
|
109
|
+
cp_channels = [0, 0]
|
110
|
+
|
111
|
+
#train_image_files = sorted([os.path.join(img_src, f) for f in os.listdir(img_src) if f.endswith('.tif')])
|
112
|
+
#train_label_files = sorted([os.path.join(mask_src, f) for f in os.listdir(mask_src) if f.endswith('.tif')])
|
113
|
+
|
114
|
+
image_filenames = set(f for f in os.listdir(img_src) if f.endswith('.tif'))
|
115
|
+
label_filenames = set(f for f in os.listdir(mask_src) if f.endswith('.tif'))
|
116
|
+
|
117
|
+
# Only keep files that are present in both folders
|
118
|
+
matched_filenames = sorted(image_filenames & label_filenames)
|
119
|
+
|
120
|
+
train_image_files = [os.path.join(img_src, f) for f in matched_filenames]
|
121
|
+
train_label_files = [os.path.join(mask_src, f) for f in matched_filenames]
|
122
|
+
|
123
|
+
train_dataset = CellposeLazyDataset(train_image_files, train_label_files, settings, randomize=True, augment=settings['augment'])
|
124
|
+
|
125
|
+
n_aug = 8 if settings['augment'] else 1
|
126
|
+
max_base_images = len(train_dataset) // n_aug if settings['augment'] else len(train_dataset)
|
127
|
+
n_base = min(settings['batch_size'], max_base_images)
|
128
|
+
|
129
|
+
unique_base_indices = list(range(max_base_images))
|
130
|
+
random.shuffle(unique_base_indices)
|
131
|
+
selected_indices = unique_base_indices[:n_base]
|
132
|
+
|
133
|
+
images, labels = [], []
|
134
|
+
for idx in selected_indices:
|
135
|
+
for aug_idx in range(n_aug):
|
136
|
+
i = idx * n_aug + aug_idx if settings['augment'] else idx
|
137
|
+
img, lbl = train_dataset[i]
|
138
|
+
images.append(img)
|
139
|
+
labels.append(lbl)
|
140
|
+
try:
|
141
|
+
plot_cellpose_batch(images, labels)
|
142
|
+
except:
|
143
|
+
print(f"could not print batch images")
|
144
|
+
|
145
|
+
print(f"Training model with {len(images)} ber patch for {settings['n_epochs']} Epochs")
|
146
|
+
|
147
|
+
train_cp.train_seg(model.net,
|
148
|
+
train_data=images,
|
149
|
+
train_labels=labels,
|
150
|
+
channels=cp_channels,
|
151
|
+
save_path=model_save_path,
|
152
|
+
n_epochs=settings['n_epochs'],
|
153
|
+
batch_size=settings['batch_size'],
|
154
|
+
learning_rate=settings['learning_rate'],
|
155
|
+
weight_decay=settings['weight_decay'],
|
156
|
+
model_name=model_name,
|
157
|
+
save_every=max(1, (settings['n_epochs'] // 10)),
|
158
|
+
rescale=False)
|
159
|
+
|
160
|
+
print(f"Model saved at: {model_save_path}/{model_name}")
|
161
|
+
|
162
|
+
def test_cellpose_model(settings):
|
163
|
+
|
164
|
+
from spacr.utils import save_settings, print_progress
|
165
|
+
from .settings import get_default_test_cellpose_model_settings
|
166
|
+
|
167
|
+
def plot_cellpose_resilts(i, j, results_dir, img, lbl, pred, flow):
|
168
|
+
from spacr. plot import generate_mask_random_cmap
|
169
|
+
fig, axs = plt.subplots(1, 5, figsize=(16, 4), gridspec_kw={'wspace': 0.1, 'hspace': 0.1})
|
170
|
+
cmap_lbl = generate_mask_random_cmap(lbl)
|
171
|
+
cmap_pred = generate_mask_random_cmap(pred)
|
172
|
+
|
173
|
+
axs[0].imshow(img, cmap='gray')
|
174
|
+
axs[0].set_title('Image')
|
175
|
+
axs[0].axis('off')
|
176
|
+
|
177
|
+
axs[1].imshow(lbl, cmap=cmap_lbl, interpolation='nearest')
|
178
|
+
axs[1].set_title('True Mask')
|
179
|
+
axs[1].axis('off')
|
180
|
+
|
181
|
+
axs[2].imshow(pred, cmap=cmap_pred, interpolation='nearest')
|
182
|
+
axs[2].set_title('Predicted Mask')
|
183
|
+
axs[2].axis('off')
|
184
|
+
|
185
|
+
axs[3].imshow(flow[2], cmap='gray')
|
186
|
+
axs[3].set_title('Cell Probability')
|
187
|
+
axs[3].axis('off')
|
188
|
+
|
189
|
+
axs[4].imshow(flow[0], cmap='gray')
|
190
|
+
axs[4].set_title('Flows')
|
191
|
+
axs[4].axis('off')
|
192
|
+
|
193
|
+
save_path = os.path.join(results_dir, f"cellpose_result_{i+j:03d}.png")
|
194
|
+
plt.savefig(save_path, dpi=200, bbox_inches='tight')
|
195
|
+
plt.show()
|
196
|
+
plt.close(fig)
|
197
|
+
|
198
|
+
|
199
|
+
settings = get_default_test_cellpose_model_settings(settings)
|
200
|
+
|
201
|
+
save_settings(settings, name='test_cellpose_model')
|
202
|
+
test_image_folder = os.path.join(settings['src'], 'test', 'images')
|
203
|
+
test_label_folder = os.path.join(settings['src'], 'test', 'masks')
|
204
|
+
results_dir = os.path.join(settings['src'], 'results')
|
205
|
+
os.makedirs(results_dir, exist_ok=True)
|
206
|
+
|
207
|
+
print(f"Results will be saved in: {results_dir}")
|
208
|
+
|
209
|
+
image_filenames = set(f for f in os.listdir(test_image_folder) if f.endswith('.tif'))
|
210
|
+
label_filenames = set(f for f in os.listdir(test_label_folder) if f.endswith('.tif'))
|
211
|
+
|
212
|
+
# Only keep files that are present in both folders
|
213
|
+
matched_filenames = sorted(image_filenames & label_filenames)
|
214
|
+
|
215
|
+
test_image_files = [os.path.join(test_image_folder, f) for f in matched_filenames]
|
216
|
+
test_label_files = [os.path.join(test_label_folder, f) for f in matched_filenames]
|
217
|
+
|
218
|
+
print(f"Found {len(test_image_files)} images and {len(test_label_files)} masks")
|
219
|
+
|
220
|
+
test_dataset = CellposeLazyDataset(test_image_files, test_label_files, settings, randomize=False, augment=False)
|
221
|
+
|
222
|
+
model = cp_models.CellposeModel(gpu=True, pretrained_model=settings['model_path'])
|
223
|
+
|
224
|
+
batch_size = settings['batch_size']
|
225
|
+
scores = []
|
226
|
+
names = []
|
227
|
+
time_ls = []
|
228
|
+
|
229
|
+
files_to_process = len(test_image_folder)
|
230
|
+
|
231
|
+
for i in range(0, len(test_dataset), batch_size):
|
232
|
+
start = time.time()
|
233
|
+
batch = [test_dataset[j] for j in range(i, min(i + batch_size, len(test_dataset)))]
|
234
|
+
images, labels = zip(*batch)
|
235
|
+
|
236
|
+
masks_pred, flows, _ = model.eval(x=list(images),
|
237
|
+
channels=[0, 0],
|
238
|
+
normalize=False,
|
239
|
+
diameter=30,
|
240
|
+
flow_threshold=settings['FT'],
|
241
|
+
cellprob_threshold=settings['CP_probability'],
|
242
|
+
rescale=None,
|
243
|
+
resample=True,
|
244
|
+
interp=True,
|
245
|
+
anisotropy=None,
|
246
|
+
min_size=5,
|
247
|
+
augment=True,
|
248
|
+
tile=True,
|
249
|
+
tile_overlap=0.2,
|
250
|
+
bsize=224)
|
251
|
+
|
252
|
+
n_objects_true_ls = []
|
253
|
+
n_objects_pred_ls = []
|
254
|
+
mean_area_true_ls = []
|
255
|
+
mean_area_pred_ls = []
|
256
|
+
tp_ls, fp_ls, fn_ls = [], [], []
|
257
|
+
precision_ls, recall_ls, f1_ls, accuracy_ls = [], [], [], []
|
258
|
+
|
259
|
+
for j, (img, lbl, pred, flow) in enumerate(zip(images, labels, masks_pred, flows)):
|
260
|
+
score = float(aggregated_jaccard_index([lbl], [pred]))
|
261
|
+
fname = os.path.basename(test_label_files[i + j])
|
262
|
+
scores.append(score)
|
263
|
+
names.append(fname)
|
264
|
+
|
265
|
+
# Label masks
|
266
|
+
lbl_lab = label(lbl)
|
267
|
+
pred_lab = label(pred)
|
268
|
+
|
269
|
+
# Count objects
|
270
|
+
n_true = lbl_lab.max()
|
271
|
+
n_pred = pred_lab.max()
|
272
|
+
n_objects_true_ls.append(n_true)
|
273
|
+
n_objects_pred_ls.append(n_pred)
|
274
|
+
|
275
|
+
# Mean object size (area)
|
276
|
+
area_true = [p.area for p in regionprops(lbl_lab)]
|
277
|
+
area_pred = [p.area for p in regionprops(pred_lab)]
|
278
|
+
|
279
|
+
mean_area_true = np.mean(area_true) if area_true else 0
|
280
|
+
mean_area_pred = np.mean(area_pred) if area_pred else 0
|
281
|
+
mean_area_true_ls.append(mean_area_true)
|
282
|
+
mean_area_pred_ls.append(mean_area_pred)
|
283
|
+
|
284
|
+
# Compute object-level TP, FP, FN
|
285
|
+
ap, tp, fp, fn = average_precision([lbl], [pred], threshold=[0.5])
|
286
|
+
tp, fp, fn = int(tp[0, 0]), int(fp[0, 0]), int(fn[0, 0])
|
287
|
+
tp_ls.append(tp)
|
288
|
+
fp_ls.append(fp)
|
289
|
+
fn_ls.append(fn)
|
290
|
+
|
291
|
+
# Precision, Recall, F1, Accuracy
|
292
|
+
prec = tp / (tp + fp) if (tp + fp) > 0 else 0
|
293
|
+
rec = tp / (tp + fn) if (tp + fn) > 0 else 0
|
294
|
+
f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0
|
295
|
+
acc = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0
|
296
|
+
|
297
|
+
precision_ls.append(prec)
|
298
|
+
recall_ls.append(rec)
|
299
|
+
f1_ls.append(f1)
|
300
|
+
accuracy_ls.append(acc)
|
301
|
+
|
302
|
+
if settings['save']:
|
303
|
+
plot_cellpose_resilts(i, j, results_dir, img, lbl, pred, flow)
|
304
|
+
|
305
|
+
if settings['save']:
|
306
|
+
plot_cellpose_resilts(i,j,results_dir, img, lbl, pred, flow)
|
307
|
+
|
308
|
+
stop = time.time()
|
309
|
+
duration = stop-start
|
310
|
+
files_processed = (i+1) * batch_size
|
311
|
+
time_ls.append(duration)
|
312
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=None, batch_size=batch_size, operation_type="test custom cellpose model")
|
313
|
+
|
314
|
+
df_results = pd.DataFrame({
|
315
|
+
'label_image': names,
|
316
|
+
'Jaccard': scores,
|
317
|
+
'n_objects_true': n_objects_true_ls,
|
318
|
+
'n_objects_pred': n_objects_pred_ls,
|
319
|
+
'mean_area_true': mean_area_true_ls,
|
320
|
+
'mean_area_pred': mean_area_pred_ls,
|
321
|
+
'TP': tp_ls,
|
322
|
+
'FP': fp_ls,
|
323
|
+
'FN': fn_ls,
|
324
|
+
'Precision': precision_ls,
|
325
|
+
'Recall': recall_ls,
|
326
|
+
'F1': f1_ls,
|
327
|
+
'Accuracy': accuracy_ls
|
328
|
+
})
|
329
|
+
|
330
|
+
df_results['n_error'] = abs(df_results['n_objects_pred'] - df_results['n_objects_true'])
|
331
|
+
|
332
|
+
print(f"Average true objects/image: {df_results['n_objects_true'].mean():.2f}")
|
333
|
+
print(f"Average predicted objects/image: {df_results['n_objects_pred'].mean():.2f}")
|
334
|
+
print(f"Mean object area (true): {df_results['mean_area_true'].mean():.2f} px")
|
335
|
+
print(f"Mean object area (pred): {df_results['mean_area_pred'].mean():.2f} px")
|
336
|
+
print(f"Average Jaccard score: {df_results['Jaccard'].mean():.4f}")
|
337
|
+
|
338
|
+
print(f"Average Precision: {df_results['Precision'].mean():.3f}")
|
339
|
+
print(f"Average Recall: {df_results['Recall'].mean():.3f}")
|
340
|
+
print(f"Average F1-score: {df_results['F1'].mean():.3f}")
|
341
|
+
print(f"Average Accuracy: {df_results['Accuracy'].mean():.3f}")
|
342
|
+
|
343
|
+
display(df_results)
|
344
|
+
|
345
|
+
if settings['save']:
|
346
|
+
df_results.to_csv(os.path.join(results_dir, 'test_results.csv'), index=False)
|
347
|
+
|
348
|
+
def apply_cellpose_model(settings):
|
349
|
+
|
350
|
+
from .settings import get_default_apply_cellpose_model_settings
|
351
|
+
from spacr.utils import save_settings, print_progress
|
352
|
+
|
353
|
+
def plot_cellpose_result(i, j, results_dir, img, pred, flow):
|
354
|
+
|
355
|
+
from .plot import generate_mask_random_cmap
|
356
|
+
|
357
|
+
fig, axs = plt.subplots(1, 4, figsize=(16, 4), gridspec_kw={'wspace': 0.1, 'hspace': 0.1})
|
358
|
+
cmap_pred = generate_mask_random_cmap(pred)
|
359
|
+
|
360
|
+
axs[0].imshow(img, cmap='gray')
|
361
|
+
axs[0].set_title('Image')
|
362
|
+
axs[0].axis('off')
|
363
|
+
|
364
|
+
axs[1].imshow(pred, cmap=cmap_pred, interpolation='nearest')
|
365
|
+
axs[1].set_title('Predicted Mask')
|
366
|
+
axs[1].axis('off')
|
367
|
+
|
368
|
+
axs[2].imshow(flow[2], cmap='gray')
|
369
|
+
axs[2].set_title('Cell Probability')
|
370
|
+
axs[2].axis('off')
|
371
|
+
|
372
|
+
axs[3].imshow(flow[0], cmap='gray')
|
373
|
+
axs[3].set_title('Flows')
|
374
|
+
axs[3].axis('off')
|
375
|
+
|
376
|
+
save_path = os.path.join(results_dir, f"cellpose_result_{i + j:03d}.png")
|
377
|
+
plt.savefig(save_path, dpi=200, bbox_inches='tight')
|
378
|
+
plt.show()
|
379
|
+
plt.close(fig)
|
380
|
+
|
381
|
+
|
382
|
+
settings = get_default_apply_cellpose_model_settings(settings)
|
383
|
+
save_settings(settings, name='apply_cellpose_model')
|
384
|
+
|
385
|
+
image_folder = os.path.join(settings['src'])
|
386
|
+
results_dir = os.path.join(settings['src'], 'results')
|
387
|
+
os.makedirs(results_dir, exist_ok=True)
|
388
|
+
print(f"Results will be saved in: {results_dir}")
|
389
|
+
|
390
|
+
image_files = sorted([os.path.join(image_folder, f) for f in os.listdir(image_folder) if f.endswith('.tif')])
|
391
|
+
print(f"Found {len(image_files)} images")
|
392
|
+
|
393
|
+
dummy_labels = [image_files[0]] * len(image_files)
|
394
|
+
dataset = CellposeLazyDataset(image_files, dummy_labels, settings, randomize=False, augment=False)
|
395
|
+
|
396
|
+
model = cp_models.CellposeModel(gpu=True, pretrained_model=settings['model_path'])
|
397
|
+
batch_size = settings['batch_size']
|
398
|
+
measurements = []
|
399
|
+
|
400
|
+
files_to_process = len(image_files)
|
401
|
+
time_ls = []
|
402
|
+
|
403
|
+
for i in range(0, len(dataset), batch_size):
|
404
|
+
start = time.time()
|
405
|
+
batch = [dataset[j] for j in range(i, min(i + batch_size, len(dataset)))]
|
406
|
+
images, _ = zip(*batch)
|
407
|
+
|
408
|
+
X = list(images)
|
409
|
+
|
410
|
+
print(settings['CP_probability'])
|
411
|
+
masks_pred, flows, _ = model.eval(x=list(images),
|
412
|
+
channels=[0, 0],
|
413
|
+
normalize=False,
|
414
|
+
diameter=30,
|
415
|
+
flow_threshold=settings['FT'],
|
416
|
+
cellprob_threshold=settings['CP_probability'],
|
417
|
+
rescale=None,
|
418
|
+
resample=True,
|
419
|
+
interp=True,
|
420
|
+
anisotropy=None,
|
421
|
+
min_size=5,
|
422
|
+
augment=True,
|
423
|
+
tile=True,
|
424
|
+
tile_overlap=0.2,
|
425
|
+
bsize=224)
|
426
|
+
|
427
|
+
for j, (img, pred, flow) in enumerate(zip(images, masks_pred, flows)):
|
428
|
+
fname = os.path.basename(image_files[i + j])
|
429
|
+
|
430
|
+
if settings.get('circularize', False):
|
431
|
+
h, w = pred.shape
|
432
|
+
Y, X = np.ogrid[:h, :w]
|
433
|
+
center_x, center_y = w / 2, h / 2
|
434
|
+
radius = min(center_x, center_y)
|
435
|
+
circular_mask = (X - center_x)**2 + (Y - center_y)**2 <= radius**2
|
436
|
+
pred = pred * circular_mask
|
437
|
+
|
438
|
+
if settings['save']:
|
439
|
+
plot_cellpose_result(i, j, results_dir, img, pred, flow)
|
440
|
+
|
441
|
+
props = regionprops(sklabel(pred))
|
442
|
+
for k, prop in enumerate(props):
|
443
|
+
measurements.append({
|
444
|
+
'image': fname,
|
445
|
+
'object_id': k + 1,
|
446
|
+
'area': prop.area
|
447
|
+
})
|
448
|
+
|
449
|
+
stop = time.time()
|
450
|
+
duration = stop-start
|
451
|
+
files_processed = (i+1) * batch_size
|
452
|
+
time_ls.append(duration)
|
453
|
+
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=None, batch_size=batch_size, operation_type="apply custom cellpose model")
|
454
|
+
|
455
|
+
|
456
|
+
# Write after each batch
|
457
|
+
df_measurements = pd.DataFrame(measurements)
|
458
|
+
df_measurements.to_csv(os.path.join(results_dir, 'measurements.csv'), index=False)
|
459
|
+
print("Saved object counts and areas to measurements.csv")
|
460
|
+
|
461
|
+
df_summary = df_measurements.groupby('image').agg(
|
462
|
+
object_count=('object_id', 'count'),
|
463
|
+
average_area=('area', 'mean')
|
464
|
+
).reset_index()
|
465
|
+
df_summary.to_csv(os.path.join(results_dir, 'summary.csv'), index=False)
|
466
|
+
print("Saved object count and average area to summary.csv")
|
467
|
+
|
468
|
+
def plot_cellpose_batch(images, labels):
|
469
|
+
from spacr.plot import generate_mask_random_cmap
|
470
|
+
|
471
|
+
cmap_lbl = generate_mask_random_cmap(labels)
|
472
|
+
batch_size = len(images)
|
473
|
+
fig, axs = plt.subplots(2, batch_size, figsize=(4 * batch_size, 8))
|
474
|
+
for i in range(batch_size):
|
475
|
+
axs[0, i].imshow(images[i], cmap='gray')
|
476
|
+
axs[0, i].set_title(f'Image {i+1}')
|
477
|
+
axs[0, i].axis('off')
|
478
|
+
axs[1, i].imshow(labels[i], cmap=cmap_lbl, interpolation='nearest')
|
479
|
+
axs[1, i].set_title(f'Label {i+1}')
|
480
|
+
axs[1, i].axis('off')
|
481
|
+
plt.show()
|
482
|
+
|
483
|
+
def analyze_percent_positive(settings):
|
484
|
+
from spacr.io import _read_and_merge_data
|
485
|
+
from spacr.utils import save_settings
|
486
|
+
from .settings import default_settings_analyze_percent_positive
|
487
|
+
|
488
|
+
settings = default_settings_analyze_percent_positive(settings)
|
489
|
+
|
490
|
+
def translate_well_in_df(csv_loc):
|
491
|
+
# Load and extract metadata
|
492
|
+
df = pd.read_csv(csv_loc)
|
493
|
+
df[['plateID', 'well']] = df['Renamed TIFF'].str.replace('.tif', '', regex=False).str.split('_', expand=True)[[0, 1]]
|
494
|
+
df['plate_well'] = df['plateID'] + '_' + df['well']
|
495
|
+
|
496
|
+
# Retain one row per plate_well
|
497
|
+
df_2 = df.drop_duplicates(subset='plate_well').copy()
|
498
|
+
|
499
|
+
# Translate well to row and column
|
500
|
+
df_2['rowID'] = 'r' + df_2['well'].str[0].map(lambda x: str(string.ascii_uppercase.index(x) + 1))
|
501
|
+
df_2['column_name'] = 'c' + df_2['well'].str[1:].astype(int).astype(str)
|
502
|
+
|
503
|
+
# Optional: add prcf ID (plate_row_column_field)
|
504
|
+
df_2['fieldID'] = 'f1' # default or extract from filename if needed
|
505
|
+
df_2['prc'] = 'p' + df_2['plateID'].str.extract(r'(\d+)')[0] + '_' + df_2['rowID'] + '_' + df_2['column_name']
|
506
|
+
|
507
|
+
return df_2
|
508
|
+
|
509
|
+
def annotate_and_summarize(df, value_col, condition_col, well_col, threshold, annotation_col='annotation'):
|
510
|
+
"""
|
511
|
+
Annotate and summarize a DataFrame based on a threshold.
|
512
|
+
|
513
|
+
Parameters:
|
514
|
+
- df: pandas.DataFrame
|
515
|
+
- value_col: str, column name to apply threshold on
|
516
|
+
- condition_col: str, column name for experimental condition
|
517
|
+
- well_col: str, column name for wells
|
518
|
+
- threshold: float, threshold value for annotation
|
519
|
+
- annotation_col: str, name of the new annotation column
|
520
|
+
|
521
|
+
Returns:
|
522
|
+
- df: annotated DataFrame
|
523
|
+
- summary_df: DataFrame with counts and fractions per condition and well
|
524
|
+
"""
|
525
|
+
# Annotate
|
526
|
+
df[annotation_col] = np.where(df[value_col] > threshold, 'above', 'below')
|
527
|
+
|
528
|
+
# Count per condition and well
|
529
|
+
count_df = df.groupby([condition_col, well_col, annotation_col]).size().unstack(fill_value=0)
|
530
|
+
|
531
|
+
# Calculate total and fractions
|
532
|
+
count_df['total'] = count_df.sum(axis=1)
|
533
|
+
count_df['fraction_above'] = count_df.get('above', 0) / count_df['total']
|
534
|
+
count_df['fraction_below'] = count_df.get('below', 0) / count_df['total']
|
535
|
+
|
536
|
+
return df, count_df.reset_index()
|
537
|
+
|
538
|
+
save_settings(settings, name='analyze_percent_positive', show=False)
|
539
|
+
|
540
|
+
df, _ = _read_and_merge_data(locs=[settings['src']+'/measurements/measurements.db'],
|
541
|
+
tables=settings['tables'],
|
542
|
+
verbose=True,
|
543
|
+
nuclei_limit=None,
|
544
|
+
pathogen_limit=None)
|
545
|
+
|
546
|
+
df['condition'] = 'none'
|
547
|
+
|
548
|
+
if not settings['filter_1'] is None:
|
549
|
+
df = df[df[settings['filter_1'][0]]>settings['filter_1'][1]]
|
550
|
+
|
551
|
+
condition_col = 'condition'
|
552
|
+
well_col = 'prc'
|
553
|
+
|
554
|
+
df, count_df = annotate_and_summarize(df, settings['value_col'], condition_col, well_col, settings['threshold'], annotation_col='annotation')
|
555
|
+
count_df[['plateID', 'rowID', 'column_name']] = count_df['prc'].str.split('_', expand=True)
|
556
|
+
|
557
|
+
csv_loc = os.path.join(settings['src'], 'rename_log.csv')
|
558
|
+
csv_out_loc = os.path.join(settings['src'], 'result.csv')
|
559
|
+
translate_df = translate_well_in_df(csv_loc)
|
560
|
+
|
561
|
+
merged = pd.merge(count_df, translate_df, on=['rowID', 'column_name'], how='inner')
|
562
|
+
|
563
|
+
merged = merged[['plate_y', 'well', 'plate_well','fieldID','rowID','column_name','prc_x','Original File','Renamed TIFF','above','below','fraction_above','fraction_below']]
|
564
|
+
merged[[f'part{i}' for i in range(merged['Original File'].str.count('_').max() + 1)]] = merged['Original File'].str.split('_', expand=True)
|
565
|
+
merged.to_csv(csv_out_loc, index=False)
|
566
|
+
display(merged)
|
567
|
+
return merged
|
568
|
+
|
24
569
|
def analyze_recruitment(settings):
|
25
570
|
"""
|
26
571
|
Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
|
@@ -198,147 +743,6 @@ def analyze_plaques(settings):
|
|
198
743
|
|
199
744
|
print(f"Analysis completed and saved to database '{db_name}'.")
|
200
745
|
|
201
|
-
def train_cellpose(settings):
|
202
|
-
|
203
|
-
from .io import _load_normalized_images_and_labels, _load_images_and_labels
|
204
|
-
from .settings import get_train_cellpose_default_settings
|
205
|
-
from .utils import save_settings
|
206
|
-
|
207
|
-
settings = get_train_cellpose_default_settings(settings)
|
208
|
-
|
209
|
-
img_src = settings['img_src']
|
210
|
-
mask_src = os.path.join(img_src, 'masks')
|
211
|
-
test_img_src = settings['test_img_src']
|
212
|
-
test_mask_src = settings['test_mask_src']
|
213
|
-
|
214
|
-
if settings['resize']:
|
215
|
-
target_height = settings['width_height'][1]
|
216
|
-
target_width = settings['width_height'][0]
|
217
|
-
|
218
|
-
if settings['test']:
|
219
|
-
test_img_src = os.path.join(os.path.dirname(settings['img_src']), 'test')
|
220
|
-
test_mask_src = os.path.join(settings['test_img_src'], 'mask')
|
221
|
-
|
222
|
-
test_images, test_masks, test_image_names, test_mask_names = None,None,None,None
|
223
|
-
print(settings)
|
224
|
-
|
225
|
-
if settings['from_scratch']:
|
226
|
-
model_name=f"scratch_{settings['model_name']}_{settings['model_type']}_e{settings['n_epochs']}_X{target_width}_Y{target_height}.CP_model"
|
227
|
-
else:
|
228
|
-
if settings['resize']:
|
229
|
-
model_name=f"{settings['model_name']}_{settings['model_type']}_e{settings['n_epochs']}_X{target_width}_Y{target_height}.CP_model"
|
230
|
-
else:
|
231
|
-
model_name=f"{settings['model_name']}_{settings['model_type']}_e{settings['n_epochs']}.CP_model"
|
232
|
-
|
233
|
-
model_save_path = os.path.join(settings['mask_src'], 'models', 'cellpose_model')
|
234
|
-
print(model_save_path)
|
235
|
-
os.makedirs(model_save_path, exist_ok=True)
|
236
|
-
|
237
|
-
save_settings(settings, name=model_name)
|
238
|
-
|
239
|
-
if settings['from_scratch']:
|
240
|
-
model = cp_models.CellposeModel(gpu=True, model_type=settings['model_type'], diam_mean=settings['diameter'], pretrained_model=None)
|
241
|
-
else:
|
242
|
-
model = cp_models.CellposeModel(gpu=True, model_type=settings['model_type'])
|
243
|
-
|
244
|
-
if settings['normalize']:
|
245
|
-
|
246
|
-
image_files = [os.path.join(img_src, f) for f in os.listdir(img_src) if f.endswith('.tif')]
|
247
|
-
label_files = [os.path.join(mask_src, f) for f in os.listdir(mask_src) if f.endswith('.tif')]
|
248
|
-
images, masks, image_names, mask_names, orig_dims = _load_normalized_images_and_labels(image_files,
|
249
|
-
label_files,
|
250
|
-
settings['channels'],
|
251
|
-
settings['percentiles'],
|
252
|
-
settings['invert'],
|
253
|
-
settings['verbose'],
|
254
|
-
settings['remove_background'],
|
255
|
-
settings['background'],
|
256
|
-
settings['Signal_to_noise'],
|
257
|
-
settings['target_height'],
|
258
|
-
settings['target_width'])
|
259
|
-
images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
|
260
|
-
|
261
|
-
if settings['test']:
|
262
|
-
test_image_files = [os.path.join(test_img_src, f) for f in os.listdir(test_img_src) if f.endswith('.tif')]
|
263
|
-
test_label_files = [os.path.join(test_mask_src, f) for f in os.listdir(test_mask_src) if f.endswith('.tif')]
|
264
|
-
test_images, test_masks, test_image_names, test_mask_names = _load_normalized_images_and_labels(test_image_files,
|
265
|
-
test_label_files,
|
266
|
-
settings['channels'],
|
267
|
-
settings['percentiles'],
|
268
|
-
settings['invert'],
|
269
|
-
settings['verbose'],
|
270
|
-
settings['remove_background'],
|
271
|
-
settings['background'],
|
272
|
-
settings['Signal_to_noise'],
|
273
|
-
settings['target_height'],
|
274
|
-
settings['target_width'])
|
275
|
-
test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
|
276
|
-
|
277
|
-
else:
|
278
|
-
images, masks, image_names, mask_names = _load_images_and_labels(img_src, mask_src, settings['invert'])
|
279
|
-
images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
|
280
|
-
|
281
|
-
if settings['test']:
|
282
|
-
test_images, test_masks, test_image_names, test_mask_names = _load_images_and_labels(test_img_src,
|
283
|
-
test_mask_src,
|
284
|
-
settings['invert'])
|
285
|
-
|
286
|
-
test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
|
287
|
-
|
288
|
-
#if resize:
|
289
|
-
# images, masks = resize_images_and_labels(images, masks, target_height, target_width, show_example=True)
|
290
|
-
|
291
|
-
if settings['model_type'] == 'cyto':
|
292
|
-
cp_channels = [0,1]
|
293
|
-
if settings['model_type'] == 'cyto2':
|
294
|
-
cp_channels = [0,2]
|
295
|
-
if settings['model_type'] == 'nucleus':
|
296
|
-
cp_channels = [0,0]
|
297
|
-
if settings['grayscale']:
|
298
|
-
cp_channels = [0,0]
|
299
|
-
images = [np.squeeze(img) if img.ndim == 3 and 1 in img.shape else img for img in images]
|
300
|
-
|
301
|
-
masks = [np.squeeze(mask) if mask.ndim == 3 and 1 in mask.shape else mask for mask in masks]
|
302
|
-
|
303
|
-
print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {masks[0].shape}, image type: masks[0].shape')
|
304
|
-
save_every = int(settings['n_epochs']/10)
|
305
|
-
if save_every < 10:
|
306
|
-
save_every = settings['n_epochs']
|
307
|
-
|
308
|
-
train_cp.train_seg(model.net,
|
309
|
-
train_data=images,
|
310
|
-
train_labels=masks,
|
311
|
-
train_files=image_names,
|
312
|
-
train_labels_files=mask_names,
|
313
|
-
train_probs=None,
|
314
|
-
test_data=test_images,
|
315
|
-
test_labels=test_masks,
|
316
|
-
test_files=test_image_names,
|
317
|
-
test_labels_files=test_mask_names,
|
318
|
-
test_probs=None,
|
319
|
-
load_files=True,
|
320
|
-
batch_size=settings['batch_size'],
|
321
|
-
learning_rate=settings['learning_rate'],
|
322
|
-
n_epochs=settings['n_epochs'],
|
323
|
-
weight_decay=settings['weight_decay'],
|
324
|
-
momentum=0.9,
|
325
|
-
SGD=False,
|
326
|
-
channels=cp_channels,
|
327
|
-
channel_axis=None,
|
328
|
-
normalize=False,
|
329
|
-
compute_flows=False,
|
330
|
-
save_path=model_save_path,
|
331
|
-
save_every=save_every,
|
332
|
-
nimg_per_epoch=None,
|
333
|
-
nimg_test_per_epoch=None,
|
334
|
-
rescale=settings['rescale'],
|
335
|
-
#scale_range=None,
|
336
|
-
#bsize=224,
|
337
|
-
min_train_masks=1,
|
338
|
-
model_name=settings['model_name'])
|
339
|
-
|
340
|
-
return print(f"Model saved at: {model_save_path}/{model_name}")
|
341
|
-
|
342
746
|
def count_phenotypes(settings):
|
343
747
|
from .io import _read_db
|
344
748
|
|
@@ -350,17 +754,17 @@ def count_phenotypes(settings):
|
|
350
754
|
unique_values_count = df[settings['annotation_column']].nunique(dropna=True)
|
351
755
|
print(f"Unique values in {settings['annotation_column']} (excluding NaN): {unique_values_count}")
|
352
756
|
|
353
|
-
# Count unique values in 'value' column, grouped by '
|
354
|
-
grouped_unique_count = df.groupby(['
|
757
|
+
# Count unique values in 'value' column, grouped by 'plateID', 'rowID', 'columnID'
|
758
|
+
grouped_unique_count = df.groupby(['plateID', 'rowID', 'columnID'])[settings['annotation_column']].nunique(dropna=True).reset_index(name='unique_count')
|
355
759
|
display(grouped_unique_count)
|
356
760
|
|
357
761
|
save_path = os.path.join(settings['src'], 'phenotype_counts.csv')
|
358
762
|
|
359
763
|
# Group by plate, row, and column, then count the occurrences of each unique value
|
360
|
-
grouped_counts = df.groupby(['
|
764
|
+
grouped_counts = df.groupby(['plateID', 'rowID', 'columnID', 'value']).size().reset_index(name='count')
|
361
765
|
|
362
766
|
# Pivot the DataFrame so that unique values are columns and their counts are in the rows
|
363
|
-
pivot_df = grouped_counts.pivot_table(index=['
|
767
|
+
pivot_df = grouped_counts.pivot_table(index=['plateID', 'rowID', 'columnID'], columns='value', values='count', fill_value=0)
|
364
768
|
|
365
769
|
# Flatten the multi-level columns
|
366
770
|
pivot_df.columns = [f"value_{int(col)}" for col in pivot_df.columns]
|
@@ -382,20 +786,20 @@ def count_phenotypes(settings):
|
|
382
786
|
def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),'r2':(90,10),'r3':(80,20),'r4':(80,20),'r5':(70,30),'r6':(70,30),'r7':(60,40),'r8':(60,40),'r9':(50,50),'r10':(50,50),'r11':(40,60),'r12':(40,60),'r13':(30,70),'r14':(30,70),'r15':(20,80),'r16':(20,80)},
|
383
787
|
pc_grna='TGGT1_220950_1', nc_grna='TGGT1_233460_4',
|
384
788
|
y_columns=['class_1_fraction', 'TGGT1_220950_1_fraction', 'nc_fraction'],
|
385
|
-
column='
|
789
|
+
column='columnID', value='c3', plate=None, save_paths=None):
|
386
790
|
|
387
791
|
def calculate_well_score_fractions(df, class_columns='cv_predictions'):
|
388
|
-
if all(col in df.columns for col in ['
|
389
|
-
df['prc'] = df['
|
792
|
+
if all(col in df.columns for col in ['plateID', 'rowID', 'columnID']):
|
793
|
+
df['prc'] = df['plateID'] + '_' + df['rowID'] + '_' + df['columnID']
|
390
794
|
else:
|
391
|
-
raise ValueError("Cannot find '
|
392
|
-
prc_summary = df.groupby(['
|
393
|
-
well_counts = (df.groupby(['
|
795
|
+
raise ValueError("Cannot find 'plateID', 'rowID', or 'columnID' in df.columns")
|
796
|
+
prc_summary = df.groupby(['plateID', 'rowID', 'columnID', 'prc']).size().reset_index(name='total_rows')
|
797
|
+
well_counts = (df.groupby(['plateID', 'rowID', 'columnID', 'prc', class_columns])
|
394
798
|
.size()
|
395
799
|
.unstack(fill_value=0)
|
396
800
|
.reset_index()
|
397
801
|
.rename(columns={0: 'class_0', 1: 'class_1'}))
|
398
|
-
summary_df = pd.merge(prc_summary, well_counts, on=['
|
802
|
+
summary_df = pd.merge(prc_summary, well_counts, on=['plateID', 'rowID', 'columnID', 'prc'], how='left')
|
399
803
|
summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
|
400
804
|
summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
|
401
805
|
return summary_df
|
@@ -490,8 +894,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
490
894
|
return result
|
491
895
|
|
492
896
|
def calculate_well_read_fraction(df, count_column='count'):
|
493
|
-
if all(col in df.columns for col in ['
|
494
|
-
df['prc'] = df['
|
897
|
+
if all(col in df.columns for col in ['plateID', 'rowID', 'columnID']):
|
898
|
+
df['prc'] = df['plateID'] + '_' + df['rowID'] + '_' + df['columnID']
|
495
899
|
else:
|
496
900
|
raise ValueError("Cannot find plate, row or column in df.columns")
|
497
901
|
grouped_df = df.groupby('prc')[count_column].sum().reset_index()
|
@@ -507,21 +911,17 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
507
911
|
for i, reads_csv_temp in enumerate(reads_csv):
|
508
912
|
reads_df_temp = pd.read_csv(reads_csv_temp)
|
509
913
|
scores_df_temp = pd.read_csv(scores_csv[i])
|
510
|
-
reads_df_temp['
|
511
|
-
scores_df_temp['
|
914
|
+
reads_df_temp['plateID'] = f"plate{i+1}"
|
915
|
+
scores_df_temp['plateID'] = f"plate{i+1}"
|
512
916
|
|
917
|
+
if 'column' in reads_df_temp.columns:
|
918
|
+
reads_df_temp = reads_df_temp.rename(columns={'column': 'columnID'})
|
513
919
|
if 'column_name' in reads_df_temp.columns:
|
514
|
-
reads_df_temp = reads_df_temp.rename(columns={'column_name': '
|
515
|
-
if '
|
516
|
-
reads_df_temp = reads_df_temp.rename(columns={'
|
517
|
-
if 'column_name' in scores_df_temp.columns:
|
518
|
-
scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
|
519
|
-
if 'column_name' in scores_df_temp.columns:
|
520
|
-
scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
|
521
|
-
if 'row_name' in reads_df_temp.columns:
|
522
|
-
reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row_name'})
|
920
|
+
reads_df_temp = reads_df_temp.rename(columns={'column_name': 'columnID'})
|
921
|
+
if 'row' in reads_df_temp.columns:
|
922
|
+
reads_df_temp = reads_df_temp.rename(columns={'row_name': 'rowID'})
|
523
923
|
if 'row_name' in scores_df_temp.columns:
|
524
|
-
scores_df_temp = scores_df_temp.rename(columns={'row_name': '
|
924
|
+
scores_df_temp = scores_df_temp.rename(columns={'row_name': 'rowID'})
|
525
925
|
|
526
926
|
reads_ls.append(reads_df_temp)
|
527
927
|
scores_ls.append(scores_df_temp)
|
@@ -535,8 +935,8 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
535
935
|
reads_df = pd.read_csv(reads_csv)
|
536
936
|
scores_df = pd.read_csv(scores_csv)
|
537
937
|
if plate != None:
|
538
|
-
reads_df['
|
539
|
-
scores_df['
|
938
|
+
reads_df['plateID'] = plate
|
939
|
+
scores_df['plateID'] = plate
|
540
940
|
|
541
941
|
reads_df = calculate_well_read_fraction(reads_df)
|
542
942
|
scores_df = calculate_well_score_fractions(scores_df)
|
@@ -548,7 +948,7 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
|
|
548
948
|
|
549
949
|
df_emp = pd.DataFrame([(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1])) for key, val in empirical_dict.items()],columns=['key', 'value1', 'value2', 'pc_fraction', 'nc_fraction'])
|
550
950
|
|
551
|
-
df = pd.merge(df, df_emp, left_on='
|
951
|
+
df = pd.merge(df, df_emp, left_on='rowID', right_on='key')
|
552
952
|
|
553
953
|
if any in y_columns not in df.columns:
|
554
954
|
print(f"columns in dataframe:")
|
@@ -698,11 +1098,17 @@ def interperate_vision_model(settings={}):
|
|
698
1098
|
# Clean and align columns for merging
|
699
1099
|
df['object_label'] = df['object_label'].str.replace('o', '')
|
700
1100
|
|
701
|
-
if '
|
702
|
-
|
1101
|
+
if 'rowID' not in scores_df.columns:
|
1102
|
+
if 'row' in scores_df.columns:
|
1103
|
+
scores_df['rowID'] = scores_df['row']
|
1104
|
+
if 'row_name' in scores_df.columns:
|
1105
|
+
scores_df['rowID'] = scores_df['row_name']
|
703
1106
|
|
704
|
-
if '
|
705
|
-
|
1107
|
+
if 'columnID' not in scores_df.columns:
|
1108
|
+
if 'column_name' in scores_df.columns:
|
1109
|
+
scores_df['columnID'] = scores_df['column_name']
|
1110
|
+
if 'column' in scores_df.columns:
|
1111
|
+
scores_df['columnID'] = scores_df['column']
|
706
1112
|
|
707
1113
|
if 'object_label' not in scores_df.columns:
|
708
1114
|
scores_df['object_label'] = scores_df['object']
|
@@ -714,14 +1120,14 @@ def interperate_vision_model(settings={}):
|
|
714
1120
|
scores_df['object_label'] = scores_df['object'].astype(str)
|
715
1121
|
|
716
1122
|
# Ensure all join columns have the same data type in both DataFrames
|
717
|
-
df[['
|
718
|
-
scores_df[['
|
1123
|
+
df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']] = df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']].astype(str)
|
1124
|
+
scores_df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']] = scores_df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label']].astype(str)
|
719
1125
|
|
720
1126
|
# Select only the necessary columns from scores_df for merging
|
721
|
-
scores_df = scores_df[['
|
1127
|
+
scores_df = scores_df[['plateID', 'rowID', 'column_name', 'fieldID', 'object_label', settings['score_column']]]
|
722
1128
|
|
723
1129
|
# Now merge DataFrames
|
724
|
-
merged_df = pd.merge(df, scores_df, on=['
|
1130
|
+
merged_df = pd.merge(df, scores_df, on=['plateID', 'rowID', 'column_name', 'fieldID', 'object_label'], how='inner')
|
725
1131
|
|
726
1132
|
# Separate numerical features and the score column
|
727
1133
|
X = merged_df.select_dtypes(include='number').drop(columns=[settings['score_column']])
|
@@ -997,8 +1403,8 @@ def analyze_endodyogeny(settings):
|
|
997
1403
|
output['data'] = df
|
998
1404
|
|
999
1405
|
|
1000
|
-
if settings['level'] == '
|
1001
|
-
prc_column = '
|
1406
|
+
if settings['level'] == 'plateID':
|
1407
|
+
prc_column = 'plateID'
|
1002
1408
|
else:
|
1003
1409
|
prc_column = 'prc'
|
1004
1410
|
|
@@ -1144,28 +1550,28 @@ def generate_score_heatmap(settings):
|
|
1144
1550
|
def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
|
1145
1551
|
|
1146
1552
|
df = pd.read_csv(csv)
|
1147
|
-
if '
|
1148
|
-
df = df[df['
|
1553
|
+
if 'columnID' in df.columns:
|
1554
|
+
df = df[df['columnID']==column]
|
1149
1555
|
elif 'column' in df.columns:
|
1150
|
-
df['
|
1151
|
-
df = df[df['
|
1556
|
+
df['columnID'] = df['column']
|
1557
|
+
df = df[df['columnID']==column]
|
1152
1558
|
if not plate is None:
|
1153
|
-
df['
|
1154
|
-
grouped_df = df.groupby(['
|
1155
|
-
grouped_df['prc'] = grouped_df['
|
1559
|
+
df['plateID'] = f"plate{plate}"
|
1560
|
+
grouped_df = df.groupby(['plateID', 'rowID', 'columnID'])[data_column].mean().reset_index()
|
1561
|
+
grouped_df['prc'] = grouped_df['plateID'].astype(str) + '_' + grouped_df['rowID'].astype(str) + '_' + grouped_df['columnID'].astype(str)
|
1156
1562
|
return grouped_df
|
1157
1563
|
|
1158
1564
|
def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
|
1159
1565
|
df = pd.read_csv(csv)
|
1160
1566
|
df = df[df['column_name']==column]
|
1161
1567
|
if plate not in df.columns:
|
1162
|
-
df['
|
1568
|
+
df['plateID'] = f"plate{plate}"
|
1163
1569
|
df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
|
1164
|
-
grouped_df = df.groupby(['
|
1570
|
+
grouped_df = df.groupby(['plateID', 'rowID', 'columnID'])['count'].sum().reset_index()
|
1165
1571
|
grouped_df = grouped_df.rename(columns={'count': 'total_count'})
|
1166
|
-
merged_df = pd.merge(df, grouped_df, on=['
|
1572
|
+
merged_df = pd.merge(df, grouped_df, on=['plateID', 'rowID', 'column_name'])
|
1167
1573
|
merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
|
1168
|
-
merged_df['prc'] = merged_df['
|
1574
|
+
merged_df['prc'] = merged_df['plateID'].astype(str) + '_' + merged_df['rowID'].astype(str) + '_' + merged_df['column_name'].astype(str)
|
1169
1575
|
return merged_df
|
1170
1576
|
|
1171
1577
|
def plot_multi_channel_heatmap(df, column='c3', cmap='coolwarm'):
|
@@ -1177,17 +1583,17 @@ def generate_score_heatmap(settings):
|
|
1177
1583
|
- column: Column to filter by (default is 'c3').
|
1178
1584
|
"""
|
1179
1585
|
# Extract row number and convert to integer for sorting
|
1180
|
-
df['row_num'] = df['
|
1586
|
+
df['row_num'] = df['rowID'].str.extract(r'(\d+)').astype(int)
|
1181
1587
|
|
1182
1588
|
# Filter and sort by plate, row, and column
|
1183
|
-
df = df[df['
|
1184
|
-
df = df.sort_values(by=['
|
1589
|
+
df = df[df['columnID'] == column]
|
1590
|
+
df = df.sort_values(by=['plateID', 'row_num', 'columnID'])
|
1185
1591
|
|
1186
1592
|
# Drop temporary 'row_num' column after sorting
|
1187
1593
|
df = df.drop('row_num', axis=1)
|
1188
1594
|
|
1189
1595
|
# Create a new column combining plate, row, and column for the index
|
1190
|
-
df['plate_row_col'] = df['
|
1596
|
+
df['plate_row_col'] = df['plateID'] + '-' + df['rowID'] + '-' + df['columnID']
|
1191
1597
|
|
1192
1598
|
# Set 'plate_row_col' as the index
|
1193
1599
|
df.set_index('plate_row_col', inplace=True)
|
@@ -1244,11 +1650,11 @@ def generate_score_heatmap(settings):
|
|
1244
1650
|
# Loop through all collected CSV files and process them
|
1245
1651
|
for csv_file in ls:
|
1246
1652
|
df = pd.read_csv(csv_file) # Read CSV into DataFrame
|
1247
|
-
df = df[df['
|
1653
|
+
df = df[df['columnID']==column]
|
1248
1654
|
if not plate is None:
|
1249
|
-
df['
|
1250
|
-
# Group the data by '
|
1251
|
-
grouped_df = df.groupby(['
|
1655
|
+
df['plateID'] = f"plate{plate}"
|
1656
|
+
# Group the data by 'plateID', 'rowID', and 'columnID'
|
1657
|
+
grouped_df = df.groupby(['plateID', 'rowID', 'columnID'])[data_column].mean().reset_index()
|
1252
1658
|
# Use the CSV filename to create a new column name
|
1253
1659
|
folder_name = os.path.dirname(csv_file).replace(".csv", "")
|
1254
1660
|
new_column_name = os.path.basename(f"{folder_name}_{data_column}")
|
@@ -1259,8 +1665,8 @@ def generate_score_heatmap(settings):
|
|
1259
1665
|
if combined_df is None:
|
1260
1666
|
combined_df = grouped_df
|
1261
1667
|
else:
|
1262
|
-
combined_df = pd.merge(combined_df, grouped_df, on=['
|
1263
|
-
combined_df['prc'] = combined_df['
|
1668
|
+
combined_df = pd.merge(combined_df, grouped_df, on=['plateID', 'rowID', 'columnID'], how='outer')
|
1669
|
+
combined_df['prc'] = combined_df['plateID'].astype(str) + '_' + combined_df['rowID'].astype(str) + '_' + combined_df['columnID'].astype(str)
|
1264
1670
|
return combined_df
|
1265
1671
|
|
1266
1672
|
def calculate_mae(df):
|
@@ -1282,16 +1688,16 @@ def generate_score_heatmap(settings):
|
|
1282
1688
|
mae_df = pd.DataFrame(mae_data)
|
1283
1689
|
return mae_df
|
1284
1690
|
|
1285
|
-
result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['
|
1286
|
-
df = calculate_fraction_mixed_condition(settings['csv'], settings['
|
1691
|
+
result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plateID'], settings['columnID'], )
|
1692
|
+
df = calculate_fraction_mixed_condition(settings['csv'], settings['plateID'], settings['columnID'], settings['control_sgrnas'])
|
1287
1693
|
df = df[df['grna_name']==settings['fraction_grna']]
|
1288
1694
|
fraction_df = df[['fraction', 'prc']]
|
1289
1695
|
merged_df = pd.merge(fraction_df, result_df, on=['prc'])
|
1290
|
-
cv_df = group_cv_score(settings['cv_csv'], settings['
|
1696
|
+
cv_df = group_cv_score(settings['cv_csv'], settings['plateID'], settings['columnID'], settings['data_column_cv'])
|
1291
1697
|
cv_df = cv_df[[settings['data_column_cv'], 'prc']]
|
1292
1698
|
merged_df = pd.merge(merged_df, cv_df, on=['prc'])
|
1293
1699
|
|
1294
|
-
fig = plot_multi_channel_heatmap(merged_df, settings['
|
1700
|
+
fig = plot_multi_channel_heatmap(merged_df, settings['columnID'], settings['cmap'])
|
1295
1701
|
if 'row_number' in merged_df.columns:
|
1296
1702
|
merged_df = merged_df.drop('row_num', axis=1)
|
1297
1703
|
mae_df = calculate_mae(merged_df)
|
@@ -1299,9 +1705,9 @@ def generate_score_heatmap(settings):
|
|
1299
1705
|
mae_df = mae_df.drop('row_num', axis=1)
|
1300
1706
|
|
1301
1707
|
if not settings['dst'] is None:
|
1302
|
-
mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['
|
1303
|
-
merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['
|
1304
|
-
heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['
|
1708
|
+
mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plateID']}.csv")
|
1709
|
+
merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}_data.csv")
|
1710
|
+
heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plateID']}.pdf")
|
1305
1711
|
mae_df.to_csv(mae_dst, index=False)
|
1306
1712
|
merged_df.to_csv(merged_dst, index=False)
|
1307
1713
|
fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')
|