spacr 0.3.46__py3-none-any.whl → 0.3.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/chat_bot.py +31 -0
- spacr/gui_elements.py +33 -7
- spacr/ml.py +478 -76
- spacr/plot.py +488 -47
- spacr/sequencing.py +122 -1
- spacr/settings.py +2 -1
- spacr/toxo.py +266 -147
- spacr/utils.py +27 -4
- {spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/METADATA +2 -1
- {spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/RECORD +14 -13
- {spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/LICENSE +0 -0
- {spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/WHEEL +0 -0
- {spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/top_level.txt +0 -0
spacr/plot.py
CHANGED
@@ -32,7 +32,340 @@ import matplotlib.patches as patches
|
|
32
32
|
from collections import defaultdict
|
33
33
|
from matplotlib.gridspec import GridSpec
|
34
34
|
|
35
|
-
|
35
|
+
#filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
|
36
|
+
def plot_image_mask_overlay(
|
37
|
+
file,
|
38
|
+
channels,
|
39
|
+
cell_channel,
|
40
|
+
nucleus_channel,
|
41
|
+
pathogen_channel,
|
42
|
+
figuresize=10,
|
43
|
+
percentiles=(2, 98),
|
44
|
+
thickness=3,
|
45
|
+
save_pdf=True,
|
46
|
+
mode='outlines',
|
47
|
+
export_tiffs=False,
|
48
|
+
all_on_all=False,
|
49
|
+
all_outlines=False,
|
50
|
+
filter_dict=None
|
51
|
+
):
|
52
|
+
"""Plot image and mask overlays."""
|
53
|
+
|
54
|
+
def random_color_cmap(n_labels, seed=None):
|
55
|
+
"""Generates a random color map for a given number of labels."""
|
56
|
+
if seed is not None:
|
57
|
+
np.random.seed(seed)
|
58
|
+
rand_colors = np.random.rand(n_labels, 3)
|
59
|
+
rand_colors = np.vstack([[0, 0, 0], rand_colors]) # Ensure background is black
|
60
|
+
cmap = ListedColormap(rand_colors)
|
61
|
+
return cmap
|
62
|
+
|
63
|
+
def _plot_merged_plot(
|
64
|
+
image,
|
65
|
+
outlines,
|
66
|
+
outline_colors,
|
67
|
+
figuresize,
|
68
|
+
thickness,
|
69
|
+
percentiles,
|
70
|
+
mode='outlines',
|
71
|
+
all_on_all=False,
|
72
|
+
all_outlines=False,
|
73
|
+
channels=None,
|
74
|
+
cell_channel=None,
|
75
|
+
nucleus_channel=None,
|
76
|
+
pathogen_channel=None,
|
77
|
+
cell_outlines=None,
|
78
|
+
nucleus_outlines=None,
|
79
|
+
pathogen_outlines=None,
|
80
|
+
save_pdf=True
|
81
|
+
):
|
82
|
+
"""Plot the merged plot with overlay, image channels, and masks."""
|
83
|
+
|
84
|
+
def _generate_colored_mask(mask, cmap):
|
85
|
+
"""Generate a colored mask using the given colormap."""
|
86
|
+
mask_norm = mask / (mask.max() + 1e-5) # Normalize mask
|
87
|
+
colored_mask = cmap(mask_norm)
|
88
|
+
colored_mask[..., 3] = np.where(mask > 0, 1, 0) # Alpha channel
|
89
|
+
return colored_mask
|
90
|
+
|
91
|
+
def _overlay_mask(image, mask):
|
92
|
+
"""Overlay the colored mask onto the original image."""
|
93
|
+
combined = np.clip(image * (1 - mask[..., 3:]) + mask[..., :3] * mask[..., 3:], 0, 1)
|
94
|
+
return combined
|
95
|
+
|
96
|
+
def _normalize_image(image, percentiles):
|
97
|
+
"""Normalize the image based on given percentiles."""
|
98
|
+
v_min, v_max = np.percentile(image, percentiles)
|
99
|
+
image_normalized = np.clip((image - v_min) / (v_max - v_min + 1e-5), 0, 1)
|
100
|
+
return image_normalized
|
101
|
+
|
102
|
+
def _generate_contours(mask):
|
103
|
+
"""Generate contours from the mask using OpenCV."""
|
104
|
+
contours, _ = cv2.findContours(
|
105
|
+
mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
106
|
+
)
|
107
|
+
return contours
|
108
|
+
|
109
|
+
def _apply_contours(image, mask, color, thickness):
|
110
|
+
"""Apply contours to the image."""
|
111
|
+
unique_labels = np.unique(mask)
|
112
|
+
for label in unique_labels:
|
113
|
+
if label == 0:
|
114
|
+
continue # Skip background
|
115
|
+
label_mask = (mask == label).astype(np.uint8)
|
116
|
+
contours = _generate_contours(label_mask)
|
117
|
+
cv2.drawContours(
|
118
|
+
image, contours, -1, mpl.colors.to_rgb(color), thickness
|
119
|
+
)
|
120
|
+
return image
|
121
|
+
|
122
|
+
num_channels = image.shape[-1]
|
123
|
+
fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
|
124
|
+
|
125
|
+
# Identify channels without associated outlines
|
126
|
+
channels_with_outlines = []
|
127
|
+
if cell_channel is not None:
|
128
|
+
channels_with_outlines.append(cell_channel)
|
129
|
+
if nucleus_channel is not None:
|
130
|
+
channels_with_outlines.append(nucleus_channel)
|
131
|
+
if pathogen_channel is not None:
|
132
|
+
channels_with_outlines.append(pathogen_channel)
|
133
|
+
|
134
|
+
for v in range(num_channels):
|
135
|
+
channel_image = image[..., v]
|
136
|
+
channel_image_normalized = _normalize_image(channel_image, percentiles)
|
137
|
+
channel_image_rgb = np.dstack([channel_image_normalized] * 3)
|
138
|
+
|
139
|
+
current_channel = channels[v]
|
140
|
+
|
141
|
+
if all_on_all:
|
142
|
+
# Apply all outlines to all channels
|
143
|
+
for outline, color in zip(outlines, outline_colors):
|
144
|
+
if mode == 'outlines':
|
145
|
+
channel_image_rgb = _apply_contours(
|
146
|
+
channel_image_rgb, outline, color, thickness
|
147
|
+
)
|
148
|
+
else:
|
149
|
+
cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
|
150
|
+
mask = _generate_colored_mask(outline, cmap)
|
151
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
152
|
+
elif current_channel in channels_with_outlines:
|
153
|
+
# Apply only the relevant outline to each channel
|
154
|
+
outline = None
|
155
|
+
color = None
|
156
|
+
|
157
|
+
if current_channel == cell_channel and cell_outlines is not None:
|
158
|
+
outline = cell_outlines
|
159
|
+
elif current_channel == nucleus_channel and nucleus_outlines is not None:
|
160
|
+
outline = nucleus_outlines
|
161
|
+
elif current_channel == pathogen_channel and pathogen_outlines is not None:
|
162
|
+
outline = pathogen_outlines
|
163
|
+
|
164
|
+
if outline is not None:
|
165
|
+
if mode == 'outlines':
|
166
|
+
# Use magenta color when all_on_all=False
|
167
|
+
channel_image_rgb = _apply_contours(
|
168
|
+
channel_image_rgb, outline, '#FF00FF', thickness
|
169
|
+
)
|
170
|
+
else:
|
171
|
+
cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
|
172
|
+
mask = _generate_colored_mask(outline, cmap)
|
173
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
174
|
+
else:
|
175
|
+
# Channel without associated outlines
|
176
|
+
if all_outlines:
|
177
|
+
# Apply all outlines with specified colors
|
178
|
+
for outline, color in zip(outlines, ['blue', 'red', 'green']):
|
179
|
+
if mode == 'outlines':
|
180
|
+
channel_image_rgb = _apply_contours(
|
181
|
+
channel_image_rgb, outline, color, thickness
|
182
|
+
)
|
183
|
+
else:
|
184
|
+
cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
|
185
|
+
mask = _generate_colored_mask(outline, cmap)
|
186
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
187
|
+
|
188
|
+
ax[v].imshow(channel_image_rgb)
|
189
|
+
ax[v].set_title(f'Image - Channel {current_channel}')
|
190
|
+
|
191
|
+
# Create an image combining all objects filled with colors
|
192
|
+
combined_mask = np.zeros_like(outlines[0])
|
193
|
+
for outline in outlines:
|
194
|
+
combined_mask = np.maximum(combined_mask, outline)
|
195
|
+
|
196
|
+
cmap = random_color_cmap(int(combined_mask.max() + 1), random.randint(0, 100))
|
197
|
+
mask = _generate_colored_mask(combined_mask, cmap)
|
198
|
+
blank_image = np.zeros((*combined_mask.shape, 3))
|
199
|
+
filled_image = _overlay_mask(blank_image, mask)
|
200
|
+
|
201
|
+
ax[-1].imshow(filled_image)
|
202
|
+
ax[-1].set_title('Combined Objects Image')
|
203
|
+
|
204
|
+
plt.tight_layout()
|
205
|
+
|
206
|
+
# Save the figure as a PDF
|
207
|
+
if save_pdf:
|
208
|
+
pdf_dir = os.path.join(
|
209
|
+
os.path.dirname(os.path.dirname(file)), 'results', 'overlay'
|
210
|
+
)
|
211
|
+
os.makedirs(pdf_dir, exist_ok=True)
|
212
|
+
pdf_path = os.path.join(
|
213
|
+
pdf_dir, os.path.basename(file).replace('.npy', '.pdf')
|
214
|
+
)
|
215
|
+
fig.savefig(pdf_path, format='pdf')
|
216
|
+
|
217
|
+
plt.show()
|
218
|
+
return fig
|
219
|
+
|
220
|
+
def _save_channels_as_tiff(stack, save_dir, filename):
|
221
|
+
"""Save each channel in the stack as a grayscale TIFF."""
|
222
|
+
os.makedirs(save_dir, exist_ok=True)
|
223
|
+
for i in range(stack.shape[-1]):
|
224
|
+
channel = stack[..., i]
|
225
|
+
tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
|
226
|
+
tiff.imwrite(tiff_path, channel.astype(np.uint16), photometric='minisblack')
|
227
|
+
print(f"Saved {tiff_path}")
|
228
|
+
|
229
|
+
def _filter_object(mask, intensity_image, min_max_area=(0, 10000000), min_max_intensity=(0, 65000), type_='object'):
|
230
|
+
"""
|
231
|
+
Filter objects in a mask based on their area (size) and mean intensity.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
mask (ndarray): The input mask.
|
235
|
+
intensity_image (ndarray): The corresponding intensity image.
|
236
|
+
min_max_area (tuple): A tuple (min_area, max_area) specifying the minimum and maximum area thresholds.
|
237
|
+
min_max_intensity (tuple): A tuple (min_intensity, max_intensity) specifying the minimum and maximum intensity thresholds.
|
238
|
+
|
239
|
+
Returns:
|
240
|
+
ndarray: The filtered mask.
|
241
|
+
"""
|
242
|
+
original_dtype = mask.dtype
|
243
|
+
mask_int = mask.astype(np.int64)
|
244
|
+
intensity_image = intensity_image.astype(np.float64)
|
245
|
+
# Compute properties for each labeled object
|
246
|
+
unique_labels = np.unique(mask_int)
|
247
|
+
unique_labels = unique_labels[unique_labels != 0] # Exclude background
|
248
|
+
num_objects_before = len(unique_labels)
|
249
|
+
|
250
|
+
# Initialize lists to store area and intensity for each object
|
251
|
+
areas = []
|
252
|
+
mean_intensities = []
|
253
|
+
labels_to_keep = []
|
254
|
+
|
255
|
+
for label in unique_labels:
|
256
|
+
label_mask = (mask_int == label)
|
257
|
+
area = np.sum(label_mask)
|
258
|
+
mean_intensity = np.mean(intensity_image[label_mask])
|
259
|
+
|
260
|
+
areas.append(area)
|
261
|
+
mean_intensities.append(mean_intensity)
|
262
|
+
|
263
|
+
# Check if the object meets both area and intensity criteria
|
264
|
+
if (min_max_area[0] <= area <= min_max_area[1]) and (min_max_intensity[0] <= mean_intensity <= min_max_intensity[1]):
|
265
|
+
labels_to_keep.append(label)
|
266
|
+
|
267
|
+
# Convert lists to numpy arrays for easier computation
|
268
|
+
areas = np.array(areas)
|
269
|
+
mean_intensities = np.array(mean_intensities)
|
270
|
+
num_objects_after = len(labels_to_keep)
|
271
|
+
# Compute average area and intensity before and after filtering
|
272
|
+
avg_area_before = areas.mean() if num_objects_before > 0 else 0
|
273
|
+
avg_intensity_before = mean_intensities.mean() if num_objects_before > 0 else 0
|
274
|
+
areas_after = areas[np.isin(unique_labels, labels_to_keep)]
|
275
|
+
mean_intensities_after = mean_intensities[np.isin(unique_labels, labels_to_keep)]
|
276
|
+
avg_area_after = areas_after.mean() if num_objects_after > 0 else 0
|
277
|
+
avg_intensity_after = mean_intensities_after.mean() if num_objects_after > 0 else 0
|
278
|
+
print(f"Before filtering {type_}: {num_objects_before} objects")
|
279
|
+
print(f"Average area {type_}: {avg_area_before:.2f} pixels, Average intensity: {avg_intensity_before:.2f}")
|
280
|
+
print(f"After filtering {type_}: {num_objects_after} objects")
|
281
|
+
print(f"Average area {type_}: {avg_area_after:.2f} pixels, Average intensity: {avg_intensity_after:.2f}")
|
282
|
+
mask_filtered = np.zeros_like(mask_int)
|
283
|
+
for label in labels_to_keep:
|
284
|
+
mask_filtered[mask_int == label] = label
|
285
|
+
mask_filtered = mask_filtered.astype(original_dtype)
|
286
|
+
return mask_filtered
|
287
|
+
|
288
|
+
stack = np.load(file)
|
289
|
+
|
290
|
+
if export_tiffs:
|
291
|
+
save_dir = os.path.join(
|
292
|
+
os.path.dirname(os.path.dirname(file)),
|
293
|
+
'results',
|
294
|
+
os.path.splitext(os.path.basename(file))[0],
|
295
|
+
'tiff'
|
296
|
+
)
|
297
|
+
filename = os.path.splitext(os.path.basename(file))[0]
|
298
|
+
_save_channels_as_tiff(stack, save_dir, filename)
|
299
|
+
|
300
|
+
# Convert to float for normalization and ensure correct handling of arrays
|
301
|
+
if stack.dtype in (np.uint16, np.uint8):
|
302
|
+
stack = stack.astype(np.float32)
|
303
|
+
|
304
|
+
image = stack[..., channels]
|
305
|
+
outlines = []
|
306
|
+
outline_colors = []
|
307
|
+
|
308
|
+
# Define variables to hold individual outlines
|
309
|
+
cell_outlines = None
|
310
|
+
nucleus_outlines = None
|
311
|
+
pathogen_outlines = None
|
312
|
+
|
313
|
+
if pathogen_channel is not None:
|
314
|
+
pathogen_mask_dim = -1
|
315
|
+
pathogen_outlines = np.take(stack, pathogen_mask_dim, axis=2)
|
316
|
+
if not filter_dict is None:
|
317
|
+
pathogen_intensity = np.take(stack, pathogen_channel, axis=2)
|
318
|
+
pathogen_outlines = _filter_object(pathogen_outlines, pathogen_intensity, filter_dict['pathogen'][0], filter_dict['pathogen'][1], type_='pathogen')
|
319
|
+
|
320
|
+
outlines.append(pathogen_outlines)
|
321
|
+
outline_colors.append('green')
|
322
|
+
|
323
|
+
if nucleus_channel is not None:
|
324
|
+
nucleus_mask_dim = -2 if pathogen_channel is not None else -1
|
325
|
+
nucleus_outlines = np.take(stack, nucleus_mask_dim, axis=2)
|
326
|
+
if not filter_dict is None:
|
327
|
+
nucleus_intensity = np.take(stack, nucleus_channel, axis=2)
|
328
|
+
nucleus_outlines = _filter_object(nucleus_outlines, nucleus_intensity, filter_dict['nucleus'][0], filter_dict['nucleus'][1], type_='nucleus')
|
329
|
+
outlines.append(nucleus_outlines)
|
330
|
+
outline_colors.append('blue')
|
331
|
+
|
332
|
+
if cell_channel is not None:
|
333
|
+
if nucleus_channel is not None and pathogen_channel is not None:
|
334
|
+
cell_mask_dim = -3
|
335
|
+
elif nucleus_channel is not None or pathogen_channel is not None:
|
336
|
+
cell_mask_dim = -2
|
337
|
+
else:
|
338
|
+
cell_mask_dim = -1
|
339
|
+
cell_outlines = np.take(stack, cell_mask_dim, axis=2)
|
340
|
+
if not filter_dict is None:
|
341
|
+
cell_intensity = np.take(stack, cell_channel, axis=2)
|
342
|
+
cell_outlines = _filter_object(cell_outlines, cell_intensity, filter_dict['cell'][0], filter_dict['cell'][1], type_='cell')
|
343
|
+
outlines.append(cell_outlines)
|
344
|
+
outline_colors.append('red')
|
345
|
+
|
346
|
+
fig = _plot_merged_plot(
|
347
|
+
image=image,
|
348
|
+
outlines=outlines,
|
349
|
+
outline_colors=outline_colors,
|
350
|
+
figuresize=figuresize,
|
351
|
+
thickness=thickness,
|
352
|
+
percentiles=percentiles, # Pass percentiles to the plotting function
|
353
|
+
mode=mode,
|
354
|
+
all_on_all=all_on_all,
|
355
|
+
all_outlines=all_outlines,
|
356
|
+
channels=channels,
|
357
|
+
cell_channel=cell_channel,
|
358
|
+
nucleus_channel=nucleus_channel,
|
359
|
+
pathogen_channel=pathogen_channel,
|
360
|
+
cell_outlines=cell_outlines,
|
361
|
+
nucleus_outlines=nucleus_outlines,
|
362
|
+
pathogen_outlines=pathogen_outlines,
|
363
|
+
save_pdf=save_pdf
|
364
|
+
)
|
365
|
+
|
366
|
+
return fig
|
367
|
+
|
368
|
+
def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
|
36
369
|
"""Plot image and mask overlays."""
|
37
370
|
|
38
371
|
def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
|
@@ -1398,7 +1731,7 @@ def _plot_histograms_and_stats(df):
|
|
1398
1731
|
print('-'*40)
|
1399
1732
|
|
1400
1733
|
# Plot the histogram
|
1401
|
-
plt.figure(figsize=(10,
|
1734
|
+
plt.figure(figsize=(10,10))
|
1402
1735
|
plt.hist(subset['pred'], bins=30, edgecolor='black')
|
1403
1736
|
plt.axvline(mean_pred, color='red', linestyle='dashed', linewidth=1, label=f"Mean = {mean_pred:.2f}")
|
1404
1737
|
plt.title(f'Histogram for pred - Condition: {condition}')
|
@@ -1455,12 +1788,16 @@ def _reg_v_plot(df, grouping, variable, plate_number):
|
|
1455
1788
|
plt.show()
|
1456
1789
|
|
1457
1790
|
def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_count):
|
1791
|
+
|
1792
|
+
if not isinstance(min_count, (int, float)):
|
1793
|
+
min_count = 0
|
1794
|
+
|
1458
1795
|
df = df.copy() # Work on a copy to avoid SettingWithCopyWarning
|
1459
1796
|
df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
|
1460
1797
|
|
1461
1798
|
# Filtering the dataframe based on the plate_number
|
1462
1799
|
df = df[df['plate'] == plate_number].copy() # Create another copy after filtering
|
1463
|
-
|
1800
|
+
|
1464
1801
|
# Ensure proper ordering
|
1465
1802
|
row_order = [f'r{i}' for i in range(1, 17)]
|
1466
1803
|
col_order = [f'c{i}' for i in range(1, 28)] # Exclude c15 as per your earlier code
|
@@ -1496,7 +1833,6 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
|
|
1496
1833
|
min_max = np.quantile(plate_map.values, [min_max[0], min_max[1]])
|
1497
1834
|
if isinstance(min_max[0], (int)) and isinstance(min_max[1], (int)):
|
1498
1835
|
min_max = [min_max[0], min_max[1]]
|
1499
|
-
|
1500
1836
|
return plate_map, min_max
|
1501
1837
|
|
1502
1838
|
def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True, dst=None):
|
@@ -1516,10 +1852,14 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
|
|
1516
1852
|
plt.subplots_adjust(wspace=0.1, hspace=0.4)
|
1517
1853
|
|
1518
1854
|
if not dst is None:
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1855
|
+
for i in range(0,1000):
|
1856
|
+
filename = os.path.join(dst, f'plate_heatmap_{i}.pdf')
|
1857
|
+
if os.path.exists(filename):
|
1858
|
+
continue
|
1859
|
+
else:
|
1860
|
+
fig.savefig(filename, format='pdf')
|
1861
|
+
print(f'Saved heatmap to {filename}')
|
1862
|
+
break
|
1523
1863
|
if verbose:
|
1524
1864
|
plt.show()
|
1525
1865
|
return fig
|
@@ -1886,22 +2226,77 @@ def volcano_plot(coef_df, filename='volcano_plot.pdf'):
|
|
1886
2226
|
print(f'Saved Volcano plot: {filename}')
|
1887
2227
|
plt.show()
|
1888
2228
|
|
1889
|
-
def plot_histogram(df,
|
2229
|
+
def plot_histogram(df, column, dst=None):
|
1890
2230
|
# Plot histogram of the dependent variable
|
1891
|
-
|
1892
|
-
|
1893
|
-
|
1894
|
-
plt.
|
2231
|
+
bar_color = (0/255, 155/255, 155/255)
|
2232
|
+
plt.figure(figsize=(10, 10))
|
2233
|
+
sns.histplot(df[column], kde=False, color=bar_color, edgecolor=None, alpha=0.6)
|
2234
|
+
plt.title(f'Histogram of {column}')
|
2235
|
+
plt.xlabel(column)
|
1895
2236
|
plt.ylabel('Frequency')
|
1896
2237
|
|
1897
2238
|
if not dst is None:
|
1898
|
-
filename = os.path.join(dst, '
|
2239
|
+
filename = os.path.join(dst, f'{column}_histogram.pdf')
|
1899
2240
|
plt.savefig(filename, format='pdf')
|
1900
2241
|
print(f'Saved histogram to {filename}')
|
1901
2242
|
|
1902
2243
|
plt.show()
|
1903
2244
|
|
1904
|
-
def plot_lorenz_curves(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
|
2245
|
+
def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count', remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4'], x_lim=[0.0,1], y_lim=[0,1], save=True):
|
2246
|
+
|
2247
|
+
def lorenz_curve(data):
|
2248
|
+
"""Calculate Lorenz curve."""
|
2249
|
+
sorted_data = np.sort(data)
|
2250
|
+
cumulative_data = np.cumsum(sorted_data)
|
2251
|
+
lorenz_curve = cumulative_data / cumulative_data[-1]
|
2252
|
+
lorenz_curve = np.insert(lorenz_curve, 0, 0)
|
2253
|
+
return lorenz_curve
|
2254
|
+
|
2255
|
+
combined_data = []
|
2256
|
+
|
2257
|
+
plt.figure(figsize=(10, 10))
|
2258
|
+
|
2259
|
+
for idx, csv_file in enumerate(csv_files):
|
2260
|
+
if idx == 1:
|
2261
|
+
save_fldr = os.path.dirname(csv_file)
|
2262
|
+
save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
|
2263
|
+
|
2264
|
+
df = pd.read_csv(csv_file)
|
2265
|
+
for remove in remove_keys:
|
2266
|
+
df = df[df[name_column] != remove]
|
2267
|
+
|
2268
|
+
values = df[value_column].values
|
2269
|
+
combined_data.extend(values)
|
2270
|
+
|
2271
|
+
lorenz = lorenz_curve(values)
|
2272
|
+
name = f"plate {idx+1}"
|
2273
|
+
plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
|
2274
|
+
|
2275
|
+
# Plot combined Lorenz curve
|
2276
|
+
combined_lorenz = lorenz_curve(np.array(combined_data))
|
2277
|
+
plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined", linestyle='--', color='black')
|
2278
|
+
|
2279
|
+
if x_lim != None:
|
2280
|
+
plt.xlim(x_lim)
|
2281
|
+
|
2282
|
+
if y_lim != None:
|
2283
|
+
plt.ylim(y_lim)
|
2284
|
+
|
2285
|
+
plt.title('Lorenz Curves')
|
2286
|
+
plt.xlabel('Cumulative Share of Individuals')
|
2287
|
+
plt.ylabel('Cumulative Share of Value')
|
2288
|
+
plt.legend()
|
2289
|
+
plt.grid(False)
|
2290
|
+
|
2291
|
+
if save:
|
2292
|
+
save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
|
2293
|
+
os.makedirs(save_path, exist_ok=True)
|
2294
|
+
save_file_path = os.path.join(save_path, 'lorenz_curve.pdf')
|
2295
|
+
plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
|
2296
|
+
print(f"Saved Lorenz Curve: {save_file_path}")
|
2297
|
+
plt.show()
|
2298
|
+
|
2299
|
+
def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
|
1905
2300
|
|
1906
2301
|
def lorenz_curve(data):
|
1907
2302
|
"""Calculate Lorenz curve."""
|
@@ -2358,22 +2753,33 @@ class spacrGraph:
|
|
2358
2753
|
return filtered_df
|
2359
2754
|
|
2360
2755
|
def perform_normality_tests(self):
|
2361
|
-
"""Perform normality tests for each group and
|
2756
|
+
"""Perform normality tests for each group and data column."""
|
2362
2757
|
unique_groups = self.df[self.grouping_column].unique()
|
2363
2758
|
normality_results = []
|
2364
2759
|
|
2365
2760
|
for column in self.data_column:
|
2366
|
-
# Iterate over each group and its corresponding data
|
2367
2761
|
for group in unique_groups:
|
2368
|
-
data = self.df.loc[self.df[self.grouping_column] == group, column]
|
2762
|
+
data = self.df.loc[self.df[self.grouping_column] == group, column].dropna()
|
2369
2763
|
n_samples = len(data)
|
2370
2764
|
|
2765
|
+
if n_samples < 3:
|
2766
|
+
# Skip test if there aren't enough data points
|
2767
|
+
print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
|
2768
|
+
normality_results.append({
|
2769
|
+
'Comparison': f'Normality test for {group} on {column}',
|
2770
|
+
'Test Statistic': None,
|
2771
|
+
'p-value': None,
|
2772
|
+
'Test Name': 'Skipped',
|
2773
|
+
'Column': column,
|
2774
|
+
'n': n_samples
|
2775
|
+
})
|
2776
|
+
continue
|
2777
|
+
|
2778
|
+
# Choose the appropriate normality test based on the sample size
|
2371
2779
|
if n_samples >= 8:
|
2372
|
-
# Use D'Agostino-Pearson test for larger samples
|
2373
2780
|
stat, p_value = normaltest(data)
|
2374
2781
|
test_name = "D'Agostino-Pearson test"
|
2375
2782
|
else:
|
2376
|
-
# Use Shapiro-Wilk test for smaller samples
|
2377
2783
|
stat, p_value = shapiro(data)
|
2378
2784
|
test_name = "Shapiro-Wilk test"
|
2379
2785
|
|
@@ -2384,11 +2790,11 @@ class spacrGraph:
|
|
2384
2790
|
'p-value': p_value,
|
2385
2791
|
'Test Name': test_name,
|
2386
2792
|
'Column': column,
|
2387
|
-
'n': n_samples
|
2793
|
+
'n': n_samples
|
2388
2794
|
})
|
2389
2795
|
|
2390
2796
|
# Check if all groups are normally distributed (p > 0.05)
|
2391
|
-
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
|
2797
|
+
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
|
2392
2798
|
is_normal = all(p > 0.05 for p in normal_p_values)
|
2393
2799
|
|
2394
2800
|
return is_normal, normality_results
|
@@ -2733,7 +3139,7 @@ class spacrGraph:
|
|
2733
3139
|
hue = None
|
2734
3140
|
|
2735
3141
|
# Create the jitter plot
|
2736
|
-
sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6)
|
3142
|
+
sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax, alpha=0.6, size=16)
|
2737
3143
|
|
2738
3144
|
# Adjust legend and labels
|
2739
3145
|
ax.set_xlabel(self.grouping_column)
|
@@ -2754,6 +3160,12 @@ class spacrGraph:
|
|
2754
3160
|
# Ensure epoch is used on the x-axis and accuracy on the y-axis
|
2755
3161
|
x_axis_column = self.data_column[0]
|
2756
3162
|
y_axis_column = self.data_column[1]
|
3163
|
+
|
3164
|
+
if self.log_y:
|
3165
|
+
self.df[y_axis_column] = np.log10(self.df[y_axis_column])
|
3166
|
+
|
3167
|
+
if self.log_x:
|
3168
|
+
self.df[x_axis_column] = np.log10(self.df[x_axis_column])
|
2757
3169
|
|
2758
3170
|
# Set hue to the grouping column to get one line per group
|
2759
3171
|
hue = self.grouping_column
|
@@ -2771,11 +3183,6 @@ class spacrGraph:
|
|
2771
3183
|
ax.set_xlabel(f"{x_axis_column}")
|
2772
3184
|
ax.set_ylabel(f"{y_axis_column}")
|
2773
3185
|
|
2774
|
-
if self.log_y:
|
2775
|
-
ax.set_yscale('log')
|
2776
|
-
if self.log_x:
|
2777
|
-
ax.set_xscale('log')
|
2778
|
-
|
2779
3186
|
def _create_line_with_std_area(self, ax):
|
2780
3187
|
"""Helper method to create a line graph with shaded area representing standard deviation."""
|
2781
3188
|
|
@@ -2784,15 +3191,22 @@ class spacrGraph:
|
|
2784
3191
|
y_axis_column_mean = f"mean_{y_axis_column}"
|
2785
3192
|
y_axis_column_std = f"std_{y_axis_column_mean}"
|
2786
3193
|
|
3194
|
+
if self.log_y:
|
3195
|
+
self.df[y_axis_column] = np.log10(self.df[y_axis_column])
|
3196
|
+
|
3197
|
+
if self.log_x:
|
3198
|
+
self.df[x_axis_column] = np.log10(self.df[x_axis_column])
|
3199
|
+
|
2787
3200
|
# Pivot the DataFrame to get mean and std for each epoch across plates
|
2788
3201
|
summary_df = self.df.pivot_table(index=x_axis_column,values=y_axis_column,aggfunc=['mean', 'std']).reset_index()
|
2789
3202
|
|
2790
3203
|
# Flatten MultiIndex columns (result of pivoting)
|
2791
3204
|
summary_df.columns = [x_axis_column, y_axis_column_mean, y_axis_column_std]
|
2792
|
-
|
3205
|
+
|
2793
3206
|
# Plot the mean accuracy as a line
|
2794
3207
|
sns.lineplot(data=summary_df,x=x_axis_column,y=y_axis_column_mean,ax=ax,marker='o',linewidth=1,markersize=0,color='blue',label=y_axis_column_mean)
|
2795
3208
|
|
3209
|
+
|
2796
3210
|
# Fill the area representing the standard deviation
|
2797
3211
|
ax.fill_between(summary_df[x_axis_column],summary_df[y_axis_column_mean] - summary_df[y_axis_column_std],summary_df[y_axis_column_mean] + summary_df[y_axis_column_std],color='blue', alpha=0.1 )
|
2798
3212
|
|
@@ -2800,11 +3214,6 @@ class spacrGraph:
|
|
2800
3214
|
ax.set_xlabel(f"{x_axis_column}")
|
2801
3215
|
ax.set_ylabel(f"{y_axis_column}")
|
2802
3216
|
|
2803
|
-
if self.log_y:
|
2804
|
-
ax.set_yscale('log')
|
2805
|
-
if self.log_x:
|
2806
|
-
ax.set_xscale('log')
|
2807
|
-
|
2808
3217
|
def _create_box_plot(self, ax):
|
2809
3218
|
"""Helper method to create a box plot with consistent spacing."""
|
2810
3219
|
# Combine grouping column and data column if needed
|
@@ -2969,23 +3378,29 @@ def plot_data_from_db(settings):
|
|
2969
3378
|
df (pd.DataFrame): The extracted table as a DataFrame.
|
2970
3379
|
"""
|
2971
3380
|
|
3381
|
+
|
3382
|
+
|
2972
3383
|
if isinstance(settings['src'], str):
|
2973
3384
|
srcs = [settings['src']]
|
2974
3385
|
elif isinstance(settings['src'], list):
|
2975
3386
|
srcs = settings['src']
|
2976
|
-
if isinstance(settings['database'], str):
|
2977
|
-
settings['database'] = [settings['database'] for _ in range(len(srcs))]
|
2978
3387
|
else:
|
2979
3388
|
raise ValueError("src must be a string or a list of strings.")
|
2980
3389
|
|
3390
|
+
if isinstance(settings['database'], str):
|
3391
|
+
settings['database'] = [settings['database'] for _ in range(len(srcs))]
|
3392
|
+
|
3393
|
+
settings['dst'] = os.path.join(srcs[0], 'results')
|
3394
|
+
|
2981
3395
|
save_settings(settings, name=f"{settings['graph_name']}_plot_settings_db", show=True)
|
2982
3396
|
|
2983
3397
|
dfs = []
|
2984
3398
|
for i, src in enumerate(srcs):
|
2985
3399
|
|
2986
3400
|
db_loc = os.path.join(src, 'measurements', settings['database'][i])
|
2987
|
-
|
3401
|
+
print(f"Database: {db_loc}")
|
2988
3402
|
if settings['table_names'] in ['saliency_image_correlations']:
|
3403
|
+
print(f"Database table: {settings['table_names']}")
|
2989
3404
|
[df1] = _read_db(db_loc, tables=[settings['table_names']])
|
2990
3405
|
else:
|
2991
3406
|
df1, _ = _read_and_merge_data(locs=[db_loc],
|
@@ -3006,8 +3421,9 @@ def plot_data_from_db(settings):
|
|
3006
3421
|
|
3007
3422
|
df = pd.concat(dfs, axis=0)
|
3008
3423
|
df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
|
3009
|
-
df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
3010
|
-
df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
3424
|
+
#df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
3425
|
+
#df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
3426
|
+
df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
|
3011
3427
|
|
3012
3428
|
if settings['cell_plate_metadata'] != None:
|
3013
3429
|
df = df.dropna(subset='host_cell')
|
@@ -3021,7 +3437,7 @@ def plot_data_from_db(settings):
|
|
3021
3437
|
df = df.dropna(subset=settings['data_column'])
|
3022
3438
|
df = df.dropna(subset=settings['grouping_column'])
|
3023
3439
|
|
3024
|
-
|
3440
|
+
|
3025
3441
|
src = srcs[0]
|
3026
3442
|
dst = os.path.join(src, 'results', settings['graph_name'])
|
3027
3443
|
os.makedirs(dst, exist_ok=True)
|
@@ -3082,9 +3498,13 @@ def plot_data_from_csv(settings):
|
|
3082
3498
|
dft = pd.read_csv(src)
|
3083
3499
|
if 'plate' not in dft.columns:
|
3084
3500
|
dft['plate'] = f"plate{i+1}"
|
3501
|
+
dft['common'] = 'spacr'
|
3085
3502
|
dfs.append(dft)
|
3086
3503
|
|
3087
3504
|
df = pd.concat(dfs, axis=0)
|
3505
|
+
|
3506
|
+
display(df)
|
3507
|
+
|
3088
3508
|
df = df.dropna(subset=settings['data_column'])
|
3089
3509
|
df = df.dropna(subset=settings['grouping_column'])
|
3090
3510
|
src = srcs[0]
|
@@ -3131,23 +3551,39 @@ def plot_region(settings):
|
|
3131
3551
|
print(f"Saved {path}")
|
3132
3552
|
|
3133
3553
|
from .io import _read_db
|
3554
|
+
from .utils import correct_paths
|
3134
3555
|
fov_path = os.path.join(settings['src'], 'merged', settings['name'])
|
3135
3556
|
name = os.path.splitext(settings['name'])[0]
|
3136
3557
|
|
3137
3558
|
db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
|
3138
3559
|
paths_df = _read_db(db_path, tables=['png_list'])[0]
|
3560
|
+
paths_df, _ = correct_paths(df=paths_df, base_path=settings['src'], folder='data')
|
3139
3561
|
paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
|
3140
3562
|
|
3141
3563
|
activation_mode = f"{settings['activation_mode']}_list"
|
3142
3564
|
activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
|
3143
3565
|
activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
|
3566
|
+
activation_db = os.path.splitext(settings['activation_db'])[0]
|
3567
|
+
base_path=os.path.join(settings['src'], 'datasets',activation_db)
|
3568
|
+
activation_paths_df, _ = correct_paths(df=activation_paths_df, base_path=base_path, folder=settings['activation_mode'])
|
3144
3569
|
activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
|
3145
3570
|
|
3146
3571
|
png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
|
3147
3572
|
activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
|
3148
3573
|
|
3149
|
-
|
3150
|
-
|
3574
|
+
|
3575
|
+
if activation_paths:
|
3576
|
+
fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
|
3577
|
+
else:
|
3578
|
+
fig_3 = None
|
3579
|
+
print(f"Could not find any cropped PNGs")
|
3580
|
+
if png_paths:
|
3581
|
+
fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
|
3582
|
+
else:
|
3583
|
+
fig_2 = None
|
3584
|
+
print(f"Could not find any activation maps")
|
3585
|
+
|
3586
|
+
print('fov_path', fov_path)
|
3151
3587
|
fig_1 = plot_image_mask_overlay(file=fov_path,
|
3152
3588
|
channels=settings['channels'],
|
3153
3589
|
cell_channel=settings['cell_channel'],
|
@@ -3156,14 +3592,18 @@ def plot_region(settings):
|
|
3156
3592
|
figuresize=10,
|
3157
3593
|
percentiles=settings['percentiles'],
|
3158
3594
|
thickness=3,
|
3159
|
-
save_pdf=
|
3595
|
+
save_pdf=True,
|
3160
3596
|
mode=settings['mode'],
|
3161
3597
|
export_tiffs=settings['export_tiffs'])
|
3162
3598
|
|
3163
3599
|
dst = os.path.join(settings['src'], 'results', name)
|
3164
|
-
|
3165
|
-
|
3166
|
-
|
3600
|
+
|
3601
|
+
if not fig_1 == None:
|
3602
|
+
save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
|
3603
|
+
if not fig_2 == None:
|
3604
|
+
save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
|
3605
|
+
if not fig_3 == None:
|
3606
|
+
save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
|
3167
3607
|
|
3168
3608
|
return fig_1, fig_2, fig_3
|
3169
3609
|
|
@@ -3327,4 +3767,5 @@ def overlay_masks_on_images(img_folder, normalize=True, resize=True, save=False,
|
|
3327
3767
|
plt.imshow(blended)
|
3328
3768
|
plt.title(f"Overlay: {filename}")
|
3329
3769
|
plt.axis('off')
|
3330
|
-
plt.show()
|
3770
|
+
plt.show()
|
3771
|
+
|