spacr 0.3.47__py3-none-any.whl → 0.3.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/chat_bot.py +31 -0
- spacr/gui_elements.py +33 -7
- spacr/ml.py +453 -141
- spacr/plot.py +460 -29
- spacr/sequencing.py +5 -2
- spacr/settings.py +1 -1
- spacr/toxo.py +267 -158
- spacr/utils.py +12 -4
- {spacr-0.3.47.dist-info → spacr-0.3.50.dist-info}/METADATA +2 -1
- {spacr-0.3.47.dist-info → spacr-0.3.50.dist-info}/RECORD +14 -13
- {spacr-0.3.47.dist-info → spacr-0.3.50.dist-info}/LICENSE +0 -0
- {spacr-0.3.47.dist-info → spacr-0.3.50.dist-info}/WHEEL +0 -0
- {spacr-0.3.47.dist-info → spacr-0.3.50.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.47.dist-info → spacr-0.3.50.dist-info}/top_level.txt +0 -0
spacr/plot.py
CHANGED
@@ -32,7 +32,340 @@ import matplotlib.patches as patches
|
|
32
32
|
from collections import defaultdict
|
33
33
|
from matplotlib.gridspec import GridSpec
|
34
34
|
|
35
|
-
|
35
|
+
#filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
|
36
|
+
def plot_image_mask_overlay(
|
37
|
+
file,
|
38
|
+
channels,
|
39
|
+
cell_channel,
|
40
|
+
nucleus_channel,
|
41
|
+
pathogen_channel,
|
42
|
+
figuresize=10,
|
43
|
+
percentiles=(2, 98),
|
44
|
+
thickness=3,
|
45
|
+
save_pdf=True,
|
46
|
+
mode='outlines',
|
47
|
+
export_tiffs=False,
|
48
|
+
all_on_all=False,
|
49
|
+
all_outlines=False,
|
50
|
+
filter_dict=None
|
51
|
+
):
|
52
|
+
"""Plot image and mask overlays."""
|
53
|
+
|
54
|
+
def random_color_cmap(n_labels, seed=None):
|
55
|
+
"""Generates a random color map for a given number of labels."""
|
56
|
+
if seed is not None:
|
57
|
+
np.random.seed(seed)
|
58
|
+
rand_colors = np.random.rand(n_labels, 3)
|
59
|
+
rand_colors = np.vstack([[0, 0, 0], rand_colors]) # Ensure background is black
|
60
|
+
cmap = ListedColormap(rand_colors)
|
61
|
+
return cmap
|
62
|
+
|
63
|
+
def _plot_merged_plot(
|
64
|
+
image,
|
65
|
+
outlines,
|
66
|
+
outline_colors,
|
67
|
+
figuresize,
|
68
|
+
thickness,
|
69
|
+
percentiles,
|
70
|
+
mode='outlines',
|
71
|
+
all_on_all=False,
|
72
|
+
all_outlines=False,
|
73
|
+
channels=None,
|
74
|
+
cell_channel=None,
|
75
|
+
nucleus_channel=None,
|
76
|
+
pathogen_channel=None,
|
77
|
+
cell_outlines=None,
|
78
|
+
nucleus_outlines=None,
|
79
|
+
pathogen_outlines=None,
|
80
|
+
save_pdf=True
|
81
|
+
):
|
82
|
+
"""Plot the merged plot with overlay, image channels, and masks."""
|
83
|
+
|
84
|
+
def _generate_colored_mask(mask, cmap):
|
85
|
+
"""Generate a colored mask using the given colormap."""
|
86
|
+
mask_norm = mask / (mask.max() + 1e-5) # Normalize mask
|
87
|
+
colored_mask = cmap(mask_norm)
|
88
|
+
colored_mask[..., 3] = np.where(mask > 0, 1, 0) # Alpha channel
|
89
|
+
return colored_mask
|
90
|
+
|
91
|
+
def _overlay_mask(image, mask):
|
92
|
+
"""Overlay the colored mask onto the original image."""
|
93
|
+
combined = np.clip(image * (1 - mask[..., 3:]) + mask[..., :3] * mask[..., 3:], 0, 1)
|
94
|
+
return combined
|
95
|
+
|
96
|
+
def _normalize_image(image, percentiles):
|
97
|
+
"""Normalize the image based on given percentiles."""
|
98
|
+
v_min, v_max = np.percentile(image, percentiles)
|
99
|
+
image_normalized = np.clip((image - v_min) / (v_max - v_min + 1e-5), 0, 1)
|
100
|
+
return image_normalized
|
101
|
+
|
102
|
+
def _generate_contours(mask):
|
103
|
+
"""Generate contours from the mask using OpenCV."""
|
104
|
+
contours, _ = cv2.findContours(
|
105
|
+
mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
106
|
+
)
|
107
|
+
return contours
|
108
|
+
|
109
|
+
def _apply_contours(image, mask, color, thickness):
|
110
|
+
"""Apply contours to the image."""
|
111
|
+
unique_labels = np.unique(mask)
|
112
|
+
for label in unique_labels:
|
113
|
+
if label == 0:
|
114
|
+
continue # Skip background
|
115
|
+
label_mask = (mask == label).astype(np.uint8)
|
116
|
+
contours = _generate_contours(label_mask)
|
117
|
+
cv2.drawContours(
|
118
|
+
image, contours, -1, mpl.colors.to_rgb(color), thickness
|
119
|
+
)
|
120
|
+
return image
|
121
|
+
|
122
|
+
num_channels = image.shape[-1]
|
123
|
+
fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
|
124
|
+
|
125
|
+
# Identify channels without associated outlines
|
126
|
+
channels_with_outlines = []
|
127
|
+
if cell_channel is not None:
|
128
|
+
channels_with_outlines.append(cell_channel)
|
129
|
+
if nucleus_channel is not None:
|
130
|
+
channels_with_outlines.append(nucleus_channel)
|
131
|
+
if pathogen_channel is not None:
|
132
|
+
channels_with_outlines.append(pathogen_channel)
|
133
|
+
|
134
|
+
for v in range(num_channels):
|
135
|
+
channel_image = image[..., v]
|
136
|
+
channel_image_normalized = _normalize_image(channel_image, percentiles)
|
137
|
+
channel_image_rgb = np.dstack([channel_image_normalized] * 3)
|
138
|
+
|
139
|
+
current_channel = channels[v]
|
140
|
+
|
141
|
+
if all_on_all:
|
142
|
+
# Apply all outlines to all channels
|
143
|
+
for outline, color in zip(outlines, outline_colors):
|
144
|
+
if mode == 'outlines':
|
145
|
+
channel_image_rgb = _apply_contours(
|
146
|
+
channel_image_rgb, outline, color, thickness
|
147
|
+
)
|
148
|
+
else:
|
149
|
+
cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
|
150
|
+
mask = _generate_colored_mask(outline, cmap)
|
151
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
152
|
+
elif current_channel in channels_with_outlines:
|
153
|
+
# Apply only the relevant outline to each channel
|
154
|
+
outline = None
|
155
|
+
color = None
|
156
|
+
|
157
|
+
if current_channel == cell_channel and cell_outlines is not None:
|
158
|
+
outline = cell_outlines
|
159
|
+
elif current_channel == nucleus_channel and nucleus_outlines is not None:
|
160
|
+
outline = nucleus_outlines
|
161
|
+
elif current_channel == pathogen_channel and pathogen_outlines is not None:
|
162
|
+
outline = pathogen_outlines
|
163
|
+
|
164
|
+
if outline is not None:
|
165
|
+
if mode == 'outlines':
|
166
|
+
# Use magenta color when all_on_all=False
|
167
|
+
channel_image_rgb = _apply_contours(
|
168
|
+
channel_image_rgb, outline, '#FF00FF', thickness
|
169
|
+
)
|
170
|
+
else:
|
171
|
+
cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
|
172
|
+
mask = _generate_colored_mask(outline, cmap)
|
173
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
174
|
+
else:
|
175
|
+
# Channel without associated outlines
|
176
|
+
if all_outlines:
|
177
|
+
# Apply all outlines with specified colors
|
178
|
+
for outline, color in zip(outlines, ['blue', 'red', 'green']):
|
179
|
+
if mode == 'outlines':
|
180
|
+
channel_image_rgb = _apply_contours(
|
181
|
+
channel_image_rgb, outline, color, thickness
|
182
|
+
)
|
183
|
+
else:
|
184
|
+
cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
|
185
|
+
mask = _generate_colored_mask(outline, cmap)
|
186
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
187
|
+
|
188
|
+
ax[v].imshow(channel_image_rgb)
|
189
|
+
ax[v].set_title(f'Image - Channel {current_channel}')
|
190
|
+
|
191
|
+
# Create an image combining all objects filled with colors
|
192
|
+
combined_mask = np.zeros_like(outlines[0])
|
193
|
+
for outline in outlines:
|
194
|
+
combined_mask = np.maximum(combined_mask, outline)
|
195
|
+
|
196
|
+
cmap = random_color_cmap(int(combined_mask.max() + 1), random.randint(0, 100))
|
197
|
+
mask = _generate_colored_mask(combined_mask, cmap)
|
198
|
+
blank_image = np.zeros((*combined_mask.shape, 3))
|
199
|
+
filled_image = _overlay_mask(blank_image, mask)
|
200
|
+
|
201
|
+
ax[-1].imshow(filled_image)
|
202
|
+
ax[-1].set_title('Combined Objects Image')
|
203
|
+
|
204
|
+
plt.tight_layout()
|
205
|
+
|
206
|
+
# Save the figure as a PDF
|
207
|
+
if save_pdf:
|
208
|
+
pdf_dir = os.path.join(
|
209
|
+
os.path.dirname(os.path.dirname(file)), 'results', 'overlay'
|
210
|
+
)
|
211
|
+
os.makedirs(pdf_dir, exist_ok=True)
|
212
|
+
pdf_path = os.path.join(
|
213
|
+
pdf_dir, os.path.basename(file).replace('.npy', '.pdf')
|
214
|
+
)
|
215
|
+
fig.savefig(pdf_path, format='pdf')
|
216
|
+
|
217
|
+
plt.show()
|
218
|
+
return fig
|
219
|
+
|
220
|
+
def _save_channels_as_tiff(stack, save_dir, filename):
|
221
|
+
"""Save each channel in the stack as a grayscale TIFF."""
|
222
|
+
os.makedirs(save_dir, exist_ok=True)
|
223
|
+
for i in range(stack.shape[-1]):
|
224
|
+
channel = stack[..., i]
|
225
|
+
tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
|
226
|
+
tiff.imwrite(tiff_path, channel.astype(np.uint16), photometric='minisblack')
|
227
|
+
print(f"Saved {tiff_path}")
|
228
|
+
|
229
|
+
def _filter_object(mask, intensity_image, min_max_area=(0, 10000000), min_max_intensity=(0, 65000), type_='object'):
|
230
|
+
"""
|
231
|
+
Filter objects in a mask based on their area (size) and mean intensity.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
mask (ndarray): The input mask.
|
235
|
+
intensity_image (ndarray): The corresponding intensity image.
|
236
|
+
min_max_area (tuple): A tuple (min_area, max_area) specifying the minimum and maximum area thresholds.
|
237
|
+
min_max_intensity (tuple): A tuple (min_intensity, max_intensity) specifying the minimum and maximum intensity thresholds.
|
238
|
+
|
239
|
+
Returns:
|
240
|
+
ndarray: The filtered mask.
|
241
|
+
"""
|
242
|
+
original_dtype = mask.dtype
|
243
|
+
mask_int = mask.astype(np.int64)
|
244
|
+
intensity_image = intensity_image.astype(np.float64)
|
245
|
+
# Compute properties for each labeled object
|
246
|
+
unique_labels = np.unique(mask_int)
|
247
|
+
unique_labels = unique_labels[unique_labels != 0] # Exclude background
|
248
|
+
num_objects_before = len(unique_labels)
|
249
|
+
|
250
|
+
# Initialize lists to store area and intensity for each object
|
251
|
+
areas = []
|
252
|
+
mean_intensities = []
|
253
|
+
labels_to_keep = []
|
254
|
+
|
255
|
+
for label in unique_labels:
|
256
|
+
label_mask = (mask_int == label)
|
257
|
+
area = np.sum(label_mask)
|
258
|
+
mean_intensity = np.mean(intensity_image[label_mask])
|
259
|
+
|
260
|
+
areas.append(area)
|
261
|
+
mean_intensities.append(mean_intensity)
|
262
|
+
|
263
|
+
# Check if the object meets both area and intensity criteria
|
264
|
+
if (min_max_area[0] <= area <= min_max_area[1]) and (min_max_intensity[0] <= mean_intensity <= min_max_intensity[1]):
|
265
|
+
labels_to_keep.append(label)
|
266
|
+
|
267
|
+
# Convert lists to numpy arrays for easier computation
|
268
|
+
areas = np.array(areas)
|
269
|
+
mean_intensities = np.array(mean_intensities)
|
270
|
+
num_objects_after = len(labels_to_keep)
|
271
|
+
# Compute average area and intensity before and after filtering
|
272
|
+
avg_area_before = areas.mean() if num_objects_before > 0 else 0
|
273
|
+
avg_intensity_before = mean_intensities.mean() if num_objects_before > 0 else 0
|
274
|
+
areas_after = areas[np.isin(unique_labels, labels_to_keep)]
|
275
|
+
mean_intensities_after = mean_intensities[np.isin(unique_labels, labels_to_keep)]
|
276
|
+
avg_area_after = areas_after.mean() if num_objects_after > 0 else 0
|
277
|
+
avg_intensity_after = mean_intensities_after.mean() if num_objects_after > 0 else 0
|
278
|
+
print(f"Before filtering {type_}: {num_objects_before} objects")
|
279
|
+
print(f"Average area {type_}: {avg_area_before:.2f} pixels, Average intensity: {avg_intensity_before:.2f}")
|
280
|
+
print(f"After filtering {type_}: {num_objects_after} objects")
|
281
|
+
print(f"Average area {type_}: {avg_area_after:.2f} pixels, Average intensity: {avg_intensity_after:.2f}")
|
282
|
+
mask_filtered = np.zeros_like(mask_int)
|
283
|
+
for label in labels_to_keep:
|
284
|
+
mask_filtered[mask_int == label] = label
|
285
|
+
mask_filtered = mask_filtered.astype(original_dtype)
|
286
|
+
return mask_filtered
|
287
|
+
|
288
|
+
stack = np.load(file)
|
289
|
+
|
290
|
+
if export_tiffs:
|
291
|
+
save_dir = os.path.join(
|
292
|
+
os.path.dirname(os.path.dirname(file)),
|
293
|
+
'results',
|
294
|
+
os.path.splitext(os.path.basename(file))[0],
|
295
|
+
'tiff'
|
296
|
+
)
|
297
|
+
filename = os.path.splitext(os.path.basename(file))[0]
|
298
|
+
_save_channels_as_tiff(stack, save_dir, filename)
|
299
|
+
|
300
|
+
# Convert to float for normalization and ensure correct handling of arrays
|
301
|
+
if stack.dtype in (np.uint16, np.uint8):
|
302
|
+
stack = stack.astype(np.float32)
|
303
|
+
|
304
|
+
image = stack[..., channels]
|
305
|
+
outlines = []
|
306
|
+
outline_colors = []
|
307
|
+
|
308
|
+
# Define variables to hold individual outlines
|
309
|
+
cell_outlines = None
|
310
|
+
nucleus_outlines = None
|
311
|
+
pathogen_outlines = None
|
312
|
+
|
313
|
+
if pathogen_channel is not None:
|
314
|
+
pathogen_mask_dim = -1
|
315
|
+
pathogen_outlines = np.take(stack, pathogen_mask_dim, axis=2)
|
316
|
+
if not filter_dict is None:
|
317
|
+
pathogen_intensity = np.take(stack, pathogen_channel, axis=2)
|
318
|
+
pathogen_outlines = _filter_object(pathogen_outlines, pathogen_intensity, filter_dict['pathogen'][0], filter_dict['pathogen'][1], type_='pathogen')
|
319
|
+
|
320
|
+
outlines.append(pathogen_outlines)
|
321
|
+
outline_colors.append('green')
|
322
|
+
|
323
|
+
if nucleus_channel is not None:
|
324
|
+
nucleus_mask_dim = -2 if pathogen_channel is not None else -1
|
325
|
+
nucleus_outlines = np.take(stack, nucleus_mask_dim, axis=2)
|
326
|
+
if not filter_dict is None:
|
327
|
+
nucleus_intensity = np.take(stack, nucleus_channel, axis=2)
|
328
|
+
nucleus_outlines = _filter_object(nucleus_outlines, nucleus_intensity, filter_dict['nucleus'][0], filter_dict['nucleus'][1], type_='nucleus')
|
329
|
+
outlines.append(nucleus_outlines)
|
330
|
+
outline_colors.append('blue')
|
331
|
+
|
332
|
+
if cell_channel is not None:
|
333
|
+
if nucleus_channel is not None and pathogen_channel is not None:
|
334
|
+
cell_mask_dim = -3
|
335
|
+
elif nucleus_channel is not None or pathogen_channel is not None:
|
336
|
+
cell_mask_dim = -2
|
337
|
+
else:
|
338
|
+
cell_mask_dim = -1
|
339
|
+
cell_outlines = np.take(stack, cell_mask_dim, axis=2)
|
340
|
+
if not filter_dict is None:
|
341
|
+
cell_intensity = np.take(stack, cell_channel, axis=2)
|
342
|
+
cell_outlines = _filter_object(cell_outlines, cell_intensity, filter_dict['cell'][0], filter_dict['cell'][1], type_='cell')
|
343
|
+
outlines.append(cell_outlines)
|
344
|
+
outline_colors.append('red')
|
345
|
+
|
346
|
+
fig = _plot_merged_plot(
|
347
|
+
image=image,
|
348
|
+
outlines=outlines,
|
349
|
+
outline_colors=outline_colors,
|
350
|
+
figuresize=figuresize,
|
351
|
+
thickness=thickness,
|
352
|
+
percentiles=percentiles, # Pass percentiles to the plotting function
|
353
|
+
mode=mode,
|
354
|
+
all_on_all=all_on_all,
|
355
|
+
all_outlines=all_outlines,
|
356
|
+
channels=channels,
|
357
|
+
cell_channel=cell_channel,
|
358
|
+
nucleus_channel=nucleus_channel,
|
359
|
+
pathogen_channel=pathogen_channel,
|
360
|
+
cell_outlines=cell_outlines,
|
361
|
+
nucleus_outlines=nucleus_outlines,
|
362
|
+
pathogen_outlines=pathogen_outlines,
|
363
|
+
save_pdf=save_pdf
|
364
|
+
)
|
365
|
+
|
366
|
+
return fig
|
367
|
+
|
368
|
+
def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
|
36
369
|
"""Plot image and mask overlays."""
|
37
370
|
|
38
371
|
def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
|
@@ -1398,7 +1731,7 @@ def _plot_histograms_and_stats(df):
|
|
1398
1731
|
print('-'*40)
|
1399
1732
|
|
1400
1733
|
# Plot the histogram
|
1401
|
-
plt.figure(figsize=(10,
|
1734
|
+
plt.figure(figsize=(10,10))
|
1402
1735
|
plt.hist(subset['pred'], bins=30, edgecolor='black')
|
1403
1736
|
plt.axvline(mean_pred, color='red', linestyle='dashed', linewidth=1, label=f"Mean = {mean_pred:.2f}")
|
1404
1737
|
plt.title(f'Histogram for pred - Condition: {condition}')
|
@@ -1455,12 +1788,16 @@ def _reg_v_plot(df, grouping, variable, plate_number):
|
|
1455
1788
|
plt.show()
|
1456
1789
|
|
1457
1790
|
def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_count):
|
1791
|
+
|
1792
|
+
if not isinstance(min_count, (int, float)):
|
1793
|
+
min_count = 0
|
1794
|
+
|
1458
1795
|
df = df.copy() # Work on a copy to avoid SettingWithCopyWarning
|
1459
1796
|
df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
|
1460
1797
|
|
1461
1798
|
# Filtering the dataframe based on the plate_number
|
1462
1799
|
df = df[df['plate'] == plate_number].copy() # Create another copy after filtering
|
1463
|
-
|
1800
|
+
|
1464
1801
|
# Ensure proper ordering
|
1465
1802
|
row_order = [f'r{i}' for i in range(1, 17)]
|
1466
1803
|
col_order = [f'c{i}' for i in range(1, 28)] # Exclude c15 as per your earlier code
|
@@ -1496,7 +1833,6 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
|
|
1496
1833
|
min_max = np.quantile(plate_map.values, [min_max[0], min_max[1]])
|
1497
1834
|
if isinstance(min_max[0], (int)) and isinstance(min_max[1], (int)):
|
1498
1835
|
min_max = [min_max[0], min_max[1]]
|
1499
|
-
|
1500
1836
|
return plate_map, min_max
|
1501
1837
|
|
1502
1838
|
def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True, dst=None):
|
@@ -1516,10 +1852,14 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
|
|
1516
1852
|
plt.subplots_adjust(wspace=0.1, hspace=0.4)
|
1517
1853
|
|
1518
1854
|
if not dst is None:
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1855
|
+
for i in range(0,1000):
|
1856
|
+
filename = os.path.join(dst, f'plate_heatmap_{i}.pdf')
|
1857
|
+
if os.path.exists(filename):
|
1858
|
+
continue
|
1859
|
+
else:
|
1860
|
+
fig.savefig(filename, format='pdf')
|
1861
|
+
print(f'Saved heatmap to {filename}')
|
1862
|
+
break
|
1523
1863
|
if verbose:
|
1524
1864
|
plt.show()
|
1525
1865
|
return fig
|
@@ -1886,22 +2226,77 @@ def volcano_plot(coef_df, filename='volcano_plot.pdf'):
|
|
1886
2226
|
print(f'Saved Volcano plot: {filename}')
|
1887
2227
|
plt.show()
|
1888
2228
|
|
1889
|
-
def plot_histogram(df,
|
2229
|
+
def plot_histogram(df, column, dst=None):
|
1890
2230
|
# Plot histogram of the dependent variable
|
1891
|
-
|
1892
|
-
|
1893
|
-
|
1894
|
-
plt.
|
2231
|
+
bar_color = (0/255, 155/255, 155/255)
|
2232
|
+
plt.figure(figsize=(10, 10))
|
2233
|
+
sns.histplot(df[column], kde=False, color=bar_color, edgecolor=None, alpha=0.6)
|
2234
|
+
plt.title(f'Histogram of {column}')
|
2235
|
+
plt.xlabel(column)
|
1895
2236
|
plt.ylabel('Frequency')
|
1896
2237
|
|
1897
2238
|
if not dst is None:
|
1898
|
-
filename = os.path.join(dst, '
|
2239
|
+
filename = os.path.join(dst, f'{column}_histogram.pdf')
|
1899
2240
|
plt.savefig(filename, format='pdf')
|
1900
2241
|
print(f'Saved histogram to {filename}')
|
1901
2242
|
|
1902
2243
|
plt.show()
|
1903
2244
|
|
1904
|
-
def plot_lorenz_curves(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
|
2245
|
+
def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count', remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4'], x_lim=[0.0,1], y_lim=[0,1], save=True):
|
2246
|
+
|
2247
|
+
def lorenz_curve(data):
|
2248
|
+
"""Calculate Lorenz curve."""
|
2249
|
+
sorted_data = np.sort(data)
|
2250
|
+
cumulative_data = np.cumsum(sorted_data)
|
2251
|
+
lorenz_curve = cumulative_data / cumulative_data[-1]
|
2252
|
+
lorenz_curve = np.insert(lorenz_curve, 0, 0)
|
2253
|
+
return lorenz_curve
|
2254
|
+
|
2255
|
+
combined_data = []
|
2256
|
+
|
2257
|
+
plt.figure(figsize=(10, 10))
|
2258
|
+
|
2259
|
+
for idx, csv_file in enumerate(csv_files):
|
2260
|
+
if idx == 1:
|
2261
|
+
save_fldr = os.path.dirname(csv_file)
|
2262
|
+
save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
|
2263
|
+
|
2264
|
+
df = pd.read_csv(csv_file)
|
2265
|
+
for remove in remove_keys:
|
2266
|
+
df = df[df[name_column] != remove]
|
2267
|
+
|
2268
|
+
values = df[value_column].values
|
2269
|
+
combined_data.extend(values)
|
2270
|
+
|
2271
|
+
lorenz = lorenz_curve(values)
|
2272
|
+
name = f"plate {idx+1}"
|
2273
|
+
plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
|
2274
|
+
|
2275
|
+
# Plot combined Lorenz curve
|
2276
|
+
combined_lorenz = lorenz_curve(np.array(combined_data))
|
2277
|
+
plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined", linestyle='--', color='black')
|
2278
|
+
|
2279
|
+
if x_lim != None:
|
2280
|
+
plt.xlim(x_lim)
|
2281
|
+
|
2282
|
+
if y_lim != None:
|
2283
|
+
plt.ylim(y_lim)
|
2284
|
+
|
2285
|
+
plt.title('Lorenz Curves')
|
2286
|
+
plt.xlabel('Cumulative Share of Individuals')
|
2287
|
+
plt.ylabel('Cumulative Share of Value')
|
2288
|
+
plt.legend()
|
2289
|
+
plt.grid(False)
|
2290
|
+
|
2291
|
+
if save:
|
2292
|
+
save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
|
2293
|
+
os.makedirs(save_path, exist_ok=True)
|
2294
|
+
save_file_path = os.path.join(save_path, 'lorenz_curve.pdf')
|
2295
|
+
plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
|
2296
|
+
print(f"Saved Lorenz Curve: {save_file_path}")
|
2297
|
+
plt.show()
|
2298
|
+
|
2299
|
+
def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
|
1905
2300
|
|
1906
2301
|
def lorenz_curve(data):
|
1907
2302
|
"""Calculate Lorenz curve."""
|
@@ -2358,22 +2753,33 @@ class spacrGraph:
|
|
2358
2753
|
return filtered_df
|
2359
2754
|
|
2360
2755
|
def perform_normality_tests(self):
|
2361
|
-
"""Perform normality tests for each group and
|
2756
|
+
"""Perform normality tests for each group and data column."""
|
2362
2757
|
unique_groups = self.df[self.grouping_column].unique()
|
2363
2758
|
normality_results = []
|
2364
2759
|
|
2365
2760
|
for column in self.data_column:
|
2366
|
-
# Iterate over each group and its corresponding data
|
2367
2761
|
for group in unique_groups:
|
2368
|
-
data = self.df.loc[self.df[self.grouping_column] == group, column]
|
2762
|
+
data = self.df.loc[self.df[self.grouping_column] == group, column].dropna()
|
2369
2763
|
n_samples = len(data)
|
2370
2764
|
|
2765
|
+
if n_samples < 3:
|
2766
|
+
# Skip test if there aren't enough data points
|
2767
|
+
print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
|
2768
|
+
normality_results.append({
|
2769
|
+
'Comparison': f'Normality test for {group} on {column}',
|
2770
|
+
'Test Statistic': None,
|
2771
|
+
'p-value': None,
|
2772
|
+
'Test Name': 'Skipped',
|
2773
|
+
'Column': column,
|
2774
|
+
'n': n_samples
|
2775
|
+
})
|
2776
|
+
continue
|
2777
|
+
|
2778
|
+
# Choose the appropriate normality test based on the sample size
|
2371
2779
|
if n_samples >= 8:
|
2372
|
-
# Use D'Agostino-Pearson test for larger samples
|
2373
2780
|
stat, p_value = normaltest(data)
|
2374
2781
|
test_name = "D'Agostino-Pearson test"
|
2375
2782
|
else:
|
2376
|
-
# Use Shapiro-Wilk test for smaller samples
|
2377
2783
|
stat, p_value = shapiro(data)
|
2378
2784
|
test_name = "Shapiro-Wilk test"
|
2379
2785
|
|
@@ -2384,11 +2790,11 @@ class spacrGraph:
|
|
2384
2790
|
'p-value': p_value,
|
2385
2791
|
'Test Name': test_name,
|
2386
2792
|
'Column': column,
|
2387
|
-
'n': n_samples
|
2793
|
+
'n': n_samples
|
2388
2794
|
})
|
2389
2795
|
|
2390
2796
|
# Check if all groups are normally distributed (p > 0.05)
|
2391
|
-
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
|
2797
|
+
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
|
2392
2798
|
is_normal = all(p > 0.05 for p in normal_p_values)
|
2393
2799
|
|
2394
2800
|
return is_normal, normality_results
|
@@ -3092,9 +3498,13 @@ def plot_data_from_csv(settings):
|
|
3092
3498
|
dft = pd.read_csv(src)
|
3093
3499
|
if 'plate' not in dft.columns:
|
3094
3500
|
dft['plate'] = f"plate{i+1}"
|
3501
|
+
dft['common'] = 'spacr'
|
3095
3502
|
dfs.append(dft)
|
3096
3503
|
|
3097
3504
|
df = pd.concat(dfs, axis=0)
|
3505
|
+
|
3506
|
+
display(df)
|
3507
|
+
|
3098
3508
|
df = df.dropna(subset=settings['data_column'])
|
3099
3509
|
df = df.dropna(subset=settings['grouping_column'])
|
3100
3510
|
src = srcs[0]
|
@@ -3141,23 +3551,39 @@ def plot_region(settings):
|
|
3141
3551
|
print(f"Saved {path}")
|
3142
3552
|
|
3143
3553
|
from .io import _read_db
|
3554
|
+
from .utils import correct_paths
|
3144
3555
|
fov_path = os.path.join(settings['src'], 'merged', settings['name'])
|
3145
3556
|
name = os.path.splitext(settings['name'])[0]
|
3146
3557
|
|
3147
3558
|
db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
|
3148
3559
|
paths_df = _read_db(db_path, tables=['png_list'])[0]
|
3560
|
+
paths_df, _ = correct_paths(df=paths_df, base_path=settings['src'], folder='data')
|
3149
3561
|
paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
|
3150
3562
|
|
3151
3563
|
activation_mode = f"{settings['activation_mode']}_list"
|
3152
3564
|
activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
|
3153
3565
|
activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
|
3566
|
+
activation_db = os.path.splitext(settings['activation_db'])[0]
|
3567
|
+
base_path=os.path.join(settings['src'], 'datasets',activation_db)
|
3568
|
+
activation_paths_df, _ = correct_paths(df=activation_paths_df, base_path=base_path, folder=settings['activation_mode'])
|
3154
3569
|
activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
|
3155
3570
|
|
3156
3571
|
png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
|
3157
3572
|
activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
|
3158
3573
|
|
3159
|
-
|
3160
|
-
|
3574
|
+
|
3575
|
+
if activation_paths:
|
3576
|
+
fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
|
3577
|
+
else:
|
3578
|
+
fig_3 = None
|
3579
|
+
print(f"Could not find any cropped PNGs")
|
3580
|
+
if png_paths:
|
3581
|
+
fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
|
3582
|
+
else:
|
3583
|
+
fig_2 = None
|
3584
|
+
print(f"Could not find any activation maps")
|
3585
|
+
|
3586
|
+
print('fov_path', fov_path)
|
3161
3587
|
fig_1 = plot_image_mask_overlay(file=fov_path,
|
3162
3588
|
channels=settings['channels'],
|
3163
3589
|
cell_channel=settings['cell_channel'],
|
@@ -3166,14 +3592,18 @@ def plot_region(settings):
|
|
3166
3592
|
figuresize=10,
|
3167
3593
|
percentiles=settings['percentiles'],
|
3168
3594
|
thickness=3,
|
3169
|
-
save_pdf=
|
3595
|
+
save_pdf=True,
|
3170
3596
|
mode=settings['mode'],
|
3171
3597
|
export_tiffs=settings['export_tiffs'])
|
3172
3598
|
|
3173
3599
|
dst = os.path.join(settings['src'], 'results', name)
|
3174
|
-
|
3175
|
-
|
3176
|
-
|
3600
|
+
|
3601
|
+
if not fig_1 == None:
|
3602
|
+
save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
|
3603
|
+
if not fig_2 == None:
|
3604
|
+
save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
|
3605
|
+
if not fig_3 == None:
|
3606
|
+
save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
|
3177
3607
|
|
3178
3608
|
return fig_1, fig_2, fig_3
|
3179
3609
|
|
@@ -3337,4 +3767,5 @@ def overlay_masks_on_images(img_folder, normalize=True, resize=True, save=False,
|
|
3337
3767
|
plt.imshow(blended)
|
3338
3768
|
plt.title(f"Overlay: {filename}")
|
3339
3769
|
plt.axis('off')
|
3340
|
-
plt.show()
|
3770
|
+
plt.show()
|
3771
|
+
|
spacr/sequencing.py
CHANGED
@@ -493,7 +493,7 @@ def graph_sequencing_stats(settings):
|
|
493
493
|
|
494
494
|
def _plot_density(df, dependent_variable, dst=None):
|
495
495
|
"""Plot a density plot of the dependent variable."""
|
496
|
-
plt.figure(figsize=(10,
|
496
|
+
plt.figure(figsize=(10, 10))
|
497
497
|
sns.kdeplot(df[dependent_variable], fill=True, alpha=0.6)
|
498
498
|
plt.title(f'Density Plot of {dependent_variable}')
|
499
499
|
plt.xlabel(dependent_variable)
|
@@ -548,6 +548,9 @@ def graph_sequencing_stats(settings):
|
|
548
548
|
label=f'Closest Threshold ({closest_threshold["fraction_threshold"]:.4f})')
|
549
549
|
plt.axhline(y=target_unique_count, color='black', linestyle='--',
|
550
550
|
label=f'Target Unique Count ({target_unique_count})')
|
551
|
+
|
552
|
+
plt.xlim(0,0.1)
|
553
|
+
plt.ylim(0,20)
|
551
554
|
|
552
555
|
if dst is not None:
|
553
556
|
fig_path = os.path.join(dst, 'results')
|
@@ -594,7 +597,7 @@ def graph_sequencing_stats(settings):
|
|
594
597
|
df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
|
595
598
|
|
596
599
|
print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
|
597
|
-
|
600
|
+
display(df)
|
598
601
|
#_plot_density(df, dependent_variable='unique_counts')
|
599
602
|
plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
|
600
603
|
|
spacr/settings.py
CHANGED
@@ -549,7 +549,7 @@ def get_perform_regression_default_settings(settings):
|
|
549
549
|
settings.setdefault('filter_column','column')
|
550
550
|
settings.setdefault('plate','plate1')
|
551
551
|
settings.setdefault('class_1_threshold',None)
|
552
|
-
settings.setdefault('metadata_files',['/home/carruthers/Documents/
|
552
|
+
settings.setdefault('metadata_files',['/home/carruthers/Documents/TGGT1_Summary.csv','/home/carruthers/Documents/TGME49_Summary.csv'])
|
553
553
|
settings.setdefault('volcano','gene')
|
554
554
|
settings.setdefault('toxo', True)
|
555
555
|
|