spacr 0.3.46__py3-none-any.whl → 0.3.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/plot.py CHANGED
@@ -32,7 +32,340 @@ import matplotlib.patches as patches
32
32
  from collections import defaultdict
33
33
  from matplotlib.gridspec import GridSpec
34
34
 
35
- def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
35
+ #filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
36
+ def plot_image_mask_overlay(
37
+ file,
38
+ channels,
39
+ cell_channel,
40
+ nucleus_channel,
41
+ pathogen_channel,
42
+ figuresize=10,
43
+ percentiles=(2, 98),
44
+ thickness=3,
45
+ save_pdf=True,
46
+ mode='outlines',
47
+ export_tiffs=False,
48
+ all_on_all=False,
49
+ all_outlines=False,
50
+ filter_dict=None
51
+ ):
52
+ """Plot image and mask overlays."""
53
+
54
+ def random_color_cmap(n_labels, seed=None):
55
+ """Generates a random color map for a given number of labels."""
56
+ if seed is not None:
57
+ np.random.seed(seed)
58
+ rand_colors = np.random.rand(n_labels, 3)
59
+ rand_colors = np.vstack([[0, 0, 0], rand_colors]) # Ensure background is black
60
+ cmap = ListedColormap(rand_colors)
61
+ return cmap
62
+
63
+ def _plot_merged_plot(
64
+ image,
65
+ outlines,
66
+ outline_colors,
67
+ figuresize,
68
+ thickness,
69
+ percentiles,
70
+ mode='outlines',
71
+ all_on_all=False,
72
+ all_outlines=False,
73
+ channels=None,
74
+ cell_channel=None,
75
+ nucleus_channel=None,
76
+ pathogen_channel=None,
77
+ cell_outlines=None,
78
+ nucleus_outlines=None,
79
+ pathogen_outlines=None,
80
+ save_pdf=True
81
+ ):
82
+ """Plot the merged plot with overlay, image channels, and masks."""
83
+
84
+ def _generate_colored_mask(mask, cmap):
85
+ """Generate a colored mask using the given colormap."""
86
+ mask_norm = mask / (mask.max() + 1e-5) # Normalize mask
87
+ colored_mask = cmap(mask_norm)
88
+ colored_mask[..., 3] = np.where(mask > 0, 1, 0) # Alpha channel
89
+ return colored_mask
90
+
91
+ def _overlay_mask(image, mask):
92
+ """Overlay the colored mask onto the original image."""
93
+ combined = np.clip(image * (1 - mask[..., 3:]) + mask[..., :3] * mask[..., 3:], 0, 1)
94
+ return combined
95
+
96
+ def _normalize_image(image, percentiles):
97
+ """Normalize the image based on given percentiles."""
98
+ v_min, v_max = np.percentile(image, percentiles)
99
+ image_normalized = np.clip((image - v_min) / (v_max - v_min + 1e-5), 0, 1)
100
+ return image_normalized
101
+
102
+ def _generate_contours(mask):
103
+ """Generate contours from the mask using OpenCV."""
104
+ contours, _ = cv2.findContours(
105
+ mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
106
+ )
107
+ return contours
108
+
109
+ def _apply_contours(image, mask, color, thickness):
110
+ """Apply contours to the image."""
111
+ unique_labels = np.unique(mask)
112
+ for label in unique_labels:
113
+ if label == 0:
114
+ continue # Skip background
115
+ label_mask = (mask == label).astype(np.uint8)
116
+ contours = _generate_contours(label_mask)
117
+ cv2.drawContours(
118
+ image, contours, -1, mpl.colors.to_rgb(color), thickness
119
+ )
120
+ return image
121
+
122
+ num_channels = image.shape[-1]
123
+ fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
124
+
125
+ # Identify channels without associated outlines
126
+ channels_with_outlines = []
127
+ if cell_channel is not None:
128
+ channels_with_outlines.append(cell_channel)
129
+ if nucleus_channel is not None:
130
+ channels_with_outlines.append(nucleus_channel)
131
+ if pathogen_channel is not None:
132
+ channels_with_outlines.append(pathogen_channel)
133
+
134
+ for v in range(num_channels):
135
+ channel_image = image[..., v]
136
+ channel_image_normalized = _normalize_image(channel_image, percentiles)
137
+ channel_image_rgb = np.dstack([channel_image_normalized] * 3)
138
+
139
+ current_channel = channels[v]
140
+
141
+ if all_on_all:
142
+ # Apply all outlines to all channels
143
+ for outline, color in zip(outlines, outline_colors):
144
+ if mode == 'outlines':
145
+ channel_image_rgb = _apply_contours(
146
+ channel_image_rgb, outline, color, thickness
147
+ )
148
+ else:
149
+ cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
150
+ mask = _generate_colored_mask(outline, cmap)
151
+ channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
152
+ elif current_channel in channels_with_outlines:
153
+ # Apply only the relevant outline to each channel
154
+ outline = None
155
+ color = None
156
+
157
+ if current_channel == cell_channel and cell_outlines is not None:
158
+ outline = cell_outlines
159
+ elif current_channel == nucleus_channel and nucleus_outlines is not None:
160
+ outline = nucleus_outlines
161
+ elif current_channel == pathogen_channel and pathogen_outlines is not None:
162
+ outline = pathogen_outlines
163
+
164
+ if outline is not None:
165
+ if mode == 'outlines':
166
+ # Use magenta color when all_on_all=False
167
+ channel_image_rgb = _apply_contours(
168
+ channel_image_rgb, outline, '#FF00FF', thickness
169
+ )
170
+ else:
171
+ cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
172
+ mask = _generate_colored_mask(outline, cmap)
173
+ channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
174
+ else:
175
+ # Channel without associated outlines
176
+ if all_outlines:
177
+ # Apply all outlines with specified colors
178
+ for outline, color in zip(outlines, ['blue', 'red', 'green']):
179
+ if mode == 'outlines':
180
+ channel_image_rgb = _apply_contours(
181
+ channel_image_rgb, outline, color, thickness
182
+ )
183
+ else:
184
+ cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
185
+ mask = _generate_colored_mask(outline, cmap)
186
+ channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
187
+
188
+ ax[v].imshow(channel_image_rgb)
189
+ ax[v].set_title(f'Image - Channel {current_channel}')
190
+
191
+ # Create an image combining all objects filled with colors
192
+ combined_mask = np.zeros_like(outlines[0])
193
+ for outline in outlines:
194
+ combined_mask = np.maximum(combined_mask, outline)
195
+
196
+ cmap = random_color_cmap(int(combined_mask.max() + 1), random.randint(0, 100))
197
+ mask = _generate_colored_mask(combined_mask, cmap)
198
+ blank_image = np.zeros((*combined_mask.shape, 3))
199
+ filled_image = _overlay_mask(blank_image, mask)
200
+
201
+ ax[-1].imshow(filled_image)
202
+ ax[-1].set_title('Combined Objects Image')
203
+
204
+ plt.tight_layout()
205
+
206
+ # Save the figure as a PDF
207
+ if save_pdf:
208
+ pdf_dir = os.path.join(
209
+ os.path.dirname(os.path.dirname(file)), 'results', 'overlay'
210
+ )
211
+ os.makedirs(pdf_dir, exist_ok=True)
212
+ pdf_path = os.path.join(
213
+ pdf_dir, os.path.basename(file).replace('.npy', '.pdf')
214
+ )
215
+ fig.savefig(pdf_path, format='pdf')
216
+
217
+ plt.show()
218
+ return fig
219
+
220
+ def _save_channels_as_tiff(stack, save_dir, filename):
221
+ """Save each channel in the stack as a grayscale TIFF."""
222
+ os.makedirs(save_dir, exist_ok=True)
223
+ for i in range(stack.shape[-1]):
224
+ channel = stack[..., i]
225
+ tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
226
+ tiff.imwrite(tiff_path, channel.astype(np.uint16), photometric='minisblack')
227
+ print(f"Saved {tiff_path}")
228
+
229
+ def _filter_object(mask, intensity_image, min_max_area=(0, 10000000), min_max_intensity=(0, 65000), type_='object'):
230
+ """
231
+ Filter objects in a mask based on their area (size) and mean intensity.
232
+
233
+ Args:
234
+ mask (ndarray): The input mask.
235
+ intensity_image (ndarray): The corresponding intensity image.
236
+ min_max_area (tuple): A tuple (min_area, max_area) specifying the minimum and maximum area thresholds.
237
+ min_max_intensity (tuple): A tuple (min_intensity, max_intensity) specifying the minimum and maximum intensity thresholds.
238
+
239
+ Returns:
240
+ ndarray: The filtered mask.
241
+ """
242
+ original_dtype = mask.dtype
243
+ mask_int = mask.astype(np.int64)
244
+ intensity_image = intensity_image.astype(np.float64)
245
+ # Compute properties for each labeled object
246
+ unique_labels = np.unique(mask_int)
247
+ unique_labels = unique_labels[unique_labels != 0] # Exclude background
248
+ num_objects_before = len(unique_labels)
249
+
250
+ # Initialize lists to store area and intensity for each object
251
+ areas = []
252
+ mean_intensities = []
253
+ labels_to_keep = []
254
+
255
+ for label in unique_labels:
256
+ label_mask = (mask_int == label)
257
+ area = np.sum(label_mask)
258
+ mean_intensity = np.mean(intensity_image[label_mask])
259
+
260
+ areas.append(area)
261
+ mean_intensities.append(mean_intensity)
262
+
263
+ # Check if the object meets both area and intensity criteria
264
+ if (min_max_area[0] <= area <= min_max_area[1]) and (min_max_intensity[0] <= mean_intensity <= min_max_intensity[1]):
265
+ labels_to_keep.append(label)
266
+
267
+ # Convert lists to numpy arrays for easier computation
268
+ areas = np.array(areas)
269
+ mean_intensities = np.array(mean_intensities)
270
+ num_objects_after = len(labels_to_keep)
271
+ # Compute average area and intensity before and after filtering
272
+ avg_area_before = areas.mean() if num_objects_before > 0 else 0
273
+ avg_intensity_before = mean_intensities.mean() if num_objects_before > 0 else 0
274
+ areas_after = areas[np.isin(unique_labels, labels_to_keep)]
275
+ mean_intensities_after = mean_intensities[np.isin(unique_labels, labels_to_keep)]
276
+ avg_area_after = areas_after.mean() if num_objects_after > 0 else 0
277
+ avg_intensity_after = mean_intensities_after.mean() if num_objects_after > 0 else 0
278
+ print(f"Before filtering {type_}: {num_objects_before} objects")
279
+ print(f"Average area {type_}: {avg_area_before:.2f} pixels, Average intensity: {avg_intensity_before:.2f}")
280
+ print(f"After filtering {type_}: {num_objects_after} objects")
281
+ print(f"Average area {type_}: {avg_area_after:.2f} pixels, Average intensity: {avg_intensity_after:.2f}")
282
+ mask_filtered = np.zeros_like(mask_int)
283
+ for label in labels_to_keep:
284
+ mask_filtered[mask_int == label] = label
285
+ mask_filtered = mask_filtered.astype(original_dtype)
286
+ return mask_filtered
287
+
288
+ stack = np.load(file)
289
+
290
+ if export_tiffs:
291
+ save_dir = os.path.join(
292
+ os.path.dirname(os.path.dirname(file)),
293
+ 'results',
294
+ os.path.splitext(os.path.basename(file))[0],
295
+ 'tiff'
296
+ )
297
+ filename = os.path.splitext(os.path.basename(file))[0]
298
+ _save_channels_as_tiff(stack, save_dir, filename)
299
+
300
+ # Convert to float for normalization and ensure correct handling of arrays
301
+ if stack.dtype in (np.uint16, np.uint8):
302
+ stack = stack.astype(np.float32)
303
+
304
+ image = stack[..., channels]
305
+ outlines = []
306
+ outline_colors = []
307
+
308
+ # Define variables to hold individual outlines
309
+ cell_outlines = None
310
+ nucleus_outlines = None
311
+ pathogen_outlines = None
312
+
313
+ if pathogen_channel is not None:
314
+ pathogen_mask_dim = -1
315
+ pathogen_outlines = np.take(stack, pathogen_mask_dim, axis=2)
316
+ if not filter_dict is None:
317
+ pathogen_intensity = np.take(stack, pathogen_channel, axis=2)
318
+ pathogen_outlines = _filter_object(pathogen_outlines, pathogen_intensity, filter_dict['pathogen'][0], filter_dict['pathogen'][1], type_='pathogen')
319
+
320
+ outlines.append(pathogen_outlines)
321
+ outline_colors.append('green')
322
+
323
+ if nucleus_channel is not None:
324
+ nucleus_mask_dim = -2 if pathogen_channel is not None else -1
325
+ nucleus_outlines = np.take(stack, nucleus_mask_dim, axis=2)
326
+ if not filter_dict is None:
327
+ nucleus_intensity = np.take(stack, nucleus_channel, axis=2)
328
+ nucleus_outlines = _filter_object(nucleus_outlines, nucleus_intensity, filter_dict['nucleus'][0], filter_dict['nucleus'][1], type_='nucleus')
329
+ outlines.append(nucleus_outlines)
330
+ outline_colors.append('blue')
331
+
332
+ if cell_channel is not None:
333
+ if nucleus_channel is not None and pathogen_channel is not None:
334
+ cell_mask_dim = -3
335
+ elif nucleus_channel is not None or pathogen_channel is not None:
336
+ cell_mask_dim = -2
337
+ else:
338
+ cell_mask_dim = -1
339
+ cell_outlines = np.take(stack, cell_mask_dim, axis=2)
340
+ if not filter_dict is None:
341
+ cell_intensity = np.take(stack, cell_channel, axis=2)
342
+ cell_outlines = _filter_object(cell_outlines, cell_intensity, filter_dict['cell'][0], filter_dict['cell'][1], type_='cell')
343
+ outlines.append(cell_outlines)
344
+ outline_colors.append('red')
345
+
346
+ fig = _plot_merged_plot(
347
+ image=image,
348
+ outlines=outlines,
349
+ outline_colors=outline_colors,
350
+ figuresize=figuresize,
351
+ thickness=thickness,
352
+ percentiles=percentiles, # Pass percentiles to the plotting function
353
+ mode=mode,
354
+ all_on_all=all_on_all,
355
+ all_outlines=all_outlines,
356
+ channels=channels,
357
+ cell_channel=cell_channel,
358
+ nucleus_channel=nucleus_channel,
359
+ pathogen_channel=pathogen_channel,
360
+ cell_outlines=cell_outlines,
361
+ nucleus_outlines=nucleus_outlines,
362
+ pathogen_outlines=pathogen_outlines,
363
+ save_pdf=save_pdf
364
+ )
365
+
366
+ return fig
367
+
368
+ def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
36
369
  """Plot image and mask overlays."""
37
370
 
38
371
  def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
@@ -1398,7 +1731,7 @@ def _plot_histograms_and_stats(df):
1398
1731
  print('-'*40)
1399
1732
 
1400
1733
  # Plot the histogram
1401
- plt.figure(figsize=(10,6))
1734
+ plt.figure(figsize=(10,10))
1402
1735
  plt.hist(subset['pred'], bins=30, edgecolor='black')
1403
1736
  plt.axvline(mean_pred, color='red', linestyle='dashed', linewidth=1, label=f"Mean = {mean_pred:.2f}")
1404
1737
  plt.title(f'Histogram for pred - Condition: {condition}')
@@ -1455,12 +1788,16 @@ def _reg_v_plot(df, grouping, variable, plate_number):
1455
1788
  plt.show()
1456
1789
 
1457
1790
  def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_count):
1791
+
1792
+ if not isinstance(min_count, (int, float)):
1793
+ min_count = 0
1794
+
1458
1795
  df = df.copy() # Work on a copy to avoid SettingWithCopyWarning
1459
1796
  df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
1460
1797
 
1461
1798
  # Filtering the dataframe based on the plate_number
1462
1799
  df = df[df['plate'] == plate_number].copy() # Create another copy after filtering
1463
-
1800
+
1464
1801
  # Ensure proper ordering
1465
1802
  row_order = [f'r{i}' for i in range(1, 17)]
1466
1803
  col_order = [f'c{i}' for i in range(1, 28)] # Exclude c15 as per your earlier code
@@ -1496,7 +1833,6 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
1496
1833
  min_max = np.quantile(plate_map.values, [min_max[0], min_max[1]])
1497
1834
  if isinstance(min_max[0], (int)) and isinstance(min_max[1], (int)):
1498
1835
  min_max = [min_max[0], min_max[1]]
1499
-
1500
1836
  return plate_map, min_max
1501
1837
 
1502
1838
  def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True, dst=None):
@@ -1516,10 +1852,14 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
1516
1852
  plt.subplots_adjust(wspace=0.1, hspace=0.4)
1517
1853
 
1518
1854
  if not dst is None:
1519
- filename = os.path.join(dst, 'plate_heatmap.pdf')
1520
- fig.savefig(filename, format='pdf')
1521
- print(f'Saved heatmap to {filename}')
1522
-
1855
+ for i in range(0,1000):
1856
+ filename = os.path.join(dst, f'plate_heatmap_{i}.pdf')
1857
+ if os.path.exists(filename):
1858
+ continue
1859
+ else:
1860
+ fig.savefig(filename, format='pdf')
1861
+ print(f'Saved heatmap to {filename}')
1862
+ break
1523
1863
  if verbose:
1524
1864
  plt.show()
1525
1865
  return fig
@@ -1886,22 +2226,77 @@ def volcano_plot(coef_df, filename='volcano_plot.pdf'):
1886
2226
  print(f'Saved Volcano plot: {filename}')
1887
2227
  plt.show()
1888
2228
 
1889
- def plot_histogram(df, dependent_variable, dst=None):
2229
+ def plot_histogram(df, column, dst=None):
1890
2230
  # Plot histogram of the dependent variable
1891
- plt.figure(figsize=(10, 6))
1892
- sns.histplot(df[dependent_variable], kde=True)
1893
- plt.title(f'Histogram of {dependent_variable}')
1894
- plt.xlabel(dependent_variable)
2231
+ bar_color = (0/255, 155/255, 155/255)
2232
+ plt.figure(figsize=(10, 10))
2233
+ sns.histplot(df[column], kde=False, color=bar_color, edgecolor=None, alpha=0.6)
2234
+ plt.title(f'Histogram of {column}')
2235
+ plt.xlabel(column)
1895
2236
  plt.ylabel('Frequency')
1896
2237
 
1897
2238
  if not dst is None:
1898
- filename = os.path.join(dst, 'dependent_variable_histogram.pdf')
2239
+ filename = os.path.join(dst, f'{column}_histogram.pdf')
1899
2240
  plt.savefig(filename, format='pdf')
1900
2241
  print(f'Saved histogram to {filename}')
1901
2242
 
1902
2243
  plt.show()
1903
2244
 
1904
- def plot_lorenz_curves(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
2245
+ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count', remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4'], x_lim=[0.0,1], y_lim=[0,1], save=True):
2246
+
2247
+ def lorenz_curve(data):
2248
+ """Calculate Lorenz curve."""
2249
+ sorted_data = np.sort(data)
2250
+ cumulative_data = np.cumsum(sorted_data)
2251
+ lorenz_curve = cumulative_data / cumulative_data[-1]
2252
+ lorenz_curve = np.insert(lorenz_curve, 0, 0)
2253
+ return lorenz_curve
2254
+
2255
+ combined_data = []
2256
+
2257
+ plt.figure(figsize=(10, 10))
2258
+
2259
+ for idx, csv_file in enumerate(csv_files):
2260
+ if idx == 1:
2261
+ save_fldr = os.path.dirname(csv_file)
2262
+ save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
2263
+
2264
+ df = pd.read_csv(csv_file)
2265
+ for remove in remove_keys:
2266
+ df = df[df[name_column] != remove]
2267
+
2268
+ values = df[value_column].values
2269
+ combined_data.extend(values)
2270
+
2271
+ lorenz = lorenz_curve(values)
2272
+ name = f"plate {idx+1}"
2273
+ plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
2274
+
2275
+ # Plot combined Lorenz curve
2276
+ combined_lorenz = lorenz_curve(np.array(combined_data))
2277
+ plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined", linestyle='--', color='black')
2278
+
2279
+ if x_lim != None:
2280
+ plt.xlim(x_lim)
2281
+
2282
+ if y_lim != None:
2283
+ plt.ylim(y_lim)
2284
+
2285
+ plt.title('Lorenz Curves')
2286
+ plt.xlabel('Cumulative Share of Individuals')
2287
+ plt.ylabel('Cumulative Share of Value')
2288
+ plt.legend()
2289
+ plt.grid(False)
2290
+
2291
+ if save:
2292
+ save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
2293
+ os.makedirs(save_path, exist_ok=True)
2294
+ save_file_path = os.path.join(save_path, 'lorenz_curve.pdf')
2295
+ plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
2296
+ print(f"Saved Lorenz Curve: {save_file_path}")
2297
+ plt.show()
2298
+
2299
+ def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
1905
2300
 
1906
2301
  def lorenz_curve(data):
1907
2302
  """Calculate Lorenz curve."""
@@ -2358,22 +2753,33 @@ class spacrGraph:
2358
2753
  return filtered_df
2359
2754
 
2360
2755
  def perform_normality_tests(self):
2361
- """Perform normality tests for each group and each data column."""
2756
+ """Perform normality tests for each group and data column."""
2362
2757
  unique_groups = self.df[self.grouping_column].unique()
2363
2758
  normality_results = []
2364
2759
 
2365
2760
  for column in self.data_column:
2366
- # Iterate over each group and its corresponding data
2367
2761
  for group in unique_groups:
2368
- data = self.df.loc[self.df[self.grouping_column] == group, column]
2762
+ data = self.df.loc[self.df[self.grouping_column] == group, column].dropna()
2369
2763
  n_samples = len(data)
2370
2764
 
2765
+ if n_samples < 3:
2766
+ # Skip test if there aren't enough data points
2767
+ print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
2768
+ normality_results.append({
2769
+ 'Comparison': f'Normality test for {group} on {column}',
2770
+ 'Test Statistic': None,
2771
+ 'p-value': None,
2772
+ 'Test Name': 'Skipped',
2773
+ 'Column': column,
2774
+ 'n': n_samples
2775
+ })
2776
+ continue
2777
+
2778
+ # Choose the appropriate normality test based on the sample size
2371
2779
  if n_samples >= 8:
2372
- # Use D'Agostino-Pearson test for larger samples
2373
2780
  stat, p_value = normaltest(data)
2374
2781
  test_name = "D'Agostino-Pearson test"
2375
2782
  else:
2376
- # Use Shapiro-Wilk test for smaller samples
2377
2783
  stat, p_value = shapiro(data)
2378
2784
  test_name = "Shapiro-Wilk test"
2379
2785
 
@@ -2384,11 +2790,11 @@ class spacrGraph:
2384
2790
  'p-value': p_value,
2385
2791
  'Test Name': test_name,
2386
2792
  'Column': column,
2387
- 'n': n_samples # Sample size
2793
+ 'n': n_samples
2388
2794
  })
2389
2795
 
2390
2796
  # Check if all groups are normally distributed (p > 0.05)
2391
- normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
2797
+ normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
2392
2798
  is_normal = all(p > 0.05 for p in normal_p_values)
2393
2799
 
2394
2800
  return is_normal, normality_results
@@ -2733,7 +3139,7 @@ class spacrGraph:
2733
3139
  hue = None
2734
3140
 
2735
3141
  # Create the jitter plot
2736
- sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6)
3142
+ sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax, alpha=0.6, size=16)
2737
3143
 
2738
3144
  # Adjust legend and labels
2739
3145
  ax.set_xlabel(self.grouping_column)
@@ -2754,6 +3160,12 @@ class spacrGraph:
2754
3160
  # Ensure epoch is used on the x-axis and accuracy on the y-axis
2755
3161
  x_axis_column = self.data_column[0]
2756
3162
  y_axis_column = self.data_column[1]
3163
+
3164
+ if self.log_y:
3165
+ self.df[y_axis_column] = np.log10(self.df[y_axis_column])
3166
+
3167
+ if self.log_x:
3168
+ self.df[x_axis_column] = np.log10(self.df[x_axis_column])
2757
3169
 
2758
3170
  # Set hue to the grouping column to get one line per group
2759
3171
  hue = self.grouping_column
@@ -2771,11 +3183,6 @@ class spacrGraph:
2771
3183
  ax.set_xlabel(f"{x_axis_column}")
2772
3184
  ax.set_ylabel(f"{y_axis_column}")
2773
3185
 
2774
- if self.log_y:
2775
- ax.set_yscale('log')
2776
- if self.log_x:
2777
- ax.set_xscale('log')
2778
-
2779
3186
  def _create_line_with_std_area(self, ax):
2780
3187
  """Helper method to create a line graph with shaded area representing standard deviation."""
2781
3188
 
@@ -2784,15 +3191,22 @@ class spacrGraph:
2784
3191
  y_axis_column_mean = f"mean_{y_axis_column}"
2785
3192
  y_axis_column_std = f"std_{y_axis_column_mean}"
2786
3193
 
3194
+ if self.log_y:
3195
+ self.df[y_axis_column] = np.log10(self.df[y_axis_column])
3196
+
3197
+ if self.log_x:
3198
+ self.df[x_axis_column] = np.log10(self.df[x_axis_column])
3199
+
2787
3200
  # Pivot the DataFrame to get mean and std for each epoch across plates
2788
3201
  summary_df = self.df.pivot_table(index=x_axis_column,values=y_axis_column,aggfunc=['mean', 'std']).reset_index()
2789
3202
 
2790
3203
  # Flatten MultiIndex columns (result of pivoting)
2791
3204
  summary_df.columns = [x_axis_column, y_axis_column_mean, y_axis_column_std]
2792
-
3205
+
2793
3206
  # Plot the mean accuracy as a line
2794
3207
  sns.lineplot(data=summary_df,x=x_axis_column,y=y_axis_column_mean,ax=ax,marker='o',linewidth=1,markersize=0,color='blue',label=y_axis_column_mean)
2795
3208
 
3209
+
2796
3210
  # Fill the area representing the standard deviation
2797
3211
  ax.fill_between(summary_df[x_axis_column],summary_df[y_axis_column_mean] - summary_df[y_axis_column_std],summary_df[y_axis_column_mean] + summary_df[y_axis_column_std],color='blue', alpha=0.1 )
2798
3212
 
@@ -2800,11 +3214,6 @@ class spacrGraph:
2800
3214
  ax.set_xlabel(f"{x_axis_column}")
2801
3215
  ax.set_ylabel(f"{y_axis_column}")
2802
3216
 
2803
- if self.log_y:
2804
- ax.set_yscale('log')
2805
- if self.log_x:
2806
- ax.set_xscale('log')
2807
-
2808
3217
  def _create_box_plot(self, ax):
2809
3218
  """Helper method to create a box plot with consistent spacing."""
2810
3219
  # Combine grouping column and data column if needed
@@ -2969,23 +3378,29 @@ def plot_data_from_db(settings):
2969
3378
  df (pd.DataFrame): The extracted table as a DataFrame.
2970
3379
  """
2971
3380
 
3381
+
3382
+
2972
3383
  if isinstance(settings['src'], str):
2973
3384
  srcs = [settings['src']]
2974
3385
  elif isinstance(settings['src'], list):
2975
3386
  srcs = settings['src']
2976
- if isinstance(settings['database'], str):
2977
- settings['database'] = [settings['database'] for _ in range(len(srcs))]
2978
3387
  else:
2979
3388
  raise ValueError("src must be a string or a list of strings.")
2980
3389
 
3390
+ if isinstance(settings['database'], str):
3391
+ settings['database'] = [settings['database'] for _ in range(len(srcs))]
3392
+
3393
+ settings['dst'] = os.path.join(srcs[0], 'results')
3394
+
2981
3395
  save_settings(settings, name=f"{settings['graph_name']}_plot_settings_db", show=True)
2982
3396
 
2983
3397
  dfs = []
2984
3398
  for i, src in enumerate(srcs):
2985
3399
 
2986
3400
  db_loc = os.path.join(src, 'measurements', settings['database'][i])
2987
-
3401
+ print(f"Database: {db_loc}")
2988
3402
  if settings['table_names'] in ['saliency_image_correlations']:
3403
+ print(f"Database table: {settings['table_names']}")
2989
3404
  [df1] = _read_db(db_loc, tables=[settings['table_names']])
2990
3405
  else:
2991
3406
  df1, _ = _read_and_merge_data(locs=[db_loc],
@@ -3006,8 +3421,9 @@ def plot_data_from_db(settings):
3006
3421
 
3007
3422
  df = pd.concat(dfs, axis=0)
3008
3423
  df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
3009
- df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
3010
- df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
3424
+ #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
3425
+ #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
3426
+ df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
3011
3427
 
3012
3428
  if settings['cell_plate_metadata'] != None:
3013
3429
  df = df.dropna(subset='host_cell')
@@ -3021,7 +3437,7 @@ def plot_data_from_db(settings):
3021
3437
  df = df.dropna(subset=settings['data_column'])
3022
3438
  df = df.dropna(subset=settings['grouping_column'])
3023
3439
 
3024
- #df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
3440
+
3025
3441
  src = srcs[0]
3026
3442
  dst = os.path.join(src, 'results', settings['graph_name'])
3027
3443
  os.makedirs(dst, exist_ok=True)
@@ -3082,9 +3498,13 @@ def plot_data_from_csv(settings):
3082
3498
  dft = pd.read_csv(src)
3083
3499
  if 'plate' not in dft.columns:
3084
3500
  dft['plate'] = f"plate{i+1}"
3501
+ dft['common'] = 'spacr'
3085
3502
  dfs.append(dft)
3086
3503
 
3087
3504
  df = pd.concat(dfs, axis=0)
3505
+
3506
+ display(df)
3507
+
3088
3508
  df = df.dropna(subset=settings['data_column'])
3089
3509
  df = df.dropna(subset=settings['grouping_column'])
3090
3510
  src = srcs[0]
@@ -3131,23 +3551,39 @@ def plot_region(settings):
3131
3551
  print(f"Saved {path}")
3132
3552
 
3133
3553
  from .io import _read_db
3554
+ from .utils import correct_paths
3134
3555
  fov_path = os.path.join(settings['src'], 'merged', settings['name'])
3135
3556
  name = os.path.splitext(settings['name'])[0]
3136
3557
 
3137
3558
  db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
3138
3559
  paths_df = _read_db(db_path, tables=['png_list'])[0]
3560
+ paths_df, _ = correct_paths(df=paths_df, base_path=settings['src'], folder='data')
3139
3561
  paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
3140
3562
 
3141
3563
  activation_mode = f"{settings['activation_mode']}_list"
3142
3564
  activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
3143
3565
  activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
3566
+ activation_db = os.path.splitext(settings['activation_db'])[0]
3567
+ base_path=os.path.join(settings['src'], 'datasets',activation_db)
3568
+ activation_paths_df, _ = correct_paths(df=activation_paths_df, base_path=base_path, folder=settings['activation_mode'])
3144
3569
  activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
3145
3570
 
3146
3571
  png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
3147
3572
  activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
3148
3573
 
3149
- fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
3150
- fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
3574
+
3575
+ if activation_paths:
3576
+ fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
3577
+ else:
3578
+ fig_3 = None
3579
+ print(f"Could not find any cropped PNGs")
3580
+ if png_paths:
3581
+ fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
3582
+ else:
3583
+ fig_2 = None
3584
+ print(f"Could not find any activation maps")
3585
+
3586
+ print('fov_path', fov_path)
3151
3587
  fig_1 = plot_image_mask_overlay(file=fov_path,
3152
3588
  channels=settings['channels'],
3153
3589
  cell_channel=settings['cell_channel'],
@@ -3156,14 +3592,18 @@ def plot_region(settings):
3156
3592
  figuresize=10,
3157
3593
  percentiles=settings['percentiles'],
3158
3594
  thickness=3,
3159
- save_pdf=False,
3595
+ save_pdf=True,
3160
3596
  mode=settings['mode'],
3161
3597
  export_tiffs=settings['export_tiffs'])
3162
3598
 
3163
3599
  dst = os.path.join(settings['src'], 'results', name)
3164
- save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
3165
- save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
3166
- save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
3600
+
3601
+ if not fig_1 == None:
3602
+ save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
3603
+ if not fig_2 == None:
3604
+ save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
3605
+ if not fig_3 == None:
3606
+ save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
3167
3607
 
3168
3608
  return fig_1, fig_2, fig_3
3169
3609
 
@@ -3327,4 +3767,5 @@ def overlay_masks_on_images(img_folder, normalize=True, resize=True, save=False,
3327
3767
  plt.imshow(blended)
3328
3768
  plt.title(f"Overlay: {filename}")
3329
3769
  plt.axis('off')
3330
- plt.show()
3770
+ plt.show()
3771
+