spacr 0.3.47__py3-none-any.whl → 0.3.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/plot.py CHANGED
@@ -32,7 +32,340 @@ import matplotlib.patches as patches
32
32
  from collections import defaultdict
33
33
  from matplotlib.gridspec import GridSpec
34
34
 
35
- def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
35
+ #filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
36
+ def plot_image_mask_overlay(
37
+ file,
38
+ channels,
39
+ cell_channel,
40
+ nucleus_channel,
41
+ pathogen_channel,
42
+ figuresize=10,
43
+ percentiles=(2, 98),
44
+ thickness=3,
45
+ save_pdf=True,
46
+ mode='outlines',
47
+ export_tiffs=False,
48
+ all_on_all=False,
49
+ all_outlines=False,
50
+ filter_dict=None
51
+ ):
52
+ """Plot image and mask overlays."""
53
+
54
+ def random_color_cmap(n_labels, seed=None):
55
+ """Generates a random color map for a given number of labels."""
56
+ if seed is not None:
57
+ np.random.seed(seed)
58
+ rand_colors = np.random.rand(n_labels, 3)
59
+ rand_colors = np.vstack([[0, 0, 0], rand_colors]) # Ensure background is black
60
+ cmap = ListedColormap(rand_colors)
61
+ return cmap
62
+
63
+ def _plot_merged_plot(
64
+ image,
65
+ outlines,
66
+ outline_colors,
67
+ figuresize,
68
+ thickness,
69
+ percentiles,
70
+ mode='outlines',
71
+ all_on_all=False,
72
+ all_outlines=False,
73
+ channels=None,
74
+ cell_channel=None,
75
+ nucleus_channel=None,
76
+ pathogen_channel=None,
77
+ cell_outlines=None,
78
+ nucleus_outlines=None,
79
+ pathogen_outlines=None,
80
+ save_pdf=True
81
+ ):
82
+ """Plot the merged plot with overlay, image channels, and masks."""
83
+
84
+ def _generate_colored_mask(mask, cmap):
85
+ """Generate a colored mask using the given colormap."""
86
+ mask_norm = mask / (mask.max() + 1e-5) # Normalize mask
87
+ colored_mask = cmap(mask_norm)
88
+ colored_mask[..., 3] = np.where(mask > 0, 1, 0) # Alpha channel
89
+ return colored_mask
90
+
91
+ def _overlay_mask(image, mask):
92
+ """Overlay the colored mask onto the original image."""
93
+ combined = np.clip(image * (1 - mask[..., 3:]) + mask[..., :3] * mask[..., 3:], 0, 1)
94
+ return combined
95
+
96
+ def _normalize_image(image, percentiles):
97
+ """Normalize the image based on given percentiles."""
98
+ v_min, v_max = np.percentile(image, percentiles)
99
+ image_normalized = np.clip((image - v_min) / (v_max - v_min + 1e-5), 0, 1)
100
+ return image_normalized
101
+
102
+ def _generate_contours(mask):
103
+ """Generate contours from the mask using OpenCV."""
104
+ contours, _ = cv2.findContours(
105
+ mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
106
+ )
107
+ return contours
108
+
109
+ def _apply_contours(image, mask, color, thickness):
110
+ """Apply contours to the image."""
111
+ unique_labels = np.unique(mask)
112
+ for label in unique_labels:
113
+ if label == 0:
114
+ continue # Skip background
115
+ label_mask = (mask == label).astype(np.uint8)
116
+ contours = _generate_contours(label_mask)
117
+ cv2.drawContours(
118
+ image, contours, -1, mpl.colors.to_rgb(color), thickness
119
+ )
120
+ return image
121
+
122
+ num_channels = image.shape[-1]
123
+ fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
124
+
125
+ # Identify channels without associated outlines
126
+ channels_with_outlines = []
127
+ if cell_channel is not None:
128
+ channels_with_outlines.append(cell_channel)
129
+ if nucleus_channel is not None:
130
+ channels_with_outlines.append(nucleus_channel)
131
+ if pathogen_channel is not None:
132
+ channels_with_outlines.append(pathogen_channel)
133
+
134
+ for v in range(num_channels):
135
+ channel_image = image[..., v]
136
+ channel_image_normalized = _normalize_image(channel_image, percentiles)
137
+ channel_image_rgb = np.dstack([channel_image_normalized] * 3)
138
+
139
+ current_channel = channels[v]
140
+
141
+ if all_on_all:
142
+ # Apply all outlines to all channels
143
+ for outline, color in zip(outlines, outline_colors):
144
+ if mode == 'outlines':
145
+ channel_image_rgb = _apply_contours(
146
+ channel_image_rgb, outline, color, thickness
147
+ )
148
+ else:
149
+ cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
150
+ mask = _generate_colored_mask(outline, cmap)
151
+ channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
152
+ elif current_channel in channels_with_outlines:
153
+ # Apply only the relevant outline to each channel
154
+ outline = None
155
+ color = None
156
+
157
+ if current_channel == cell_channel and cell_outlines is not None:
158
+ outline = cell_outlines
159
+ elif current_channel == nucleus_channel and nucleus_outlines is not None:
160
+ outline = nucleus_outlines
161
+ elif current_channel == pathogen_channel and pathogen_outlines is not None:
162
+ outline = pathogen_outlines
163
+
164
+ if outline is not None:
165
+ if mode == 'outlines':
166
+ # Use magenta color when all_on_all=False
167
+ channel_image_rgb = _apply_contours(
168
+ channel_image_rgb, outline, '#FF00FF', thickness
169
+ )
170
+ else:
171
+ cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
172
+ mask = _generate_colored_mask(outline, cmap)
173
+ channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
174
+ else:
175
+ # Channel without associated outlines
176
+ if all_outlines:
177
+ # Apply all outlines with specified colors
178
+ for outline, color in zip(outlines, ['blue', 'red', 'green']):
179
+ if mode == 'outlines':
180
+ channel_image_rgb = _apply_contours(
181
+ channel_image_rgb, outline, color, thickness
182
+ )
183
+ else:
184
+ cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
185
+ mask = _generate_colored_mask(outline, cmap)
186
+ channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
187
+
188
+ ax[v].imshow(channel_image_rgb)
189
+ ax[v].set_title(f'Image - Channel {current_channel}')
190
+
191
+ # Create an image combining all objects filled with colors
192
+ combined_mask = np.zeros_like(outlines[0])
193
+ for outline in outlines:
194
+ combined_mask = np.maximum(combined_mask, outline)
195
+
196
+ cmap = random_color_cmap(int(combined_mask.max() + 1), random.randint(0, 100))
197
+ mask = _generate_colored_mask(combined_mask, cmap)
198
+ blank_image = np.zeros((*combined_mask.shape, 3))
199
+ filled_image = _overlay_mask(blank_image, mask)
200
+
201
+ ax[-1].imshow(filled_image)
202
+ ax[-1].set_title('Combined Objects Image')
203
+
204
+ plt.tight_layout()
205
+
206
+ # Save the figure as a PDF
207
+ if save_pdf:
208
+ pdf_dir = os.path.join(
209
+ os.path.dirname(os.path.dirname(file)), 'results', 'overlay'
210
+ )
211
+ os.makedirs(pdf_dir, exist_ok=True)
212
+ pdf_path = os.path.join(
213
+ pdf_dir, os.path.basename(file).replace('.npy', '.pdf')
214
+ )
215
+ fig.savefig(pdf_path, format='pdf')
216
+
217
+ plt.show()
218
+ return fig
219
+
220
+ def _save_channels_as_tiff(stack, save_dir, filename):
221
+ """Save each channel in the stack as a grayscale TIFF."""
222
+ os.makedirs(save_dir, exist_ok=True)
223
+ for i in range(stack.shape[-1]):
224
+ channel = stack[..., i]
225
+ tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
226
+ tiff.imwrite(tiff_path, channel.astype(np.uint16), photometric='minisblack')
227
+ print(f"Saved {tiff_path}")
228
+
229
+ def _filter_object(mask, intensity_image, min_max_area=(0, 10000000), min_max_intensity=(0, 65000), type_='object'):
230
+ """
231
+ Filter objects in a mask based on their area (size) and mean intensity.
232
+
233
+ Args:
234
+ mask (ndarray): The input mask.
235
+ intensity_image (ndarray): The corresponding intensity image.
236
+ min_max_area (tuple): A tuple (min_area, max_area) specifying the minimum and maximum area thresholds.
237
+ min_max_intensity (tuple): A tuple (min_intensity, max_intensity) specifying the minimum and maximum intensity thresholds.
238
+
239
+ Returns:
240
+ ndarray: The filtered mask.
241
+ """
242
+ original_dtype = mask.dtype
243
+ mask_int = mask.astype(np.int64)
244
+ intensity_image = intensity_image.astype(np.float64)
245
+ # Compute properties for each labeled object
246
+ unique_labels = np.unique(mask_int)
247
+ unique_labels = unique_labels[unique_labels != 0] # Exclude background
248
+ num_objects_before = len(unique_labels)
249
+
250
+ # Initialize lists to store area and intensity for each object
251
+ areas = []
252
+ mean_intensities = []
253
+ labels_to_keep = []
254
+
255
+ for label in unique_labels:
256
+ label_mask = (mask_int == label)
257
+ area = np.sum(label_mask)
258
+ mean_intensity = np.mean(intensity_image[label_mask])
259
+
260
+ areas.append(area)
261
+ mean_intensities.append(mean_intensity)
262
+
263
+ # Check if the object meets both area and intensity criteria
264
+ if (min_max_area[0] <= area <= min_max_area[1]) and (min_max_intensity[0] <= mean_intensity <= min_max_intensity[1]):
265
+ labels_to_keep.append(label)
266
+
267
+ # Convert lists to numpy arrays for easier computation
268
+ areas = np.array(areas)
269
+ mean_intensities = np.array(mean_intensities)
270
+ num_objects_after = len(labels_to_keep)
271
+ # Compute average area and intensity before and after filtering
272
+ avg_area_before = areas.mean() if num_objects_before > 0 else 0
273
+ avg_intensity_before = mean_intensities.mean() if num_objects_before > 0 else 0
274
+ areas_after = areas[np.isin(unique_labels, labels_to_keep)]
275
+ mean_intensities_after = mean_intensities[np.isin(unique_labels, labels_to_keep)]
276
+ avg_area_after = areas_after.mean() if num_objects_after > 0 else 0
277
+ avg_intensity_after = mean_intensities_after.mean() if num_objects_after > 0 else 0
278
+ print(f"Before filtering {type_}: {num_objects_before} objects")
279
+ print(f"Average area {type_}: {avg_area_before:.2f} pixels, Average intensity: {avg_intensity_before:.2f}")
280
+ print(f"After filtering {type_}: {num_objects_after} objects")
281
+ print(f"Average area {type_}: {avg_area_after:.2f} pixels, Average intensity: {avg_intensity_after:.2f}")
282
+ mask_filtered = np.zeros_like(mask_int)
283
+ for label in labels_to_keep:
284
+ mask_filtered[mask_int == label] = label
285
+ mask_filtered = mask_filtered.astype(original_dtype)
286
+ return mask_filtered
287
+
288
+ stack = np.load(file)
289
+
290
+ if export_tiffs:
291
+ save_dir = os.path.join(
292
+ os.path.dirname(os.path.dirname(file)),
293
+ 'results',
294
+ os.path.splitext(os.path.basename(file))[0],
295
+ 'tiff'
296
+ )
297
+ filename = os.path.splitext(os.path.basename(file))[0]
298
+ _save_channels_as_tiff(stack, save_dir, filename)
299
+
300
+ # Convert to float for normalization and ensure correct handling of arrays
301
+ if stack.dtype in (np.uint16, np.uint8):
302
+ stack = stack.astype(np.float32)
303
+
304
+ image = stack[..., channels]
305
+ outlines = []
306
+ outline_colors = []
307
+
308
+ # Define variables to hold individual outlines
309
+ cell_outlines = None
310
+ nucleus_outlines = None
311
+ pathogen_outlines = None
312
+
313
+ if pathogen_channel is not None:
314
+ pathogen_mask_dim = -1
315
+ pathogen_outlines = np.take(stack, pathogen_mask_dim, axis=2)
316
+ if not filter_dict is None:
317
+ pathogen_intensity = np.take(stack, pathogen_channel, axis=2)
318
+ pathogen_outlines = _filter_object(pathogen_outlines, pathogen_intensity, filter_dict['pathogen'][0], filter_dict['pathogen'][1], type_='pathogen')
319
+
320
+ outlines.append(pathogen_outlines)
321
+ outline_colors.append('green')
322
+
323
+ if nucleus_channel is not None:
324
+ nucleus_mask_dim = -2 if pathogen_channel is not None else -1
325
+ nucleus_outlines = np.take(stack, nucleus_mask_dim, axis=2)
326
+ if not filter_dict is None:
327
+ nucleus_intensity = np.take(stack, nucleus_channel, axis=2)
328
+ nucleus_outlines = _filter_object(nucleus_outlines, nucleus_intensity, filter_dict['nucleus'][0], filter_dict['nucleus'][1], type_='nucleus')
329
+ outlines.append(nucleus_outlines)
330
+ outline_colors.append('blue')
331
+
332
+ if cell_channel is not None:
333
+ if nucleus_channel is not None and pathogen_channel is not None:
334
+ cell_mask_dim = -3
335
+ elif nucleus_channel is not None or pathogen_channel is not None:
336
+ cell_mask_dim = -2
337
+ else:
338
+ cell_mask_dim = -1
339
+ cell_outlines = np.take(stack, cell_mask_dim, axis=2)
340
+ if not filter_dict is None:
341
+ cell_intensity = np.take(stack, cell_channel, axis=2)
342
+ cell_outlines = _filter_object(cell_outlines, cell_intensity, filter_dict['cell'][0], filter_dict['cell'][1], type_='cell')
343
+ outlines.append(cell_outlines)
344
+ outline_colors.append('red')
345
+
346
+ fig = _plot_merged_plot(
347
+ image=image,
348
+ outlines=outlines,
349
+ outline_colors=outline_colors,
350
+ figuresize=figuresize,
351
+ thickness=thickness,
352
+ percentiles=percentiles, # Pass percentiles to the plotting function
353
+ mode=mode,
354
+ all_on_all=all_on_all,
355
+ all_outlines=all_outlines,
356
+ channels=channels,
357
+ cell_channel=cell_channel,
358
+ nucleus_channel=nucleus_channel,
359
+ pathogen_channel=pathogen_channel,
360
+ cell_outlines=cell_outlines,
361
+ nucleus_outlines=nucleus_outlines,
362
+ pathogen_outlines=pathogen_outlines,
363
+ save_pdf=save_pdf
364
+ )
365
+
366
+ return fig
367
+
368
+ def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
36
369
  """Plot image and mask overlays."""
37
370
 
38
371
  def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
@@ -1398,7 +1731,7 @@ def _plot_histograms_and_stats(df):
1398
1731
  print('-'*40)
1399
1732
 
1400
1733
  # Plot the histogram
1401
- plt.figure(figsize=(10,6))
1734
+ plt.figure(figsize=(10,10))
1402
1735
  plt.hist(subset['pred'], bins=30, edgecolor='black')
1403
1736
  plt.axvline(mean_pred, color='red', linestyle='dashed', linewidth=1, label=f"Mean = {mean_pred:.2f}")
1404
1737
  plt.title(f'Histogram for pred - Condition: {condition}')
@@ -1455,12 +1788,16 @@ def _reg_v_plot(df, grouping, variable, plate_number):
1455
1788
  plt.show()
1456
1789
 
1457
1790
  def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_count):
1791
+
1792
+ if not isinstance(min_count, (int, float)):
1793
+ min_count = 0
1794
+
1458
1795
  df = df.copy() # Work on a copy to avoid SettingWithCopyWarning
1459
1796
  df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
1460
1797
 
1461
1798
  # Filtering the dataframe based on the plate_number
1462
1799
  df = df[df['plate'] == plate_number].copy() # Create another copy after filtering
1463
-
1800
+
1464
1801
  # Ensure proper ordering
1465
1802
  row_order = [f'r{i}' for i in range(1, 17)]
1466
1803
  col_order = [f'c{i}' for i in range(1, 28)] # Exclude c15 as per your earlier code
@@ -1496,7 +1833,6 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
1496
1833
  min_max = np.quantile(plate_map.values, [min_max[0], min_max[1]])
1497
1834
  if isinstance(min_max[0], (int)) and isinstance(min_max[1], (int)):
1498
1835
  min_max = [min_max[0], min_max[1]]
1499
-
1500
1836
  return plate_map, min_max
1501
1837
 
1502
1838
  def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True, dst=None):
@@ -1516,10 +1852,14 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
1516
1852
  plt.subplots_adjust(wspace=0.1, hspace=0.4)
1517
1853
 
1518
1854
  if not dst is None:
1519
- filename = os.path.join(dst, 'plate_heatmap.pdf')
1520
- fig.savefig(filename, format='pdf')
1521
- print(f'Saved heatmap to {filename}')
1522
-
1855
+ for i in range(0,1000):
1856
+ filename = os.path.join(dst, f'plate_heatmap_{i}.pdf')
1857
+ if os.path.exists(filename):
1858
+ continue
1859
+ else:
1860
+ fig.savefig(filename, format='pdf')
1861
+ print(f'Saved heatmap to {filename}')
1862
+ break
1523
1863
  if verbose:
1524
1864
  plt.show()
1525
1865
  return fig
@@ -1886,22 +2226,77 @@ def volcano_plot(coef_df, filename='volcano_plot.pdf'):
1886
2226
  print(f'Saved Volcano plot: {filename}')
1887
2227
  plt.show()
1888
2228
 
1889
- def plot_histogram(df, dependent_variable, dst=None):
2229
+ def plot_histogram(df, column, dst=None):
1890
2230
  # Plot histogram of the dependent variable
1891
- plt.figure(figsize=(10, 6))
1892
- sns.histplot(df[dependent_variable], kde=True)
1893
- plt.title(f'Histogram of {dependent_variable}')
1894
- plt.xlabel(dependent_variable)
2231
+ bar_color = (0/255, 155/255, 155/255)
2232
+ plt.figure(figsize=(10, 10))
2233
+ sns.histplot(df[column], kde=False, color=bar_color, edgecolor=None, alpha=0.6)
2234
+ plt.title(f'Histogram of {column}')
2235
+ plt.xlabel(column)
1895
2236
  plt.ylabel('Frequency')
1896
2237
 
1897
2238
  if not dst is None:
1898
- filename = os.path.join(dst, 'dependent_variable_histogram.pdf')
2239
+ filename = os.path.join(dst, f'{column}_histogram.pdf')
1899
2240
  plt.savefig(filename, format='pdf')
1900
2241
  print(f'Saved histogram to {filename}')
1901
2242
 
1902
2243
  plt.show()
1903
2244
 
1904
- def plot_lorenz_curves(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
2245
+ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count', remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4'], x_lim=[0.0,1], y_lim=[0,1], save=True):
2246
+
2247
+ def lorenz_curve(data):
2248
+ """Calculate Lorenz curve."""
2249
+ sorted_data = np.sort(data)
2250
+ cumulative_data = np.cumsum(sorted_data)
2251
+ lorenz_curve = cumulative_data / cumulative_data[-1]
2252
+ lorenz_curve = np.insert(lorenz_curve, 0, 0)
2253
+ return lorenz_curve
2254
+
2255
+ combined_data = []
2256
+
2257
+ plt.figure(figsize=(10, 10))
2258
+
2259
+ for idx, csv_file in enumerate(csv_files):
2260
+ if idx == 1:
2261
+ save_fldr = os.path.dirname(csv_file)
2262
+ save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
2263
+
2264
+ df = pd.read_csv(csv_file)
2265
+ for remove in remove_keys:
2266
+ df = df[df[name_column] != remove]
2267
+
2268
+ values = df[value_column].values
2269
+ combined_data.extend(values)
2270
+
2271
+ lorenz = lorenz_curve(values)
2272
+ name = f"plate {idx+1}"
2273
+ plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
2274
+
2275
+ # Plot combined Lorenz curve
2276
+ combined_lorenz = lorenz_curve(np.array(combined_data))
2277
+ plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined", linestyle='--', color='black')
2278
+
2279
+ if x_lim != None:
2280
+ plt.xlim(x_lim)
2281
+
2282
+ if y_lim != None:
2283
+ plt.ylim(y_lim)
2284
+
2285
+ plt.title('Lorenz Curves')
2286
+ plt.xlabel('Cumulative Share of Individuals')
2287
+ plt.ylabel('Cumulative Share of Value')
2288
+ plt.legend()
2289
+ plt.grid(False)
2290
+
2291
+ if save:
2292
+ save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
2293
+ os.makedirs(save_path, exist_ok=True)
2294
+ save_file_path = os.path.join(save_path, 'lorenz_curve.pdf')
2295
+ plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
2296
+ print(f"Saved Lorenz Curve: {save_file_path}")
2297
+ plt.show()
2298
+
2299
+ def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
1905
2300
 
1906
2301
  def lorenz_curve(data):
1907
2302
  """Calculate Lorenz curve."""
@@ -2358,22 +2753,33 @@ class spacrGraph:
2358
2753
  return filtered_df
2359
2754
 
2360
2755
  def perform_normality_tests(self):
2361
- """Perform normality tests for each group and each data column."""
2756
+ """Perform normality tests for each group and data column."""
2362
2757
  unique_groups = self.df[self.grouping_column].unique()
2363
2758
  normality_results = []
2364
2759
 
2365
2760
  for column in self.data_column:
2366
- # Iterate over each group and its corresponding data
2367
2761
  for group in unique_groups:
2368
- data = self.df.loc[self.df[self.grouping_column] == group, column]
2762
+ data = self.df.loc[self.df[self.grouping_column] == group, column].dropna()
2369
2763
  n_samples = len(data)
2370
2764
 
2765
+ if n_samples < 3:
2766
+ # Skip test if there aren't enough data points
2767
+ print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
2768
+ normality_results.append({
2769
+ 'Comparison': f'Normality test for {group} on {column}',
2770
+ 'Test Statistic': None,
2771
+ 'p-value': None,
2772
+ 'Test Name': 'Skipped',
2773
+ 'Column': column,
2774
+ 'n': n_samples
2775
+ })
2776
+ continue
2777
+
2778
+ # Choose the appropriate normality test based on the sample size
2371
2779
  if n_samples >= 8:
2372
- # Use D'Agostino-Pearson test for larger samples
2373
2780
  stat, p_value = normaltest(data)
2374
2781
  test_name = "D'Agostino-Pearson test"
2375
2782
  else:
2376
- # Use Shapiro-Wilk test for smaller samples
2377
2783
  stat, p_value = shapiro(data)
2378
2784
  test_name = "Shapiro-Wilk test"
2379
2785
 
@@ -2384,11 +2790,11 @@ class spacrGraph:
2384
2790
  'p-value': p_value,
2385
2791
  'Test Name': test_name,
2386
2792
  'Column': column,
2387
- 'n': n_samples # Sample size
2793
+ 'n': n_samples
2388
2794
  })
2389
2795
 
2390
2796
  # Check if all groups are normally distributed (p > 0.05)
2391
- normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
2797
+ normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
2392
2798
  is_normal = all(p > 0.05 for p in normal_p_values)
2393
2799
 
2394
2800
  return is_normal, normality_results
@@ -3092,9 +3498,13 @@ def plot_data_from_csv(settings):
3092
3498
  dft = pd.read_csv(src)
3093
3499
  if 'plate' not in dft.columns:
3094
3500
  dft['plate'] = f"plate{i+1}"
3501
+ dft['common'] = 'spacr'
3095
3502
  dfs.append(dft)
3096
3503
 
3097
3504
  df = pd.concat(dfs, axis=0)
3505
+
3506
+ display(df)
3507
+
3098
3508
  df = df.dropna(subset=settings['data_column'])
3099
3509
  df = df.dropna(subset=settings['grouping_column'])
3100
3510
  src = srcs[0]
@@ -3141,23 +3551,39 @@ def plot_region(settings):
3141
3551
  print(f"Saved {path}")
3142
3552
 
3143
3553
  from .io import _read_db
3554
+ from .utils import correct_paths
3144
3555
  fov_path = os.path.join(settings['src'], 'merged', settings['name'])
3145
3556
  name = os.path.splitext(settings['name'])[0]
3146
3557
 
3147
3558
  db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
3148
3559
  paths_df = _read_db(db_path, tables=['png_list'])[0]
3560
+ paths_df, _ = correct_paths(df=paths_df, base_path=settings['src'], folder='data')
3149
3561
  paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
3150
3562
 
3151
3563
  activation_mode = f"{settings['activation_mode']}_list"
3152
3564
  activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
3153
3565
  activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
3566
+ activation_db = os.path.splitext(settings['activation_db'])[0]
3567
+ base_path=os.path.join(settings['src'], 'datasets',activation_db)
3568
+ activation_paths_df, _ = correct_paths(df=activation_paths_df, base_path=base_path, folder=settings['activation_mode'])
3154
3569
  activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
3155
3570
 
3156
3571
  png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
3157
3572
  activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
3158
3573
 
3159
- fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
3160
- fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
3574
+
3575
+ if activation_paths:
3576
+ fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
3577
+ else:
3578
+ fig_3 = None
3579
+ print(f"Could not find any cropped PNGs")
3580
+ if png_paths:
3581
+ fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
3582
+ else:
3583
+ fig_2 = None
3584
+ print(f"Could not find any activation maps")
3585
+
3586
+ print('fov_path', fov_path)
3161
3587
  fig_1 = plot_image_mask_overlay(file=fov_path,
3162
3588
  channels=settings['channels'],
3163
3589
  cell_channel=settings['cell_channel'],
@@ -3166,14 +3592,18 @@ def plot_region(settings):
3166
3592
  figuresize=10,
3167
3593
  percentiles=settings['percentiles'],
3168
3594
  thickness=3,
3169
- save_pdf=False,
3595
+ save_pdf=True,
3170
3596
  mode=settings['mode'],
3171
3597
  export_tiffs=settings['export_tiffs'])
3172
3598
 
3173
3599
  dst = os.path.join(settings['src'], 'results', name)
3174
- save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
3175
- save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
3176
- save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
3600
+
3601
+ if not fig_1 == None:
3602
+ save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
3603
+ if not fig_2 == None:
3604
+ save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
3605
+ if not fig_3 == None:
3606
+ save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
3177
3607
 
3178
3608
  return fig_1, fig_2, fig_3
3179
3609
 
@@ -3337,4 +3767,5 @@ def overlay_masks_on_images(img_folder, normalize=True, resize=True, save=False,
3337
3767
  plt.imshow(blended)
3338
3768
  plt.title(f"Overlay: {filename}")
3339
3769
  plt.axis('off')
3340
- plt.show()
3770
+ plt.show()
3771
+
spacr/sequencing.py CHANGED
@@ -493,7 +493,7 @@ def graph_sequencing_stats(settings):
493
493
 
494
494
  def _plot_density(df, dependent_variable, dst=None):
495
495
  """Plot a density plot of the dependent variable."""
496
- plt.figure(figsize=(10, 6))
496
+ plt.figure(figsize=(10, 10))
497
497
  sns.kdeplot(df[dependent_variable], fill=True, alpha=0.6)
498
498
  plt.title(f'Density Plot of {dependent_variable}')
499
499
  plt.xlabel(dependent_variable)
@@ -548,6 +548,9 @@ def graph_sequencing_stats(settings):
548
548
  label=f'Closest Threshold ({closest_threshold["fraction_threshold"]:.4f})')
549
549
  plt.axhline(y=target_unique_count, color='black', linestyle='--',
550
550
  label=f'Target Unique Count ({target_unique_count})')
551
+
552
+ plt.xlim(0,0.1)
553
+ plt.ylim(0,20)
551
554
 
552
555
  if dst is not None:
553
556
  fig_path = os.path.join(dst, 'results')
@@ -594,7 +597,7 @@ def graph_sequencing_stats(settings):
594
597
  df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
595
598
 
596
599
  print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
597
-
600
+ display(df)
598
601
  #_plot_density(df, dependent_variable='unique_counts')
599
602
  plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
600
603
 
spacr/settings.py CHANGED
@@ -549,7 +549,7 @@ def get_perform_regression_default_settings(settings):
549
549
  settings.setdefault('filter_column','column')
550
550
  settings.setdefault('plate','plate1')
551
551
  settings.setdefault('class_1_threshold',None)
552
- settings.setdefault('metadata_files',['/home/carruthers/Documents/TGME49_Summary.csv','/home/carruthers/Documents/TGGT1_Summary.csv'])
552
+ settings.setdefault('metadata_files',['/home/carruthers/Documents/TGGT1_Summary.csv','/home/carruthers/Documents/TGME49_Summary.csv'])
553
553
  settings.setdefault('volcano','gene')
554
554
  settings.setdefault('toxo', True)
555
555