spacr 0.0.71__tar.gz → 0.0.81__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {spacr-0.0.71/spacr.egg-info → spacr-0.0.81}/PKG-INFO +2 -1
  2. {spacr-0.0.71 → spacr-0.0.81}/setup.py +3 -2
  3. {spacr-0.0.71 → spacr-0.0.81}/spacr/__init__.py +4 -1
  4. spacr-0.0.81/spacr/__main__.py +6 -0
  5. {spacr-0.0.71 → spacr-0.0.81}/spacr/annotate_app.py +74 -58
  6. {spacr-0.0.71 → spacr-0.0.81}/spacr/core.py +7 -214
  7. {spacr-0.0.71 → spacr-0.0.81}/spacr/io.py +0 -66
  8. {spacr-0.0.71 → spacr-0.0.81}/spacr/measure.py +46 -59
  9. {spacr-0.0.71 → spacr-0.0.81}/spacr/plot.py +117 -81
  10. {spacr-0.0.71 → spacr-0.0.81}/spacr/sequencing.py +508 -491
  11. {spacr-0.0.71 → spacr-0.0.81}/spacr/utils.py +469 -182
  12. {spacr-0.0.71 → spacr-0.0.81/spacr.egg-info}/PKG-INFO +2 -1
  13. {spacr-0.0.71 → spacr-0.0.81}/spacr.egg-info/requires.txt +1 -0
  14. spacr-0.0.71/spacr/__main__.py +0 -13
  15. {spacr-0.0.71 → spacr-0.0.81}/LICENSE +0 -0
  16. {spacr-0.0.71 → spacr-0.0.81}/MANIFEST.in +0 -0
  17. {spacr-0.0.71 → spacr-0.0.81}/README.rst +0 -0
  18. {spacr-0.0.71 → spacr-0.0.81}/setup.cfg +0 -0
  19. {spacr-0.0.71 → spacr-0.0.81}/spacr/chris.py +0 -0
  20. {spacr-0.0.71 → spacr-0.0.81}/spacr/deep_spacr.py +0 -0
  21. {spacr-0.0.71 → spacr-0.0.81}/spacr/graph_learning.py +0 -0
  22. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui.py +0 -0
  23. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui_2.py +0 -0
  24. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui_classify_app.py +0 -0
  25. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui_mask_app.py +0 -0
  26. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui_measure_app.py +0 -0
  27. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui_sim_app.py +0 -0
  28. {spacr-0.0.71 → spacr-0.0.81}/spacr/gui_utils.py +0 -0
  29. {spacr-0.0.71 → spacr-0.0.81}/spacr/logger.py +0 -0
  30. {spacr-0.0.71 → spacr-0.0.81}/spacr/mask_app.py +0 -0
  31. {spacr-0.0.71 → spacr-0.0.81}/spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
  32. {spacr-0.0.71 → spacr-0.0.81}/spacr/sim.py +0 -0
  33. {spacr-0.0.71 → spacr-0.0.81}/spacr/timelapse.py +0 -0
  34. {spacr-0.0.71 → spacr-0.0.81}/spacr/version.py +0 -0
  35. {spacr-0.0.71 → spacr-0.0.81}/spacr.egg-info/SOURCES.txt +0 -0
  36. {spacr-0.0.71 → spacr-0.0.81}/spacr.egg-info/dependency_links.txt +0 -0
  37. {spacr-0.0.71 → spacr-0.0.81}/spacr.egg-info/entry_points.txt +0 -0
  38. {spacr-0.0.71 → spacr-0.0.81}/spacr.egg-info/top_level.txt +0 -0
  39. {spacr-0.0.71 → spacr-0.0.81}/tests/test_annotate_app.py +0 -0
  40. {spacr-0.0.71 → spacr-0.0.81}/tests/test_core.py +0 -0
  41. {spacr-0.0.71 → spacr-0.0.81}/tests/test_gui_classify_app.py +0 -0
  42. {spacr-0.0.71 → spacr-0.0.81}/tests/test_gui_mask_app.py +0 -0
  43. {spacr-0.0.71 → spacr-0.0.81}/tests/test_gui_measure_app.py +0 -0
  44. {spacr-0.0.71 → spacr-0.0.81}/tests/test_gui_sim_app.py +0 -0
  45. {spacr-0.0.71 → spacr-0.0.81}/tests/test_gui_utils.py +0 -0
  46. {spacr-0.0.71 → spacr-0.0.81}/tests/test_io.py +0 -0
  47. {spacr-0.0.71 → spacr-0.0.81}/tests/test_mask_app.py +0 -0
  48. {spacr-0.0.71 → spacr-0.0.81}/tests/test_measure.py +0 -0
  49. {spacr-0.0.71 → spacr-0.0.81}/tests/test_plot.py +0 -0
  50. {spacr-0.0.71 → spacr-0.0.81}/tests/test_sim.py +0 -0
  51. {spacr-0.0.71 → spacr-0.0.81}/tests/test_timelapse.py +0 -0
  52. {spacr-0.0.71 → spacr-0.0.81}/tests/test_train.py +0 -0
  53. {spacr-0.0.71 → spacr-0.0.81}/tests/test_umap.py +0 -0
  54. {spacr-0.0.71 → spacr-0.0.81}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.0.71
3
+ Version: 0.0.81
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -38,6 +38,7 @@ Requires-Dist: PyWavelets<2.0,>=1.6.0
38
38
  Requires-Dist: torchcam<1.0,>=0.4.0
39
39
  Requires-Dist: ttf_opensans>=2020.10.30
40
40
  Requires-Dist: customtkinter<6.0,>=5.2.2
41
+ Requires-Dist: biopython<2.0,>=1.80
41
42
  Requires-Dist: lxml<6.0,>=5.1.0
42
43
  Provides-Extra: dev
43
44
  Requires-Dist: pytest>=3.9; extra == "dev"
@@ -50,13 +50,14 @@ dependencies = [
50
50
  'PyWavelets>=1.6.0,<2.0',
51
51
  'torchcam>=0.4.0,<1.0',
52
52
  'ttf_opensans>=2020.10.30',
53
- 'customtkinter>=5.2.2,<6.0',
53
+ 'customtkinter>=5.2.2,<6.0',
54
+ 'biopython>=1.80,<2.0',
54
55
  'lxml>=5.1.0,<6.0'
55
56
  ]
56
57
 
57
58
  setup(
58
59
  name="spacr",
59
- version="0.0.71",
60
+ version="0.0.81",
60
61
  author="Einar Birnir Olafsson",
61
62
  author_email="olafsson@med.umich.com",
62
63
  description="Spatial phenotype analysis of crisp screens (SpaCr)",
@@ -8,16 +8,18 @@ from . import utils
8
8
  from . import plot
9
9
  from . import measure
10
10
  from . import sim
11
+ from . import sequencing
11
12
  from . import timelapse
12
13
  from . import deep_spacr
13
- from . import mask_app
14
14
  from . import annotate_app
15
15
  from . import gui_utils
16
+ from . import mask_app
16
17
  from . import gui_mask_app
17
18
  from . import gui_measure_app
18
19
  from . import gui_classify_app
19
20
  from . import logger
20
21
 
22
+
21
23
  __all__ = [
22
24
  "core",
23
25
  "io",
@@ -25,6 +27,7 @@ __all__ = [
25
27
  "plot",
26
28
  "measure",
27
29
  "sim",
30
+ "sequencing"
28
31
  "timelapse",
29
32
  "deep_spacr",
30
33
  "annotate_app",
@@ -0,0 +1,6 @@
1
+ """
2
+ Copyright © 2024 Something
3
+ """
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -10,13 +10,16 @@ from IPython.display import display, HTML
10
10
  import tkinter as tk
11
11
  from tkinter import ttk
12
12
  from ttkthemes import ThemedTk
13
+ from skimage.exposure import rescale_intensity
14
+ import cv2
15
+ import matplotlib.pyplot as plt
13
16
 
14
17
  from .logger import log_function_call
15
18
 
16
19
  from .gui_utils import ScrollableFrame, set_default_font, set_dark_style, create_dark_mode, style_text_boxes, create_menu_bar
17
20
 
18
21
  class ImageApp:
19
- def __init__(self, root, db_path, src, image_type=None, channels=None, grid_rows=None, grid_cols=None, image_size=(200, 200), annotation_column='annotate'):
22
+ def __init__(self, root, db_path, src, image_type=None, channels=None, grid_rows=None, grid_cols=None, image_size=(200, 200), annotation_column='annotate', normalize=False, percentiles=(1,99)):
20
23
  """
21
24
  Initializes an instance of the ImageApp class.
22
25
 
@@ -30,6 +33,7 @@ class ImageApp:
30
33
  - grid_cols (int): The number of columns in the image grid.
31
34
  - image_size (tuple): The size of the displayed images.
32
35
  - annotation_column (str): The column name for image annotations in the database.
36
+ - normalize (bool): Whether to normalize images to their 2nd and 98th percentiles. Defaults to False.
33
37
  """
34
38
  self.root = root
35
39
  self.db_path = db_path
@@ -41,6 +45,8 @@ class ImageApp:
41
45
  self.annotation_column = annotation_column
42
46
  self.image_type = image_type
43
47
  self.channels = channels
48
+ self.normalize = normalize
49
+ self.percentiles = percentiles
44
50
  self.images = {}
45
51
  self.pending_updates = {}
46
52
  self.labels = []
@@ -119,47 +125,80 @@ class ImageApp:
119
125
  label.bind('<Button-3>', self.get_on_image_click(path, label, img))
120
126
 
121
127
  self.root.update()
122
-
128
+
129
+ def load_single_image(self, path_annotation_tuple):
130
+ """
131
+ Loads a single image from the given path and annotation tuple.
132
+
133
+ Args:
134
+ path_annotation_tuple (tuple): A tuple containing the image path and its annotation.
135
+
136
+ Returns:
137
+ img (PIL.Image.Image): The loaded image.
138
+ annotation: The annotation associated with the image.
139
+ """
140
+ path, annotation = path_annotation_tuple
141
+ img = Image.open(path)
142
+ img = self.normalize_image(img, self.normalize, self.percentiles)
143
+ img = img.convert('RGB')
144
+ img = self.filter_channels(img)
145
+ img = img.resize(self.image_size)
146
+ return img, annotation
147
+
123
148
  @staticmethod
124
- def normalize_image(img):
149
+ def normalize_image(img, normalize=False, percentiles=(1, 99)):
125
150
  """
126
- Normalize the pixel values of an image to the range [0, 255].
151
+ Normalize the pixel values of an image based on the 2nd and 98th percentiles or the image min and max values,
152
+ and ensure the image is exported as 8-bit.
127
153
 
128
154
  Parameters:
129
155
  - img: PIL.Image.Image. The input image to be normalized.
156
+ - normalize: bool. Whether to normalize based on the 2nd and 98th percentiles.
157
+ - percentiles: tuple. The percentiles to use for normalization.
130
158
 
131
159
  Returns:
132
- - PIL.Image.Image. The normalized image.
160
+ - PIL.Image.Image. The normalized and 8-bit converted image.
133
161
  """
134
162
  img_array = np.array(img)
135
- img_array = ((img_array - img_array.min()) * (1/(img_array.max() - img_array.min()) * 255)).astype('uint8')
136
- return Image.fromarray(img_array)
137
163
 
164
+ if normalize:
165
+ if img_array.ndim == 2: # Grayscale image
166
+ p2, p98 = np.percentile(img_array, percentiles)
167
+ img_array = rescale_intensity(img_array, in_range=(p2, p98), out_range=(0, 255))
168
+ else: # Color image or multi-channel image
169
+ for channel in range(img_array.shape[2]):
170
+ p2, p98 = np.percentile(img_array[:, :, channel], percentiles)
171
+ img_array[:, :, channel] = rescale_intensity(img_array[:, :, channel], in_range=(p2, p98), out_range=(0, 255))
172
+
173
+ img_array = np.clip(img_array, 0, 255).astype('uint8')
174
+
175
+ return Image.fromarray(img_array)
176
+
138
177
  def add_colored_border(self, img, border_width, border_color):
139
- """
140
- Adds a colored border to an image.
141
-
142
- Args:
143
- img (PIL.Image.Image): The input image.
144
- border_width (int): The width of the border in pixels.
145
- border_color (str): The color of the border in RGB format.
146
-
147
- Returns:
148
- PIL.Image.Image: The image with the colored border.
149
- """
150
- top_border = Image.new('RGB', (img.width, border_width), color=border_color)
151
- bottom_border = Image.new('RGB', (img.width, border_width), color=border_color)
152
- left_border = Image.new('RGB', (border_width, img.height), color=border_color)
153
- right_border = Image.new('RGB', (border_width, img.height), color=border_color)
154
-
155
- bordered_img = Image.new('RGB', (img.width + 2 * border_width, img.height + 2 * border_width), color='white')
156
- bordered_img.paste(top_border, (border_width, 0))
157
- bordered_img.paste(bottom_border, (border_width, img.height + border_width))
158
- bordered_img.paste(left_border, (0, border_width))
159
- bordered_img.paste(right_border, (img.width + border_width, border_width))
160
- bordered_img.paste(img, (border_width, border_width))
161
-
162
- return bordered_img
178
+ """
179
+ Adds a colored border to an image.
180
+
181
+ Args:
182
+ img (PIL.Image.Image): The input image.
183
+ border_width (int): The width of the border in pixels.
184
+ border_color (str): The color of the border in RGB format.
185
+
186
+ Returns:
187
+ PIL.Image.Image: The image with the colored border.
188
+ """
189
+ top_border = Image.new('RGB', (img.width, border_width), color=border_color)
190
+ bottom_border = Image.new('RGB', (img.width, border_width), color=border_color)
191
+ left_border = Image.new('RGB', (border_width, img.height), color=border_color)
192
+ right_border = Image.new('RGB', (border_width, img.height), color=border_color)
193
+
194
+ bordered_img = Image.new('RGB', (img.width + 2 * border_width, img.height + 2 * border_width), color='white')
195
+ bordered_img.paste(top_border, (border_width, 0))
196
+ bordered_img.paste(bottom_border, (border_width, img.height + border_width))
197
+ bordered_img.paste(left_border, (0, border_width))
198
+ bordered_img.paste(right_border, (img.width + border_width, border_width))
199
+ bordered_img.paste(img, (border_width, border_width))
200
+
201
+ return bordered_img
163
202
 
164
203
  def filter_channels(self, img):
165
204
  """
@@ -187,26 +226,6 @@ class ImageApp:
187
226
 
188
227
  return Image.merge("RGB", (r, g, b))
189
228
 
190
- def load_single_image(self, path_annotation_tuple):
191
- """
192
- Loads a single image from the given path and annotation tuple.
193
-
194
- Args:
195
- path_annotation_tuple (tuple): A tuple containing the image path and its annotation.
196
-
197
- Returns:
198
- img (PIL.Image.Image): The loaded image.
199
- annotation: The annotation associated with the image.
200
- """
201
- path, annotation = path_annotation_tuple
202
- img = Image.open(path)
203
- if img.mode == "I":
204
- img = self.normalize_image(img)
205
- img = img.convert('RGB')
206
- img = self.filter_channels(img)
207
- img = img.resize(self.image_size)
208
- return img, annotation
209
-
210
229
  def get_on_image_click(self, path, label, img):
211
230
  """
212
231
  Returns a callback function that handles the click event on an image.
@@ -242,7 +261,7 @@ class ImageApp:
242
261
  self.root.update()
243
262
 
244
263
  return on_image_click
245
-
264
+
246
265
  @staticmethod
247
266
  def update_html(text):
248
267
  display(HTML(f"""
@@ -347,7 +366,7 @@ class ImageApp:
347
366
  self.root.destroy()
348
367
  print(f'Quit application')
349
368
 
350
- def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(200, 200), rows=5, columns=5, annotation_column='annotate'):
369
+ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(200, 200), rows=5, columns=5, annotation_column='annotate', normalize=False, percentiles=(1,99)):
351
370
  """
352
371
  Annotates images in a database using a graphical user interface.
353
372
 
@@ -361,11 +380,9 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
361
380
  rows (int, optional): The number of rows in the image grid. Defaults to 5.
362
381
  columns (int, optional): The number of columns in the image grid. Defaults to 5.
363
382
  annotation_column (str, optional): The name of the annotation column in the database table. Defaults to 'annotate'.
383
+ normalize (bool, optional): Whether to normalize images to their 2nd and 98th percentiles. Defaults to False.
364
384
  """
365
385
  db = os.path.join(src, 'measurements/measurements.db')
366
- #print('src', src)
367
- #print('db', db)
368
-
369
386
  conn = sqlite3.connect(db)
370
387
  c = conn.cursor()
371
388
  c.execute('PRAGMA table_info(png_list)')
@@ -377,7 +394,7 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
377
394
 
378
395
  root = tk.Tk()
379
396
  root.geometry(geom)
380
- app = ImageApp(root, db, src, image_type=image_type, channels=channels, image_size=img_size, grid_rows=rows, grid_cols=columns, annotation_column=annotation_column)
397
+ app = ImageApp(root, db, src, image_type=image_type, channels=channels, image_size=img_size, grid_rows=rows, grid_cols=columns, annotation_column=annotation_column, normalize=normalize, percentiles=percentiles)
381
398
  next_button = tk.Button(root, text="Next", command=app.next_page)
382
399
  next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
383
400
  back_button = tk.Button(root, text="Back", command=app.previous_page)
@@ -388,7 +405,6 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
388
405
  app.load_images()
389
406
  root.mainloop()
390
407
 
391
-
392
408
  def check_for_duplicates(db):
393
409
  """
394
410
  Check for duplicates in the given SQLite database.
@@ -15,12 +15,8 @@ from multiprocessing import Pool, cpu_count, Value, Lock
15
15
  import seaborn as sns
16
16
 
17
17
  from skimage.measure import regionprops, label
18
- from skimage.morphology import square
19
18
  from skimage.transform import resize as resizescikit
20
- from collections import defaultdict
21
- from torch.utils.data import DataLoader, random_split
22
- from sklearn.cluster import KMeans
23
- from sklearn.decomposition import PCA
19
+ from torch.utils.data import DataLoader
24
20
 
25
21
  from skimage import measure
26
22
  from sklearn.model_selection import train_test_split
@@ -30,7 +26,6 @@ from sklearn.inspection import permutation_importance
30
26
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
31
27
  from sklearn.preprocessing import StandardScaler
32
28
 
33
- from scipy.ndimage import binary_dilation
34
29
  from scipy.spatial.distance import cosine, euclidean, mahalanobis, cityblock, minkowski, chebyshev, hamming, jaccard, braycurtis
35
30
 
36
31
  import torchvision.transforms as transforms
@@ -40,7 +35,6 @@ import shap
40
35
  import matplotlib.pyplot as plt
41
36
  import matplotlib
42
37
  matplotlib.use('Agg')
43
- #import matplotlib.pyplot as plt
44
38
 
45
39
  from .logger import log_function_call
46
40
 
@@ -1637,216 +1631,14 @@ def analyze_recruitment(src, metadata_settings, advanced_settings):
1637
1631
  cells,wells = _results_to_csv(src, df, df_well)
1638
1632
  return [cells,wells]
1639
1633
 
1640
- def _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold=5, perimeter_threshold=30):
1641
- """
1642
- Merge cells in cell_mask if a parasite in parasite_mask overlaps with more than one cell,
1643
- and if cells share more than a specified perimeter percentage.
1644
-
1645
- Args:
1646
- parasite_mask (ndarray): Mask of parasites.
1647
- cell_mask (ndarray): Mask of cells.
1648
- nuclei_mask (ndarray): Mask of nuclei.
1649
- overlap_threshold (float): The percentage threshold for merging cells based on parasite overlap.
1650
- perimeter_threshold (float): The percentage threshold for merging cells based on shared perimeter.
1651
-
1652
- Returns:
1653
- ndarray: The modified cell mask (cell_mask) with unique labels.
1654
- """
1655
- labeled_cells = label(cell_mask)
1656
- labeled_parasites = label(parasite_mask)
1657
- labeled_nuclei = label(nuclei_mask)
1658
- num_parasites = np.max(labeled_parasites)
1659
- num_cells = np.max(labeled_cells)
1660
- num_nuclei = np.max(labeled_nuclei)
1661
-
1662
- # Merge cells based on parasite overlap
1663
- for parasite_id in range(1, num_parasites + 1):
1664
- current_parasite_mask = labeled_parasites == parasite_id
1665
- overlapping_cell_labels = np.unique(labeled_cells[current_parasite_mask])
1666
- overlapping_cell_labels = overlapping_cell_labels[overlapping_cell_labels != 0]
1667
- if len(overlapping_cell_labels) > 1:
1668
- # Calculate the overlap percentages
1669
- overlap_percentages = [
1670
- np.sum(current_parasite_mask & (labeled_cells == cell_label)) / np.sum(current_parasite_mask) * 100
1671
- for cell_label in overlapping_cell_labels
1672
- ]
1673
- # Merge cells if overlap percentage is above the threshold
1674
- for cell_label, overlap_percentage in zip(overlapping_cell_labels, overlap_percentages):
1675
- if overlap_percentage > overlap_threshold:
1676
- first_label = overlapping_cell_labels[0]
1677
- for other_label in overlapping_cell_labels[1:]:
1678
- if other_label != first_label:
1679
- cell_mask[cell_mask == other_label] = first_label
1680
-
1681
- # Merge cells based on nucleus overlap
1682
- for nucleus_id in range(1, num_nuclei + 1):
1683
- current_nucleus_mask = labeled_nuclei == nucleus_id
1684
- overlapping_cell_labels = np.unique(labeled_cells[current_nucleus_mask])
1685
- overlapping_cell_labels = overlapping_cell_labels[overlapping_cell_labels != 0]
1686
- if len(overlapping_cell_labels) > 1:
1687
- # Calculate the overlap percentages
1688
- overlap_percentages = [
1689
- np.sum(current_nucleus_mask & (labeled_cells == cell_label)) / np.sum(current_nucleus_mask) * 100
1690
- for cell_label in overlapping_cell_labels
1691
- ]
1692
- # Merge cells if overlap percentage is above the threshold for each cell
1693
- if all(overlap_percentage > overlap_threshold for overlap_percentage in overlap_percentages):
1694
- first_label = overlapping_cell_labels[0]
1695
- for other_label in overlapping_cell_labels[1:]:
1696
- if other_label != first_label:
1697
- cell_mask[cell_mask == other_label] = first_label
1698
-
1699
- # Check for cells without nuclei and merge based on shared perimeter
1700
- labeled_cells = label(cell_mask) # Re-label after merging based on overlap
1701
- cell_regions = regionprops(labeled_cells)
1702
- for region in cell_regions:
1703
- cell_label = region.label
1704
- cell_mask_binary = labeled_cells == cell_label
1705
- overlapping_nuclei = np.unique(nuclei_mask[cell_mask_binary])
1706
- overlapping_nuclei = overlapping_nuclei[overlapping_nuclei != 0]
1707
-
1708
- if len(overlapping_nuclei) == 0:
1709
- # Cell does not overlap with any nucleus
1710
- perimeter = region.perimeter
1711
- # Dilate the cell to find neighbors
1712
- dilated_cell = binary_dilation(cell_mask_binary, structure=square(3))
1713
- neighbor_cells = np.unique(labeled_cells[dilated_cell])
1714
- neighbor_cells = neighbor_cells[(neighbor_cells != 0) & (neighbor_cells != cell_label)]
1715
- # Calculate shared border length with neighboring cells
1716
- shared_borders = [
1717
- np.sum((labeled_cells == neighbor_label) & dilated_cell) for neighbor_label in neighbor_cells
1718
- ]
1719
- shared_border_percentages = [shared_border / perimeter * 100 for shared_border in shared_borders]
1720
- # Merge with the neighbor cell with the largest shared border percentage above the threshold
1721
- if shared_borders:
1722
- max_shared_border_index = np.argmax(shared_border_percentages)
1723
- max_shared_border_percentage = shared_border_percentages[max_shared_border_index]
1724
- if max_shared_border_percentage > perimeter_threshold:
1725
- cell_mask[labeled_cells == cell_label] = neighbor_cells[max_shared_border_index]
1726
-
1727
- # Relabel the merged cell mask
1728
- relabeled_cell_mask, _ = label(cell_mask, return_num=True)
1729
- return relabeled_cell_mask
1730
-
1731
- def adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_threshold=5, perimeter_threshold=30):
1732
- """
1733
- Process all npy files in the given folders. Merge and relabel cells in cell masks
1734
- based on parasite overlap and cell perimeter sharing conditions.
1735
-
1736
- Args:
1737
- parasite_folder (str): Path to the folder containing parasite masks.
1738
- cell_folder (str): Path to the folder containing cell masks.
1739
- nuclei_folder (str): Path to the folder containing nuclei masks.
1740
- overlap_threshold (float): The percentage threshold for merging cells based on parasite overlap.
1741
- perimeter_threshold (float): The percentage threshold for merging cells based on shared perimeter.
1742
- """
1743
-
1744
- parasite_files = sorted([f for f in os.listdir(parasite_folder) if f.endswith('.npy')])
1745
- cell_files = sorted([f for f in os.listdir(cell_folder) if f.endswith('.npy')])
1746
- nuclei_files = sorted([f for f in os.listdir(nuclei_folder) if f.endswith('.npy')])
1747
-
1748
- # Ensure there are matching files in all folders
1749
- if not (len(parasite_files) == len(cell_files) == len(nuclei_files)):
1750
- raise ValueError("The number of files in the folders do not match.")
1751
-
1752
- # Match files by name
1753
- for file_name in parasite_files:
1754
- parasite_path = os.path.join(parasite_folder, file_name)
1755
- cell_path = os.path.join(cell_folder, file_name)
1756
- nuclei_path = os.path.join(nuclei_folder, file_name)
1757
- # Check if the corresponding cell and nuclei mask files exist
1758
- if not (os.path.exists(cell_path) and os.path.exists(nuclei_path)):
1759
- raise ValueError(f"Corresponding cell or nuclei mask file for {file_name} not found.")
1760
- # Load the masks
1761
- parasite_mask = np.load(parasite_path)
1762
- cell_mask = np.load(cell_path)
1763
- nuclei_mask = np.load(nuclei_path)
1764
- # Merge and relabel cells
1765
- merged_cell_mask = _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold, perimeter_threshold)
1766
- # Overwrite the original cell mask file with the merged result
1767
- np.save(cell_path, merged_cell_mask)
1768
-
1769
- def process_masks(mask_folder, image_folder, channel, batch_size=50, n_clusters=2, plot=False):
1770
-
1771
- def read_files_in_batches(folder, batch_size=50):
1772
- files = [f for f in os.listdir(folder) if f.endswith('.npy')]
1773
- files.sort() # Sort to ensure matching order
1774
- for i in range(0, len(files), batch_size):
1775
- yield files[i:i + batch_size]
1776
-
1777
- def measure_morphology_and_intensity(mask, image):
1778
- properties = measure.regionprops(mask, intensity_image=image)
1779
- properties_list = [{'area': p.area, 'mean_intensity': p.mean_intensity, 'perimeter': p.perimeter, 'eccentricity': p.eccentricity} for p in properties]
1780
- return properties_list
1781
-
1782
- def cluster_objects(properties, n_clusters=2):
1783
- data = np.array([[p['area'], p['mean_intensity'], p['perimeter'], p['eccentricity']] for p in properties])
1784
- kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(data)
1785
- return kmeans
1786
-
1787
- def remove_objects_not_in_largest_cluster(mask, labels, largest_cluster_label):
1788
- cleaned_mask = np.zeros_like(mask)
1789
- for region in measure.regionprops(mask):
1790
- if labels[region.label - 1] == largest_cluster_label:
1791
- cleaned_mask[mask == region.label] = region.label
1792
- return cleaned_mask
1793
-
1794
- def plot_clusters(properties, labels):
1795
- data = np.array([[p['area'], p['mean_intensity'], p['perimeter'], p['eccentricity']] for p in properties])
1796
- pca = PCA(n_components=2)
1797
- data_2d = pca.fit_transform(data)
1798
- plt.scatter(data_2d[:, 0], data_2d[:, 1], c=labels, cmap='viridis')
1799
- plt.xlabel('PCA Component 1')
1800
- plt.ylabel('PCA Component 2')
1801
- plt.title('Object Clustering')
1802
- plt.show()
1803
-
1804
- all_properties = []
1805
-
1806
- # Step 1: Accumulate properties over all files
1807
- for batch in read_files_in_batches(mask_folder, batch_size):
1808
- mask_files = [os.path.join(mask_folder, file) for file in batch]
1809
- image_files = [os.path.join(image_folder, file) for file in batch]
1810
-
1811
- masks = [np.load(file) for file in mask_files]
1812
- images = [np.load(file)[:, :, channel] for file in image_files]
1813
-
1814
- for i, mask in enumerate(masks):
1815
- image = images[i]
1816
- # Measure morphology and intensity
1817
- properties = measure_morphology_and_intensity(mask, image)
1818
- all_properties.extend(properties)
1819
-
1820
- # Step 2: Perform clustering on accumulated properties
1821
- kmeans = cluster_objects(all_properties, n_clusters)
1822
- labels = kmeans.labels_
1823
-
1824
- if plot:
1825
- # Step 3: Plot clusters using PCA
1826
- plot_clusters(all_properties, labels)
1827
-
1828
- # Step 4: Remove objects not in the largest cluster and overwrite files in batches
1829
- label_index = 0
1830
- for batch in read_files_in_batches(mask_folder, batch_size):
1831
- mask_files = [os.path.join(mask_folder, file) for file in batch]
1832
- masks = [np.load(file) for file in mask_files]
1833
-
1834
- for i, mask in enumerate(masks):
1835
- batch_properties = measure_morphology_and_intensity(mask, mask)
1836
- batch_labels = labels[label_index:label_index + len(batch_properties)]
1837
- largest_cluster_label = np.bincount(batch_labels).argmax()
1838
- cleaned_mask = remove_objects_not_in_largest_cluster(mask, batch_labels, largest_cluster_label)
1839
- np.save(mask_files[i], cleaned_mask)
1840
- label_index += len(batch_properties)
1841
-
1842
1634
  def preprocess_generate_masks(src, settings={}):
1843
1635
 
1844
1636
  from .io import preprocess_img_data, _load_and_concatenate_arrays
1845
1637
  from .plot import plot_merged, plot_arrays
1846
1638
  from .utils import _pivot_counts_table, set_default_settings_preprocess_generate_masks, set_default_plot_merge_settings, check_mask_folder
1847
-
1639
+ from .utils import adjust_cell_masks, _merge_cells_based_on_parasite_overlap, process_masks
1640
+
1848
1641
  settings = set_default_settings_preprocess_generate_masks(src, settings)
1849
-
1850
1642
  settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
1851
1643
  settings_csv = os.path.join(src,'settings','preprocess_generate_masks_settings.csv')
1852
1644
  os.makedirs(os.path.join(src,'settings'), exist_ok=True)
@@ -1907,7 +1699,8 @@ def preprocess_generate_masks(src, settings={}):
1907
1699
 
1908
1700
  adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_threshold=5, perimeter_threshold=30)
1909
1701
  stop = time.time()
1910
- print(f'Cell mask adjustment: {stop-start} seconds')
1702
+ adjust_time = (stop-start)/60
1703
+ print(f'Cell mask adjustment: {adjust_time} min.')
1911
1704
 
1912
1705
  if os.path.exists(os.path.join(src,'measurements')):
1913
1706
  _pivot_counts_table(db_path=os.path.join(src,'measurements', 'measurements.db'))
@@ -2583,6 +2376,7 @@ def generate_cellpose_masks(src, settings, object_type):
2583
2376
  if settings['save']:
2584
2377
  for mask_index, mask in enumerate(mask_stack):
2585
2378
  output_filename = os.path.join(output_folder, batch_filenames[mask_index])
2379
+ mask = mask.astype(np.uint16)
2586
2380
  np.save(output_filename, mask)
2587
2381
  mask_stack = []
2588
2382
  batch_filenames = []
@@ -3147,8 +2941,7 @@ def generate_image_umap(settings={}):
3147
2941
  """
3148
2942
 
3149
2943
  from .io import _read_and_join_tables
3150
- from .utils import get_db_paths, preprocess_data, reduction_and_clustering, remove_noise, generate_colors, correct_paths, plot_embedding, plot_clusters_grid, get_umap_image_settings
3151
- from .alpha import cluster_feature_analysis, generate_umap_from_images
2944
+ from .utils import get_db_paths, preprocess_data, reduction_and_clustering, remove_noise, generate_colors, correct_paths, plot_embedding, plot_clusters_grid, get_umap_image_settings, cluster_feature_analysis, generate_umap_from_images
3152
2945
 
3153
2946
  settings = get_umap_image_settings(settings)
3154
2947
 
@@ -1041,72 +1041,6 @@ def _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentil
1041
1041
 
1042
1042
  return normalized_stack.astype(save_dtype)
1043
1043
 
1044
- def _normalize_img_batch_v1(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
1045
- """
1046
- Normalize the stack of images.
1047
-
1048
- Args:
1049
- stack (numpy.ndarray): The stack of images to normalize.
1050
- backgrounds (list): Background values for each channel.
1051
- remove_backgrounds (list): Whether to remove background values for each channel.
1052
- lower_percentile (int): Lower percentile value for normalization.
1053
- save_dtype (numpy.dtype): Data type for saving the normalized stack.
1054
- signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
1055
- signal_thresholds (list): Signal thresholds for each channel.
1056
-
1057
- Returns:
1058
- numpy.ndarray: The normalized stack.
1059
- """
1060
- normalized_stack = np.zeros_like(stack, dtype=np.float32)
1061
- time_ls = []
1062
-
1063
- for chan_index, channel in enumerate(range(stack.shape[-1])):
1064
- single_channel = stack[:, :, :, channel]
1065
- background = backgrounds[chan_index]
1066
- signal_threshold = signal_thresholds[chan_index]
1067
- remove_background = remove_backgrounds[chan_index]
1068
- signal_2_noise = signal_to_noise[chan_index]
1069
- print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
1070
-
1071
- if remove_background:
1072
- single_channel[single_channel < background] = 0
1073
-
1074
- non_zero_single_channel = single_channel[single_channel != 0]
1075
- global_lower = np.percentile(non_zero_single_channel, lower_percentile)
1076
- for upper_p in np.linspace(98, 99.5, num=20).tolist():
1077
- global_upper = np.percentile(non_zero_single_channel, upper_p)
1078
- if global_upper >= signal_threshold:
1079
- break
1080
-
1081
- arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
1082
- signal_to_noise_ratio_ls = []
1083
- for array_index in range(single_channel.shape[0]):
1084
- start = time.time()
1085
- arr_2d = single_channel[array_index, :, :]
1086
- non_zero_arr_2d = arr_2d[arr_2d != 0]
1087
- if non_zero_arr_2d.size > 0:
1088
- lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
1089
- signal_to_noise_ratio = upper / lower
1090
- else:
1091
- signal_to_noise_ratio = 0
1092
- signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
1093
- average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
1094
-
1095
- if signal_to_noise_ratio > signal_2_noise:
1096
- arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
1097
- arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
1098
- else:
1099
- arr_2d_normalized[array_index, :, :] = arr_2d
1100
- stop = time.time()
1101
- duration = (stop - start) * single_channel.shape[0]
1102
- time_ls.append(duration)
1103
- average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
1104
- print(f'Progress: channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
1105
-
1106
- normalized_stack[:, :, :, channel] = arr_2d_normalized
1107
-
1108
- return normalized_stack.astype(save_dtype)
1109
-
1110
1044
  def _get_lists_for_normalization(settings):
1111
1045
  """
1112
1046
  Get lists for normalization based on the provided settings.