spacr 0.0.70__py3-none-any.whl → 0.0.80__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +4 -1
- spacr/__main__.py +0 -7
- spacr/annotate_app.py +75 -61
- spacr/core.py +39 -246
- spacr/foldseek.py +6 -6
- spacr/get_alfafold_structures.py +3 -3
- spacr/io.py +53 -116
- spacr/measure.py +46 -59
- spacr/plot.py +117 -81
- spacr/sequencing.py +508 -491
- spacr/sim.py +24 -29
- spacr/utils.py +487 -260
- {spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/METADATA +10 -8
- spacr-0.0.80.dist-info/RECORD +36 -0
- spacr/graph_learning_lap.py +0 -84
- spacr/train.py +0 -667
- spacr/umap.py +0 -0
- spacr-0.0.70.dist-info/RECORD +0 -39
- {spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/LICENSE +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/WHEEL +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/entry_points.txt +0 -0
- {spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/top_level.txt +0 -0
spacr/__init__.py
CHANGED
@@ -8,16 +8,18 @@ from . import utils
|
|
8
8
|
from . import plot
|
9
9
|
from . import measure
|
10
10
|
from . import sim
|
11
|
+
from . import sequencing
|
11
12
|
from . import timelapse
|
12
13
|
from . import deep_spacr
|
13
|
-
from . import mask_app
|
14
14
|
from . import annotate_app
|
15
15
|
from . import gui_utils
|
16
|
+
from . import mask_app
|
16
17
|
from . import gui_mask_app
|
17
18
|
from . import gui_measure_app
|
18
19
|
from . import gui_classify_app
|
19
20
|
from . import logger
|
20
21
|
|
22
|
+
|
21
23
|
__all__ = [
|
22
24
|
"core",
|
23
25
|
"io",
|
@@ -25,6 +27,7 @@ __all__ = [
|
|
25
27
|
"plot",
|
26
28
|
"measure",
|
27
29
|
"sim",
|
30
|
+
"sequencing"
|
28
31
|
"timelapse",
|
29
32
|
"deep_spacr",
|
30
33
|
"annotate_app",
|
spacr/__main__.py
CHANGED
@@ -2,12 +2,5 @@
|
|
2
2
|
Copyright © 2024 Something
|
3
3
|
"""
|
4
4
|
|
5
|
-
import sys, os, glob, pathlib, time
|
6
|
-
import numpy as np
|
7
|
-
from natsort import natsorted
|
8
|
-
from tqdm import tqdm
|
9
|
-
#from spacr import utils, io, version, timelapse, plot, core, mask_app, annotate_app
|
10
|
-
import logging
|
11
|
-
|
12
5
|
if __name__ == "__main__":
|
13
6
|
main()
|
spacr/annotate_app.py
CHANGED
@@ -10,13 +10,16 @@ from IPython.display import display, HTML
|
|
10
10
|
import tkinter as tk
|
11
11
|
from tkinter import ttk
|
12
12
|
from ttkthemes import ThemedTk
|
13
|
+
from skimage.exposure import rescale_intensity
|
14
|
+
import cv2
|
15
|
+
import matplotlib.pyplot as plt
|
13
16
|
|
14
17
|
from .logger import log_function_call
|
15
18
|
|
16
19
|
from .gui_utils import ScrollableFrame, set_default_font, set_dark_style, create_dark_mode, style_text_boxes, create_menu_bar
|
17
20
|
|
18
21
|
class ImageApp:
|
19
|
-
def __init__(self, root, db_path, src, image_type=None, channels=None, grid_rows=None, grid_cols=None, image_size=(200, 200), annotation_column='annotate'):
|
22
|
+
def __init__(self, root, db_path, src, image_type=None, channels=None, grid_rows=None, grid_cols=None, image_size=(200, 200), annotation_column='annotate', normalize=False, percentiles=(1,99)):
|
20
23
|
"""
|
21
24
|
Initializes an instance of the ImageApp class.
|
22
25
|
|
@@ -30,6 +33,7 @@ class ImageApp:
|
|
30
33
|
- grid_cols (int): The number of columns in the image grid.
|
31
34
|
- image_size (tuple): The size of the displayed images.
|
32
35
|
- annotation_column (str): The column name for image annotations in the database.
|
36
|
+
- normalize (bool): Whether to normalize images to their 2nd and 98th percentiles. Defaults to False.
|
33
37
|
"""
|
34
38
|
self.root = root
|
35
39
|
self.db_path = db_path
|
@@ -41,6 +45,8 @@ class ImageApp:
|
|
41
45
|
self.annotation_column = annotation_column
|
42
46
|
self.image_type = image_type
|
43
47
|
self.channels = channels
|
48
|
+
self.normalize = normalize
|
49
|
+
self.percentiles = percentiles
|
44
50
|
self.images = {}
|
45
51
|
self.pending_updates = {}
|
46
52
|
self.labels = []
|
@@ -119,49 +125,80 @@ class ImageApp:
|
|
119
125
|
label.bind('<Button-3>', self.get_on_image_click(path, label, img))
|
120
126
|
|
121
127
|
self.root.update()
|
122
|
-
|
128
|
+
|
129
|
+
def load_single_image(self, path_annotation_tuple):
|
130
|
+
"""
|
131
|
+
Loads a single image from the given path and annotation tuple.
|
132
|
+
|
133
|
+
Args:
|
134
|
+
path_annotation_tuple (tuple): A tuple containing the image path and its annotation.
|
135
|
+
|
136
|
+
Returns:
|
137
|
+
img (PIL.Image.Image): The loaded image.
|
138
|
+
annotation: The annotation associated with the image.
|
139
|
+
"""
|
140
|
+
path, annotation = path_annotation_tuple
|
141
|
+
img = Image.open(path)
|
142
|
+
img = self.normalize_image(img, self.normalize, self.percentiles)
|
143
|
+
img = img.convert('RGB')
|
144
|
+
img = self.filter_channels(img)
|
145
|
+
img = img.resize(self.image_size)
|
146
|
+
return img, annotation
|
147
|
+
|
123
148
|
@staticmethod
|
124
|
-
def normalize_image(img):
|
149
|
+
def normalize_image(img, normalize=False, percentiles=(1, 99)):
|
125
150
|
"""
|
126
|
-
Normalize the pixel values of an image
|
151
|
+
Normalize the pixel values of an image based on the 2nd and 98th percentiles or the image min and max values,
|
152
|
+
and ensure the image is exported as 8-bit.
|
127
153
|
|
128
154
|
Parameters:
|
129
|
-
- img: PIL.Image.Image
|
130
|
-
|
155
|
+
- img: PIL.Image.Image. The input image to be normalized.
|
156
|
+
- normalize: bool. Whether to normalize based on the 2nd and 98th percentiles.
|
157
|
+
- percentiles: tuple. The percentiles to use for normalization.
|
131
158
|
|
132
159
|
Returns:
|
133
|
-
- PIL.Image.Image
|
134
|
-
The normalized image.
|
160
|
+
- PIL.Image.Image. The normalized and 8-bit converted image.
|
135
161
|
"""
|
136
162
|
img_array = np.array(img)
|
137
|
-
img_array = ((img_array - img_array.min()) * (1/(img_array.max() - img_array.min()) * 255)).astype('uint8')
|
138
|
-
return Image.fromarray(img_array)
|
139
163
|
|
164
|
+
if normalize:
|
165
|
+
if img_array.ndim == 2: # Grayscale image
|
166
|
+
p2, p98 = np.percentile(img_array, percentiles)
|
167
|
+
img_array = rescale_intensity(img_array, in_range=(p2, p98), out_range=(0, 255))
|
168
|
+
else: # Color image or multi-channel image
|
169
|
+
for channel in range(img_array.shape[2]):
|
170
|
+
p2, p98 = np.percentile(img_array[:, :, channel], percentiles)
|
171
|
+
img_array[:, :, channel] = rescale_intensity(img_array[:, :, channel], in_range=(p2, p98), out_range=(0, 255))
|
172
|
+
|
173
|
+
img_array = np.clip(img_array, 0, 255).astype('uint8')
|
174
|
+
|
175
|
+
return Image.fromarray(img_array)
|
176
|
+
|
140
177
|
def add_colored_border(self, img, border_width, border_color):
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
178
|
+
"""
|
179
|
+
Adds a colored border to an image.
|
180
|
+
|
181
|
+
Args:
|
182
|
+
img (PIL.Image.Image): The input image.
|
183
|
+
border_width (int): The width of the border in pixels.
|
184
|
+
border_color (str): The color of the border in RGB format.
|
185
|
+
|
186
|
+
Returns:
|
187
|
+
PIL.Image.Image: The image with the colored border.
|
188
|
+
"""
|
189
|
+
top_border = Image.new('RGB', (img.width, border_width), color=border_color)
|
190
|
+
bottom_border = Image.new('RGB', (img.width, border_width), color=border_color)
|
191
|
+
left_border = Image.new('RGB', (border_width, img.height), color=border_color)
|
192
|
+
right_border = Image.new('RGB', (border_width, img.height), color=border_color)
|
193
|
+
|
194
|
+
bordered_img = Image.new('RGB', (img.width + 2 * border_width, img.height + 2 * border_width), color='white')
|
195
|
+
bordered_img.paste(top_border, (border_width, 0))
|
196
|
+
bordered_img.paste(bottom_border, (border_width, img.height + border_width))
|
197
|
+
bordered_img.paste(left_border, (0, border_width))
|
198
|
+
bordered_img.paste(right_border, (img.width + border_width, border_width))
|
199
|
+
bordered_img.paste(img, (border_width, border_width))
|
200
|
+
|
201
|
+
return bordered_img
|
165
202
|
|
166
203
|
def filter_channels(self, img):
|
167
204
|
"""
|
@@ -189,26 +226,6 @@ class ImageApp:
|
|
189
226
|
|
190
227
|
return Image.merge("RGB", (r, g, b))
|
191
228
|
|
192
|
-
def load_single_image(self, path_annotation_tuple):
|
193
|
-
"""
|
194
|
-
Loads a single image from the given path and annotation tuple.
|
195
|
-
|
196
|
-
Args:
|
197
|
-
path_annotation_tuple (tuple): A tuple containing the image path and its annotation.
|
198
|
-
|
199
|
-
Returns:
|
200
|
-
img (PIL.Image.Image): The loaded image.
|
201
|
-
annotation: The annotation associated with the image.
|
202
|
-
"""
|
203
|
-
path, annotation = path_annotation_tuple
|
204
|
-
img = Image.open(path)
|
205
|
-
if img.mode == "I":
|
206
|
-
img = self.normalize_image(img)
|
207
|
-
img = img.convert('RGB')
|
208
|
-
img = self.filter_channels(img)
|
209
|
-
img = img.resize(self.image_size)
|
210
|
-
return img, annotation
|
211
|
-
|
212
229
|
def get_on_image_click(self, path, label, img):
|
213
230
|
"""
|
214
231
|
Returns a callback function that handles the click event on an image.
|
@@ -244,7 +261,7 @@ class ImageApp:
|
|
244
261
|
self.root.update()
|
245
262
|
|
246
263
|
return on_image_click
|
247
|
-
|
264
|
+
|
248
265
|
@staticmethod
|
249
266
|
def update_html(text):
|
250
267
|
display(HTML(f"""
|
@@ -349,7 +366,7 @@ class ImageApp:
|
|
349
366
|
self.root.destroy()
|
350
367
|
print(f'Quit application')
|
351
368
|
|
352
|
-
def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(200, 200), rows=5, columns=5, annotation_column='annotate'):
|
369
|
+
def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(200, 200), rows=5, columns=5, annotation_column='annotate', normalize=False, percentiles=(1,99)):
|
353
370
|
"""
|
354
371
|
Annotates images in a database using a graphical user interface.
|
355
372
|
|
@@ -363,11 +380,9 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
|
|
363
380
|
rows (int, optional): The number of rows in the image grid. Defaults to 5.
|
364
381
|
columns (int, optional): The number of columns in the image grid. Defaults to 5.
|
365
382
|
annotation_column (str, optional): The name of the annotation column in the database table. Defaults to 'annotate'.
|
383
|
+
normalize (bool, optional): Whether to normalize images to their 2nd and 98th percentiles. Defaults to False.
|
366
384
|
"""
|
367
385
|
db = os.path.join(src, 'measurements/measurements.db')
|
368
|
-
#print('src', src)
|
369
|
-
#print('db', db)
|
370
|
-
|
371
386
|
conn = sqlite3.connect(db)
|
372
387
|
c = conn.cursor()
|
373
388
|
c.execute('PRAGMA table_info(png_list)')
|
@@ -379,7 +394,7 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
|
|
379
394
|
|
380
395
|
root = tk.Tk()
|
381
396
|
root.geometry(geom)
|
382
|
-
app = ImageApp(root, db, src, image_type=image_type, channels=channels, image_size=img_size, grid_rows=rows, grid_cols=columns, annotation_column=annotation_column)
|
397
|
+
app = ImageApp(root, db, src, image_type=image_type, channels=channels, image_size=img_size, grid_rows=rows, grid_cols=columns, annotation_column=annotation_column, normalize=normalize, percentiles=percentiles)
|
383
398
|
next_button = tk.Button(root, text="Next", command=app.next_page)
|
384
399
|
next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
|
385
400
|
back_button = tk.Button(root, text="Back", command=app.previous_page)
|
@@ -390,7 +405,6 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
|
|
390
405
|
app.load_images()
|
391
406
|
root.mainloop()
|
392
407
|
|
393
|
-
|
394
408
|
def check_for_duplicates(db):
|
395
409
|
"""
|
396
410
|
Check for duplicates in the given SQLite database.
|
spacr/core.py
CHANGED
@@ -15,12 +15,8 @@ from multiprocessing import Pool, cpu_count, Value, Lock
|
|
15
15
|
import seaborn as sns
|
16
16
|
|
17
17
|
from skimage.measure import regionprops, label
|
18
|
-
from skimage.morphology import square
|
19
18
|
from skimage.transform import resize as resizescikit
|
20
|
-
from
|
21
|
-
from torch.utils.data import DataLoader, random_split
|
22
|
-
from sklearn.cluster import KMeans
|
23
|
-
from sklearn.decomposition import PCA
|
19
|
+
from torch.utils.data import DataLoader
|
24
20
|
|
25
21
|
from skimage import measure
|
26
22
|
from sklearn.model_selection import train_test_split
|
@@ -30,7 +26,6 @@ from sklearn.inspection import permutation_importance
|
|
30
26
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
|
31
27
|
from sklearn.preprocessing import StandardScaler
|
32
28
|
|
33
|
-
from scipy.ndimage import binary_dilation
|
34
29
|
from scipy.spatial.distance import cosine, euclidean, mahalanobis, cityblock, minkowski, chebyshev, hamming, jaccard, braycurtis
|
35
30
|
|
36
31
|
import torchvision.transforms as transforms
|
@@ -40,7 +35,6 @@ import shap
|
|
40
35
|
import matplotlib.pyplot as plt
|
41
36
|
import matplotlib
|
42
37
|
matplotlib.use('Agg')
|
43
|
-
#import matplotlib.pyplot as plt
|
44
38
|
|
45
39
|
from .logger import log_function_call
|
46
40
|
|
@@ -1637,216 +1631,14 @@ def analyze_recruitment(src, metadata_settings, advanced_settings):
|
|
1637
1631
|
cells,wells = _results_to_csv(src, df, df_well)
|
1638
1632
|
return [cells,wells]
|
1639
1633
|
|
1640
|
-
def _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold=5, perimeter_threshold=30):
|
1641
|
-
"""
|
1642
|
-
Merge cells in cell_mask if a parasite in parasite_mask overlaps with more than one cell,
|
1643
|
-
and if cells share more than a specified perimeter percentage.
|
1644
|
-
|
1645
|
-
Args:
|
1646
|
-
parasite_mask (ndarray): Mask of parasites.
|
1647
|
-
cell_mask (ndarray): Mask of cells.
|
1648
|
-
nuclei_mask (ndarray): Mask of nuclei.
|
1649
|
-
overlap_threshold (float): The percentage threshold for merging cells based on parasite overlap.
|
1650
|
-
perimeter_threshold (float): The percentage threshold for merging cells based on shared perimeter.
|
1651
|
-
|
1652
|
-
Returns:
|
1653
|
-
ndarray: The modified cell mask (cell_mask) with unique labels.
|
1654
|
-
"""
|
1655
|
-
labeled_cells = label(cell_mask)
|
1656
|
-
labeled_parasites = label(parasite_mask)
|
1657
|
-
labeled_nuclei = label(nuclei_mask)
|
1658
|
-
num_parasites = np.max(labeled_parasites)
|
1659
|
-
num_cells = np.max(labeled_cells)
|
1660
|
-
num_nuclei = np.max(labeled_nuclei)
|
1661
|
-
|
1662
|
-
# Merge cells based on parasite overlap
|
1663
|
-
for parasite_id in range(1, num_parasites + 1):
|
1664
|
-
current_parasite_mask = labeled_parasites == parasite_id
|
1665
|
-
overlapping_cell_labels = np.unique(labeled_cells[current_parasite_mask])
|
1666
|
-
overlapping_cell_labels = overlapping_cell_labels[overlapping_cell_labels != 0]
|
1667
|
-
if len(overlapping_cell_labels) > 1:
|
1668
|
-
# Calculate the overlap percentages
|
1669
|
-
overlap_percentages = [
|
1670
|
-
np.sum(current_parasite_mask & (labeled_cells == cell_label)) / np.sum(current_parasite_mask) * 100
|
1671
|
-
for cell_label in overlapping_cell_labels
|
1672
|
-
]
|
1673
|
-
# Merge cells if overlap percentage is above the threshold
|
1674
|
-
for cell_label, overlap_percentage in zip(overlapping_cell_labels, overlap_percentages):
|
1675
|
-
if overlap_percentage > overlap_threshold:
|
1676
|
-
first_label = overlapping_cell_labels[0]
|
1677
|
-
for other_label in overlapping_cell_labels[1:]:
|
1678
|
-
if other_label != first_label:
|
1679
|
-
cell_mask[cell_mask == other_label] = first_label
|
1680
|
-
|
1681
|
-
# Merge cells based on nucleus overlap
|
1682
|
-
for nucleus_id in range(1, num_nuclei + 1):
|
1683
|
-
current_nucleus_mask = labeled_nuclei == nucleus_id
|
1684
|
-
overlapping_cell_labels = np.unique(labeled_cells[current_nucleus_mask])
|
1685
|
-
overlapping_cell_labels = overlapping_cell_labels[overlapping_cell_labels != 0]
|
1686
|
-
if len(overlapping_cell_labels) > 1:
|
1687
|
-
# Calculate the overlap percentages
|
1688
|
-
overlap_percentages = [
|
1689
|
-
np.sum(current_nucleus_mask & (labeled_cells == cell_label)) / np.sum(current_nucleus_mask) * 100
|
1690
|
-
for cell_label in overlapping_cell_labels
|
1691
|
-
]
|
1692
|
-
# Merge cells if overlap percentage is above the threshold for each cell
|
1693
|
-
if all(overlap_percentage > overlap_threshold for overlap_percentage in overlap_percentages):
|
1694
|
-
first_label = overlapping_cell_labels[0]
|
1695
|
-
for other_label in overlapping_cell_labels[1:]:
|
1696
|
-
if other_label != first_label:
|
1697
|
-
cell_mask[cell_mask == other_label] = first_label
|
1698
|
-
|
1699
|
-
# Check for cells without nuclei and merge based on shared perimeter
|
1700
|
-
labeled_cells = label(cell_mask) # Re-label after merging based on overlap
|
1701
|
-
cell_regions = regionprops(labeled_cells)
|
1702
|
-
for region in cell_regions:
|
1703
|
-
cell_label = region.label
|
1704
|
-
cell_mask_binary = labeled_cells == cell_label
|
1705
|
-
overlapping_nuclei = np.unique(nuclei_mask[cell_mask_binary])
|
1706
|
-
overlapping_nuclei = overlapping_nuclei[overlapping_nuclei != 0]
|
1707
|
-
|
1708
|
-
if len(overlapping_nuclei) == 0:
|
1709
|
-
# Cell does not overlap with any nucleus
|
1710
|
-
perimeter = region.perimeter
|
1711
|
-
# Dilate the cell to find neighbors
|
1712
|
-
dilated_cell = binary_dilation(cell_mask_binary, structure=square(3))
|
1713
|
-
neighbor_cells = np.unique(labeled_cells[dilated_cell])
|
1714
|
-
neighbor_cells = neighbor_cells[(neighbor_cells != 0) & (neighbor_cells != cell_label)]
|
1715
|
-
# Calculate shared border length with neighboring cells
|
1716
|
-
shared_borders = [
|
1717
|
-
np.sum((labeled_cells == neighbor_label) & dilated_cell) for neighbor_label in neighbor_cells
|
1718
|
-
]
|
1719
|
-
shared_border_percentages = [shared_border / perimeter * 100 for shared_border in shared_borders]
|
1720
|
-
# Merge with the neighbor cell with the largest shared border percentage above the threshold
|
1721
|
-
if shared_borders:
|
1722
|
-
max_shared_border_index = np.argmax(shared_border_percentages)
|
1723
|
-
max_shared_border_percentage = shared_border_percentages[max_shared_border_index]
|
1724
|
-
if max_shared_border_percentage > perimeter_threshold:
|
1725
|
-
cell_mask[labeled_cells == cell_label] = neighbor_cells[max_shared_border_index]
|
1726
|
-
|
1727
|
-
# Relabel the merged cell mask
|
1728
|
-
relabeled_cell_mask, _ = label(cell_mask, return_num=True)
|
1729
|
-
return relabeled_cell_mask
|
1730
|
-
|
1731
|
-
def adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_threshold=5, perimeter_threshold=30):
|
1732
|
-
"""
|
1733
|
-
Process all npy files in the given folders. Merge and relabel cells in cell masks
|
1734
|
-
based on parasite overlap and cell perimeter sharing conditions.
|
1735
|
-
|
1736
|
-
Args:
|
1737
|
-
parasite_folder (str): Path to the folder containing parasite masks.
|
1738
|
-
cell_folder (str): Path to the folder containing cell masks.
|
1739
|
-
nuclei_folder (str): Path to the folder containing nuclei masks.
|
1740
|
-
overlap_threshold (float): The percentage threshold for merging cells based on parasite overlap.
|
1741
|
-
perimeter_threshold (float): The percentage threshold for merging cells based on shared perimeter.
|
1742
|
-
"""
|
1743
|
-
|
1744
|
-
parasite_files = sorted([f for f in os.listdir(parasite_folder) if f.endswith('.npy')])
|
1745
|
-
cell_files = sorted([f for f in os.listdir(cell_folder) if f.endswith('.npy')])
|
1746
|
-
nuclei_files = sorted([f for f in os.listdir(nuclei_folder) if f.endswith('.npy')])
|
1747
|
-
|
1748
|
-
# Ensure there are matching files in all folders
|
1749
|
-
if not (len(parasite_files) == len(cell_files) == len(nuclei_files)):
|
1750
|
-
raise ValueError("The number of files in the folders do not match.")
|
1751
|
-
|
1752
|
-
# Match files by name
|
1753
|
-
for file_name in parasite_files:
|
1754
|
-
parasite_path = os.path.join(parasite_folder, file_name)
|
1755
|
-
cell_path = os.path.join(cell_folder, file_name)
|
1756
|
-
nuclei_path = os.path.join(nuclei_folder, file_name)
|
1757
|
-
# Check if the corresponding cell and nuclei mask files exist
|
1758
|
-
if not (os.path.exists(cell_path) and os.path.exists(nuclei_path)):
|
1759
|
-
raise ValueError(f"Corresponding cell or nuclei mask file for {file_name} not found.")
|
1760
|
-
# Load the masks
|
1761
|
-
parasite_mask = np.load(parasite_path)
|
1762
|
-
cell_mask = np.load(cell_path)
|
1763
|
-
nuclei_mask = np.load(nuclei_path)
|
1764
|
-
# Merge and relabel cells
|
1765
|
-
merged_cell_mask = _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold, perimeter_threshold)
|
1766
|
-
# Overwrite the original cell mask file with the merged result
|
1767
|
-
np.save(cell_path, merged_cell_mask)
|
1768
|
-
|
1769
|
-
def process_masks(mask_folder, image_folder, channel, batch_size=50, n_clusters=2, plot=False):
|
1770
|
-
|
1771
|
-
def read_files_in_batches(folder, batch_size=50):
|
1772
|
-
files = [f for f in os.listdir(folder) if f.endswith('.npy')]
|
1773
|
-
files.sort() # Sort to ensure matching order
|
1774
|
-
for i in range(0, len(files), batch_size):
|
1775
|
-
yield files[i:i + batch_size]
|
1776
|
-
|
1777
|
-
def measure_morphology_and_intensity(mask, image):
|
1778
|
-
properties = measure.regionprops(mask, intensity_image=image)
|
1779
|
-
properties_list = [{'area': p.area, 'mean_intensity': p.mean_intensity, 'perimeter': p.perimeter, 'eccentricity': p.eccentricity} for p in properties]
|
1780
|
-
return properties_list
|
1781
|
-
|
1782
|
-
def cluster_objects(properties, n_clusters=2):
|
1783
|
-
data = np.array([[p['area'], p['mean_intensity'], p['perimeter'], p['eccentricity']] for p in properties])
|
1784
|
-
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(data)
|
1785
|
-
return kmeans
|
1786
|
-
|
1787
|
-
def remove_objects_not_in_largest_cluster(mask, labels, largest_cluster_label):
|
1788
|
-
cleaned_mask = np.zeros_like(mask)
|
1789
|
-
for region in measure.regionprops(mask):
|
1790
|
-
if labels[region.label - 1] == largest_cluster_label:
|
1791
|
-
cleaned_mask[mask == region.label] = region.label
|
1792
|
-
return cleaned_mask
|
1793
|
-
|
1794
|
-
def plot_clusters(properties, labels):
|
1795
|
-
data = np.array([[p['area'], p['mean_intensity'], p['perimeter'], p['eccentricity']] for p in properties])
|
1796
|
-
pca = PCA(n_components=2)
|
1797
|
-
data_2d = pca.fit_transform(data)
|
1798
|
-
plt.scatter(data_2d[:, 0], data_2d[:, 1], c=labels, cmap='viridis')
|
1799
|
-
plt.xlabel('PCA Component 1')
|
1800
|
-
plt.ylabel('PCA Component 2')
|
1801
|
-
plt.title('Object Clustering')
|
1802
|
-
plt.show()
|
1803
|
-
|
1804
|
-
all_properties = []
|
1805
|
-
|
1806
|
-
# Step 1: Accumulate properties over all files
|
1807
|
-
for batch in read_files_in_batches(mask_folder, batch_size):
|
1808
|
-
mask_files = [os.path.join(mask_folder, file) for file in batch]
|
1809
|
-
image_files = [os.path.join(image_folder, file) for file in batch]
|
1810
|
-
|
1811
|
-
masks = [np.load(file) for file in mask_files]
|
1812
|
-
images = [np.load(file)[:, :, channel] for file in image_files]
|
1813
|
-
|
1814
|
-
for i, mask in enumerate(masks):
|
1815
|
-
image = images[i]
|
1816
|
-
# Measure morphology and intensity
|
1817
|
-
properties = measure_morphology_and_intensity(mask, image)
|
1818
|
-
all_properties.extend(properties)
|
1819
|
-
|
1820
|
-
# Step 2: Perform clustering on accumulated properties
|
1821
|
-
kmeans = cluster_objects(all_properties, n_clusters)
|
1822
|
-
labels = kmeans.labels_
|
1823
|
-
|
1824
|
-
if plot:
|
1825
|
-
# Step 3: Plot clusters using PCA
|
1826
|
-
plot_clusters(all_properties, labels)
|
1827
|
-
|
1828
|
-
# Step 4: Remove objects not in the largest cluster and overwrite files in batches
|
1829
|
-
label_index = 0
|
1830
|
-
for batch in read_files_in_batches(mask_folder, batch_size):
|
1831
|
-
mask_files = [os.path.join(mask_folder, file) for file in batch]
|
1832
|
-
masks = [np.load(file) for file in mask_files]
|
1833
|
-
|
1834
|
-
for i, mask in enumerate(masks):
|
1835
|
-
batch_properties = measure_morphology_and_intensity(mask, mask)
|
1836
|
-
batch_labels = labels[label_index:label_index + len(batch_properties)]
|
1837
|
-
largest_cluster_label = np.bincount(batch_labels).argmax()
|
1838
|
-
cleaned_mask = remove_objects_not_in_largest_cluster(mask, batch_labels, largest_cluster_label)
|
1839
|
-
np.save(mask_files[i], cleaned_mask)
|
1840
|
-
label_index += len(batch_properties)
|
1841
|
-
|
1842
1634
|
def preprocess_generate_masks(src, settings={}):
|
1843
1635
|
|
1844
1636
|
from .io import preprocess_img_data, _load_and_concatenate_arrays
|
1845
1637
|
from .plot import plot_merged, plot_arrays
|
1846
1638
|
from .utils import _pivot_counts_table, set_default_settings_preprocess_generate_masks, set_default_plot_merge_settings, check_mask_folder
|
1847
|
-
|
1639
|
+
from .utils import adjust_cell_masks, _merge_cells_based_on_parasite_overlap, process_masks
|
1640
|
+
|
1848
1641
|
settings = set_default_settings_preprocess_generate_masks(src, settings)
|
1849
|
-
|
1850
1642
|
settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
|
1851
1643
|
settings_csv = os.path.join(src,'settings','preprocess_generate_masks_settings.csv')
|
1852
1644
|
os.makedirs(os.path.join(src,'settings'), exist_ok=True)
|
@@ -1907,7 +1699,8 @@ def preprocess_generate_masks(src, settings={}):
|
|
1907
1699
|
|
1908
1700
|
adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_threshold=5, perimeter_threshold=30)
|
1909
1701
|
stop = time.time()
|
1910
|
-
|
1702
|
+
adjust_time = (stop-start)/60
|
1703
|
+
print(f'Cell mask adjustment: {adjust_time} min.')
|
1911
1704
|
|
1912
1705
|
if os.path.exists(os.path.join(src,'measurements')):
|
1913
1706
|
_pivot_counts_table(db_path=os.path.join(src,'measurements', 'measurements.db'))
|
@@ -2583,6 +2376,7 @@ def generate_cellpose_masks(src, settings, object_type):
|
|
2583
2376
|
if settings['save']:
|
2584
2377
|
for mask_index, mask in enumerate(mask_stack):
|
2585
2378
|
output_filename = os.path.join(output_folder, batch_filenames[mask_index])
|
2379
|
+
mask = mask.astype(np.uint16)
|
2586
2380
|
np.save(output_filename, mask)
|
2587
2381
|
mask_stack = []
|
2588
2382
|
batch_filenames = []
|
@@ -3118,37 +2912,36 @@ def generate_image_umap(settings={}):
|
|
3118
2912
|
|
3119
2913
|
Parameters:
|
3120
2914
|
settings (dict): Dictionary containing the following keys:
|
3121
|
-
|
3122
|
-
|
3123
|
-
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3127
|
-
|
3128
|
-
|
3129
|
-
|
3130
|
-
|
3131
|
-
|
3132
|
-
|
3133
|
-
|
3134
|
-
|
3135
|
-
|
3136
|
-
|
3137
|
-
|
3138
|
-
|
3139
|
-
|
3140
|
-
|
3141
|
-
|
3142
|
-
|
3143
|
-
|
2915
|
+
src (str): Source directory containing the data.
|
2916
|
+
row_limit (int): Limit the number of rows to process.
|
2917
|
+
tables (list): List of table names to read from the database.
|
2918
|
+
visualize (str): Visualization type.
|
2919
|
+
image_nr (int): Number of images to display.
|
2920
|
+
dot_size (int): Size of dots in the scatter plot.
|
2921
|
+
n_neighbors (int): Number of neighbors for UMAP.
|
2922
|
+
figuresize (int): Size of the figure.
|
2923
|
+
black_background (bool): Whether to use a black background.
|
2924
|
+
remove_image_canvas (bool): Whether to remove the image canvas.
|
2925
|
+
plot_outlines (bool): Whether to plot outlines.
|
2926
|
+
plot_points (bool): Whether to plot points.
|
2927
|
+
smooth_lines (bool): Whether to smooth lines.
|
2928
|
+
verbose (bool): Whether to print verbose output.
|
2929
|
+
embedding_by_controls (bool): Whether to use embedding from controls.
|
2930
|
+
col_to_compare (str): Column to compare for control-based embedding.
|
2931
|
+
pos (str): Positive control value.
|
2932
|
+
neg (str): Negative control value.
|
2933
|
+
clustering (str): Clustering method ('DBSCAN' or 'KMeans').
|
2934
|
+
exclude (list): List of columns to exclude from the analysis.
|
2935
|
+
plot_images (bool): Whether to plot images.
|
2936
|
+
reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
|
2937
|
+
save_figure (bool): Whether to save the figure as a PDF.
|
3144
2938
|
|
3145
2939
|
Returns:
|
3146
2940
|
pd.DataFrame: DataFrame with the original data and an additional column 'cluster' containing the cluster identity.
|
3147
2941
|
"""
|
3148
2942
|
|
3149
2943
|
from .io import _read_and_join_tables
|
3150
|
-
from .utils import get_db_paths, preprocess_data, reduction_and_clustering, remove_noise, generate_colors, correct_paths, plot_embedding, plot_clusters_grid, get_umap_image_settings
|
3151
|
-
from .alpha import cluster_feature_analysis, generate_umap_from_images
|
2944
|
+
from .utils import get_db_paths, preprocess_data, reduction_and_clustering, remove_noise, generate_colors, correct_paths, plot_embedding, plot_clusters_grid, get_umap_image_settings, cluster_feature_analysis, generate_umap_from_images
|
3152
2945
|
|
3153
2946
|
settings = get_umap_image_settings(settings)
|
3154
2947
|
|
@@ -3311,15 +3104,15 @@ def reducer_hyperparameter_search(settings={}, reduction_params=None, dbscan_par
|
|
3311
3104
|
|
3312
3105
|
Parameters:
|
3313
3106
|
settings (dict): Dictionary containing the following keys:
|
3314
|
-
|
3315
|
-
|
3316
|
-
|
3317
|
-
|
3318
|
-
|
3319
|
-
|
3320
|
-
|
3321
|
-
|
3322
|
-
|
3107
|
+
src (str): Source directory containing the data.
|
3108
|
+
row_limit (int): Limit the number of rows to process.
|
3109
|
+
tables (list): List of table names to read from the database.
|
3110
|
+
filter_by (str): Column to filter the data.
|
3111
|
+
sample_size (int): Number of samples to use for the hyperparameter search.
|
3112
|
+
remove_highly_correlated (bool): Whether to remove highly correlated columns.
|
3113
|
+
log_data (bool): Whether to log transform the data.
|
3114
|
+
verbose (bool): Whether to print verbose output.
|
3115
|
+
reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
|
3323
3116
|
reduction_params (list): List of dictionaries containing hyperparameters to test for the reduction method.
|
3324
3117
|
dbscan_params (list): List of dictionaries containing DBSCAN hyperparameters to test.
|
3325
3118
|
kmeans_params (list): List of dictionaries containing KMeans hyperparameters to test.
|
spacr/foldseek.py
CHANGED
@@ -762,18 +762,18 @@ def analyze_results(foldseek_csv_path, base_dir):
|
|
762
762
|
#display(functional_data_df)
|
763
763
|
|
764
764
|
# Set up directories
|
765
|
-
structure_fldr_path = "/home/carruthers/Downloads/ME49_proteome/cif"
|
766
|
-
base_dir='/home/carruthers/foldseek/me49'
|
765
|
+
#structure_fldr_path = "/home/carruthers/Downloads/ME49_proteome/cif"
|
766
|
+
#base_dir='/home/carruthers/foldseek/me49'
|
767
767
|
|
768
|
-
align_to_database(structure_fldr_path, base_dir, cores=25)
|
768
|
+
#align_to_database(structure_fldr_path, base_dir, cores=25)
|
769
769
|
#foldseek_csv_path = f'{base_dir}/results/pdb/aln_tmscore.csv'
|
770
770
|
#analyze_results(foldseek_csv_path, base_dir)
|
771
771
|
|
772
772
|
# Set up directories
|
773
|
-
structure_fldr_path = "/home/carruthers/Downloads/GT1_proteome/cif"
|
774
|
-
base_dir='/home/carruthers/foldseek/gt1'
|
773
|
+
#structure_fldr_path = "/home/carruthers/Downloads/GT1_proteome/cif"
|
774
|
+
#base_dir='/home/carruthers/foldseek/gt1'
|
775
775
|
|
776
|
-
align_to_database(structure_fldr_path, base_dir, cores=25)
|
776
|
+
#align_to_database(structure_fldr_path, base_dir, cores=25)
|
777
777
|
#foldseek_csv_path = f'{base_dir}/results/pdb/aln_tmscore.csv'
|
778
778
|
#analyze_results(foldseek_csv_path, base_dir)
|
779
779
|
|
spacr/get_alfafold_structures.py
CHANGED
@@ -67,6 +67,6 @@ def download_alphafold_structures(tsv_location, dst, version="4"):
|
|
67
67
|
print(f"Failed download entries saved to: {os.path.join(dst, 'failed_downloads.csv')}")
|
68
68
|
|
69
69
|
# Example usage:
|
70
|
-
tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv' # Replace with the path to your TSV file containing a list of UniProt entries
|
71
|
-
dst_folder = '/home/carruthers/Downloads/GT1_proteome' # Replace with your destination folder
|
72
|
-
download_alphafold_structures(tsv_location, dst_folder)
|
70
|
+
#tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv' # Replace with the path to your TSV file containing a list of UniProt entries
|
71
|
+
#dst_folder = '/home/carruthers/Downloads/GT1_proteome' # Replace with your destination folder
|
72
|
+
#download_alphafold_structures(tsv_location, dst_folder)
|