spacr 0.0.35__py3-none-any.whl → 0.0.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +2 -2
- spacr/__main__.py +0 -2
- spacr/alpha.py +514 -2
- spacr/annotate_app.py +113 -117
- spacr/core.py +864 -728
- spacr/deep_spacr.py +696 -0
- spacr/foldseek.py +2 -16
- spacr/graph_learning.py +297 -253
- spacr/gui.py +9 -8
- spacr/gui_2.py +90 -0
- spacr/gui_classify_app.py +7 -8
- spacr/gui_mask_app.py +13 -13
- spacr/gui_measure_app.py +8 -10
- spacr/gui_utils.py +134 -35
- spacr/io.py +311 -467
- spacr/mask_app.py +110 -6
- spacr/measure.py +19 -5
- spacr/models/cp/toxo_pv_lumen.CP_model +0 -0
- spacr/old_code.py +70 -2
- spacr/plot.py +23 -6
- spacr/sequencing.py +1130 -0
- spacr/sim.py +0 -42
- spacr/timelapse.py +0 -1
- spacr/train.py +172 -13
- spacr/umap.py +0 -689
- spacr/utils.py +1322 -75
- {spacr-0.0.35.dist-info → spacr-0.0.61.dist-info}/METADATA +14 -29
- spacr-0.0.61.dist-info/RECORD +39 -0
- {spacr-0.0.35.dist-info → spacr-0.0.61.dist-info}/entry_points.txt +1 -0
- spacr-0.0.35.dist-info/RECORD +0 -35
- {spacr-0.0.35.dist-info → spacr-0.0.61.dist-info}/LICENSE +0 -0
- {spacr-0.0.35.dist-info → spacr-0.0.61.dist-info}/WHEEL +0 -0
- {spacr-0.0.35.dist-info → spacr-0.0.61.dist-info}/top_level.txt +0 -0
spacr/umap.py
CHANGED
@@ -1,689 +0,0 @@
|
|
1
|
-
import umap
|
2
|
-
import random
|
3
|
-
import sqlite3
|
4
|
-
import numpy as np
|
5
|
-
import pandas as pd
|
6
|
-
from PIL import Image
|
7
|
-
import matplotlib.pyplot as plt
|
8
|
-
from sklearn.cluster import DBSCAN
|
9
|
-
from sklearn.preprocessing import StandardScaler
|
10
|
-
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
11
|
-
from numba import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
|
12
|
-
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
|
13
|
-
from scipy.spatial import ConvexHull
|
14
|
-
from scipy.interpolate import splprep, splev
|
15
|
-
from IPython.display import display
|
16
|
-
|
17
|
-
from .logger import log_function_call
|
18
|
-
|
19
|
-
# Create a function to check if images overlap
|
20
|
-
def check_overlap(current_position, other_positions, threshold):
|
21
|
-
for other_position in other_positions:
|
22
|
-
distance = np.linalg.norm(np.array(current_position) - np.array(other_position))
|
23
|
-
if distance < threshold:
|
24
|
-
return True
|
25
|
-
return False
|
26
|
-
|
27
|
-
def remove_highly_correlated_columns(df, threshold):
|
28
|
-
corr_matrix = df.corr().abs()
|
29
|
-
upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
|
30
|
-
to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > threshold)]
|
31
|
-
return df.drop(to_drop, axis=1)
|
32
|
-
|
33
|
-
def hyperparameter_search(db_path, tables, filter_by=None, sample_size=None, umap_params=None, dbscan_params=None, pointsize=2, save=False, remove_highly_correlated=False, log_data=False, verbose=True):
|
34
|
-
# Create a connection to the SQLite database
|
35
|
-
conn = sqlite3.connect(db_path)
|
36
|
-
|
37
|
-
# Read the tables into a list of DataFrames
|
38
|
-
dfs = [pd.read_sql_query(f"SELECT * FROM {table_name}", conn) for table_name in tables]
|
39
|
-
|
40
|
-
# Concatenate the DataFrames along the columns (axis=1)
|
41
|
-
df = pd.concat(dfs, axis=1)
|
42
|
-
|
43
|
-
if verbose:
|
44
|
-
print(df.columns)
|
45
|
-
display(df)
|
46
|
-
|
47
|
-
# Filter the DataFrame if filter_by is specified
|
48
|
-
if filter_by is not None:
|
49
|
-
if filter_by !='morphology':
|
50
|
-
cols_to_include = [col for col in df.columns if filter_by in str(col)]
|
51
|
-
else:
|
52
|
-
cols_to_include = [col for col in df.columns if 'channel' not in str(col)]
|
53
|
-
df = df[cols_to_include]
|
54
|
-
|
55
|
-
if sample_size is not None:
|
56
|
-
df = df.sample(n=sample_size)
|
57
|
-
|
58
|
-
#Remove non-numerical data
|
59
|
-
numeric_data = df.select_dtypes(include=['number'])
|
60
|
-
|
61
|
-
# Remove highly correlated columns if required
|
62
|
-
if remove_highly_correlated:
|
63
|
-
numeric_data = remove_highly_correlated_columns(df=numeric_data, threshold=95)
|
64
|
-
|
65
|
-
if verbose:
|
66
|
-
print(f'Columns included in UMAP')
|
67
|
-
print(numeric_data.columns.tolist())
|
68
|
-
display(numeric_data)
|
69
|
-
|
70
|
-
#Log transform data
|
71
|
-
if log_data:
|
72
|
-
numeric_data = np.log(numeric_data + 1e-6)
|
73
|
-
|
74
|
-
#Fill NaN values with columns mean
|
75
|
-
numeric_data = numeric_data.fillna(numeric_data.mean())
|
76
|
-
|
77
|
-
# Scale the numeric data
|
78
|
-
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
|
79
|
-
numeric_data = scaler.fit_transform(numeric_data)
|
80
|
-
|
81
|
-
if verbose:
|
82
|
-
print(numeric_data)
|
83
|
-
|
84
|
-
# Calculate the grid size
|
85
|
-
grid_rows = len(umap_params)
|
86
|
-
grid_cols = len(dbscan_params)
|
87
|
-
|
88
|
-
fig, axs = plt.subplots(grid_rows, grid_cols, figsize=(20, 20))
|
89
|
-
|
90
|
-
# Iterate through the Cartesian product of UMAP and DBSCAN hyperparameters
|
91
|
-
for i, umap_param in enumerate(umap_params):
|
92
|
-
for j, dbscan_param in enumerate(dbscan_params):
|
93
|
-
ax = axs[i, j]
|
94
|
-
|
95
|
-
reducer = umap.UMAP(**umap_param)
|
96
|
-
embedding = reducer.fit_transform(numeric_data)
|
97
|
-
|
98
|
-
clustering = DBSCAN(**dbscan_param).fit(embedding)
|
99
|
-
labels = clustering.labels_
|
100
|
-
|
101
|
-
# Get unique labels to create a custom legend
|
102
|
-
unique_labels = np.unique(labels)
|
103
|
-
for label in unique_labels:
|
104
|
-
ax.scatter(embedding[labels == label, 0], embedding[labels == label, 1],
|
105
|
-
s=pointsize, label=f"Cluster {label}")
|
106
|
-
|
107
|
-
ax.set_title(f'UMAP {umap_param}\nDBSCAN {dbscan_param}')
|
108
|
-
#ax.set_xlabel('UMAP Dimension 1') # x-axis label
|
109
|
-
#ax.set_ylabel('UMAP Dimension 2') # y-axis label
|
110
|
-
ax.legend() # Add legend
|
111
|
-
|
112
|
-
plt.tight_layout()
|
113
|
-
if save:
|
114
|
-
plt.savefig('hyperparameter_search.png')
|
115
|
-
else:
|
116
|
-
plt.show()
|
117
|
-
|
118
|
-
# Close the database connection
|
119
|
-
conn.close()
|
120
|
-
return
|
121
|
-
|
122
|
-
# Create a function to check if images overlap
|
123
|
-
def check_overlap(current_position, other_positions, threshold):
|
124
|
-
for other_position in other_positions:
|
125
|
-
distance = np.linalg.norm(np.array(current_position) - np.array(other_position))
|
126
|
-
if distance < threshold:
|
127
|
-
return True
|
128
|
-
return False
|
129
|
-
|
130
|
-
# Define a function to try random positions around a given point
|
131
|
-
def find_non_overlapping_position(x, y, image_positions, threshold, max_attempts=100):
|
132
|
-
offset_range = 10 # Adjust the range for random offsets
|
133
|
-
attempts = 0
|
134
|
-
while attempts < max_attempts:
|
135
|
-
random_offset_x = random.uniform(-offset_range, offset_range)
|
136
|
-
random_offset_y = random.uniform(-offset_range, offset_range)
|
137
|
-
new_x = x + random_offset_x
|
138
|
-
new_y = y + random_offset_y
|
139
|
-
if not check_overlap((new_x, new_y), image_positions, threshold):
|
140
|
-
return new_x, new_y
|
141
|
-
attempts += 1
|
142
|
-
return x, y # Return the original position if no suitable position found
|
143
|
-
|
144
|
-
|
145
|
-
def smooth_hull_lines(cluster_data):
|
146
|
-
hull = ConvexHull(cluster_data)
|
147
|
-
|
148
|
-
# Extract vertices of the hull
|
149
|
-
vertices = hull.points[hull.vertices]
|
150
|
-
|
151
|
-
# Close the loop
|
152
|
-
vertices = np.vstack([vertices, vertices[0, :]])
|
153
|
-
|
154
|
-
# Parameterize the vertices
|
155
|
-
tck, u = splprep(vertices.T, u=None, s=0.0)
|
156
|
-
|
157
|
-
# Evaluate spline at new parameter values
|
158
|
-
new_points = splev(np.linspace(0, 1, 100), tck)
|
159
|
-
|
160
|
-
return new_points[0], new_points[1]
|
161
|
-
|
162
|
-
def generate_image_umap(db_paths, tables=['cell'], visualize='cell', image_nr=100, dot_size=50, n_neighbors=30, min_dist=0.1, metric='cosine', eps=0.5, min_samples=5, filter_by=None, img_zoom=0.3, plot_by_cluster=False, plot_cluster_grids=False, remove_cluster_noise=False, figuresize=20, remove_highly_correlated=True, log_data=False, black_background=False, remove_image_canvas=False, plot_outlines=False, plot_points=True, smooth_lines=False, row_limit=None, verbose=False):
|
163
|
-
|
164
|
-
from .annotate_app import check_for_duplicates
|
165
|
-
|
166
|
-
if not isinstance(db_paths, list):
|
167
|
-
print(f'Warning: Variable db_paths is not a list. db_paths:{db_paths}')
|
168
|
-
return
|
169
|
-
|
170
|
-
all_df = pd.DataFrame()
|
171
|
-
for db_path in db_paths:
|
172
|
-
check_for_duplicates(db_path)
|
173
|
-
if verbose:
|
174
|
-
print(f'database:{db_path}')
|
175
|
-
conn = sqlite3.connect(db_path)
|
176
|
-
c = conn.cursor()
|
177
|
-
df = pd.DataFrame()
|
178
|
-
for table in tables:
|
179
|
-
if table == 'cell':
|
180
|
-
object_name = 'object_label'
|
181
|
-
if table == 'cytoplasm':
|
182
|
-
object_name = 'object_label'
|
183
|
-
if table == 'nucleus':
|
184
|
-
object_name = 'cell_id'
|
185
|
-
if table == 'parasite':
|
186
|
-
object_name = 'cell_id'
|
187
|
-
|
188
|
-
print(f'{table}:{object_name}')
|
189
|
-
|
190
|
-
# Fetch all data
|
191
|
-
c.execute(f'SELECT * FROM {table}')
|
192
|
-
data = c.fetchall()
|
193
|
-
columns_info = c.execute(f'PRAGMA table_info({table})').fetchall()
|
194
|
-
column_names = [col_info[1] for col_info in columns_info]
|
195
|
-
|
196
|
-
# Create a DataFrame from the data
|
197
|
-
df_temp = pd.DataFrame(data, columns=column_names)
|
198
|
-
df_temp = df_temp.dropna(subset=[object_name])
|
199
|
-
|
200
|
-
if object_name in df_temp.columns:
|
201
|
-
if df_temp[object_name].dtype == float:
|
202
|
-
df_temp[object_name] = df_temp[object_name].astype(int)
|
203
|
-
|
204
|
-
df_temp = df_temp.assign(object_label=lambda x: 'o' + x[object_name].astype(int).astype(str))
|
205
|
-
|
206
|
-
if verbose:
|
207
|
-
display(df_temp)
|
208
|
-
|
209
|
-
if 'prfco' in df_temp.columns:
|
210
|
-
df_temp = df_temp.drop(columns=['prfco'])
|
211
|
-
|
212
|
-
df_temp = df_temp.assign(prcfo = lambda x: x['plate'] + '_' + x['row'] + '_' +x['col']+ '_' +x['field']+ '_' +x['object_label'])
|
213
|
-
df_temp = df_temp.drop(columns=[object_name])
|
214
|
-
df = pd.concat([df, df_temp],axis=1)
|
215
|
-
|
216
|
-
#Remove duplicate columns
|
217
|
-
df = df.loc[:, ~df.columns.duplicated(keep='first')]
|
218
|
-
|
219
|
-
if row_limit != None:
|
220
|
-
df = df.sample(n=row_limit, replace=False, random_state=1)
|
221
|
-
|
222
|
-
# Fetch image paths
|
223
|
-
c.execute(f'SELECT * FROM png_list')
|
224
|
-
data = c.fetchall()
|
225
|
-
columns_info = c.execute(f'PRAGMA table_info(png_list)').fetchall()
|
226
|
-
column_names = [col_info[1] for col_info in columns_info]
|
227
|
-
#column_names = ['png_path', 'file_name', 'plate', 'row', 'col','field','cell_id','prcfo']
|
228
|
-
column_names_keep = ['png_path','prcfo']
|
229
|
-
image_paths_df = pd.DataFrame(data, columns=column_names)
|
230
|
-
image_paths_df = image_paths_df.loc[:, image_paths_df.columns.isin(column_names_keep)]
|
231
|
-
|
232
|
-
if visualize is not None:
|
233
|
-
object_visualize = visualize+'_png'
|
234
|
-
image_paths_df = image_paths_df[image_paths_df['png_path'].str.contains(object_visualize)]
|
235
|
-
|
236
|
-
image_paths_df.set_index('prcfo', inplace=True)
|
237
|
-
df.set_index('prcfo', inplace=True)
|
238
|
-
df = image_paths_df.merge(df, left_index=True, right_index=True)
|
239
|
-
|
240
|
-
if verbose:
|
241
|
-
display(df)
|
242
|
-
|
243
|
-
all_df = pd.concat([all_df, df],axis=0)
|
244
|
-
df.reset_index(inplace=True)
|
245
|
-
image_paths = all_df['png_path'].to_list()
|
246
|
-
|
247
|
-
conn.close()
|
248
|
-
|
249
|
-
if verbose:
|
250
|
-
display(all_df)
|
251
|
-
|
252
|
-
# Filter the DataFrame if filter_by is specified
|
253
|
-
if filter_by is not None:
|
254
|
-
if filter_by !='morphology':
|
255
|
-
cols_to_include = [col for col in df.columns if filter_by in str(col)]
|
256
|
-
else:
|
257
|
-
cols_to_include = [col for col in df.columns if 'channel' not in str(col)]
|
258
|
-
df = df[cols_to_include]
|
259
|
-
|
260
|
-
#Remove non-numerical data
|
261
|
-
numeric_data = all_df.select_dtypes(include=['number'])
|
262
|
-
|
263
|
-
# Remove highly correlated columns if required
|
264
|
-
if remove_highly_correlated:
|
265
|
-
numeric_data = remove_highly_correlated_columns(df=numeric_data, threshold=95)
|
266
|
-
|
267
|
-
if verbose:
|
268
|
-
print(f'Columns included in UMAP')
|
269
|
-
print(numeric_data.columns.tolist())
|
270
|
-
display(numeric_data)
|
271
|
-
|
272
|
-
#Log transform data
|
273
|
-
if log_data:
|
274
|
-
numeric_data = np.log(numeric_data + 1e-6)
|
275
|
-
|
276
|
-
#Fill NaN values with columns mean
|
277
|
-
numeric_data = numeric_data.fillna(numeric_data.mean())
|
278
|
-
|
279
|
-
# Scale the numeric data
|
280
|
-
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
|
281
|
-
numeric_data = scaler.fit_transform(numeric_data)
|
282
|
-
|
283
|
-
if verbose:
|
284
|
-
print(numeric_data)
|
285
|
-
|
286
|
-
# Perform UMAP analysis
|
287
|
-
reducer = umap.UMAP(n_neighbors=n_neighbors,
|
288
|
-
n_components=2,
|
289
|
-
metric=metric, #default='euclidean'
|
290
|
-
output_metric='euclidean', #default='euclidean'
|
291
|
-
n_epochs=10, #default=None
|
292
|
-
learning_rate=0.1,
|
293
|
-
init='spectral',
|
294
|
-
min_dist=min_dist,
|
295
|
-
spread=1.0,
|
296
|
-
low_memory=False,
|
297
|
-
set_op_mix_ratio=1.0,
|
298
|
-
local_connectivity=1.0,
|
299
|
-
repulsion_strength=1.0,
|
300
|
-
negative_sample_rate=5,
|
301
|
-
transform_queue_size=4.0,
|
302
|
-
a=None,
|
303
|
-
b=None,
|
304
|
-
random_state=None,
|
305
|
-
metric_kwds=None,
|
306
|
-
angular_rp_forest=False,
|
307
|
-
target_n_neighbors=-1,
|
308
|
-
target_metric='categorical',
|
309
|
-
target_metric_kwds=None,
|
310
|
-
target_weight=0.5,
|
311
|
-
transform_seed=42,
|
312
|
-
verbose=False)
|
313
|
-
|
314
|
-
embedding = reducer.fit_transform(numeric_data)
|
315
|
-
|
316
|
-
clustering = DBSCAN(eps=eps,
|
317
|
-
min_samples=min_samples,
|
318
|
-
metric='euclidean',
|
319
|
-
metric_params=None,
|
320
|
-
algorithm='auto',
|
321
|
-
leaf_size=30,
|
322
|
-
p=None,
|
323
|
-
n_jobs=None).fit(embedding)
|
324
|
-
|
325
|
-
labels = clustering.labels_
|
326
|
-
|
327
|
-
if remove_cluster_noise:
|
328
|
-
non_noise_indices = labels != -1
|
329
|
-
embedding = embedding[non_noise_indices]
|
330
|
-
labels = labels[non_noise_indices]
|
331
|
-
|
332
|
-
# Create random integer RGB colors
|
333
|
-
unique_labels = np.unique(labels)
|
334
|
-
num_clusters = len(unique_labels[unique_labels != 0])
|
335
|
-
random_colors = np.random.rand(num_clusters + 1, 4)
|
336
|
-
random_colors[:, 3] = 1 # Set alpha channel
|
337
|
-
|
338
|
-
# Set specific colors for the first four clusters
|
339
|
-
specific_colors = [
|
340
|
-
[155/255, 55/255, 155/255, 1],
|
341
|
-
[55/255, 155/255, 155/255, 1],
|
342
|
-
[55/255, 155/255, 255/255, 1],
|
343
|
-
[255/255, 55/255, 155/255, 1]]
|
344
|
-
|
345
|
-
random_colors = np.vstack((specific_colors, random_colors[len(specific_colors):]))
|
346
|
-
|
347
|
-
if remove_cluster_noise == False:
|
348
|
-
random_colors = np.vstack(([0, 0, 0, 1], random_colors))
|
349
|
-
|
350
|
-
# Normalize colors to [0, 1]
|
351
|
-
normalized_colors = random_colors / 255
|
352
|
-
colors_img = [tuple(color) for color in normalized_colors]
|
353
|
-
colors = [tuple(color) for color in random_colors]
|
354
|
-
|
355
|
-
# Get cluster centers to place the labels
|
356
|
-
cluster_centers = [np.mean(embedding[labels == cluster_label], axis=0) for cluster_label in unique_labels]
|
357
|
-
|
358
|
-
# Create mapping from cluster labels to color indices
|
359
|
-
label_to_color_index = {label: index for index, label in enumerate(unique_labels)}
|
360
|
-
|
361
|
-
#Generate matplotlib figure
|
362
|
-
if black_background:
|
363
|
-
plt.rcParams['figure.facecolor'] = 'black'
|
364
|
-
plt.rcParams['axes.facecolor'] = 'black'
|
365
|
-
plt.rcParams['text.color'] = 'white'
|
366
|
-
plt.rcParams['xtick.color'] = 'white'
|
367
|
-
plt.rcParams['ytick.color'] = 'white'
|
368
|
-
plt.rcParams['axes.labelcolor'] = 'white'
|
369
|
-
else:
|
370
|
-
plt.rcParams['figure.facecolor'] = 'white'
|
371
|
-
plt.rcParams['axes.facecolor'] = 'white'
|
372
|
-
plt.rcParams['text.color'] = 'black'
|
373
|
-
plt.rcParams['xtick.color'] = 'black'
|
374
|
-
plt.rcParams['ytick.color'] = 'black'
|
375
|
-
plt.rcParams['axes.labelcolor'] = 'black'
|
376
|
-
|
377
|
-
fig, ax = plt.subplots(1, 1, figsize=(figuresize,figuresize))
|
378
|
-
fontsize = int(figuresize*0.75)
|
379
|
-
handles = []
|
380
|
-
|
381
|
-
# Plot all points in the embedding
|
382
|
-
for cluster_label, color, center in zip(unique_labels, colors, cluster_centers):
|
383
|
-
cluster_data = embedding[labels == cluster_label]
|
384
|
-
|
385
|
-
if smooth_lines:
|
386
|
-
# Check if the cluster has more than 2 points to create a Convex Hull
|
387
|
-
if cluster_data.shape[0] > 2:
|
388
|
-
x_smooth, y_smooth = smooth_hull_lines(cluster_data)
|
389
|
-
if plot_outlines:
|
390
|
-
plt.plot(x_smooth, y_smooth, color=color, linewidth=2)
|
391
|
-
else:
|
392
|
-
if cluster_data.shape[0] > 2:
|
393
|
-
hull = ConvexHull(cluster_data)
|
394
|
-
for simplex in hull.simplices:
|
395
|
-
if plot_outlines:
|
396
|
-
plt.plot(hull.points[simplex, 0], hull.points[simplex, 1], color=color, linewidth=4) #w =white, k=black
|
397
|
-
if plot_points:
|
398
|
-
scatter = ax.scatter(cluster_data[:, 0], cluster_data[:, 1], s=dot_size, c=[color], alpha=0.5, label=f'Cluster {cluster_label if cluster_label != -1 else "Noise"}')
|
399
|
-
else:
|
400
|
-
scatter = ax.scatter(cluster_data[:, 0], cluster_data[:, 1], s=dot_size, c=[color], alpha=0, label=f'Cluster {cluster_label if cluster_label != -1 else "Noise"}')
|
401
|
-
handles.append(scatter)
|
402
|
-
|
403
|
-
# Annotate the cluster center with the cluster label
|
404
|
-
if cluster_label != -1: # Skip noise labeled as -1
|
405
|
-
ax.text(center[0], center[1], str(cluster_label), fontsize=12, ha='center', va='center')
|
406
|
-
|
407
|
-
# Create a dictionary to track indices for each cluster
|
408
|
-
cluster_indices = {label: np.where(labels == label)[0] for label in unique_labels if label != -1}
|
409
|
-
|
410
|
-
if visualize is not None:
|
411
|
-
if not plot_by_cluster:
|
412
|
-
# Plot images replacing random points
|
413
|
-
indices = random.sample(range(len(embedding)), image_nr)
|
414
|
-
sampled_embedding = embedding[indices]
|
415
|
-
#sampled_image_paths = [image_paths[i] for i in sample_indices]
|
416
|
-
|
417
|
-
for i, index in enumerate(indices):
|
418
|
-
x, y = embedding[index]
|
419
|
-
img_array = Image.open(image_paths[index])
|
420
|
-
img = np.array(img_array)
|
421
|
-
|
422
|
-
if remove_image_canvas:
|
423
|
-
#Get the images for these indices
|
424
|
-
for index in indices:
|
425
|
-
x, y = embedding[index]
|
426
|
-
img_array = Image.open(image_paths[index])
|
427
|
-
|
428
|
-
if img_array.mode in ['L', 'I']: # Grayscale image
|
429
|
-
img_data = np.array(img_array)
|
430
|
-
img_data = img_data / np.max(img_data) # Normalize to [0, 1]
|
431
|
-
alpha_channel = (img_data > 0).astype(float) # Create alpha channel
|
432
|
-
img_data_rgb = np.stack([img_data] * 3, axis=-1) # Convert to RGB
|
433
|
-
img_data_with_alpha = np.dstack([img_data_rgb, alpha_channel])
|
434
|
-
elif img_array.mode == 'RGB': # RGB image
|
435
|
-
img_data = np.array(img_array)
|
436
|
-
img_data = img_data / 255.0 # Normalize to [0, 1]
|
437
|
-
alpha_channel = (np.sum(img_data, axis=-1) > 0).astype(float) # Non-black pixels
|
438
|
-
img_data_with_alpha = np.dstack([img_data, alpha_channel])
|
439
|
-
else:
|
440
|
-
raise ValueError(f"Unsupported image mode: {img_array.mode}")
|
441
|
-
|
442
|
-
imagebox = OffsetImage(img_data_with_alpha, zoom=img_zoom)
|
443
|
-
ab = AnnotationBbox(imagebox, (x, y), frameon=False)
|
444
|
-
ax.add_artist(ab)
|
445
|
-
else:
|
446
|
-
for i, index in enumerate(indices):
|
447
|
-
x, y = embedding[index]
|
448
|
-
img_array = Image.open(image_paths[index])
|
449
|
-
img = np.array(img_array)
|
450
|
-
imagebox = OffsetImage(img, zoom=img_zoom, cmap='gray')
|
451
|
-
ab = AnnotationBbox(imagebox, (x, y), frameon=False)
|
452
|
-
ax.add_artist(ab)
|
453
|
-
|
454
|
-
if plot_by_cluster:
|
455
|
-
|
456
|
-
# Create a dictionary to track indices for each cluster
|
457
|
-
cluster_indices = {label: np.where(labels == label)[0] for label in unique_labels if label != -1}
|
458
|
-
|
459
|
-
# Plot images replacing random points for each cluster
|
460
|
-
for cluster_label, color, center in zip(unique_labels, colors, cluster_centers):
|
461
|
-
if cluster_label == -1: # Skip noise labeled as -1
|
462
|
-
continue
|
463
|
-
|
464
|
-
# Get 10 random indices for this cluster
|
465
|
-
indices = cluster_indices.get(cluster_label, [])
|
466
|
-
if len(indices) > image_nr:
|
467
|
-
indices = random.sample(list(indices), image_nr)
|
468
|
-
elif len(indices) > 1:
|
469
|
-
indices = random.sample(list(indices), 1)
|
470
|
-
|
471
|
-
if remove_image_canvas:
|
472
|
-
#Get the images for these indices
|
473
|
-
for index in indices:
|
474
|
-
x, y = embedding[index]
|
475
|
-
img_array = Image.open(image_paths[index])
|
476
|
-
|
477
|
-
if img_array.mode in ['L', 'I']: # Grayscale image
|
478
|
-
img_data = np.array(img_array)
|
479
|
-
img_data = img_data / np.max(img_data) # Normalize to [0, 1]
|
480
|
-
alpha_channel = (img_data > 0).astype(float) # Create alpha channel
|
481
|
-
img_data_rgb = np.stack([img_data] * 3, axis=-1) # Convert to RGB
|
482
|
-
img_data_with_alpha = np.dstack([img_data_rgb, alpha_channel])
|
483
|
-
elif img_array.mode == 'RGB': # RGB image
|
484
|
-
img_data = np.array(img_array)
|
485
|
-
img_data = img_data / 255.0 # Normalize to [0, 1]
|
486
|
-
alpha_channel = (np.sum(img_data, axis=-1) > 0).astype(float) # Non-black pixels
|
487
|
-
img_data_with_alpha = np.dstack([img_data, alpha_channel])
|
488
|
-
else:
|
489
|
-
raise ValueError(f"Unsupported image mode: {img_array.mode}")
|
490
|
-
|
491
|
-
imagebox = OffsetImage(img_data_with_alpha, zoom=img_zoom)
|
492
|
-
ab = AnnotationBbox(imagebox, (x, y), frameon=False)
|
493
|
-
ax.add_artist(ab)
|
494
|
-
else:
|
495
|
-
for i, index in enumerate(indices):
|
496
|
-
x, y = embedding[index]
|
497
|
-
img_array = Image.open(image_paths[index])
|
498
|
-
img = np.array(img_array)
|
499
|
-
imagebox = OffsetImage(img, zoom=img_zoom, cmap='gray')
|
500
|
-
ab = AnnotationBbox(imagebox, (x, y), frameon=False)
|
501
|
-
ax.add_artist(ab)
|
502
|
-
|
503
|
-
plt.legend(handles=handles, loc='best', fontsize=fontsize)
|
504
|
-
plt.xlabel('UMAP Dimension 1', fontsize=fontsize)
|
505
|
-
plt.ylabel('UMAP Dimension 2', fontsize=fontsize)
|
506
|
-
plt.tick_params(axis='both', which='major', labelsize=fontsize)
|
507
|
-
plt.show()
|
508
|
-
|
509
|
-
if plot_cluster_grids:
|
510
|
-
# Determine the number of clusters
|
511
|
-
num_clusters = len(unique_labels[unique_labels != -1])
|
512
|
-
|
513
|
-
# Dictionary to keep track of images for each cluster
|
514
|
-
cluster_images = {label: [] for label in unique_labels if label != -1}
|
515
|
-
|
516
|
-
# Collect the images for each cluster based on previously selected indices
|
517
|
-
for cluster_label, indices in cluster_indices.items():
|
518
|
-
if cluster_label == -1:
|
519
|
-
continue
|
520
|
-
|
521
|
-
if len(indices) > image_nr:
|
522
|
-
indices = random.sample(list(indices), image_nr)
|
523
|
-
elif len(indices) > 1:
|
524
|
-
indices = random.sample(list(indices), 1)
|
525
|
-
|
526
|
-
for index in indices:
|
527
|
-
img_path = image_paths[index]
|
528
|
-
img_array = Image.open(img_path)
|
529
|
-
img = np.array(img_array)
|
530
|
-
cluster_images[cluster_label].append(img)
|
531
|
-
|
532
|
-
# Create a new figure for the cluster grids
|
533
|
-
grid_fig, grid_axes = plt.subplots(1, num_clusters, figsize=(figuresize * num_clusters, figuresize), gridspec_kw={'wspace': 0.2, 'hspace': 0})
|
534
|
-
|
535
|
-
# Iterate through the clusters and plot the grids
|
536
|
-
|
537
|
-
if len(cluster_images.keys()) >1:
|
538
|
-
for cluster_label, axes in zip(cluster_images.keys(), grid_axes):
|
539
|
-
images = cluster_images[cluster_label]
|
540
|
-
num_images = len(images)
|
541
|
-
grid_size = int(np.ceil(np.sqrt(num_images))) # Calculate grid size (both rows and columns)
|
542
|
-
image_size = 0.9 / grid_size # Adjusting this value will control the whitespace
|
543
|
-
whitespace = (1 - grid_size * image_size) / (grid_size + 1)
|
544
|
-
|
545
|
-
color = colors[label_to_color_index[cluster_label]] # Retrieve the color for this cluster
|
546
|
-
|
547
|
-
# Fill the entire axes with the cluster color
|
548
|
-
axes.add_patch(plt.Rectangle((0, 0), 1, 1, transform=axes.transAxes, color=color[:3]))
|
549
|
-
|
550
|
-
axes.set_title(f'Cluster {cluster_label}', fontsize=fontsize*3)
|
551
|
-
axes.axis('off')
|
552
|
-
|
553
|
-
for i, img in enumerate(images):
|
554
|
-
row = i // grid_size
|
555
|
-
col = i % grid_size
|
556
|
-
x_pos = (col + 1) * whitespace + col * image_size
|
557
|
-
y_pos = 1 - ((row + 1) * whitespace + (row + 1) * image_size)
|
558
|
-
ax_img = axes.inset_axes([x_pos, y_pos, image_size, image_size], transform=axes.transAxes)
|
559
|
-
ax_img.imshow(img, cmap='gray', aspect='auto')
|
560
|
-
ax_img.axis('off')
|
561
|
-
ax_img.set_aspect('equal') # Ensure that the aspect ratio is equal
|
562
|
-
ax_img.set_facecolor(color[:3]) # Set the inset axes background color
|
563
|
-
plt.show()
|
564
|
-
else:
|
565
|
-
cluster_label = list(cluster_images.keys())[0]
|
566
|
-
images = cluster_images[cluster_label]
|
567
|
-
num_images = len(images)
|
568
|
-
grid_size = int(np.ceil(np.sqrt(num_images))) # Calculate grid size (both rows and columns)
|
569
|
-
|
570
|
-
fig, axes = plt.subplots(grid_size, grid_size, figsize=(figuresize, figuresize))
|
571
|
-
|
572
|
-
if grid_size == 1:
|
573
|
-
# Special case for one image
|
574
|
-
axes.imshow(images[0], cmap='gray', aspect='auto')
|
575
|
-
axes.axis('off')
|
576
|
-
else:
|
577
|
-
for i, ax in enumerate(axes.flat):
|
578
|
-
if i < num_images:
|
579
|
-
ax.imshow(images[i], cmap='gray', aspect='auto')
|
580
|
-
ax.set_aspect('equal') # Ensure that the aspect ratio is equal
|
581
|
-
ax.axis('off')
|
582
|
-
else: # Turn off any remaining empty subplots
|
583
|
-
ax.axis('off')
|
584
|
-
|
585
|
-
plt.suptitle(f'Cluster {cluster_label}', fontsize=fontsize*3, y=0.95) # Adjust the y-position
|
586
|
-
plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to make room for the title
|
587
|
-
plt.show()
|
588
|
-
return
|
589
|
-
|
590
|
-
db_paths = ['/mnt/data/CellVoyager/20x/tsg101/crispr_screen/all/measurements/measurements.db']
|
591
|
-
|
592
|
-
generate_image_umap(db_paths=db_paths,
|
593
|
-
row_limit = 1000,
|
594
|
-
tables=['cytoplasm'],
|
595
|
-
visualize='cell',
|
596
|
-
image_nr=36,
|
597
|
-
dot_size=50,
|
598
|
-
n_neighbors=1000,
|
599
|
-
min_dist=0.1,
|
600
|
-
metric='euclidean',
|
601
|
-
eps=0.5,
|
602
|
-
min_samples=1000,
|
603
|
-
filter_by='channel_0',
|
604
|
-
img_zoom=0.3,
|
605
|
-
plot_by_cluster=True,
|
606
|
-
plot_cluster_grids=True,
|
607
|
-
remove_cluster_noise=True,
|
608
|
-
remove_highly_correlated=True,
|
609
|
-
log_data=True,
|
610
|
-
figuresize=60,
|
611
|
-
black_background=False,
|
612
|
-
remove_image_canvas=False,
|
613
|
-
plot_outlines=False,
|
614
|
-
plot_points=True,
|
615
|
-
smooth_lines=False,
|
616
|
-
verbose=True)
|
617
|
-
|
618
|
-
generate_image_umap(db_paths=db_paths,
|
619
|
-
tables=['cytoplasm'],
|
620
|
-
visualize='cytoplasm',
|
621
|
-
image_nr=36,
|
622
|
-
dot_size=50,
|
623
|
-
n_neighbors=1000,
|
624
|
-
min_dist=0.1,
|
625
|
-
metric='euclidean',
|
626
|
-
eps=0.5,
|
627
|
-
min_samples=1000,
|
628
|
-
filter_by='channel_0',
|
629
|
-
img_zoom=0.3,
|
630
|
-
plot_by_cluster=True,
|
631
|
-
plot_cluster_grids=True,
|
632
|
-
remove_cluster_noise=True,
|
633
|
-
remove_highly_correlated=True,
|
634
|
-
log_data=True,
|
635
|
-
figuresize=60,
|
636
|
-
black_background=False,
|
637
|
-
remove_image_canvas=False,
|
638
|
-
plot_outlines=False,
|
639
|
-
plot_points=True,
|
640
|
-
smooth_lines=False,
|
641
|
-
verbose=False)
|
642
|
-
|
643
|
-
|
644
|
-
db_path = '/mnt/data/CellVoyager/63x/mack/CRCR2P2_20230721_162734/PECCU/measurements/measurements.db'
|
645
|
-
db = db_path
|
646
|
-
channels = ['channel_0','channel_1','channel_2', 'channel_3', None]
|
647
|
-
|
648
|
-
for channel in channels:
|
649
|
-
generate_image_umap(db,
|
650
|
-
tables=['cell','cytoplasm', 'nucleus'],
|
651
|
-
image_nr=36,
|
652
|
-
dot_size=50,
|
653
|
-
n_neighbors=50,
|
654
|
-
min_dist=0.1,
|
655
|
-
metric='euclidean',
|
656
|
-
eps=0.3,
|
657
|
-
min_samples=100,
|
658
|
-
filter_by=channel,
|
659
|
-
img_zoom=0.2,
|
660
|
-
plot_by_cluster=True,
|
661
|
-
plot_cluster_grids=True,
|
662
|
-
remove_cluster_noise=True,
|
663
|
-
remove_highly_correlated=True,
|
664
|
-
log_data=True,
|
665
|
-
figuresize=60,
|
666
|
-
verbose=False)
|
667
|
-
|
668
|
-
#db_path = '/mnt/data/CellVoyager/63x/mack/CRCR2P2_20230721_162734/PECCU/measurements/measurements.db'
|
669
|
-
db_path = '/mnt/data/CellVoyager/20x/tsg101/crispr_screen/all/measurements/measurements.db'
|
670
|
-
tables = ['cell','cytoplasm','parasite']
|
671
|
-
|
672
|
-
# UMAP hyperparameters
|
673
|
-
umap_params = [{'n_neighbors': 20, 'min_dist': 0.01, 'metric': 'euclidean'},
|
674
|
-
{'n_neighbors': 40, 'min_dist': 0.1, 'metric': 'euclidean'}]
|
675
|
-
|
676
|
-
# DBSCAN hyperparameters
|
677
|
-
dbscan_params = [{'eps': 0.3, 'min_samples': 100},
|
678
|
-
{'eps': 0.3, 'min_samples': 100}]
|
679
|
-
|
680
|
-
hyperparameter_search(db_path,
|
681
|
-
tables=tables,
|
682
|
-
filter_by = 'channel_0',
|
683
|
-
sample_size=5000,
|
684
|
-
umap_params=umap_params,
|
685
|
-
dbscan_params=dbscan_params,
|
686
|
-
remove_highly_correlated=True,
|
687
|
-
log_data=True,
|
688
|
-
pointsize=2,
|
689
|
-
verbose=False)
|