spacr 0.0.35__py3-none-any.whl → 0.0.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/umap.py CHANGED
@@ -1,689 +0,0 @@
1
- import umap
2
- import random
3
- import sqlite3
4
- import numpy as np
5
- import pandas as pd
6
- from PIL import Image
7
- import matplotlib.pyplot as plt
8
- from sklearn.cluster import DBSCAN
9
- from sklearn.preprocessing import StandardScaler
10
- from matplotlib.offsetbox import OffsetImage, AnnotationBbox
11
- from numba import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
12
- from matplotlib.offsetbox import OffsetImage, AnnotationBbox
13
- from scipy.spatial import ConvexHull
14
- from scipy.interpolate import splprep, splev
15
- from IPython.display import display
16
-
17
- from .logger import log_function_call
18
-
19
- # Create a function to check if images overlap
20
- def check_overlap(current_position, other_positions, threshold):
21
- for other_position in other_positions:
22
- distance = np.linalg.norm(np.array(current_position) - np.array(other_position))
23
- if distance < threshold:
24
- return True
25
- return False
26
-
27
- def remove_highly_correlated_columns(df, threshold):
28
- corr_matrix = df.corr().abs()
29
- upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
30
- to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > threshold)]
31
- return df.drop(to_drop, axis=1)
32
-
33
- def hyperparameter_search(db_path, tables, filter_by=None, sample_size=None, umap_params=None, dbscan_params=None, pointsize=2, save=False, remove_highly_correlated=False, log_data=False, verbose=True):
34
- # Create a connection to the SQLite database
35
- conn = sqlite3.connect(db_path)
36
-
37
- # Read the tables into a list of DataFrames
38
- dfs = [pd.read_sql_query(f"SELECT * FROM {table_name}", conn) for table_name in tables]
39
-
40
- # Concatenate the DataFrames along the columns (axis=1)
41
- df = pd.concat(dfs, axis=1)
42
-
43
- if verbose:
44
- print(df.columns)
45
- display(df)
46
-
47
- # Filter the DataFrame if filter_by is specified
48
- if filter_by is not None:
49
- if filter_by !='morphology':
50
- cols_to_include = [col for col in df.columns if filter_by in str(col)]
51
- else:
52
- cols_to_include = [col for col in df.columns if 'channel' not in str(col)]
53
- df = df[cols_to_include]
54
-
55
- if sample_size is not None:
56
- df = df.sample(n=sample_size)
57
-
58
- #Remove non-numerical data
59
- numeric_data = df.select_dtypes(include=['number'])
60
-
61
- # Remove highly correlated columns if required
62
- if remove_highly_correlated:
63
- numeric_data = remove_highly_correlated_columns(df=numeric_data, threshold=95)
64
-
65
- if verbose:
66
- print(f'Columns included in UMAP')
67
- print(numeric_data.columns.tolist())
68
- display(numeric_data)
69
-
70
- #Log transform data
71
- if log_data:
72
- numeric_data = np.log(numeric_data + 1e-6)
73
-
74
- #Fill NaN values with columns mean
75
- numeric_data = numeric_data.fillna(numeric_data.mean())
76
-
77
- # Scale the numeric data
78
- scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
79
- numeric_data = scaler.fit_transform(numeric_data)
80
-
81
- if verbose:
82
- print(numeric_data)
83
-
84
- # Calculate the grid size
85
- grid_rows = len(umap_params)
86
- grid_cols = len(dbscan_params)
87
-
88
- fig, axs = plt.subplots(grid_rows, grid_cols, figsize=(20, 20))
89
-
90
- # Iterate through the Cartesian product of UMAP and DBSCAN hyperparameters
91
- for i, umap_param in enumerate(umap_params):
92
- for j, dbscan_param in enumerate(dbscan_params):
93
- ax = axs[i, j]
94
-
95
- reducer = umap.UMAP(**umap_param)
96
- embedding = reducer.fit_transform(numeric_data)
97
-
98
- clustering = DBSCAN(**dbscan_param).fit(embedding)
99
- labels = clustering.labels_
100
-
101
- # Get unique labels to create a custom legend
102
- unique_labels = np.unique(labels)
103
- for label in unique_labels:
104
- ax.scatter(embedding[labels == label, 0], embedding[labels == label, 1],
105
- s=pointsize, label=f"Cluster {label}")
106
-
107
- ax.set_title(f'UMAP {umap_param}\nDBSCAN {dbscan_param}')
108
- #ax.set_xlabel('UMAP Dimension 1') # x-axis label
109
- #ax.set_ylabel('UMAP Dimension 2') # y-axis label
110
- ax.legend() # Add legend
111
-
112
- plt.tight_layout()
113
- if save:
114
- plt.savefig('hyperparameter_search.png')
115
- else:
116
- plt.show()
117
-
118
- # Close the database connection
119
- conn.close()
120
- return
121
-
122
- # Create a function to check if images overlap
123
- def check_overlap(current_position, other_positions, threshold):
124
- for other_position in other_positions:
125
- distance = np.linalg.norm(np.array(current_position) - np.array(other_position))
126
- if distance < threshold:
127
- return True
128
- return False
129
-
130
- # Define a function to try random positions around a given point
131
- def find_non_overlapping_position(x, y, image_positions, threshold, max_attempts=100):
132
- offset_range = 10 # Adjust the range for random offsets
133
- attempts = 0
134
- while attempts < max_attempts:
135
- random_offset_x = random.uniform(-offset_range, offset_range)
136
- random_offset_y = random.uniform(-offset_range, offset_range)
137
- new_x = x + random_offset_x
138
- new_y = y + random_offset_y
139
- if not check_overlap((new_x, new_y), image_positions, threshold):
140
- return new_x, new_y
141
- attempts += 1
142
- return x, y # Return the original position if no suitable position found
143
-
144
-
145
- def smooth_hull_lines(cluster_data):
146
- hull = ConvexHull(cluster_data)
147
-
148
- # Extract vertices of the hull
149
- vertices = hull.points[hull.vertices]
150
-
151
- # Close the loop
152
- vertices = np.vstack([vertices, vertices[0, :]])
153
-
154
- # Parameterize the vertices
155
- tck, u = splprep(vertices.T, u=None, s=0.0)
156
-
157
- # Evaluate spline at new parameter values
158
- new_points = splev(np.linspace(0, 1, 100), tck)
159
-
160
- return new_points[0], new_points[1]
161
-
162
- def generate_image_umap(db_paths, tables=['cell'], visualize='cell', image_nr=100, dot_size=50, n_neighbors=30, min_dist=0.1, metric='cosine', eps=0.5, min_samples=5, filter_by=None, img_zoom=0.3, plot_by_cluster=False, plot_cluster_grids=False, remove_cluster_noise=False, figuresize=20, remove_highly_correlated=True, log_data=False, black_background=False, remove_image_canvas=False, plot_outlines=False, plot_points=True, smooth_lines=False, row_limit=None, verbose=False):
163
-
164
- from .annotate_app import check_for_duplicates
165
-
166
- if not isinstance(db_paths, list):
167
- print(f'Warning: Variable db_paths is not a list. db_paths:{db_paths}')
168
- return
169
-
170
- all_df = pd.DataFrame()
171
- for db_path in db_paths:
172
- check_for_duplicates(db_path)
173
- if verbose:
174
- print(f'database:{db_path}')
175
- conn = sqlite3.connect(db_path)
176
- c = conn.cursor()
177
- df = pd.DataFrame()
178
- for table in tables:
179
- if table == 'cell':
180
- object_name = 'object_label'
181
- if table == 'cytoplasm':
182
- object_name = 'object_label'
183
- if table == 'nucleus':
184
- object_name = 'cell_id'
185
- if table == 'parasite':
186
- object_name = 'cell_id'
187
-
188
- print(f'{table}:{object_name}')
189
-
190
- # Fetch all data
191
- c.execute(f'SELECT * FROM {table}')
192
- data = c.fetchall()
193
- columns_info = c.execute(f'PRAGMA table_info({table})').fetchall()
194
- column_names = [col_info[1] for col_info in columns_info]
195
-
196
- # Create a DataFrame from the data
197
- df_temp = pd.DataFrame(data, columns=column_names)
198
- df_temp = df_temp.dropna(subset=[object_name])
199
-
200
- if object_name in df_temp.columns:
201
- if df_temp[object_name].dtype == float:
202
- df_temp[object_name] = df_temp[object_name].astype(int)
203
-
204
- df_temp = df_temp.assign(object_label=lambda x: 'o' + x[object_name].astype(int).astype(str))
205
-
206
- if verbose:
207
- display(df_temp)
208
-
209
- if 'prfco' in df_temp.columns:
210
- df_temp = df_temp.drop(columns=['prfco'])
211
-
212
- df_temp = df_temp.assign(prcfo = lambda x: x['plate'] + '_' + x['row'] + '_' +x['col']+ '_' +x['field']+ '_' +x['object_label'])
213
- df_temp = df_temp.drop(columns=[object_name])
214
- df = pd.concat([df, df_temp],axis=1)
215
-
216
- #Remove duplicate columns
217
- df = df.loc[:, ~df.columns.duplicated(keep='first')]
218
-
219
- if row_limit != None:
220
- df = df.sample(n=row_limit, replace=False, random_state=1)
221
-
222
- # Fetch image paths
223
- c.execute(f'SELECT * FROM png_list')
224
- data = c.fetchall()
225
- columns_info = c.execute(f'PRAGMA table_info(png_list)').fetchall()
226
- column_names = [col_info[1] for col_info in columns_info]
227
- #column_names = ['png_path', 'file_name', 'plate', 'row', 'col','field','cell_id','prcfo']
228
- column_names_keep = ['png_path','prcfo']
229
- image_paths_df = pd.DataFrame(data, columns=column_names)
230
- image_paths_df = image_paths_df.loc[:, image_paths_df.columns.isin(column_names_keep)]
231
-
232
- if visualize is not None:
233
- object_visualize = visualize+'_png'
234
- image_paths_df = image_paths_df[image_paths_df['png_path'].str.contains(object_visualize)]
235
-
236
- image_paths_df.set_index('prcfo', inplace=True)
237
- df.set_index('prcfo', inplace=True)
238
- df = image_paths_df.merge(df, left_index=True, right_index=True)
239
-
240
- if verbose:
241
- display(df)
242
-
243
- all_df = pd.concat([all_df, df],axis=0)
244
- df.reset_index(inplace=True)
245
- image_paths = all_df['png_path'].to_list()
246
-
247
- conn.close()
248
-
249
- if verbose:
250
- display(all_df)
251
-
252
- # Filter the DataFrame if filter_by is specified
253
- if filter_by is not None:
254
- if filter_by !='morphology':
255
- cols_to_include = [col for col in df.columns if filter_by in str(col)]
256
- else:
257
- cols_to_include = [col for col in df.columns if 'channel' not in str(col)]
258
- df = df[cols_to_include]
259
-
260
- #Remove non-numerical data
261
- numeric_data = all_df.select_dtypes(include=['number'])
262
-
263
- # Remove highly correlated columns if required
264
- if remove_highly_correlated:
265
- numeric_data = remove_highly_correlated_columns(df=numeric_data, threshold=95)
266
-
267
- if verbose:
268
- print(f'Columns included in UMAP')
269
- print(numeric_data.columns.tolist())
270
- display(numeric_data)
271
-
272
- #Log transform data
273
- if log_data:
274
- numeric_data = np.log(numeric_data + 1e-6)
275
-
276
- #Fill NaN values with columns mean
277
- numeric_data = numeric_data.fillna(numeric_data.mean())
278
-
279
- # Scale the numeric data
280
- scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
281
- numeric_data = scaler.fit_transform(numeric_data)
282
-
283
- if verbose:
284
- print(numeric_data)
285
-
286
- # Perform UMAP analysis
287
- reducer = umap.UMAP(n_neighbors=n_neighbors,
288
- n_components=2,
289
- metric=metric, #default='euclidean'
290
- output_metric='euclidean', #default='euclidean'
291
- n_epochs=10, #default=None
292
- learning_rate=0.1,
293
- init='spectral',
294
- min_dist=min_dist,
295
- spread=1.0,
296
- low_memory=False,
297
- set_op_mix_ratio=1.0,
298
- local_connectivity=1.0,
299
- repulsion_strength=1.0,
300
- negative_sample_rate=5,
301
- transform_queue_size=4.0,
302
- a=None,
303
- b=None,
304
- random_state=None,
305
- metric_kwds=None,
306
- angular_rp_forest=False,
307
- target_n_neighbors=-1,
308
- target_metric='categorical',
309
- target_metric_kwds=None,
310
- target_weight=0.5,
311
- transform_seed=42,
312
- verbose=False)
313
-
314
- embedding = reducer.fit_transform(numeric_data)
315
-
316
- clustering = DBSCAN(eps=eps,
317
- min_samples=min_samples,
318
- metric='euclidean',
319
- metric_params=None,
320
- algorithm='auto',
321
- leaf_size=30,
322
- p=None,
323
- n_jobs=None).fit(embedding)
324
-
325
- labels = clustering.labels_
326
-
327
- if remove_cluster_noise:
328
- non_noise_indices = labels != -1
329
- embedding = embedding[non_noise_indices]
330
- labels = labels[non_noise_indices]
331
-
332
- # Create random integer RGB colors
333
- unique_labels = np.unique(labels)
334
- num_clusters = len(unique_labels[unique_labels != 0])
335
- random_colors = np.random.rand(num_clusters + 1, 4)
336
- random_colors[:, 3] = 1 # Set alpha channel
337
-
338
- # Set specific colors for the first four clusters
339
- specific_colors = [
340
- [155/255, 55/255, 155/255, 1],
341
- [55/255, 155/255, 155/255, 1],
342
- [55/255, 155/255, 255/255, 1],
343
- [255/255, 55/255, 155/255, 1]]
344
-
345
- random_colors = np.vstack((specific_colors, random_colors[len(specific_colors):]))
346
-
347
- if remove_cluster_noise == False:
348
- random_colors = np.vstack(([0, 0, 0, 1], random_colors))
349
-
350
- # Normalize colors to [0, 1]
351
- normalized_colors = random_colors / 255
352
- colors_img = [tuple(color) for color in normalized_colors]
353
- colors = [tuple(color) for color in random_colors]
354
-
355
- # Get cluster centers to place the labels
356
- cluster_centers = [np.mean(embedding[labels == cluster_label], axis=0) for cluster_label in unique_labels]
357
-
358
- # Create mapping from cluster labels to color indices
359
- label_to_color_index = {label: index for index, label in enumerate(unique_labels)}
360
-
361
- #Generate matplotlib figure
362
- if black_background:
363
- plt.rcParams['figure.facecolor'] = 'black'
364
- plt.rcParams['axes.facecolor'] = 'black'
365
- plt.rcParams['text.color'] = 'white'
366
- plt.rcParams['xtick.color'] = 'white'
367
- plt.rcParams['ytick.color'] = 'white'
368
- plt.rcParams['axes.labelcolor'] = 'white'
369
- else:
370
- plt.rcParams['figure.facecolor'] = 'white'
371
- plt.rcParams['axes.facecolor'] = 'white'
372
- plt.rcParams['text.color'] = 'black'
373
- plt.rcParams['xtick.color'] = 'black'
374
- plt.rcParams['ytick.color'] = 'black'
375
- plt.rcParams['axes.labelcolor'] = 'black'
376
-
377
- fig, ax = plt.subplots(1, 1, figsize=(figuresize,figuresize))
378
- fontsize = int(figuresize*0.75)
379
- handles = []
380
-
381
- # Plot all points in the embedding
382
- for cluster_label, color, center in zip(unique_labels, colors, cluster_centers):
383
- cluster_data = embedding[labels == cluster_label]
384
-
385
- if smooth_lines:
386
- # Check if the cluster has more than 2 points to create a Convex Hull
387
- if cluster_data.shape[0] > 2:
388
- x_smooth, y_smooth = smooth_hull_lines(cluster_data)
389
- if plot_outlines:
390
- plt.plot(x_smooth, y_smooth, color=color, linewidth=2)
391
- else:
392
- if cluster_data.shape[0] > 2:
393
- hull = ConvexHull(cluster_data)
394
- for simplex in hull.simplices:
395
- if plot_outlines:
396
- plt.plot(hull.points[simplex, 0], hull.points[simplex, 1], color=color, linewidth=4) #w =white, k=black
397
- if plot_points:
398
- scatter = ax.scatter(cluster_data[:, 0], cluster_data[:, 1], s=dot_size, c=[color], alpha=0.5, label=f'Cluster {cluster_label if cluster_label != -1 else "Noise"}')
399
- else:
400
- scatter = ax.scatter(cluster_data[:, 0], cluster_data[:, 1], s=dot_size, c=[color], alpha=0, label=f'Cluster {cluster_label if cluster_label != -1 else "Noise"}')
401
- handles.append(scatter)
402
-
403
- # Annotate the cluster center with the cluster label
404
- if cluster_label != -1: # Skip noise labeled as -1
405
- ax.text(center[0], center[1], str(cluster_label), fontsize=12, ha='center', va='center')
406
-
407
- # Create a dictionary to track indices for each cluster
408
- cluster_indices = {label: np.where(labels == label)[0] for label in unique_labels if label != -1}
409
-
410
- if visualize is not None:
411
- if not plot_by_cluster:
412
- # Plot images replacing random points
413
- indices = random.sample(range(len(embedding)), image_nr)
414
- sampled_embedding = embedding[indices]
415
- #sampled_image_paths = [image_paths[i] for i in sample_indices]
416
-
417
- for i, index in enumerate(indices):
418
- x, y = embedding[index]
419
- img_array = Image.open(image_paths[index])
420
- img = np.array(img_array)
421
-
422
- if remove_image_canvas:
423
- #Get the images for these indices
424
- for index in indices:
425
- x, y = embedding[index]
426
- img_array = Image.open(image_paths[index])
427
-
428
- if img_array.mode in ['L', 'I']: # Grayscale image
429
- img_data = np.array(img_array)
430
- img_data = img_data / np.max(img_data) # Normalize to [0, 1]
431
- alpha_channel = (img_data > 0).astype(float) # Create alpha channel
432
- img_data_rgb = np.stack([img_data] * 3, axis=-1) # Convert to RGB
433
- img_data_with_alpha = np.dstack([img_data_rgb, alpha_channel])
434
- elif img_array.mode == 'RGB': # RGB image
435
- img_data = np.array(img_array)
436
- img_data = img_data / 255.0 # Normalize to [0, 1]
437
- alpha_channel = (np.sum(img_data, axis=-1) > 0).astype(float) # Non-black pixels
438
- img_data_with_alpha = np.dstack([img_data, alpha_channel])
439
- else:
440
- raise ValueError(f"Unsupported image mode: {img_array.mode}")
441
-
442
- imagebox = OffsetImage(img_data_with_alpha, zoom=img_zoom)
443
- ab = AnnotationBbox(imagebox, (x, y), frameon=False)
444
- ax.add_artist(ab)
445
- else:
446
- for i, index in enumerate(indices):
447
- x, y = embedding[index]
448
- img_array = Image.open(image_paths[index])
449
- img = np.array(img_array)
450
- imagebox = OffsetImage(img, zoom=img_zoom, cmap='gray')
451
- ab = AnnotationBbox(imagebox, (x, y), frameon=False)
452
- ax.add_artist(ab)
453
-
454
- if plot_by_cluster:
455
-
456
- # Create a dictionary to track indices for each cluster
457
- cluster_indices = {label: np.where(labels == label)[0] for label in unique_labels if label != -1}
458
-
459
- # Plot images replacing random points for each cluster
460
- for cluster_label, color, center in zip(unique_labels, colors, cluster_centers):
461
- if cluster_label == -1: # Skip noise labeled as -1
462
- continue
463
-
464
- # Get 10 random indices for this cluster
465
- indices = cluster_indices.get(cluster_label, [])
466
- if len(indices) > image_nr:
467
- indices = random.sample(list(indices), image_nr)
468
- elif len(indices) > 1:
469
- indices = random.sample(list(indices), 1)
470
-
471
- if remove_image_canvas:
472
- #Get the images for these indices
473
- for index in indices:
474
- x, y = embedding[index]
475
- img_array = Image.open(image_paths[index])
476
-
477
- if img_array.mode in ['L', 'I']: # Grayscale image
478
- img_data = np.array(img_array)
479
- img_data = img_data / np.max(img_data) # Normalize to [0, 1]
480
- alpha_channel = (img_data > 0).astype(float) # Create alpha channel
481
- img_data_rgb = np.stack([img_data] * 3, axis=-1) # Convert to RGB
482
- img_data_with_alpha = np.dstack([img_data_rgb, alpha_channel])
483
- elif img_array.mode == 'RGB': # RGB image
484
- img_data = np.array(img_array)
485
- img_data = img_data / 255.0 # Normalize to [0, 1]
486
- alpha_channel = (np.sum(img_data, axis=-1) > 0).astype(float) # Non-black pixels
487
- img_data_with_alpha = np.dstack([img_data, alpha_channel])
488
- else:
489
- raise ValueError(f"Unsupported image mode: {img_array.mode}")
490
-
491
- imagebox = OffsetImage(img_data_with_alpha, zoom=img_zoom)
492
- ab = AnnotationBbox(imagebox, (x, y), frameon=False)
493
- ax.add_artist(ab)
494
- else:
495
- for i, index in enumerate(indices):
496
- x, y = embedding[index]
497
- img_array = Image.open(image_paths[index])
498
- img = np.array(img_array)
499
- imagebox = OffsetImage(img, zoom=img_zoom, cmap='gray')
500
- ab = AnnotationBbox(imagebox, (x, y), frameon=False)
501
- ax.add_artist(ab)
502
-
503
- plt.legend(handles=handles, loc='best', fontsize=fontsize)
504
- plt.xlabel('UMAP Dimension 1', fontsize=fontsize)
505
- plt.ylabel('UMAP Dimension 2', fontsize=fontsize)
506
- plt.tick_params(axis='both', which='major', labelsize=fontsize)
507
- plt.show()
508
-
509
- if plot_cluster_grids:
510
- # Determine the number of clusters
511
- num_clusters = len(unique_labels[unique_labels != -1])
512
-
513
- # Dictionary to keep track of images for each cluster
514
- cluster_images = {label: [] for label in unique_labels if label != -1}
515
-
516
- # Collect the images for each cluster based on previously selected indices
517
- for cluster_label, indices in cluster_indices.items():
518
- if cluster_label == -1:
519
- continue
520
-
521
- if len(indices) > image_nr:
522
- indices = random.sample(list(indices), image_nr)
523
- elif len(indices) > 1:
524
- indices = random.sample(list(indices), 1)
525
-
526
- for index in indices:
527
- img_path = image_paths[index]
528
- img_array = Image.open(img_path)
529
- img = np.array(img_array)
530
- cluster_images[cluster_label].append(img)
531
-
532
- # Create a new figure for the cluster grids
533
- grid_fig, grid_axes = plt.subplots(1, num_clusters, figsize=(figuresize * num_clusters, figuresize), gridspec_kw={'wspace': 0.2, 'hspace': 0})
534
-
535
- # Iterate through the clusters and plot the grids
536
-
537
- if len(cluster_images.keys()) >1:
538
- for cluster_label, axes in zip(cluster_images.keys(), grid_axes):
539
- images = cluster_images[cluster_label]
540
- num_images = len(images)
541
- grid_size = int(np.ceil(np.sqrt(num_images))) # Calculate grid size (both rows and columns)
542
- image_size = 0.9 / grid_size # Adjusting this value will control the whitespace
543
- whitespace = (1 - grid_size * image_size) / (grid_size + 1)
544
-
545
- color = colors[label_to_color_index[cluster_label]] # Retrieve the color for this cluster
546
-
547
- # Fill the entire axes with the cluster color
548
- axes.add_patch(plt.Rectangle((0, 0), 1, 1, transform=axes.transAxes, color=color[:3]))
549
-
550
- axes.set_title(f'Cluster {cluster_label}', fontsize=fontsize*3)
551
- axes.axis('off')
552
-
553
- for i, img in enumerate(images):
554
- row = i // grid_size
555
- col = i % grid_size
556
- x_pos = (col + 1) * whitespace + col * image_size
557
- y_pos = 1 - ((row + 1) * whitespace + (row + 1) * image_size)
558
- ax_img = axes.inset_axes([x_pos, y_pos, image_size, image_size], transform=axes.transAxes)
559
- ax_img.imshow(img, cmap='gray', aspect='auto')
560
- ax_img.axis('off')
561
- ax_img.set_aspect('equal') # Ensure that the aspect ratio is equal
562
- ax_img.set_facecolor(color[:3]) # Set the inset axes background color
563
- plt.show()
564
- else:
565
- cluster_label = list(cluster_images.keys())[0]
566
- images = cluster_images[cluster_label]
567
- num_images = len(images)
568
- grid_size = int(np.ceil(np.sqrt(num_images))) # Calculate grid size (both rows and columns)
569
-
570
- fig, axes = plt.subplots(grid_size, grid_size, figsize=(figuresize, figuresize))
571
-
572
- if grid_size == 1:
573
- # Special case for one image
574
- axes.imshow(images[0], cmap='gray', aspect='auto')
575
- axes.axis('off')
576
- else:
577
- for i, ax in enumerate(axes.flat):
578
- if i < num_images:
579
- ax.imshow(images[i], cmap='gray', aspect='auto')
580
- ax.set_aspect('equal') # Ensure that the aspect ratio is equal
581
- ax.axis('off')
582
- else: # Turn off any remaining empty subplots
583
- ax.axis('off')
584
-
585
- plt.suptitle(f'Cluster {cluster_label}', fontsize=fontsize*3, y=0.95) # Adjust the y-position
586
- plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to make room for the title
587
- plt.show()
588
- return
589
-
590
- db_paths = ['/mnt/data/CellVoyager/20x/tsg101/crispr_screen/all/measurements/measurements.db']
591
-
592
- generate_image_umap(db_paths=db_paths,
593
- row_limit = 1000,
594
- tables=['cytoplasm'],
595
- visualize='cell',
596
- image_nr=36,
597
- dot_size=50,
598
- n_neighbors=1000,
599
- min_dist=0.1,
600
- metric='euclidean',
601
- eps=0.5,
602
- min_samples=1000,
603
- filter_by='channel_0',
604
- img_zoom=0.3,
605
- plot_by_cluster=True,
606
- plot_cluster_grids=True,
607
- remove_cluster_noise=True,
608
- remove_highly_correlated=True,
609
- log_data=True,
610
- figuresize=60,
611
- black_background=False,
612
- remove_image_canvas=False,
613
- plot_outlines=False,
614
- plot_points=True,
615
- smooth_lines=False,
616
- verbose=True)
617
-
618
- generate_image_umap(db_paths=db_paths,
619
- tables=['cytoplasm'],
620
- visualize='cytoplasm',
621
- image_nr=36,
622
- dot_size=50,
623
- n_neighbors=1000,
624
- min_dist=0.1,
625
- metric='euclidean',
626
- eps=0.5,
627
- min_samples=1000,
628
- filter_by='channel_0',
629
- img_zoom=0.3,
630
- plot_by_cluster=True,
631
- plot_cluster_grids=True,
632
- remove_cluster_noise=True,
633
- remove_highly_correlated=True,
634
- log_data=True,
635
- figuresize=60,
636
- black_background=False,
637
- remove_image_canvas=False,
638
- plot_outlines=False,
639
- plot_points=True,
640
- smooth_lines=False,
641
- verbose=False)
642
-
643
-
644
- db_path = '/mnt/data/CellVoyager/63x/mack/CRCR2P2_20230721_162734/PECCU/measurements/measurements.db'
645
- db = db_path
646
- channels = ['channel_0','channel_1','channel_2', 'channel_3', None]
647
-
648
- for channel in channels:
649
- generate_image_umap(db,
650
- tables=['cell','cytoplasm', 'nucleus'],
651
- image_nr=36,
652
- dot_size=50,
653
- n_neighbors=50,
654
- min_dist=0.1,
655
- metric='euclidean',
656
- eps=0.3,
657
- min_samples=100,
658
- filter_by=channel,
659
- img_zoom=0.2,
660
- plot_by_cluster=True,
661
- plot_cluster_grids=True,
662
- remove_cluster_noise=True,
663
- remove_highly_correlated=True,
664
- log_data=True,
665
- figuresize=60,
666
- verbose=False)
667
-
668
- #db_path = '/mnt/data/CellVoyager/63x/mack/CRCR2P2_20230721_162734/PECCU/measurements/measurements.db'
669
- db_path = '/mnt/data/CellVoyager/20x/tsg101/crispr_screen/all/measurements/measurements.db'
670
- tables = ['cell','cytoplasm','parasite']
671
-
672
- # UMAP hyperparameters
673
- umap_params = [{'n_neighbors': 20, 'min_dist': 0.01, 'metric': 'euclidean'},
674
- {'n_neighbors': 40, 'min_dist': 0.1, 'metric': 'euclidean'}]
675
-
676
- # DBSCAN hyperparameters
677
- dbscan_params = [{'eps': 0.3, 'min_samples': 100},
678
- {'eps': 0.3, 'min_samples': 100}]
679
-
680
- hyperparameter_search(db_path,
681
- tables=tables,
682
- filter_by = 'channel_0',
683
- sample_size=5000,
684
- umap_params=umap_params,
685
- dbscan_params=dbscan_params,
686
- remove_highly_correlated=True,
687
- log_data=True,
688
- pointsize=2,
689
- verbose=False)