spacr 0.3.42__py3-none-any.whl → 0.3.45__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/cellpose.py +63 -73
- spacr/gui_core.py +5 -5
- spacr/gui_utils.py +1 -1
- spacr/io.py +76 -63
- spacr/ml.py +4 -3
- spacr/plot.py +265 -11
- spacr/settings.py +29 -8
- spacr/submodules.py +214 -10
- spacr/toxo.py +5 -116
- spacr/utils.py +103 -6
- {spacr-0.3.42.dist-info → spacr-0.3.45.dist-info}/METADATA +1 -1
- {spacr-0.3.42.dist-info → spacr-0.3.45.dist-info}/RECORD +16 -16
- {spacr-0.3.42.dist-info → spacr-0.3.45.dist-info}/LICENSE +0 -0
- {spacr-0.3.42.dist-info → spacr-0.3.45.dist-info}/WHEEL +0 -0
- {spacr-0.3.42.dist-info → spacr-0.3.45.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.42.dist-info → spacr-0.3.45.dist-info}/top_level.txt +0 -0
spacr/submodules.py
CHANGED
@@ -8,6 +8,9 @@ from cellpose import models as cp_models
|
|
8
8
|
from cellpose import train as train_cp
|
9
9
|
from IPython.display import display
|
10
10
|
|
11
|
+
import matplotlib.pyplot as plt
|
12
|
+
from natsort import natsorted
|
13
|
+
|
11
14
|
def analyze_recruitment(settings={}):
|
12
15
|
"""
|
13
16
|
Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
|
@@ -122,16 +125,39 @@ def analyze_recruitment(settings={}):
|
|
122
125
|
|
123
126
|
return [cells,wells]
|
124
127
|
|
125
|
-
def analyze_plaques(
|
128
|
+
def analyze_plaques(settings):
|
129
|
+
|
130
|
+
from .cellpose import identify_masks_finetune
|
131
|
+
from .settings import get_analyze_plaque_settings
|
132
|
+
from .utils import save_settings, download_models
|
133
|
+
from spacr import __file__ as spacr_path
|
134
|
+
|
135
|
+
download_models()
|
136
|
+
package_dir = os.path.dirname(spacr_path)
|
137
|
+
models_dir = os.path.join(package_dir, 'resources', 'models', 'cp')
|
138
|
+
model_path = os.path.join(models_dir, 'toxo_plaque_cyto_e25000_X1120_Y1120.CP_model')
|
139
|
+
settings['custom_model'] = model_path
|
140
|
+
print('custom_model',settings['custom_model'])
|
141
|
+
|
142
|
+
settings = get_analyze_plaque_settings(settings)
|
143
|
+
save_settings(settings, name='analyze_plaques', show=True)
|
144
|
+
settings['dst'] = os.path.join(settings['src'], 'masks')
|
145
|
+
|
146
|
+
if settings['masks']:
|
147
|
+
identify_masks_finetune(settings)
|
148
|
+
folder = settings['dst']
|
149
|
+
else:
|
150
|
+
folder = settings['dst']
|
151
|
+
|
126
152
|
summary_data = []
|
127
153
|
details_data = []
|
128
154
|
stats_data = []
|
129
155
|
|
130
156
|
for filename in os.listdir(folder):
|
131
157
|
filepath = os.path.join(folder, filename)
|
132
|
-
|
133
|
-
|
134
|
-
|
158
|
+
|
159
|
+
if filepath.endswith('.tif') and os.path.isfile(filepath):
|
160
|
+
print(f"Analyzing: {filepath}")
|
135
161
|
image = cellpose.io.imread(filepath)
|
136
162
|
labeled_image = label(image)
|
137
163
|
regions = regionprops(labeled_image)
|
@@ -214,7 +240,6 @@ def train_cellpose(settings):
|
|
214
240
|
label_files,
|
215
241
|
settings['channels'],
|
216
242
|
settings['percentiles'],
|
217
|
-
settings['circular'],
|
218
243
|
settings['invert'],
|
219
244
|
settings['verbose'],
|
220
245
|
settings['remove_background'],
|
@@ -231,7 +256,6 @@ def train_cellpose(settings):
|
|
231
256
|
test_label_files,
|
232
257
|
settings['channels'],
|
233
258
|
settings['percentiles'],
|
234
|
-
settings['circular'],
|
235
259
|
settings['invert'],
|
236
260
|
settings['verbose'],
|
237
261
|
settings['remove_background'],
|
@@ -242,13 +266,12 @@ def train_cellpose(settings):
|
|
242
266
|
test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
|
243
267
|
|
244
268
|
else:
|
245
|
-
images, masks, image_names, mask_names = _load_images_and_labels(img_src, mask_src, settings['
|
269
|
+
images, masks, image_names, mask_names = _load_images_and_labels(img_src, mask_src, settings['invert'])
|
246
270
|
images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
|
247
271
|
|
248
272
|
if settings['test']:
|
249
273
|
test_images, test_masks, test_image_names, test_mask_names = _load_images_and_labels(test_img_src,
|
250
274
|
test_mask_src,
|
251
|
-
settings['circular'],
|
252
275
|
settings['invert'])
|
253
276
|
|
254
277
|
test_images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in test_images]
|
@@ -293,7 +316,6 @@ def train_cellpose(settings):
|
|
293
316
|
SGD=False,
|
294
317
|
channels=cp_channels,
|
295
318
|
channel_axis=None,
|
296
|
-
#rgb=False,
|
297
319
|
normalize=False,
|
298
320
|
compute_flows=False,
|
299
321
|
save_path=model_save_path,
|
@@ -346,4 +368,186 @@ def count_phenotypes(settings):
|
|
346
368
|
|
347
369
|
pivot_df.to_csv(output_path)
|
348
370
|
|
349
|
-
return
|
371
|
+
return
|
372
|
+
|
373
|
+
def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),'r2':(90,10),'r3':(80,20),'r4':(80,20),'r5':(70,30),'r6':(70,30),'r7':(60,40),'r8':(60,40),'r9':(50,50),'r10':(50,50),'r11':(40,60),'r12':(40,60),'r13':(30,70),'r14':(30,70),'r15':(20,80),'r16':(20,80)},
|
374
|
+
pc_grna='TGGT1_220950_1', nc_grna='TGGT1_233460_4',
|
375
|
+
y_columns=['class_1_fraction', 'TGGT1_220950_1_fraction', 'nc_fraction'],
|
376
|
+
column='column', value='c3', plate=None, save_paths=None):
|
377
|
+
|
378
|
+
def calculate_well_score_fractions(df, class_columns='cv_predictions'):
|
379
|
+
if all(col in df.columns for col in ['plate', 'row', 'column']):
|
380
|
+
df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
|
381
|
+
else:
|
382
|
+
raise ValueError("Cannot find 'plate', 'row', or 'column' in df.columns")
|
383
|
+
prc_summary = df.groupby(['plate', 'row', 'column', 'prc']).size().reset_index(name='total_rows')
|
384
|
+
well_counts = (df.groupby(['plate', 'row', 'column', 'prc', class_columns])
|
385
|
+
.size()
|
386
|
+
.unstack(fill_value=0)
|
387
|
+
.reset_index()
|
388
|
+
.rename(columns={0: 'class_0', 1: 'class_1'}))
|
389
|
+
summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row', 'column', 'prc'], how='left')
|
390
|
+
summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
|
391
|
+
summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
|
392
|
+
return summary_df
|
393
|
+
|
394
|
+
def plot_line(df, x_column, y_columns, group_column=None, xlabel=None, ylabel=None,
|
395
|
+
title=None, figsize=(10, 6), save_path=None, theme='deep'):
|
396
|
+
"""
|
397
|
+
Create a line plot that can handle multiple y-columns, each becoming a separate line.
|
398
|
+
"""
|
399
|
+
|
400
|
+
def _set_theme(theme):
|
401
|
+
"""Set the Seaborn theme and reorder colors if necessary."""
|
402
|
+
|
403
|
+
def __set_reordered_theme(theme='deep', order=None, n_colors=100, show_theme=False):
|
404
|
+
"""Set and reorder the Seaborn color palette."""
|
405
|
+
palette = sns.color_palette(theme, n_colors)
|
406
|
+
if order:
|
407
|
+
reordered_palette = [palette[i] for i in order]
|
408
|
+
else:
|
409
|
+
reordered_palette = palette
|
410
|
+
if show_theme:
|
411
|
+
sns.palplot(reordered_palette)
|
412
|
+
plt.show()
|
413
|
+
return reordered_palette
|
414
|
+
|
415
|
+
integer_list = list(range(1, 81))
|
416
|
+
color_order = [7, 9, 4, 0, 3, 6, 2] + integer_list
|
417
|
+
sns_palette = __set_reordered_theme(theme, color_order, 100)
|
418
|
+
return sns_palette
|
419
|
+
|
420
|
+
sns_palette = _set_theme(theme)
|
421
|
+
|
422
|
+
# Sort the DataFrame based on the x_column
|
423
|
+
df = df.loc[natsorted(df.index, key=lambda x: df.loc[x, x_column])]
|
424
|
+
|
425
|
+
fig, ax = plt.subplots(figsize=figsize)
|
426
|
+
|
427
|
+
# Handle multiple y-columns, each as a separate line
|
428
|
+
if isinstance(y_columns, list):
|
429
|
+
for idx, y_col in enumerate(y_columns):
|
430
|
+
sns.lineplot(
|
431
|
+
data=df, x=x_column, y=y_col, ax=ax, label=y_col,
|
432
|
+
color=sns_palette[idx % len(sns_palette)], linewidth=1
|
433
|
+
)
|
434
|
+
else:
|
435
|
+
sns.lineplot(
|
436
|
+
data=df, x=x_column, y=y_columns, hue=group_column, ax=ax,
|
437
|
+
palette=sns_palette, linewidth=2
|
438
|
+
)
|
439
|
+
|
440
|
+
# Set axis labels and title
|
441
|
+
ax.set_xlabel(xlabel if xlabel else x_column)
|
442
|
+
ax.set_ylabel(ylabel if ylabel else 'Value')
|
443
|
+
ax.set_title(title if title else 'Line Plot')
|
444
|
+
|
445
|
+
# Remove top and right spines
|
446
|
+
sns.despine(ax=ax)
|
447
|
+
|
448
|
+
# Ensure legend only appears when needed and place it to the right
|
449
|
+
if group_column or isinstance(y_columns, list):
|
450
|
+
ax.legend(title='Legend', loc='center left', bbox_to_anchor=(1, 0.5))
|
451
|
+
|
452
|
+
plt.tight_layout()
|
453
|
+
|
454
|
+
# Save the plot if a save path is provided
|
455
|
+
if save_path:
|
456
|
+
plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
|
457
|
+
print(f"Plot saved to {save_path}")
|
458
|
+
|
459
|
+
plt.show()
|
460
|
+
return fig
|
461
|
+
|
462
|
+
def calculate_grna_fraction_ratio(df, grna1='TGGT1_220950_1', grna2='TGGT1_233460_4'):
|
463
|
+
# Filter relevant grna_names within each prc and group them
|
464
|
+
grouped = df[df['grna_name'].isin([grna1, grna2])] \
|
465
|
+
.groupby(['prc', 'grna_name']) \
|
466
|
+
.agg({'fraction': 'sum', 'count': 'sum'}) \
|
467
|
+
.unstack(fill_value=0)
|
468
|
+
grouped.columns = ['_'.join(col).strip() for col in grouped.columns.values]
|
469
|
+
grouped['fraction_ratio'] = grouped[f'fraction_{grna1}'] / grouped[f'fraction_{grna2}']
|
470
|
+
grouped = grouped.assign(
|
471
|
+
fraction_ratio=lambda x: x['fraction_ratio'].replace([float('inf'), -float('inf')], 0)
|
472
|
+
).fillna({'fraction_ratio': 0})
|
473
|
+
grouped = grouped.rename(columns={
|
474
|
+
f'count_{grna1}': f'{grna1}_count',
|
475
|
+
f'count_{grna2}': f'{grna2}_count'
|
476
|
+
})
|
477
|
+
result = grouped.reset_index()[['prc', f'{grna1}_count', f'{grna2}_count', 'fraction_ratio']]
|
478
|
+
result['total_reads'] = result[f'{grna1}_count'] + result[f'{grna2}_count']
|
479
|
+
result[f'{grna1}_fraction'] = result[f'{grna1}_count'] / result['total_reads']
|
480
|
+
result[f'{grna2}_fraction'] = result[f'{grna2}_count'] / result['total_reads']
|
481
|
+
return result
|
482
|
+
|
483
|
+
def calculate_well_read_fraction(df, count_column='count'):
|
484
|
+
if all(col in df.columns for col in ['plate', 'row', 'column']):
|
485
|
+
df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
|
486
|
+
else:
|
487
|
+
raise ValueError("Cannot find plate, row or column in df.columns")
|
488
|
+
grouped_df = df.groupby('prc')[count_column].sum().reset_index()
|
489
|
+
grouped_df = grouped_df.rename(columns={count_column: 'total_counts'})
|
490
|
+
df = pd.merge(df, grouped_df, on='prc')
|
491
|
+
df['fraction'] = df['count'] / df['total_counts']
|
492
|
+
return df
|
493
|
+
|
494
|
+
if isinstance(reads_csv, list):
|
495
|
+
if len(reads_csv) == len(scores_csv):
|
496
|
+
reads_ls = []
|
497
|
+
scores_ls = []
|
498
|
+
for i, reads_csv_temp in enumerate(reads_csv):
|
499
|
+
reads_df_temp = pd.read_csv(reads_csv_temp)
|
500
|
+
scores_df_temp = pd.read_csv(scores_csv[i])
|
501
|
+
reads_df_temp['plate'] = f"plate{i+1}"
|
502
|
+
scores_df_temp['plate'] = f"plate{i+1}"
|
503
|
+
|
504
|
+
if 'col' in reads_df_temp.columns:
|
505
|
+
reads_df_temp = reads_df_temp.rename(columns={'col': 'column'})
|
506
|
+
if 'column_name' in reads_df_temp.columns:
|
507
|
+
reads_df_temp = reads_df_temp.rename(columns={'column_name': 'column'})
|
508
|
+
if 'col' in scores_df_temp.columns:
|
509
|
+
scores_df_temp = scores_df_temp.rename(columns={'col': 'column'})
|
510
|
+
if 'column_name' in scores_df_temp.columns:
|
511
|
+
scores_df_temp = scores_df_temp.rename(columns={'column_name': 'column'})
|
512
|
+
if 'row_name' in reads_df_temp.columns:
|
513
|
+
reads_df_temp = reads_df_temp.rename(columns={'row_name': 'row'})
|
514
|
+
if 'row_name' in scores_df_temp.columns:
|
515
|
+
scores_df_temp = scores_df_temp.rename(columns={'row_name': 'row'})
|
516
|
+
|
517
|
+
reads_ls.append(reads_df_temp)
|
518
|
+
scores_ls.append(scores_df_temp)
|
519
|
+
|
520
|
+
reads_df = pd.concat(reads_ls, axis=0)
|
521
|
+
scores_df = pd.concat(scores_ls, axis=0)
|
522
|
+
print(f"Reads: {len(reads_df)} Scores: {len(scores_df)}")
|
523
|
+
else:
|
524
|
+
print(f"reads_csv and scores_csv must contain the same number of elements if reads_csv is a list")
|
525
|
+
else:
|
526
|
+
reads_df = pd.read_csv(reads_csv)
|
527
|
+
scores_df = pd.read_csv(scores_csv)
|
528
|
+
if plate != None:
|
529
|
+
reads_df['plate'] = plate
|
530
|
+
scores_df['plate'] = plate
|
531
|
+
|
532
|
+
reads_df = calculate_well_read_fraction(reads_df)
|
533
|
+
scores_df = calculate_well_score_fractions(scores_df)
|
534
|
+
reads_col_df = reads_df[reads_df[column]==value]
|
535
|
+
scores_col_df = scores_df[scores_df[column]==value]
|
536
|
+
|
537
|
+
reads_col_df = calculate_grna_fraction_ratio(reads_col_df, grna1=pc_grna, grna2=nc_grna)
|
538
|
+
df = pd.merge(reads_col_df, scores_col_df, on='prc')
|
539
|
+
|
540
|
+
df_emp = pd.DataFrame([(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1])) for key, val in empirical_dict.items()],columns=['key', 'value1', 'value2', 'pc_fraction', 'nc_fraction'])
|
541
|
+
|
542
|
+
df = pd.merge(df, df_emp, left_on='row', right_on='key')
|
543
|
+
|
544
|
+
if any in y_columns not in df.columns:
|
545
|
+
print(f"columns in dataframe:")
|
546
|
+
for col in df.columns:
|
547
|
+
print(col)
|
548
|
+
return
|
549
|
+
display(df)
|
550
|
+
fig_1 = plot_line(df, x_column = 'pc_fraction', y_columns=y_columns, group_column=None, xlabel=None, ylabel='Fraction', title=None, figsize=(10, 6), save_path=save_paths[0])
|
551
|
+
fig_2 = plot_line(df, x_column = 'nc_fraction', y_columns=y_columns, group_column=None, xlabel=None, ylabel='Fraction', title=None, figsize=(10, 6), save_path=save_paths[1])
|
552
|
+
|
553
|
+
return [fig_1, fig_2]
|
spacr/toxo.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import os
|
1
2
|
import matplotlib.pyplot as plt
|
2
3
|
import seaborn as sns
|
3
4
|
import numpy as np
|
@@ -6,125 +7,13 @@ import pandas as pd
|
|
6
7
|
from scipy.stats import fisher_exact
|
7
8
|
from IPython.display import display
|
8
9
|
|
9
|
-
def
|
10
|
-
"""
|
11
|
-
Create a volcano plot with the ability to control the shape of points based on a categorical column,
|
12
|
-
color points based on a string list, annotate specific points based on p-value and coefficient thresholds,
|
13
|
-
and control the size of points.
|
14
|
-
|
15
|
-
Parameters:
|
16
|
-
- data_path: Path to the data CSV file.
|
17
|
-
- metadata_path: Path to the metadata CSV file.
|
18
|
-
- metadata_column: Column name in the metadata to control point shapes.
|
19
|
-
- string_list: List of strings to color points differently if present in 'coefficient' names.
|
20
|
-
- point_size: Fixed value to control the size of points.
|
21
|
-
- figsize: Width of the plot (height is half the width).
|
22
|
-
"""
|
23
|
-
|
24
|
-
|
25
|
-
filename = 'volcano_plot.pdf'
|
26
|
-
|
27
|
-
# Load the data
|
28
|
-
|
29
|
-
if isinstance(data_path, pd.DataFrame):
|
30
|
-
data = data_path
|
31
|
-
else:
|
32
|
-
data = pd.read_csv(data_path)
|
33
|
-
data['variable'] = data['feature'].str.extract(r'\[(.*?)\]')
|
34
|
-
data['variable'].fillna(data['feature'], inplace=True)
|
35
|
-
split_columns = data['variable'].str.split('_', expand=True)
|
36
|
-
data['gene_nr'] = split_columns[0]
|
37
|
-
|
38
|
-
# Load metadata
|
39
|
-
if isinstance(metadata_path, pd.DataFrame):
|
40
|
-
metadata = metadata_path
|
41
|
-
else:
|
42
|
-
metadata = pd.read_csv(metadata_path)
|
43
|
-
|
44
|
-
metadata['gene_nr'] = metadata['gene_nr'].astype(str)
|
45
|
-
data['gene_nr'] = data['gene_nr'].astype(str)
|
46
|
-
|
47
|
-
|
48
|
-
# Merge data and metadata on 'gene_nr'
|
49
|
-
merged_data = pd.merge(data, metadata[['gene_nr', 'tagm_location']], on='gene_nr', how='left')
|
50
|
-
|
51
|
-
merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
|
52
|
-
merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
|
53
|
-
|
54
|
-
# Create the volcano plot
|
55
|
-
figsize_2 = figsize / 2
|
56
|
-
plt.figure(figsize=(figsize_2, figsize))
|
57
|
-
|
58
|
-
palette = {
|
59
|
-
'pc': 'red',
|
60
|
-
'nc': 'green',
|
61
|
-
'control': 'black',
|
62
|
-
'other': 'gray'
|
63
|
-
}
|
64
|
-
|
65
|
-
merged_data['condition'] = pd.Categorical(
|
66
|
-
merged_data['condition'],
|
67
|
-
categories=['pc', 'nc', 'control', 'other'],
|
68
|
-
ordered=True
|
69
|
-
)
|
70
|
-
|
71
|
-
display(merged_data)
|
72
|
-
|
73
|
-
# Create the scatter plot with fixed point size
|
74
|
-
sns.scatterplot(
|
75
|
-
data=merged_data,
|
76
|
-
x='coefficient',
|
77
|
-
y='-log10(p_value)',
|
78
|
-
hue='condition', # Controls color
|
79
|
-
style=metadata_column if metadata_column else None, # Controls point shape
|
80
|
-
s=point_size, # Fixed size for all points
|
81
|
-
palette=palette, # Color palette
|
82
|
-
alpha=1.0 # Transparency
|
83
|
-
)
|
84
|
-
|
85
|
-
# Set the plot title and labels
|
86
|
-
plt.title('Custom Volcano Plot of Coefficients')
|
87
|
-
plt.xlabel('Coefficient')
|
88
|
-
plt.ylabel('-log10(p-value)')
|
89
|
-
|
90
|
-
if threshold > 0:
|
91
|
-
plt.gca().axvline(x=-abs(threshold), linestyle='--', color='black')
|
92
|
-
plt.gca().axvline(x=abs(threshold), linestyle='--', color='black')
|
93
|
-
|
94
|
-
# Horizontal line at p-value threshold (0.05)
|
95
|
-
plt.axhline(y=-np.log10(0.05), color='black', linestyle='--')
|
96
|
-
|
97
|
-
texts = []
|
98
|
-
for i, row in merged_data.iterrows():
|
99
|
-
if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
|
100
|
-
texts.append(plt.text(
|
101
|
-
row['coefficient'],
|
102
|
-
-np.log10(row['p_value']),
|
103
|
-
row['variable'],
|
104
|
-
fontsize=8
|
105
|
-
))
|
106
|
-
|
107
|
-
# Adjust text positions to avoid overlap
|
108
|
-
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
|
109
|
-
|
110
|
-
# Move the legend outside the plot
|
111
|
-
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
|
112
|
-
|
113
|
-
# Save the plot
|
114
|
-
plt.savefig(filename, format='pdf', bbox_inches='tight') # bbox_inches ensures the legend doesn't get cut off
|
115
|
-
print(f'Saved Volcano plot: {filename}')
|
116
|
-
|
117
|
-
# Show the plot
|
118
|
-
plt.show()
|
119
|
-
|
120
|
-
def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10]):
|
10
|
+
def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10], save_path=None):
|
121
11
|
"""
|
122
12
|
Create a volcano plot with the ability to control the shape of points based on a categorical column,
|
123
13
|
color points based on a condition, annotate specific points based on p-value and coefficient thresholds,
|
124
14
|
and control the size of points.
|
125
15
|
"""
|
126
|
-
|
127
|
-
filename = 'volcano_plot.pdf'
|
16
|
+
volcano_path = save_path
|
128
17
|
|
129
18
|
# Load the data
|
130
19
|
if isinstance(data_path, pd.DataFrame):
|
@@ -297,8 +186,8 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
297
186
|
fig.suptitle('Custom Volcano Plot of Coefficients', y=1.02, fontsize=16) # Title above the top plot
|
298
187
|
|
299
188
|
# Save the plot as PDF
|
300
|
-
plt.savefig(
|
301
|
-
print(f'Saved Volcano plot: {
|
189
|
+
plt.savefig(volcano_path, format='pdf', bbox_inches='tight')
|
190
|
+
print(f'Saved Volcano plot: {volcano_path}')
|
302
191
|
|
303
192
|
# Show the plot
|
304
193
|
plt.show()
|
spacr/utils.py
CHANGED
@@ -51,7 +51,8 @@ from scipy.stats import fisher_exact, f_oneway, kruskal
|
|
51
51
|
from scipy.ndimage.filters import gaussian_filter
|
52
52
|
from scipy.spatial import ConvexHull
|
53
53
|
from scipy.interpolate import splprep, splev
|
54
|
-
from scipy
|
54
|
+
from scipy import ndimage
|
55
|
+
from scipy.ndimage import binary_dilation, binary_fill_holes
|
55
56
|
|
56
57
|
from skimage.exposure import rescale_intensity
|
57
58
|
from sklearn.metrics import auc, precision_recall_curve
|
@@ -64,6 +65,7 @@ from sklearn.decomposition import PCA
|
|
64
65
|
from sklearn.ensemble import RandomForestClassifier
|
65
66
|
|
66
67
|
from huggingface_hub import list_repo_files
|
68
|
+
from spacr import __file__ as spacr_path
|
67
69
|
|
68
70
|
import umap.umap_ as umap
|
69
71
|
#import umap
|
@@ -705,6 +707,7 @@ def _update_database_with_merged_info(db_path, df, table='png_list', columns=['p
|
|
705
707
|
conn.close()
|
706
708
|
|
707
709
|
def _generate_representative_images(db_path, cells=['HeLa'], cell_loc=None, pathogens=['rh'], pathogen_loc=None, treatments=['cm'], treatment_loc=None, channel_of_interest=1, compartments = ['pathogen','cytoplasm'], measurement = 'mean_intensity', nr_imgs=16, channel_indices=[0,1,2], um_per_pixel=0.1, scale_bar_length_um=10, plot=False, fontsize=12, show_filename=True, channel_names=None, update_db=True):
|
710
|
+
|
708
711
|
"""
|
709
712
|
Generates representative images based on the provided parameters.
|
710
713
|
|
@@ -4479,6 +4482,7 @@ def cluster_feature_analysis(all_df, cluster_col='cluster'):
|
|
4479
4482
|
return combined_df
|
4480
4483
|
|
4481
4484
|
def _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold=5, perimeter_threshold=30):
|
4485
|
+
|
4482
4486
|
"""
|
4483
4487
|
Merge cells in cell_mask if a parasite in parasite_mask overlaps with more than one cell,
|
4484
4488
|
and if cells share more than a specified perimeter percentage.
|
@@ -4606,9 +4610,9 @@ def adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_thres
|
|
4606
4610
|
if not (os.path.exists(cell_path) and os.path.exists(nuclei_path)):
|
4607
4611
|
raise ValueError(f"Corresponding cell or nuclei mask file for {file_name} not found.")
|
4608
4612
|
# Load the masks
|
4609
|
-
parasite_mask = np.load(parasite_path)
|
4610
|
-
cell_mask = np.load(cell_path)
|
4611
|
-
nuclei_mask = np.load(nuclei_path)
|
4613
|
+
parasite_mask = np.load(parasite_path, allow_pickle=True)
|
4614
|
+
cell_mask = np.load(cell_path, allow_pickle=True)
|
4615
|
+
nuclei_mask = np.load(nuclei_path, allow_pickle=True)
|
4612
4616
|
# Merge and relabel cells
|
4613
4617
|
merged_cell_mask = _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold, perimeter_threshold)
|
4614
4618
|
|
@@ -4963,7 +4967,71 @@ def map_condition(col_value, neg='c1', pos='c2', mix='c3'):
|
|
4963
4967
|
else:
|
4964
4968
|
return 'screen'
|
4965
4969
|
|
4966
|
-
def download_models(repo_id="einarolafsson/models",
|
4970
|
+
def download_models(repo_id="einarolafsson/models", retries=5, delay=5):
|
4971
|
+
"""
|
4972
|
+
Downloads all model files from Hugging Face and stores them in the `resources/models` directory
|
4973
|
+
within the installed `spacr` package.
|
4974
|
+
|
4975
|
+
Args:
|
4976
|
+
repo_id (str): The repository ID on Hugging Face (default is 'einarolafsson/models').
|
4977
|
+
retries (int): Number of retry attempts in case of failure.
|
4978
|
+
delay (int): Delay in seconds between retries.
|
4979
|
+
|
4980
|
+
Returns:
|
4981
|
+
str: The local path to the downloaded models.
|
4982
|
+
"""
|
4983
|
+
# Construct the path to the `resources/models` directory in the installed `spacr` package
|
4984
|
+
package_dir = os.path.dirname(spacr_path)
|
4985
|
+
local_dir = os.path.join(package_dir, 'resources', 'models')
|
4986
|
+
|
4987
|
+
# Create the local directory if it doesn't exist
|
4988
|
+
if not os.path.exists(local_dir):
|
4989
|
+
os.makedirs(local_dir)
|
4990
|
+
elif len(os.listdir(local_dir)) > 0:
|
4991
|
+
print(f"Models already downloaded to: {local_dir}")
|
4992
|
+
return local_dir
|
4993
|
+
|
4994
|
+
attempt = 0
|
4995
|
+
while attempt < retries:
|
4996
|
+
try:
|
4997
|
+
# List all files in the repo
|
4998
|
+
files = list_repo_files(repo_id, repo_type="dataset")
|
4999
|
+
print(f"Files in repository: {files}") # Debugging print to check file list
|
5000
|
+
|
5001
|
+
# Download each file
|
5002
|
+
for file_name in files:
|
5003
|
+
for download_attempt in range(retries):
|
5004
|
+
try:
|
5005
|
+
url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}?download=true"
|
5006
|
+
print(f"Downloading file from: {url}") # Debugging
|
5007
|
+
|
5008
|
+
response = requests.get(url, stream=True)
|
5009
|
+
print(f"HTTP response status: {response.status_code}") # Debugging
|
5010
|
+
response.raise_for_status()
|
5011
|
+
|
5012
|
+
# Save the file locally
|
5013
|
+
local_file_path = os.path.join(local_dir, os.path.basename(file_name))
|
5014
|
+
with open(local_file_path, 'wb') as file:
|
5015
|
+
for chunk in response.iter_content(chunk_size=8192):
|
5016
|
+
file.write(chunk)
|
5017
|
+
print(f"Downloaded model file: {file_name} to {local_file_path}")
|
5018
|
+
break # Exit the retry loop if successful
|
5019
|
+
except (requests.HTTPError, requests.Timeout) as e:
|
5020
|
+
print(f"Error downloading {file_name}: {e}. Retrying in {delay} seconds...")
|
5021
|
+
time.sleep(delay)
|
5022
|
+
else:
|
5023
|
+
raise Exception(f"Failed to download {file_name} after multiple attempts.")
|
5024
|
+
|
5025
|
+
return local_dir # Return the directory where models are saved
|
5026
|
+
|
5027
|
+
except (requests.HTTPError, requests.Timeout) as e:
|
5028
|
+
print(f"Error downloading files: {e}. Retrying in {delay} seconds...")
|
5029
|
+
attempt += 1
|
5030
|
+
time.sleep(delay)
|
5031
|
+
|
5032
|
+
raise Exception("Failed to download model files after multiple attempts.")
|
5033
|
+
|
5034
|
+
def download_models_v1(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
|
4967
5035
|
"""
|
4968
5036
|
Downloads all model files from Hugging Face and stores them in the specified local directory.
|
4969
5037
|
|
@@ -5112,4 +5180,33 @@ def add_column_to_database(settings):
|
|
5112
5180
|
conn.commit()
|
5113
5181
|
conn.close()
|
5114
5182
|
|
5115
|
-
print(f"Updated '{new_column_name}' in '{settings['table_name']}' using '{settings['match_column']}'.")
|
5183
|
+
print(f"Updated '{new_column_name}' in '{settings['table_name']}' using '{settings['match_column']}'.")
|
5184
|
+
|
5185
|
+
def fill_holes_in_mask(mask):
|
5186
|
+
"""
|
5187
|
+
Fill holes in each object in the mask while keeping objects separated.
|
5188
|
+
|
5189
|
+
Args:
|
5190
|
+
mask (np.ndarray): A labeled mask where each object has a unique integer value.
|
5191
|
+
|
5192
|
+
Returns:
|
5193
|
+
np.ndarray: A mask with holes filled and original labels preserved.
|
5194
|
+
"""
|
5195
|
+
# Ensure the mask is integer-labeled
|
5196
|
+
labeled_mask, num_features = ndimage.label(mask)
|
5197
|
+
|
5198
|
+
# Create an empty mask to store the result
|
5199
|
+
filled_mask = np.zeros_like(labeled_mask)
|
5200
|
+
|
5201
|
+
# Fill holes for each labeled object independently
|
5202
|
+
for i in range(1, num_features + 1):
|
5203
|
+
# Create a binary mask for the current object
|
5204
|
+
object_mask = (labeled_mask == i)
|
5205
|
+
|
5206
|
+
# Fill holes within this object
|
5207
|
+
filled_object = binary_fill_holes(object_mask)
|
5208
|
+
|
5209
|
+
# Assign the original label back to the filled object
|
5210
|
+
filled_mask[filled_object] = i
|
5211
|
+
|
5212
|
+
return filled_mask
|
@@ -7,27 +7,27 @@ spacr/app_mask.py,sha256=l-dBY8ftzCMdDe6-pXc2Nh_u-idNL9G7UOARiLJBtds,153
|
|
7
7
|
spacr/app_measure.py,sha256=_K7APYIeOKpV6e_LcqabBjvEi7mfq9Fch8175x1x0k8,162
|
8
8
|
spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
|
9
9
|
spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
|
10
|
-
spacr/cellpose.py,sha256=
|
10
|
+
spacr/cellpose.py,sha256=RBHMs2vwXcfkj0xqAULpALyzJYXddSRycgZSzmwI7v0,14755
|
11
11
|
spacr/core.py,sha256=dW9RrAKFLfVsFhX0-kaVMc2T7b47Ky0pTXK-CEVOeWQ,48235
|
12
12
|
spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
|
13
13
|
spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
14
|
-
spacr/gui_core.py,sha256=
|
14
|
+
spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
|
15
15
|
spacr/gui_elements.py,sha256=w-S1MZdyxt5O3DsNAHNNXy_WGfwBPg0NhwQtCsJeiao,137071
|
16
|
-
spacr/gui_utils.py,sha256=
|
17
|
-
spacr/io.py,sha256=
|
16
|
+
spacr/gui_utils.py,sha256=KDWDWsi7UdZVhXk1ZWGx3ZqJMIxCUm3lGfjrVhbk52s,45463
|
17
|
+
spacr/io.py,sha256=1rIdJ_8dyn7W4D2zXjaOqlgyo_Y5Z7X86aRp4hNYWCU,144194
|
18
18
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
19
19
|
spacr/measure.py,sha256=KdboGXoi85BO5-_6er7932FgjFI7G7tuaQDnWSiEuew,54817
|
20
20
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
21
|
-
spacr/ml.py,sha256=
|
21
|
+
spacr/ml.py,sha256=bPcKVk1camnOhv8jQglj6EYyipAxxmiB1QJ2Fdo3dEM,50654
|
22
22
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
23
|
-
spacr/plot.py,sha256=
|
23
|
+
spacr/plot.py,sha256=c7PYi4p-ARjHjHCoSn-8ZEXAit0WcTRVxAcAs47tLms,145287
|
24
24
|
spacr/sequencing.py,sha256=t18mgpK6rhWuB1LtFOsPxqgpFXxuUmrD06ecsaVQ0Gw,19655
|
25
|
-
spacr/settings.py,sha256=
|
25
|
+
spacr/settings.py,sha256=3ygnAY6uLtkzFQdK8TMBbWV6zXEX-G_wV19YLyjCBeM,77668
|
26
26
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
27
|
-
spacr/submodules.py,sha256=
|
27
|
+
spacr/submodules.py,sha256=3C5M4UbI9Ral1MX4PTpucaAaqhL3RADuCOCqaHhMyUg,28048
|
28
28
|
spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
|
29
|
-
spacr/toxo.py,sha256=
|
30
|
-
spacr/utils.py,sha256=
|
29
|
+
spacr/toxo.py,sha256=X62hKFcSzFhIxFYlhL2AZb0qNpvtjLs3y1HldReAQEY,12880
|
30
|
+
spacr/utils.py,sha256=K36BxYr4GN956V4S7IkNty2sP4Y265WS7yMzAw8Tqeg,220451
|
31
31
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
32
32
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
33
33
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -150,9 +150,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
150
150
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
151
151
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
153
|
-
spacr-0.3.
|
154
|
-
spacr-0.3.
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
153
|
+
spacr-0.3.45.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
154
|
+
spacr-0.3.45.dist-info/METADATA,sha256=eUCjysKj_sil9xg7E1ZQzIUGN8mCTYd1uw64MEY7cbo,5949
|
155
|
+
spacr-0.3.45.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
156
|
+
spacr-0.3.45.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
157
|
+
spacr-0.3.45.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
158
|
+
spacr-0.3.45.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|