spacr 0.3.41__py3-none-any.whl → 0.3.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/cellpose.py +0 -2
- spacr/gui_core.py +5 -5
- spacr/gui_utils.py +1 -1
- spacr/io.py +74 -51
- spacr/measure.py +4 -4
- spacr/ml.py +26 -26
- spacr/plot.py +1 -1
- spacr/settings.py +49 -15
- spacr/submodules.py +161 -2
- spacr/toxo.py +26 -5
- spacr/utils.py +69 -4
- {spacr-0.3.41.dist-info → spacr-0.3.43.dist-info}/METADATA +1 -1
- {spacr-0.3.41.dist-info → spacr-0.3.43.dist-info}/RECORD +17 -17
- {spacr-0.3.41.dist-info → spacr-0.3.43.dist-info}/LICENSE +0 -0
- {spacr-0.3.41.dist-info → spacr-0.3.43.dist-info}/WHEEL +0 -0
- {spacr-0.3.41.dist-info → spacr-0.3.43.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.41.dist-info → spacr-0.3.43.dist-info}/top_level.txt +0 -0
spacr/cellpose.py
CHANGED
@@ -86,7 +86,6 @@ def identify_masks_finetune(settings):
|
|
86
86
|
if normalize:
|
87
87
|
images, _, image_names, _, orig_dims = _load_normalized_images_and_labels(image_files=image_files, label_files=None, channels=channels, percentiles=percentiles, circular=circular, invert=invert, visualize=verbose, remove_background=remove_background, background=background, Signal_to_noise=Signal_to_noise, target_height=target_height, target_width=target_width)
|
88
88
|
images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
|
89
|
-
#orig_dims = [(image.shape[0], image.shape[1]) for image in images]
|
90
89
|
else:
|
91
90
|
images, _, image_names, _ = _load_images_and_labels(image_files=image_files, label_files=None, circular=circular, invert=invert)
|
92
91
|
images = [np.squeeze(img) if img.shape[-1] == 1 else img for img in images]
|
@@ -126,7 +125,6 @@ def identify_masks_finetune(settings):
|
|
126
125
|
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls)
|
127
126
|
print_progress(files_processed, files_to_process, n_jobs=1, time_ls=time_ls, batch_size=None, operation_type="")
|
128
127
|
|
129
|
-
|
130
128
|
if verbose:
|
131
129
|
if resize:
|
132
130
|
stack = resizescikit(stack, dims, preserve_range=True, anti_aliasing=False).astype(stack.dtype)
|
spacr/gui_core.py
CHANGED
@@ -384,8 +384,8 @@ def import_settings(settings_type='mask'):
|
|
384
384
|
from .gui_utils import convert_settings_dict_for_gui, hide_all_settings
|
385
385
|
from .settings import generate_fields, set_default_settings_preprocess_generate_masks, get_measure_crop_settings, set_default_train_test_model
|
386
386
|
from .settings import set_default_generate_barecode_mapping, set_default_umap_image_settings, get_analyze_recruitment_default_settings
|
387
|
-
from .settings import get_default_generate_activation_map_settings
|
388
|
-
|
387
|
+
from .settings import get_default_generate_activation_map_settings, get_analyze_plaque_settings
|
388
|
+
|
389
389
|
def read_settings_from_csv(csv_file_path):
|
390
390
|
settings = {}
|
391
391
|
with open(csv_file_path, newline='') as csvfile:
|
@@ -428,7 +428,7 @@ def import_settings(settings_type='mask'):
|
|
428
428
|
elif settings_type == 'activation':
|
429
429
|
settings = get_default_generate_activation_map_settings(settings={})
|
430
430
|
elif settings_type == 'analyze_plaques':
|
431
|
-
settings = {}
|
431
|
+
settings = get_analyze_plaque_settings(settings={})
|
432
432
|
elif settings_type == 'convert':
|
433
433
|
settings = {}
|
434
434
|
else:
|
@@ -443,7 +443,7 @@ def setup_settings_panel(vertical_container, settings_type='mask'):
|
|
443
443
|
global vars_dict, scrollable_frame
|
444
444
|
from .settings import get_identify_masks_finetune_default_settings, set_default_analyze_screen, set_default_settings_preprocess_generate_masks
|
445
445
|
from .settings import get_measure_crop_settings, deep_spacr_defaults, set_default_generate_barecode_mapping, set_default_umap_image_settings
|
446
|
-
from .settings import get_map_barcodes_default_settings, get_analyze_recruitment_default_settings, get_check_cellpose_models_default_settings
|
446
|
+
from .settings import get_map_barcodes_default_settings, get_analyze_recruitment_default_settings, get_check_cellpose_models_default_settings, get_analyze_plaque_settings
|
447
447
|
from .settings import generate_fields, get_perform_regression_default_settings, get_train_cellpose_default_settings, get_default_generate_activation_map_settings
|
448
448
|
from .gui_utils import convert_settings_dict_for_gui
|
449
449
|
from .gui_elements import set_element_size
|
@@ -490,7 +490,7 @@ def setup_settings_panel(vertical_container, settings_type='mask'):
|
|
490
490
|
elif settings_type == 'activation':
|
491
491
|
settings = get_default_generate_activation_map_settings(settings={})
|
492
492
|
elif settings_type == 'analyze_plaques':
|
493
|
-
settings = {
|
493
|
+
settings = get_analyze_plaque_settings(settings={})
|
494
494
|
elif settings_type == 'convert':
|
495
495
|
settings = {'src':'path to images'}
|
496
496
|
else:
|
spacr/gui_utils.py
CHANGED
@@ -380,7 +380,7 @@ def convert_settings_dict_for_gui(settings):
|
|
380
380
|
variables = {}
|
381
381
|
special_cases = {
|
382
382
|
'metadata_type': ('combo', ['cellvoyager', 'cq1', 'nikon', 'zeis', 'custom'], 'cellvoyager'),
|
383
|
-
'channels': ('combo', ['[0,1,2,3]', '[0,1,2]', '[0,1]', '[0]'], '[0,1,2,3]'),
|
383
|
+
'channels': ('combo', ['[0,1,2,3]', '[0,1,2]', '[0,1]', '[0]', '[0,0]'], '[0,1,2,3]'),
|
384
384
|
'train_channels': ('combo', ["['r','g','b']", "['r','g']", "['r','b']", "['g','b']", "['r']", "['g']", "['b']"], "['r','g','b']"),
|
385
385
|
'channel_dims': ('combo', ['[0,1,2,3]', '[0,1,2]', '[0,1]', '[0]'], '[0,1,2,3]'),
|
386
386
|
'dataset_mode': ('combo', ['annotation', 'metadata', 'recruitment'], 'metadata'),
|
spacr/io.py
CHANGED
@@ -191,107 +191,130 @@ def _load_images_and_labels(image_files, label_files, circular=False, invert=Fal
|
|
191
191
|
print(f'image shape: {images[0].shape}, image type: images[0].shape mask shape: {labels[0].shape}, image type: labels[0].shape')
|
192
192
|
return images, labels, image_names, label_names
|
193
193
|
|
194
|
-
def
|
194
|
+
def _load_normalized_images_and_labels(image_files, label_files, channels=None, percentiles=None,
|
195
|
+
circular=False, invert=False, visualize=False,
|
196
|
+
remove_background=False, background=0, Signal_to_noise=10,
|
197
|
+
target_height=None, target_width=None):
|
195
198
|
|
196
|
-
from .plot import normalize_and_visualize
|
199
|
+
from .plot import normalize_and_visualize, plot_resize
|
197
200
|
from .utils import invert_image, apply_mask
|
201
|
+
from skimage.transform import resize as resizescikit
|
202
|
+
|
203
|
+
# Ensure percentiles are valid
|
204
|
+
if isinstance(percentiles, list) and len(percentiles) == 2:
|
205
|
+
try:
|
206
|
+
percentiles = [int(percentiles[0]), int(percentiles[1])]
|
207
|
+
except ValueError:
|
208
|
+
percentiles = None
|
209
|
+
else:
|
210
|
+
percentiles = None
|
198
211
|
|
199
|
-
signal_thresholds = background*Signal_to_noise
|
212
|
+
signal_thresholds = float(background) * float(Signal_to_noise)
|
200
213
|
lower_percentile = 2
|
201
214
|
|
202
|
-
images = []
|
203
|
-
labels = []
|
204
|
-
|
215
|
+
images, labels, orig_dims = [], [], []
|
205
216
|
num_channels = 4
|
206
217
|
percentiles_1 = [[] for _ in range(num_channels)]
|
207
218
|
percentiles_99 = [[] for _ in range(num_channels)]
|
208
219
|
|
209
220
|
image_names = [os.path.basename(f) for f in image_files]
|
210
|
-
|
221
|
+
image_dir = os.path.dirname(image_files[0])
|
222
|
+
|
211
223
|
if label_files is not None:
|
212
224
|
label_names = [os.path.basename(f) for f in label_files]
|
213
225
|
label_dir = os.path.dirname(label_files[0])
|
226
|
+
else:
|
227
|
+
label_names, label_dir = [], None
|
214
228
|
|
215
|
-
# Load
|
216
|
-
for i,img_file in enumerate(image_files):
|
229
|
+
# Load, normalize, and resize images
|
230
|
+
for i, img_file in enumerate(image_files):
|
217
231
|
image = cellpose.io.imread(img_file)
|
232
|
+
orig_dims.append((image.shape[0], image.shape[1]))
|
233
|
+
|
218
234
|
if invert:
|
219
235
|
image = invert_image(image)
|
220
236
|
if circular:
|
221
237
|
image = apply_mask(image, output_value=0)
|
222
238
|
|
223
|
-
#
|
239
|
+
# Select specific channels if needed
|
224
240
|
if channels is not None and image.ndim == 3:
|
225
241
|
image = image[..., channels]
|
226
242
|
|
227
243
|
if remove_background:
|
228
|
-
image
|
229
|
-
|
244
|
+
image = np.where(image < background, 0, image)
|
245
|
+
|
230
246
|
if image.ndim < 3:
|
231
247
|
image = np.expand_dims(image, axis=-1)
|
232
|
-
|
233
|
-
|
248
|
+
|
249
|
+
# Calculate percentiles if not provided
|
234
250
|
if percentiles is None:
|
235
251
|
for c in range(image.shape[-1]):
|
236
252
|
p1 = np.percentile(image[..., c], lower_percentile)
|
237
253
|
percentiles_1[c].append(p1)
|
254
|
+
|
255
|
+
# Ensure `signal_thresholds` and `p` are floats for comparison
|
238
256
|
for percentile in [98, 99, 99.9, 99.99, 99.999]:
|
239
257
|
p = np.percentile(image[..., c], percentile)
|
240
|
-
if p > signal_thresholds:
|
258
|
+
if float(p) > signal_thresholds:
|
241
259
|
percentiles_99[c].append(p)
|
242
260
|
break
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
normalized_images.append(normalized_image)
|
253
|
-
if visualize:
|
254
|
-
normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
|
255
|
-
|
261
|
+
|
262
|
+
# Resize image if required
|
263
|
+
if target_height and target_width:
|
264
|
+
image_shape = (target_height, target_width) if image.ndim == 2 else (target_height, target_width, image.shape[-1])
|
265
|
+
image = resizescikit(image, image_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
|
266
|
+
|
267
|
+
images.append(image)
|
268
|
+
|
269
|
+
# Calculate average percentiles if needed
|
256
270
|
if percentiles is None:
|
257
|
-
# Calculate average percentiles for normalization
|
258
271
|
avg_p1 = [np.mean(p) for p in percentiles_1]
|
259
|
-
avg_p99 = [np.mean(p) if
|
272
|
+
avg_p99 = [np.mean(p) if p else avg_p1[i] for i, p in enumerate(percentiles_99)]
|
260
273
|
|
261
274
|
print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
|
262
275
|
|
263
|
-
normalized_images = [
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
|
268
|
-
normalized_images.append(normalized_image)
|
269
|
-
if visualize:
|
270
|
-
normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
|
271
|
-
|
272
|
-
if not image_files is None:
|
273
|
-
image_dir = os.path.dirname(image_files[0])
|
276
|
+
normalized_images = [
|
277
|
+
np.stack([rescale_intensity(img[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
|
278
|
+
for c in range(img.shape[-1])], axis=-1) for img in images
|
279
|
+
]
|
274
280
|
|
275
281
|
else:
|
276
|
-
|
277
|
-
|
282
|
+
normalized_images = [
|
283
|
+
np.stack([rescale_intensity(img[..., c],
|
284
|
+
in_range=(np.percentile(img[..., c], percentiles[0]),
|
285
|
+
np.percentile(img[..., c], percentiles[1])),
|
286
|
+
out_range=(0, 1)) for c in range(img.shape[-1])], axis=-1)
|
287
|
+
for img in images
|
288
|
+
]
|
289
|
+
|
290
|
+
# Load and resize labels if provided
|
278
291
|
if label_files is not None:
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
label_dir = None
|
292
|
+
labels = [resizescikit(cellpose.io.imread(lbl_file),
|
293
|
+
(target_height, target_width) if target_height and target_width else orig_dims[i],
|
294
|
+
order=0, preserve_range=True, anti_aliasing=False).astype(np.uint8)
|
295
|
+
for i, lbl_file in enumerate(label_files)]
|
284
296
|
|
285
297
|
print(f'Loaded and normalized {len(normalized_images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
|
286
|
-
|
287
|
-
return normalized_images, labels, image_names, label_names
|
288
298
|
|
289
|
-
|
299
|
+
if visualize and images and labels:
|
300
|
+
plot_resize(images, normalized_images, labels, labels)
|
301
|
+
|
302
|
+
return normalized_images, labels, image_names, label_names, orig_dims
|
303
|
+
|
304
|
+
def _load_normalized_images_and_labels_v1(image_files, label_files, channels=None, percentiles=None, circular=False, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10, target_height=None, target_width=None):
|
290
305
|
|
291
306
|
from .plot import normalize_and_visualize, plot_resize
|
292
307
|
from .utils import invert_image, apply_mask
|
293
308
|
from skimage.transform import resize as resizescikit
|
294
309
|
|
310
|
+
if isinstance(percentiles, list):
|
311
|
+
if len(percentiles) !=2:
|
312
|
+
percentiles = None
|
313
|
+
if not percentiles[0] is int:
|
314
|
+
percentiles = None
|
315
|
+
if not percentiles[1] is int:
|
316
|
+
percentiles = None
|
317
|
+
|
295
318
|
signal_thresholds = background * Signal_to_noise
|
296
319
|
lower_percentile = 2
|
297
320
|
|
spacr/measure.py
CHANGED
@@ -710,7 +710,7 @@ def _measure_crop_core(index, time_ls, file, settings):
|
|
710
710
|
else:
|
711
711
|
cell_mask = np.zeros_like(data[:, :, 0])
|
712
712
|
settings['cytoplasm'] = False
|
713
|
-
settings['
|
713
|
+
settings['uninfected'] = True
|
714
714
|
|
715
715
|
if settings['nucleus_mask_dim'] is not None:
|
716
716
|
nucleus_mask = data[:, :, settings['nucleus_mask_dim']].astype(data_type)
|
@@ -762,7 +762,7 @@ def _measure_crop_core(index, time_ls, file, settings):
|
|
762
762
|
cytoplasm_mask = _filter_object(cytoplasm_mask, settings['cytoplasm_min_size'])
|
763
763
|
|
764
764
|
if settings['cell_mask_dim'] is not None:
|
765
|
-
cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask = _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask,
|
765
|
+
cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask = _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, uninfected=settings['uninfected'])
|
766
766
|
|
767
767
|
# Update data with the new masks
|
768
768
|
if settings['cell_mask_dim'] is not None:
|
@@ -979,9 +979,9 @@ def measure_crop(settings):
|
|
979
979
|
#_create_database(source_folder+'/measurements/measurements.db')
|
980
980
|
|
981
981
|
if settings['cell_mask_dim'] is None:
|
982
|
-
settings['
|
982
|
+
settings['uninfected'] = True
|
983
983
|
if settings['pathogen_mask_dim'] is None:
|
984
|
-
settings['
|
984
|
+
settings['uninfected'] = True
|
985
985
|
if settings['cell_mask_dim'] is not None and settings['pathogen_min_size'] is not None:
|
986
986
|
settings['cytoplasm'] = True
|
987
987
|
elif settings['cell_mask_dim'] is not None and settings['nucleus_min_size'] is not None:
|
spacr/ml.py
CHANGED
@@ -449,26 +449,28 @@ def perform_regression(settings):
|
|
449
449
|
return df, n_gene
|
450
450
|
else:
|
451
451
|
return df
|
452
|
+
|
453
|
+
|
452
454
|
|
453
455
|
settings = get_perform_regression_default_settings(settings)
|
454
456
|
count_data_df, score_data_df = _perform_regression_read_data(settings)
|
455
457
|
results_path, results_path_gene, results_path_grna, hits_path, res_folder, csv_path = _perform_regression_set_paths(settings)
|
456
458
|
save_settings(settings, name='regression', show=True)
|
459
|
+
|
460
|
+
if isinstance(settings['filter_value'], list):
|
461
|
+
filter_value = settings['filter_value']
|
462
|
+
else:
|
463
|
+
filter_value = []
|
464
|
+
if isinstance(settings['filter_column'], str):
|
465
|
+
filter_column = settings['filter_column']
|
457
466
|
|
458
|
-
score_data_df = clean_controls(score_data_df, settings['
|
467
|
+
score_data_df = clean_controls(score_data_df, settings['filter_value'], settings['filter_column'])
|
459
468
|
print(f"Dependent variable after clean_controls: {len(score_data_df)}")
|
460
469
|
|
461
470
|
dependent_df, dependent_variable = process_scores(score_data_df, settings['dependent_variable'], settings['plate'], settings['min_cell_count'], settings['agg_type'], settings['transform'])
|
462
471
|
print(f"Dependent variable after process_scores: {len(dependent_df)}")
|
463
472
|
|
464
|
-
|
465
|
-
|
466
|
-
if settings['other'] is not None:
|
467
|
-
if isinstance(settings['other'], str):
|
468
|
-
settings['other'] = [settings['other']]
|
469
|
-
filter_value.extend(settings['other'])
|
470
|
-
|
471
|
-
independent_df = process_reads(count_data_df, settings['fraction_threshold'], settings['plate'], filter_column=settings['location_column'], filter_value=filter_value)
|
473
|
+
independent_df = process_reads(count_data_df, settings['fraction_threshold'], settings['plate'], filter_column=filter_column, filter_value=filter_value)
|
472
474
|
independent_df, n_grna, n_gene = _count_variable_instances(independent_df, column_1='grna', column_2='gene')
|
473
475
|
|
474
476
|
print(f"Independent variable after process_reads: {len(independent_df)}")
|
@@ -498,12 +500,14 @@ def perform_regression(settings):
|
|
498
500
|
if settings['controls'] is not None:
|
499
501
|
control_coef_df = grna_coef_df[grna_coef_df['grna'].isin(settings['controls'])]
|
500
502
|
mean_coef = control_coef_df['coefficient'].mean()
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
503
|
+
|
504
|
+
if settings['threshold_method'] in ['var','variance']:
|
505
|
+
coef_mes = control_coef_df['coefficient'].var()
|
506
|
+
elif settings['threshold_method'] in ['std', 'standard_deveation']:
|
507
|
+
coef_mes = control_coef_df['coefficient'].std()
|
508
|
+
else:
|
509
|
+
raise ValueError(f"Unsupported threshold method {settings['threshold_method']}. Supported methods: ['var','variance','std','standard_deveation']")
|
510
|
+
reg_threshold = mean_coef + (settings['threshold_multiplier'] * coef_mes)
|
507
511
|
|
508
512
|
coef_df.to_csv(results_path, index=False)
|
509
513
|
gene_coef_df.to_csv(results_path_gene, index=False)
|
@@ -596,7 +600,7 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
|
|
596
600
|
if isinstance(filter_value, str):
|
597
601
|
filter_value = [filter_value]
|
598
602
|
|
599
|
-
if isinstance(filter_column, list):
|
603
|
+
if isinstance(filter_column, list):
|
600
604
|
for filter_col in filter_column:
|
601
605
|
for value in filter_value:
|
602
606
|
csv_df = csv_df[csv_df[filter_col] != value]
|
@@ -659,16 +663,12 @@ def check_normality(data, variable_name, verbose=False):
|
|
659
663
|
print(f"Normal distribution: The data for {variable_name} is not normally distributed.")
|
660
664
|
return False
|
661
665
|
|
662
|
-
def clean_controls(df,
|
663
|
-
if
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
df = df[~df['column'].isin([pc])]
|
669
|
-
if other != None:
|
670
|
-
df = df[~df['column'].isin([other])]
|
671
|
-
print(f'Removed data from {nc, pc, other}')
|
666
|
+
def clean_controls(df,values, column):
|
667
|
+
if column in df.columns:
|
668
|
+
if isinstance(values, list):
|
669
|
+
for value in values:
|
670
|
+
df = df[~df[column].isin([value])]
|
671
|
+
print(f'Removed data from {value}')
|
672
672
|
return df
|
673
673
|
|
674
674
|
def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='mean', transform=None, regression_type='ols'):
|
spacr/plot.py
CHANGED
@@ -1521,7 +1521,7 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
|
|
1521
1521
|
return fig
|
1522
1522
|
|
1523
1523
|
def print_mask_and_flows(stack, mask, flows, overlay=False):
|
1524
|
-
fig, axs = plt.subplots(1, 3, figsize=(
|
1524
|
+
fig, axs = plt.subplots(1, 3, figsize=(12, 4)) # Adjust subplot layout
|
1525
1525
|
|
1526
1526
|
if stack.shape[-1] == 1:
|
1527
1527
|
stack = np.squeeze(stack)
|
spacr/settings.py
CHANGED
@@ -261,7 +261,7 @@ def get_measure_crop_settings(settings={}):
|
|
261
261
|
settings.setdefault('nucleus_mask_dim',None)
|
262
262
|
settings.setdefault('pathogen_mask_dim',None)
|
263
263
|
settings.setdefault('cytoplasm',False)
|
264
|
-
settings.setdefault('
|
264
|
+
settings.setdefault('uninfected',True)
|
265
265
|
settings.setdefault('cell_min_size',0)
|
266
266
|
settings.setdefault('nucleus_min_size',0)
|
267
267
|
settings.setdefault('pathogen_min_size',0)
|
@@ -527,26 +527,31 @@ def set_generate_dataset_defaults(settings):
|
|
527
527
|
return settings
|
528
528
|
|
529
529
|
def get_perform_regression_default_settings(settings):
|
530
|
-
settings.setdefault('
|
531
|
-
settings.setdefault('
|
530
|
+
settings.setdefault('count_data','list of paths')
|
531
|
+
settings.setdefault('score_data','list of paths')
|
532
|
+
settings.setdefault('positive_control','239740')
|
533
|
+
settings.setdefault('negative_control','233460')
|
534
|
+
settings.setdefault('controls',['000000_1','000000_10','000000_11','000000_12','000000_13','000000_14','000000_15','000000_16','000000_17','000000_18','000000_19','000000_20','000000_21','000000_22','000000_23','000000_24','000000_25','000000_26','000000_27','000000_28','000000_29','000000_3','000000_30','000000_31','000000_32','000000_4','000000_5','000000_6','000000_8','000000_9'])
|
535
|
+
settings.setdefault('fraction_threshold',0.12)
|
536
|
+
settings.setdefault('dependent_variable','pred')
|
537
|
+
settings.setdefault('threshold_method','std')
|
538
|
+
settings.setdefault('threshold_multiplier',3)
|
532
539
|
settings.setdefault('transform',None)
|
533
540
|
settings.setdefault('agg_type','mean')
|
534
541
|
settings.setdefault('min_cell_count',25)
|
535
542
|
settings.setdefault('regression_type','ols')
|
536
543
|
settings.setdefault('random_row_column_effects',False)
|
544
|
+
settings.setdefault('split_axis_lims','')
|
545
|
+
settings.setdefault('plate','')
|
546
|
+
settings.setdefault('cov_type',None)
|
537
547
|
settings.setdefault('alpha',1)
|
538
|
-
settings.setdefault('
|
539
|
-
settings.setdefault('
|
540
|
-
settings.setdefault('nc','c1')
|
541
|
-
settings.setdefault('pc','c2')
|
542
|
-
settings.setdefault('other','c3')
|
548
|
+
settings.setdefault('filter_value',['c1', 'c2', 'c3'])
|
549
|
+
settings.setdefault('filter_column','column')
|
543
550
|
settings.setdefault('plate','plate1')
|
544
551
|
settings.setdefault('class_1_threshold',None)
|
545
|
-
settings.setdefault('cov_type',None)
|
546
552
|
settings.setdefault('metadata_files',['/home/carruthers/Documents/TGME49_Summary.csv','/home/carruthers/Documents/TGGT1_Summary.csv'])
|
547
553
|
settings.setdefault('toxo', True)
|
548
554
|
|
549
|
-
|
550
555
|
if settings['regression_type'] == 'quantile':
|
551
556
|
print(f"Using alpha as quantile for quantile regression, alpha: {settings['alpha']}")
|
552
557
|
settings['agg_type'] = None
|
@@ -576,6 +581,7 @@ def get_check_cellpose_models_default_settings(settings):
|
|
576
581
|
return settings
|
577
582
|
|
578
583
|
def get_identify_masks_finetune_default_settings(settings):
|
584
|
+
settings.setdefault('src', 'path')
|
579
585
|
settings.setdefault('model_name', 'cyto')
|
580
586
|
settings.setdefault('custom_model', None)
|
581
587
|
settings.setdefault('channels', [0,0])
|
@@ -664,7 +670,7 @@ expected_types = {
|
|
664
670
|
"png_dims": list,
|
665
671
|
"normalize_by": str,
|
666
672
|
"save_measurements": bool,
|
667
|
-
"
|
673
|
+
"uninfected": bool,
|
668
674
|
"dialate_pngs": bool,
|
669
675
|
"dialate_png_ratios": list,
|
670
676
|
"n_jobs": int,
|
@@ -685,6 +691,7 @@ expected_types = {
|
|
685
691
|
"filter_min_max": (list, type(None)),
|
686
692
|
"channel_dims": list,
|
687
693
|
"backgrounds": list,
|
694
|
+
"background": str,
|
688
695
|
"outline_thickness": int,
|
689
696
|
"outline_color": str,
|
690
697
|
"overlay_chans": list,
|
@@ -893,7 +900,7 @@ expected_types = {
|
|
893
900
|
categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset","model_path","grna_csv","row_csv","column_csv"],
|
894
901
|
"General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode"],
|
895
902
|
"Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
|
896
|
-
"Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "
|
903
|
+
"Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
|
897
904
|
"Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
|
898
905
|
"Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
|
899
906
|
"Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
|
@@ -904,12 +911,12 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
|
|
904
911
|
"Hyperparamiters (Embedding)": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
|
905
912
|
"Hyperparamiters (Clustering)": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
|
906
913
|
"Hyperparamiters (Regression)":["cov_type", "class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "random_row_column_effects", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable"],
|
907
|
-
"Hyperparamiters (Activation)":["cam_type", "
|
914
|
+
"Hyperparamiters (Activation)":["cam_type", "overlay", "correlation", "target_layer", "normalize_input"],
|
908
915
|
"Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
|
909
916
|
"Plot": ["plot", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
|
910
917
|
"Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
|
911
918
|
"Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
|
912
|
-
"Advanced": ["shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "uninfected", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
|
919
|
+
"Advanced": ["shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "uninfected", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
|
913
920
|
"Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
|
914
921
|
}
|
915
922
|
|
@@ -1083,7 +1090,7 @@ def generate_fields(variables, scrollable_frame):
|
|
1083
1090
|
"nuclei_limit": "(int) - Whether to include multinucleated cells in the analysis.",
|
1084
1091
|
"pathogen_limit": "(int) - Whether to include multi-infected cells in the analysis.",
|
1085
1092
|
"uninfected": "(bool) - Whether to include non-infected cells in the analysis.",
|
1086
|
-
"
|
1093
|
+
"uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
|
1087
1094
|
"init_weights": "(bool) - Whether to initialize weights for the model.",
|
1088
1095
|
"src": "(str) - Path to the folder containing the images.",
|
1089
1096
|
"intermedeate_save": "(bool) - Whether to save intermediate results.",
|
@@ -1362,4 +1369,31 @@ def get_default_generate_activation_map_settings(settings):
|
|
1362
1369
|
settings.setdefault('manders_thresholds', [15,50, 75])
|
1363
1370
|
settings.setdefault('n_jobs', None)
|
1364
1371
|
|
1372
|
+
return settings
|
1373
|
+
|
1374
|
+
def get_analyze_plaque_settings(settings):
|
1375
|
+
settings.setdefault('src', 'path')
|
1376
|
+
settings.setdefault('masks', True)
|
1377
|
+
settings.setdefault('model_name', 'plaque')
|
1378
|
+
settings.setdefault('custom_model', None)
|
1379
|
+
settings.setdefault('channels', [0,0])
|
1380
|
+
settings.setdefault('background', 200)
|
1381
|
+
settings.setdefault('remove_background', False)
|
1382
|
+
settings.setdefault('Signal_to_noise', 10)
|
1383
|
+
settings.setdefault('CP_prob', 0)
|
1384
|
+
settings.setdefault('diameter', 30)
|
1385
|
+
settings.setdefault('batch_size', 50)
|
1386
|
+
settings.setdefault('flow_threshold', 0.4)
|
1387
|
+
settings.setdefault('save', True)
|
1388
|
+
settings.setdefault('verbose', True)
|
1389
|
+
settings.setdefault('normalize', True)
|
1390
|
+
settings.setdefault('percentiles', None)
|
1391
|
+
settings.setdefault('circular', False)
|
1392
|
+
settings.setdefault('invert', False)
|
1393
|
+
settings.setdefault('resize', True)
|
1394
|
+
settings.setdefault('target_height', 1120)
|
1395
|
+
settings.setdefault('target_width', 1120)
|
1396
|
+
settings.setdefault('rescale', False)
|
1397
|
+
settings.setdefault('resample', False)
|
1398
|
+
settings.setdefault('grayscale', True)
|
1365
1399
|
return settings
|
spacr/submodules.py
CHANGED
@@ -8,6 +8,9 @@ from cellpose import models as cp_models
|
|
8
8
|
from cellpose import train as train_cp
|
9
9
|
from IPython.display import display
|
10
10
|
|
11
|
+
import matplotlib.pyplot as plt
|
12
|
+
from natsort import natsorted
|
13
|
+
|
11
14
|
def analyze_recruitment(settings={}):
|
12
15
|
"""
|
13
16
|
Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
|
@@ -122,7 +125,31 @@ def analyze_recruitment(settings={}):
|
|
122
125
|
|
123
126
|
return [cells,wells]
|
124
127
|
|
125
|
-
def analyze_plaques(
|
128
|
+
def analyze_plaques(settings):
|
129
|
+
|
130
|
+
from .cellpose import identify_masks_finetune
|
131
|
+
from .settings import get_analyze_plaque_settings
|
132
|
+
from .utils import save_settings, download_models
|
133
|
+
from spacr import __file__ as spacr_path
|
134
|
+
|
135
|
+
download_models()
|
136
|
+
package_dir = os.path.dirname(spacr_path)
|
137
|
+
models_dir = os.path.join(package_dir, 'resources', 'models', 'cp')
|
138
|
+
model_path = os.path.join(models_dir, 'toxo_plaque_cyto_e25000_X1120_Y1120.CP_model')
|
139
|
+
settings['custom_model'] = model_path
|
140
|
+
print('custom_model',settings['custom_model'])
|
141
|
+
|
142
|
+
settings = get_analyze_plaque_settings(settings)
|
143
|
+
save_settings(settings, name='analyze_plaques', show=True)
|
144
|
+
|
145
|
+
if settings['masks']:
|
146
|
+
settings['dst'] = os.path.join(settings['src'], 'masks')
|
147
|
+
display(settings)
|
148
|
+
identify_masks_finetune(settings)
|
149
|
+
folder = settings['dst']
|
150
|
+
else:
|
151
|
+
folder = settings['src']
|
152
|
+
|
126
153
|
summary_data = []
|
127
154
|
details_data = []
|
128
155
|
stats_data = []
|
@@ -346,4 +373,136 @@ def count_phenotypes(settings):
|
|
346
373
|
|
347
374
|
pivot_df.to_csv(output_path)
|
348
375
|
|
349
|
-
return
|
376
|
+
return
|
377
|
+
|
378
|
+
def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={}, column='column', value='c3', plate='plate1', fraction_threshold=0.05):
|
379
|
+
|
380
|
+
def calculate_well_score_fractions(df, class_columns='cv_predictions'):
|
381
|
+
if all(col in df.columns for col in ['plate', 'row', 'column']):
|
382
|
+
df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
|
383
|
+
else:
|
384
|
+
raise ValueError("Cannot find 'plate', 'row', or 'column' in df.columns")
|
385
|
+
prc_summary = df.groupby(['plate', 'row', 'column', 'prc']).size().reset_index(name='total_rows')
|
386
|
+
well_counts = (df.groupby(['plate', 'row', 'column', 'prc', class_columns])
|
387
|
+
.size()
|
388
|
+
.unstack(fill_value=0)
|
389
|
+
.reset_index()
|
390
|
+
.rename(columns={0: 'class_0', 1: 'class_1'}))
|
391
|
+
summary_df = pd.merge(prc_summary, well_counts, on=['plate', 'row', 'column', 'prc'], how='left')
|
392
|
+
summary_df['class_0_fraction'] = summary_df['class_0'] / summary_df['total_rows']
|
393
|
+
summary_df['class_1_fraction'] = summary_df['class_1'] / summary_df['total_rows']
|
394
|
+
return summary_df
|
395
|
+
|
396
|
+
def plot_line(df, x_column, y_columns, group_column=None,
|
397
|
+
xlabel=None, ylabel=None, title=None, figsize=(10, 6),
|
398
|
+
save_path=None):
|
399
|
+
"""
|
400
|
+
Create a line plot that can handle multiple y-columns, each becoming a separate line.
|
401
|
+
"""
|
402
|
+
df = df.loc[natsorted(df.index, key=lambda x: df.loc[x, x_column])]
|
403
|
+
|
404
|
+
plt.figure(figsize=figsize)
|
405
|
+
|
406
|
+
if isinstance(y_columns, list):
|
407
|
+
for y_col in y_columns:
|
408
|
+
sns.lineplot(data=df, x=x_column, y=y_col, label=y_col, marker='o')
|
409
|
+
else:
|
410
|
+
sns.lineplot(data=df, x=x_column, y=y_columns, hue=group_column, marker='o')
|
411
|
+
plt.xlabel(xlabel if xlabel else x_column)
|
412
|
+
plt.ylabel(ylabel if ylabel else 'Value')
|
413
|
+
plt.title(title if title else f'Line Plot')
|
414
|
+
if group_column or isinstance(y_columns, list):
|
415
|
+
plt.legend(title='Legend')
|
416
|
+
|
417
|
+
plt.tight_layout()
|
418
|
+
|
419
|
+
if save_path:
|
420
|
+
plt.savefig(save_path, format='png', dpi=300, bbox_inches='tight')
|
421
|
+
print(f"Plot saved to {save_path}")
|
422
|
+
plt.show()
|
423
|
+
|
424
|
+
def calculate_grna_fraction_ratio(df, grna1='TGGT1_220950_1', grna2='TGGT1_233460_4'):
|
425
|
+
# Filter relevant grna_names within each prc and group them
|
426
|
+
grouped = df[df['grna_name'].isin([grna1, grna2])] \
|
427
|
+
.groupby(['prc', 'grna_name']) \
|
428
|
+
.agg({'fraction': 'sum', 'count': 'sum'}) \
|
429
|
+
.unstack(fill_value=0)
|
430
|
+
grouped.columns = ['_'.join(col).strip() for col in grouped.columns.values]
|
431
|
+
grouped['fraction_ratio'] = grouped[f'fraction_{grna1}'] / grouped[f'fraction_{grna2}']
|
432
|
+
grouped = grouped.assign(
|
433
|
+
fraction_ratio=lambda x: x['fraction_ratio'].replace([float('inf'), -float('inf')], 0)
|
434
|
+
).fillna({'fraction_ratio': 0})
|
435
|
+
grouped = grouped.rename(columns={
|
436
|
+
f'count_{grna1}': f'{grna1}_count',
|
437
|
+
f'count_{grna2}': f'{grna2}_count'
|
438
|
+
})
|
439
|
+
result = grouped.reset_index()[['prc', f'{grna1}_count', f'{grna2}_count', 'fraction_ratio']]
|
440
|
+
|
441
|
+
result['total_reads'] = result[f'{grna1}_count'] + result[f'{grna2}_count']
|
442
|
+
|
443
|
+
result[f'{grna1}_fraction'] = result[f'{grna1}_count'] / result['total_reads']
|
444
|
+
result[f'{grna2}_fraction'] = result[f'{grna2}_count'] / result['total_reads']
|
445
|
+
|
446
|
+
return result
|
447
|
+
|
448
|
+
def calculate_well_read_fraction(df, count_column='count'):
|
449
|
+
if all(col in df.columns for col in ['plate', 'row', 'column']):
|
450
|
+
df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['column']
|
451
|
+
else:
|
452
|
+
raise ValueError("Cannot find plate, row or column in df.columns")
|
453
|
+
grouped_df = df.groupby('prc')[count_column].sum().reset_index()
|
454
|
+
grouped_df = grouped_df.rename(columns={count_column: 'total_counts'})
|
455
|
+
df = pd.merge(df, grouped_df, on='prc')
|
456
|
+
df['fraction'] = df['count'] / df['total_counts']
|
457
|
+
return df
|
458
|
+
|
459
|
+
reads_df = pd.read_csv(reads_csv)
|
460
|
+
scores_df = pd.read_csv(scores_csv)
|
461
|
+
|
462
|
+
if plate != None:
|
463
|
+
reads_df['plate'] = plate
|
464
|
+
scores_df['plate'] = plate
|
465
|
+
|
466
|
+
if 'col' in reads_df.columns:
|
467
|
+
reads_df = reads_df.rename(columns={'col': 'column'})
|
468
|
+
if 'column_name' in reads_df.columns:
|
469
|
+
reads_df = reads_df.rename(columns={'column_name': 'column'})
|
470
|
+
if 'col' in scores_df.columns:
|
471
|
+
scores_df = scores_df.rename(columns={'col': 'column'})
|
472
|
+
if 'column_name' in scores_df.columns:
|
473
|
+
scores_df = scores_df.rename(columns={'column_name': 'column'})
|
474
|
+
if 'row_name' in reads_df.columns:
|
475
|
+
reads_df = reads_df.rename(columns={'row_name': 'row'})
|
476
|
+
if 'row_name' in scores_df.columns:
|
477
|
+
scores_df = scores_df.rename(columns={'row_name': 'row'})
|
478
|
+
|
479
|
+
reads_df = calculate_well_read_fraction(reads_df)
|
480
|
+
scores_df = calculate_well_score_fractions(scores_df)
|
481
|
+
reads_col_df = reads_df[reads_df[column]==value]
|
482
|
+
scores_col_df = scores_df[scores_df[column]==value]
|
483
|
+
|
484
|
+
#reads_col_df = reads_col_df[reads_col_df['fraction'] >= fraction_threshold]
|
485
|
+
reads_col_df = calculate_grna_fraction_ratio(reads_col_df, grna1='TGGT1_220950_1', grna2='TGGT1_233460_4')
|
486
|
+
df = pd.merge(reads_col_df, scores_col_df, on='prc')
|
487
|
+
|
488
|
+
|
489
|
+
# Convert the dictionary to a DataFrame and calculate fractions
|
490
|
+
df_emp = pd.DataFrame(
|
491
|
+
[(key, val[0], val[1], val[0] / (val[0] + val[1]), val[1] / (val[0] + val[1]))
|
492
|
+
for key, val in empirical_dict.items()],
|
493
|
+
columns=['key', 'value1', 'value2', 'fraction1', 'fraction2']
|
494
|
+
)
|
495
|
+
|
496
|
+
df = pd.merge(df, df_emp, left_on='row', right_on='key')
|
497
|
+
display(df)
|
498
|
+
y_columns = ['class_1_fraction', 'TGGT1_220950_1_fraction', 'fraction2']
|
499
|
+
|
500
|
+
plot_line(df, x_column='row', y_columns=y_columns, group_column=None,
|
501
|
+
xlabel=None, ylabel=None, title=None, figsize=(10, 6),
|
502
|
+
save_path=None)
|
503
|
+
|
504
|
+
y_columns = ['class_0_fraction', 'TGGT1_233460_4_fraction', 'fraction1']
|
505
|
+
|
506
|
+
plot_line(df, x_column='row', y_columns=y_columns, group_column=None,
|
507
|
+
xlabel=None, ylabel=None, title=None, figsize=(10, 6),
|
508
|
+
save_path=None)
|
spacr/toxo.py
CHANGED
@@ -136,6 +136,7 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
136
136
|
data['variable'].fillna(data['feature'], inplace=True)
|
137
137
|
split_columns = data['variable'].str.split('_', expand=True)
|
138
138
|
data['gene_nr'] = split_columns[0]
|
139
|
+
data = data[data['variable'] != 'Intercept']
|
139
140
|
|
140
141
|
# Load metadata
|
141
142
|
if isinstance(metadata_path, pd.DataFrame):
|
@@ -158,11 +159,14 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
158
159
|
merged_data['condition'],
|
159
160
|
categories=['other','pc', 'nc', 'control'],
|
160
161
|
ordered=True)
|
162
|
+
|
163
|
+
|
164
|
+
display(merged_data)
|
161
165
|
|
162
166
|
# Create subplots with a broken y-axis
|
163
167
|
figsize_2 = figsize / 2
|
164
168
|
fig, (ax1, ax2) = plt.subplots(
|
165
|
-
2, 1, figsize=(
|
169
|
+
2, 1, figsize=(figsize, figsize),
|
166
170
|
sharex=True, gridspec_kw={'height_ratios': [1, 3]}
|
167
171
|
)
|
168
172
|
|
@@ -178,11 +182,12 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
178
182
|
data=merged_data,
|
179
183
|
x='coefficient',
|
180
184
|
y='-log10(p_value)',
|
181
|
-
hue='condition',
|
182
|
-
style=metadata_column if metadata_column else None,
|
185
|
+
hue='condition', # Keep colors but prevent them from showing in the final legend
|
186
|
+
style=metadata_column if metadata_column else None, # Shape-based legend
|
183
187
|
s=point_size,
|
184
188
|
edgecolor='black',
|
185
189
|
palette=palette,
|
190
|
+
legend='brief', # Capture the full legend initially
|
186
191
|
alpha=0.8,
|
187
192
|
ax=ax2 # Lower plot
|
188
193
|
)
|
@@ -196,6 +201,7 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
196
201
|
s=point_size,
|
197
202
|
palette=palette,
|
198
203
|
edgecolor='black',
|
204
|
+
legend=False, # Suppress legend for upper plot
|
199
205
|
alpha=0.8,
|
200
206
|
ax=ax1 # Upper plot
|
201
207
|
)
|
@@ -222,9 +228,24 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
222
228
|
ax2.spines['top'].set_visible(False)
|
223
229
|
ax1.tick_params(labelbottom=False)
|
224
230
|
|
225
|
-
ax1.legend_.remove()
|
226
231
|
if ax1.get_legend() is not None:
|
227
|
-
ax1.
|
232
|
+
ax1.legend_.remove()
|
233
|
+
ax1.get_legend().remove() # Extract handles and labels from the legend
|
234
|
+
handles, labels = ax2.get_legend_handles_labels()
|
235
|
+
|
236
|
+
# Identify shape-based legend entries (skip color-based entries)
|
237
|
+
shape_handles = handles[len(set(merged_data['condition'])):]
|
238
|
+
shape_labels = labels[len(set(merged_data['condition'])):]
|
239
|
+
|
240
|
+
# Set the legend with only shape-based entries
|
241
|
+
ax2.legend(
|
242
|
+
shape_handles,
|
243
|
+
shape_labels,
|
244
|
+
bbox_to_anchor=(1.05, 1),
|
245
|
+
loc='upper left',
|
246
|
+
borderaxespad=0.
|
247
|
+
)
|
248
|
+
|
228
249
|
ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
|
229
250
|
|
230
251
|
# Add vertical threshold lines to both plots
|
spacr/utils.py
CHANGED
@@ -64,6 +64,7 @@ from sklearn.decomposition import PCA
|
|
64
64
|
from sklearn.ensemble import RandomForestClassifier
|
65
65
|
|
66
66
|
from huggingface_hub import list_repo_files
|
67
|
+
from spacr import __file__ as spacr_path
|
67
68
|
|
68
69
|
import umap.umap_ as umap
|
69
70
|
#import umap
|
@@ -2912,7 +2913,7 @@ def _relabel_parent_with_child_labels(parent_mask, child_mask):
|
|
2912
2913
|
|
2913
2914
|
return parent_mask_new, child_mask
|
2914
2915
|
|
2915
|
-
def _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask,
|
2916
|
+
def _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, uninfected=True):
|
2916
2917
|
"""
|
2917
2918
|
Exclude objects from the masks based on certain criteria.
|
2918
2919
|
|
@@ -2921,7 +2922,7 @@ def _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, inc
|
|
2921
2922
|
nucleus_mask (ndarray): Mask representing nucleus.
|
2922
2923
|
pathogen_mask (ndarray): Mask representing pathogens.
|
2923
2924
|
cytoplasm_mask (ndarray): Mask representing cytoplasm.
|
2924
|
-
|
2925
|
+
uninfected (bool, optional): Whether to include uninfected cells. Defaults to True.
|
2925
2926
|
|
2926
2927
|
Returns:
|
2927
2928
|
tuple: A tuple containing the filtered cell mask, nucleus mask, pathogen mask, and cytoplasm mask.
|
@@ -2936,7 +2937,7 @@ def _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, inc
|
|
2936
2937
|
has_nucleus = np.any(nucleus_mask[cell_region])
|
2937
2938
|
has_cytoplasm = np.any(cytoplasm_mask[cell_region])
|
2938
2939
|
has_pathogen = np.any(pathogen_mask[cell_region])
|
2939
|
-
if
|
2940
|
+
if uninfected:
|
2940
2941
|
if has_nucleus and has_cytoplasm:
|
2941
2942
|
filtered_cells[cell_region] = cell_label
|
2942
2943
|
else:
|
@@ -4963,7 +4964,71 @@ def map_condition(col_value, neg='c1', pos='c2', mix='c3'):
|
|
4963
4964
|
else:
|
4964
4965
|
return 'screen'
|
4965
4966
|
|
4966
|
-
def download_models(repo_id="einarolafsson/models",
|
4967
|
+
def download_models(repo_id="einarolafsson/models", retries=5, delay=5):
|
4968
|
+
"""
|
4969
|
+
Downloads all model files from Hugging Face and stores them in the `resources/models` directory
|
4970
|
+
within the installed `spacr` package.
|
4971
|
+
|
4972
|
+
Args:
|
4973
|
+
repo_id (str): The repository ID on Hugging Face (default is 'einarolafsson/models').
|
4974
|
+
retries (int): Number of retry attempts in case of failure.
|
4975
|
+
delay (int): Delay in seconds between retries.
|
4976
|
+
|
4977
|
+
Returns:
|
4978
|
+
str: The local path to the downloaded models.
|
4979
|
+
"""
|
4980
|
+
# Construct the path to the `resources/models` directory in the installed `spacr` package
|
4981
|
+
package_dir = os.path.dirname(spacr_path)
|
4982
|
+
local_dir = os.path.join(package_dir, 'resources', 'models')
|
4983
|
+
|
4984
|
+
# Create the local directory if it doesn't exist
|
4985
|
+
if not os.path.exists(local_dir):
|
4986
|
+
os.makedirs(local_dir)
|
4987
|
+
elif len(os.listdir(local_dir)) > 0:
|
4988
|
+
print(f"Models already downloaded to: {local_dir}")
|
4989
|
+
return local_dir
|
4990
|
+
|
4991
|
+
attempt = 0
|
4992
|
+
while attempt < retries:
|
4993
|
+
try:
|
4994
|
+
# List all files in the repo
|
4995
|
+
files = list_repo_files(repo_id, repo_type="dataset")
|
4996
|
+
print(f"Files in repository: {files}") # Debugging print to check file list
|
4997
|
+
|
4998
|
+
# Download each file
|
4999
|
+
for file_name in files:
|
5000
|
+
for download_attempt in range(retries):
|
5001
|
+
try:
|
5002
|
+
url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/{file_name}?download=true"
|
5003
|
+
print(f"Downloading file from: {url}") # Debugging
|
5004
|
+
|
5005
|
+
response = requests.get(url, stream=True)
|
5006
|
+
print(f"HTTP response status: {response.status_code}") # Debugging
|
5007
|
+
response.raise_for_status()
|
5008
|
+
|
5009
|
+
# Save the file locally
|
5010
|
+
local_file_path = os.path.join(local_dir, os.path.basename(file_name))
|
5011
|
+
with open(local_file_path, 'wb') as file:
|
5012
|
+
for chunk in response.iter_content(chunk_size=8192):
|
5013
|
+
file.write(chunk)
|
5014
|
+
print(f"Downloaded model file: {file_name} to {local_file_path}")
|
5015
|
+
break # Exit the retry loop if successful
|
5016
|
+
except (requests.HTTPError, requests.Timeout) as e:
|
5017
|
+
print(f"Error downloading {file_name}: {e}. Retrying in {delay} seconds...")
|
5018
|
+
time.sleep(delay)
|
5019
|
+
else:
|
5020
|
+
raise Exception(f"Failed to download {file_name} after multiple attempts.")
|
5021
|
+
|
5022
|
+
return local_dir # Return the directory where models are saved
|
5023
|
+
|
5024
|
+
except (requests.HTTPError, requests.Timeout) as e:
|
5025
|
+
print(f"Error downloading files: {e}. Retrying in {delay} seconds...")
|
5026
|
+
attempt += 1
|
5027
|
+
time.sleep(delay)
|
5028
|
+
|
5029
|
+
raise Exception("Failed to download model files after multiple attempts.")
|
5030
|
+
|
5031
|
+
def download_models_v1(repo_id="einarolafsson/models", local_dir=None, retries=5, delay=5):
|
4967
5032
|
"""
|
4968
5033
|
Downloads all model files from Hugging Face and stores them in the specified local directory.
|
4969
5034
|
|
@@ -7,27 +7,27 @@ spacr/app_mask.py,sha256=l-dBY8ftzCMdDe6-pXc2Nh_u-idNL9G7UOARiLJBtds,153
|
|
7
7
|
spacr/app_measure.py,sha256=_K7APYIeOKpV6e_LcqabBjvEi7mfq9Fch8175x1x0k8,162
|
8
8
|
spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
|
9
9
|
spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
|
10
|
-
spacr/cellpose.py,sha256=
|
10
|
+
spacr/cellpose.py,sha256=KxgPAHEs4iLYZA-h_HBYnpSB_rSZKhEBZ6Fs0I9x5E0,13849
|
11
11
|
spacr/core.py,sha256=dW9RrAKFLfVsFhX0-kaVMc2T7b47Ky0pTXK-CEVOeWQ,48235
|
12
12
|
spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
|
13
13
|
spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
14
|
-
spacr/gui_core.py,sha256=
|
14
|
+
spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
|
15
15
|
spacr/gui_elements.py,sha256=w-S1MZdyxt5O3DsNAHNNXy_WGfwBPg0NhwQtCsJeiao,137071
|
16
|
-
spacr/gui_utils.py,sha256=
|
17
|
-
spacr/io.py,sha256=
|
16
|
+
spacr/gui_utils.py,sha256=KDWDWsi7UdZVhXk1ZWGx3ZqJMIxCUm3lGfjrVhbk52s,45463
|
17
|
+
spacr/io.py,sha256=ahsUaDwvkCHxGu_uvhgNCGWiJL_-ze291rHHQvdrFXQ,144622
|
18
18
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
19
|
-
spacr/measure.py,sha256=
|
19
|
+
spacr/measure.py,sha256=KdboGXoi85BO5-_6er7932FgjFI7G7tuaQDnWSiEuew,54817
|
20
20
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
21
|
-
spacr/ml.py,sha256=
|
21
|
+
spacr/ml.py,sha256=vzuEnbQd96mn7T8h3GRsEDnpWSSpxd3ApGMXTiG6b2o,50507
|
22
22
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
23
|
-
spacr/plot.py,sha256=
|
23
|
+
spacr/plot.py,sha256=mqD0XyExAZ_qhnz71bLJo7nTVGod2eN8bJ_9sAV2eN8,135868
|
24
24
|
spacr/sequencing.py,sha256=t18mgpK6rhWuB1LtFOsPxqgpFXxuUmrD06ecsaVQ0Gw,19655
|
25
|
-
spacr/settings.py,sha256=
|
25
|
+
spacr/settings.py,sha256=VkCgZ8r30Q3VmTmYCf2_KRX3htqXR80osOjq37vLbwM,77770
|
26
26
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
27
|
-
spacr/submodules.py,sha256=
|
27
|
+
spacr/submodules.py,sha256=QRzojeHMZ2iRskmU5D7Q9iu6U1wPTODRm55r30KLZyY,25653
|
28
28
|
spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
|
29
|
-
spacr/toxo.py,sha256=
|
30
|
-
spacr/utils.py,sha256=
|
29
|
+
spacr/toxo.py,sha256=MVDfkfTl6fhbzg3izLWdtr2arARYIhI1TdScnHtPVqI,16770
|
30
|
+
spacr/utils.py,sha256=yDxP8TslqLoKFpKyaCPDOulAitkDBR6MOwPG8FH8mYw,219417
|
31
31
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
32
32
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
33
33
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -150,9 +150,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
150
150
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
151
151
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
153
|
-
spacr-0.3.
|
154
|
-
spacr-0.3.
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
153
|
+
spacr-0.3.43.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
154
|
+
spacr-0.3.43.dist-info/METADATA,sha256=NBeaa28RVdVWa4lgSnWFKTuqVS_hJzUtg3sdMjgmf40,5949
|
155
|
+
spacr-0.3.43.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
156
|
+
spacr-0.3.43.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
157
|
+
spacr-0.3.43.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
158
|
+
spacr-0.3.43.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|