spacr 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +1 -11
- spacr/core.py +226 -287
- spacr/deep_spacr.py +248 -269
- spacr/gui.py +41 -19
- spacr/gui_core.py +404 -151
- spacr/gui_elements.py +778 -179
- spacr/gui_utils.py +163 -106
- spacr/io.py +116 -45
- spacr/measure.py +1 -0
- spacr/plot.py +51 -5
- spacr/sequencing.py +477 -587
- spacr/settings.py +211 -66
- spacr/utils.py +34 -14
- {spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/METADATA +46 -39
- {spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/RECORD +19 -19
- {spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/WHEEL +1 -1
- {spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/LICENSE +0 -0
- {spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/entry_points.txt +0 -0
- {spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/top_level.txt +0 -0
spacr/settings.py
CHANGED
@@ -3,8 +3,8 @@ import os, ast
|
|
3
3
|
def set_default_plot_merge_settings():
|
4
4
|
settings = {}
|
5
5
|
settings.setdefault('include_noninfected', True)
|
6
|
-
settings.setdefault('include_multiinfected',
|
7
|
-
settings.setdefault('include_multinucleated',
|
6
|
+
settings.setdefault('include_multiinfected', 10)
|
7
|
+
settings.setdefault('include_multinucleated', 1)
|
8
8
|
settings.setdefault('remove_background', False)
|
9
9
|
settings.setdefault('filter_min_max', None)
|
10
10
|
settings.setdefault('channel_dims', [0,1,2,3])
|
@@ -20,7 +20,7 @@ def set_default_plot_merge_settings():
|
|
20
20
|
settings.setdefault('normalize', True)
|
21
21
|
settings.setdefault('print_object_number', True)
|
22
22
|
settings.setdefault('nr', 1)
|
23
|
-
settings.setdefault('figuresize',
|
23
|
+
settings.setdefault('figuresize', 10)
|
24
24
|
settings.setdefault('cmap', 'inferno')
|
25
25
|
settings.setdefault('verbose', True)
|
26
26
|
return settings
|
@@ -70,7 +70,7 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
|
|
70
70
|
|
71
71
|
# Plot settings
|
72
72
|
settings.setdefault('plot', False)
|
73
|
-
settings.setdefault('figuresize',
|
73
|
+
settings.setdefault('figuresize', 10)
|
74
74
|
settings.setdefault('cmap', 'inferno')
|
75
75
|
settings.setdefault('normalize', True)
|
76
76
|
settings.setdefault('normalize_plots', True)
|
@@ -116,7 +116,7 @@ def set_default_settings_preprocess_img_data(settings):
|
|
116
116
|
skip_mode = settings.setdefault('skip_mode', False)
|
117
117
|
|
118
118
|
cmap = settings.setdefault('cmap', 'inferno')
|
119
|
-
figuresize = settings.setdefault('figuresize',
|
119
|
+
figuresize = settings.setdefault('figuresize', 10)
|
120
120
|
normalize = settings.setdefault('normalize', True)
|
121
121
|
save_dtype = settings.setdefault('save_dtype', 'uint16')
|
122
122
|
|
@@ -189,7 +189,7 @@ def set_default_umap_image_settings(settings={}):
|
|
189
189
|
settings.setdefault('remove_cluster_noise', True)
|
190
190
|
settings.setdefault('remove_highly_correlated', True)
|
191
191
|
settings.setdefault('log_data', False)
|
192
|
-
settings.setdefault('figuresize',
|
192
|
+
settings.setdefault('figuresize', 10)
|
193
193
|
settings.setdefault('black_background', True)
|
194
194
|
settings.setdefault('remove_image_canvas', False)
|
195
195
|
settings.setdefault('plot_outlines', True)
|
@@ -277,7 +277,7 @@ def get_measure_crop_settings(settings):
|
|
277
277
|
|
278
278
|
def set_default_analyze_screen(settings):
|
279
279
|
settings.setdefault('src', 'path')
|
280
|
-
settings.setdefault('
|
280
|
+
settings.setdefault('model_type_ml','xgboost')
|
281
281
|
settings.setdefault('heatmap_feature','predictions')
|
282
282
|
settings.setdefault('grouping','mean')
|
283
283
|
settings.setdefault('min_max','allq')
|
@@ -314,7 +314,6 @@ def set_default_train_test_model(settings):
|
|
314
314
|
settings.setdefault('batch_size',64)
|
315
315
|
settings.setdefault('epochs',100)
|
316
316
|
settings.setdefault('val_split',0.1)
|
317
|
-
settings.setdefault('train_mode','erm')
|
318
317
|
settings.setdefault('learning_rate',0.001)
|
319
318
|
settings.setdefault('weight_decay',0.00001)
|
320
319
|
settings.setdefault('dropout_rate',0.1)
|
@@ -324,14 +323,90 @@ def set_default_train_test_model(settings):
|
|
324
323
|
settings.setdefault('gradient_accumulation',True)
|
325
324
|
settings.setdefault('gradient_accumulation_steps',4)
|
326
325
|
settings.setdefault('intermedeate_save',True)
|
327
|
-
settings.setdefault('pin_memory',
|
326
|
+
settings.setdefault('pin_memory',False)
|
328
327
|
settings.setdefault('n_jobs',cores)
|
329
|
-
settings.setdefault('
|
328
|
+
settings.setdefault('train_channels',['r','g','b'])
|
330
329
|
settings.setdefault('augment',False)
|
331
330
|
settings.setdefault('verbose',False)
|
332
331
|
return settings
|
333
332
|
|
333
|
+
def set_generate_training_dataset_defaults(settings):
|
334
|
+
|
335
|
+
settings.setdefault('src','path')
|
336
|
+
settings.setdefault('dataset_mode','metadata')
|
337
|
+
settings.setdefault('annotation_column','test')
|
338
|
+
settings.setdefault('annotated_classes',[1,2])
|
339
|
+
settings.setdefault('classes',['nc','pc'])
|
340
|
+
settings.setdefault('size',224)
|
341
|
+
settings.setdefault('test_split',0.1)
|
342
|
+
settings.setdefault('class_metadata',[['c1'],['c2']])
|
343
|
+
settings.setdefault('metadata_type_by','col')
|
344
|
+
settings.setdefault('channel_of_interest',3)
|
345
|
+
settings.setdefault('custom_measurement',None)
|
346
|
+
settings.setdefault('tables',None)
|
347
|
+
settings.setdefault('png_type','cell_png')
|
348
|
+
|
349
|
+
return settings
|
350
|
+
|
351
|
+
def deep_spacr_defaults(settings):
|
352
|
+
|
353
|
+
cores = os.cpu_count()-4
|
354
|
+
|
355
|
+
settings.setdefault('src','path')
|
356
|
+
settings.setdefault('dataset_mode','metadata')
|
357
|
+
settings.setdefault('annotation_column','test')
|
358
|
+
settings.setdefault('annotated_classes',[1,2])
|
359
|
+
settings.setdefault('classes',['nc','pc'])
|
360
|
+
settings.setdefault('size',224)
|
361
|
+
settings.setdefault('test_split',0.1)
|
362
|
+
settings.setdefault('class_metadata',[['c1'],['c2']])
|
363
|
+
settings.setdefault('metadata_type_by','col')
|
364
|
+
settings.setdefault('channel_of_interest',3)
|
365
|
+
settings.setdefault('custom_measurement',None)
|
366
|
+
settings.setdefault('tables',None)
|
367
|
+
settings.setdefault('png_type','cell_png')
|
368
|
+
settings.setdefault('custom_model',False)
|
369
|
+
settings.setdefault('custom_model_path','path')
|
370
|
+
settings.setdefault('train',True)
|
371
|
+
settings.setdefault('test',False)
|
372
|
+
settings.setdefault('model_type','maxvit_t')
|
373
|
+
settings.setdefault('optimizer_type','adamw')
|
374
|
+
settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
|
375
|
+
settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
|
376
|
+
settings.setdefault('normalize',True)
|
377
|
+
settings.setdefault('image_size',224)
|
378
|
+
settings.setdefault('batch_size',64)
|
379
|
+
settings.setdefault('epochs',100)
|
380
|
+
settings.setdefault('val_split',0.1)
|
381
|
+
settings.setdefault('learning_rate',0.001)
|
382
|
+
settings.setdefault('weight_decay',0.00001)
|
383
|
+
settings.setdefault('dropout_rate',0.1)
|
384
|
+
settings.setdefault('init_weights',True)
|
385
|
+
settings.setdefault('amsgrad',True)
|
386
|
+
settings.setdefault('use_checkpoint',True)
|
387
|
+
settings.setdefault('gradient_accumulation',True)
|
388
|
+
settings.setdefault('gradient_accumulation_steps',4)
|
389
|
+
settings.setdefault('intermedeate_save',True)
|
390
|
+
settings.setdefault('pin_memory',False)
|
391
|
+
settings.setdefault('n_jobs',cores)
|
392
|
+
settings.setdefault('train_channels',['r','g','b'])
|
393
|
+
settings.setdefault('augment',False)
|
394
|
+
settings.setdefault('preload_batches', 3)
|
395
|
+
settings.setdefault('verbose',True)
|
396
|
+
settings.setdefault('apply_model_to_dataset',True)
|
397
|
+
settings.setdefault('file_metadata',None)
|
398
|
+
settings.setdefault('sample',None)
|
399
|
+
settings.setdefault('experiment','exp.')
|
400
|
+
settings.setdefault('score_threshold',0.5)
|
401
|
+
settings.setdefault('tar_path','path')
|
402
|
+
settings.setdefault('model_path','path')
|
403
|
+
settings.setdefault('file_type','cell_png')
|
404
|
+
settings.setdefault('generate_training_dataset', True)
|
405
|
+
settings.setdefault('train_DL_model', True)
|
406
|
+
return settings
|
407
|
+
|
334
408
|
def get_analyze_recruitment_default_settings(settings):
|
409
|
+
settings.setdefault('src','path')
|
335
410
|
settings.setdefault('target','protein')
|
336
411
|
settings.setdefault('cell_types',['HeLa'])
|
337
412
|
settings.setdefault('cell_plate_metadata',None)
|
@@ -351,12 +426,10 @@ def get_analyze_recruitment_default_settings(settings):
|
|
351
426
|
settings.setdefault('plot',True)
|
352
427
|
settings.setdefault('plot_nr',10)
|
353
428
|
settings.setdefault('plot_control',True)
|
354
|
-
settings.setdefault('figuresize',
|
355
|
-
settings.setdefault('remove_background',False)
|
356
|
-
settings.setdefault('backgrounds',100)
|
429
|
+
settings.setdefault('figuresize',10)
|
357
430
|
settings.setdefault('include_noninfected',True)
|
358
|
-
settings.setdefault('include_multiinfected',
|
359
|
-
settings.setdefault('include_multinucleated',
|
431
|
+
settings.setdefault('include_multiinfected',10)
|
432
|
+
settings.setdefault('include_multinucleated',1)
|
360
433
|
settings.setdefault('cells_per_well',0)
|
361
434
|
settings.setdefault('pathogen_size_range',[0,100000])
|
362
435
|
settings.setdefault('nucleus_size_range',[0,100000])
|
@@ -368,6 +441,7 @@ def get_analyze_recruitment_default_settings(settings):
|
|
368
441
|
return settings
|
369
442
|
|
370
443
|
def get_analyze_reads_default_settings(settings):
|
444
|
+
settings.setdefault('src', 'path')
|
371
445
|
settings.setdefault('upstream', 'CTTCTGGTAAATGGGGATGTCAAGTT')
|
372
446
|
settings.setdefault('downstream', 'GTTTAAGAGCTATGCTGGAAACAGCAG') #This is the reverce compliment of the column primer starting from the end #TGCTGTTTAAGAGCTATGCTGGAAACAGCA
|
373
447
|
settings.setdefault('barecode_length_1', 8)
|
@@ -380,7 +454,7 @@ def get_map_barcodes_default_settings(settings):
|
|
380
454
|
settings.setdefault('src', 'path')
|
381
455
|
settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
|
382
456
|
settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
|
383
|
-
settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
|
457
|
+
settings.setdefault('plate_dict', "{'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'}")
|
384
458
|
settings.setdefault('test', False)
|
385
459
|
settings.setdefault('verbose', True)
|
386
460
|
settings.setdefault('pc', 'TGGT1_220950_1')
|
@@ -506,7 +580,7 @@ expected_types = {
|
|
506
580
|
"timelapse": bool,
|
507
581
|
"timelapse_displacement": int,
|
508
582
|
"timelapse_memory": int,
|
509
|
-
"timelapse_frame_limits": list, # This can be a list of lists
|
583
|
+
"timelapse_frame_limits": (list, type(None)), # This can be a list of lists
|
510
584
|
"timelapse_remove_transient": bool,
|
511
585
|
"timelapse_mode": str,
|
512
586
|
"timelapse_objects": list,
|
@@ -533,7 +607,7 @@ expected_types = {
|
|
533
607
|
"save_png": bool,
|
534
608
|
"crop_mode": list,
|
535
609
|
"use_bounding_box": bool,
|
536
|
-
"png_size": list, # This can be a list of lists
|
610
|
+
"png_size": list, # This can be a list of lists
|
537
611
|
"normalize": bool,
|
538
612
|
"png_dims": list,
|
539
613
|
"normalize_by": str,
|
@@ -545,7 +619,7 @@ expected_types = {
|
|
545
619
|
"cells": list,
|
546
620
|
"cell_loc": list,
|
547
621
|
"pathogens": list,
|
548
|
-
"pathogen_loc": (list, list), # This can be a list of lists
|
622
|
+
"pathogen_loc": (list, list), # This can be a list of lists
|
549
623
|
"treatments": list,
|
550
624
|
"treatment_loc": (list, list), # This can be a list of lists
|
551
625
|
"channel_of_interest": int,
|
@@ -553,10 +627,9 @@ expected_types = {
|
|
553
627
|
"measurement": str,
|
554
628
|
"nr_imgs": int,
|
555
629
|
"um_per_pixel": (int, float),
|
556
|
-
# Additional settings based on provided defaults
|
557
630
|
"include_noninfected": bool,
|
558
|
-
"include_multiinfected":
|
559
|
-
"include_multinucleated":
|
631
|
+
"include_multiinfected": int,
|
632
|
+
"include_multinucleated": int,
|
560
633
|
"filter_min_max": (list, type(None)),
|
561
634
|
"channel_dims": list,
|
562
635
|
"backgrounds": list,
|
@@ -650,7 +723,6 @@ expected_types = {
|
|
650
723
|
"image_size": int,
|
651
724
|
"epochs": int,
|
652
725
|
"val_split": float,
|
653
|
-
"train_mode": str,
|
654
726
|
"learning_rate": float,
|
655
727
|
"weight_decay": float,
|
656
728
|
"dropout_rate": float,
|
@@ -665,9 +737,9 @@ expected_types = {
|
|
665
737
|
"augment": bool,
|
666
738
|
"target": str,
|
667
739
|
"cell_types": list,
|
668
|
-
"cell_plate_metadata": (list,
|
740
|
+
"cell_plate_metadata": (list, list),
|
669
741
|
"pathogen_types": list,
|
670
|
-
"pathogen_plate_metadata": (list, list), # This can be a list of lists
|
742
|
+
"pathogen_plate_metadata": (list, list), # This can be a list of lists
|
671
743
|
"treatment_plate_metadata": (list, list), # This can be a list of lists
|
672
744
|
"metadata_types": list,
|
673
745
|
"cell_chann_dim": int,
|
@@ -720,10 +792,71 @@ expected_types = {
|
|
720
792
|
"from_scratch": bool,
|
721
793
|
"width_height": list,
|
722
794
|
"resize": bool,
|
795
|
+
"compression": str,
|
796
|
+
"complevel": int,
|
723
797
|
"gene_weights_csv": str,
|
724
798
|
"fraction_threshold": float,
|
799
|
+
"barcode_mapping":dict,
|
800
|
+
"redunction_method":str,
|
801
|
+
"mix":str,
|
802
|
+
"model_type_ml":str,
|
803
|
+
"exclude_conditions":list,
|
804
|
+
"remove_highly_correlated_features":bool,
|
805
|
+
'barcode_coordinates':list, # This is a list of lists
|
806
|
+
'reverse_complement':bool,
|
807
|
+
'file_type':str,
|
808
|
+
'model_path':str,
|
809
|
+
'tar_path':str,
|
810
|
+
'score_threshold':float,
|
811
|
+
'sample':None,
|
812
|
+
'file_metadata':None,
|
813
|
+
'apply_model_to_dataset':False,
|
814
|
+
"train":bool,
|
815
|
+
"test":bool,
|
816
|
+
'train_channels':list,
|
817
|
+
"optimizer_type":str,
|
818
|
+
"dataset_mode":str,
|
819
|
+
"annotated_classes":list,
|
820
|
+
"annotation_column":str,
|
821
|
+
"apply_model_to_dataset":bool,
|
822
|
+
"metadata_type_by":str,
|
823
|
+
"custom_measurement":str,
|
824
|
+
"custom_model":bool,
|
825
|
+
"size":int,
|
826
|
+
"test_split":float,
|
827
|
+
"class_metadata":list, # This is a list of lists
|
828
|
+
"png_type":str,
|
829
|
+
"custom_model_path":str,
|
830
|
+
"generate_training_dataset":bool,
|
831
|
+
'preload_batches':int,
|
832
|
+
"train_DL_model":bool,
|
725
833
|
}
|
726
834
|
|
835
|
+
categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
|
836
|
+
"Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
|
837
|
+
"Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
|
838
|
+
"Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
|
839
|
+
"Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
|
840
|
+
"Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
|
841
|
+
"Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
|
842
|
+
"Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
|
843
|
+
"Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
|
844
|
+
"Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
|
845
|
+
"Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
|
846
|
+
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
|
847
|
+
"Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
|
848
|
+
"Machine Learning":[],
|
849
|
+
"Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
|
850
|
+
"Generate Dataset":["preload_batches", "file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
|
851
|
+
"Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
|
852
|
+
"Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
|
853
|
+
"Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
|
854
|
+
"Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
|
855
|
+
"Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
|
856
|
+
}
|
857
|
+
|
858
|
+
category_keys = list(categories.keys())
|
859
|
+
|
727
860
|
def check_settings(vars_dict, expected_types, q=None):
|
728
861
|
from .gui_utils import parse_list
|
729
862
|
|
@@ -735,7 +868,7 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
735
868
|
|
736
869
|
for key, (label, widget, var, _) in vars_dict.items():
|
737
870
|
if key not in expected_types:
|
738
|
-
if key not in
|
871
|
+
if key not in category_keys:
|
739
872
|
q.put(f"Key {key} not found in expected types.")
|
740
873
|
continue
|
741
874
|
|
@@ -743,13 +876,15 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
743
876
|
expected_type = expected_types.get(key, str)
|
744
877
|
|
745
878
|
try:
|
746
|
-
if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
|
879
|
+
if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
|
747
880
|
parsed_value = ast.literal_eval(value) if value else None
|
748
881
|
if isinstance(parsed_value, list):
|
749
882
|
if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
|
750
883
|
settings[key] = parsed_value
|
751
884
|
else:
|
752
885
|
raise ValueError("Invalid format: Mixed list and list of lists")
|
886
|
+
#elif parsed_value == None:
|
887
|
+
# settings[key] = None
|
753
888
|
else:
|
754
889
|
raise ValueError("Invalid format for list or list of lists")
|
755
890
|
elif expected_type == list:
|
@@ -764,6 +899,20 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
764
899
|
settings[key] = float(value) if '.' in value else int(value)
|
765
900
|
elif expected_type == (str, type(None)):
|
766
901
|
settings[key] = str(value) if value else None
|
902
|
+
elif expected_type == dict:
|
903
|
+
try:
|
904
|
+
# Ensure that the value is a string that can be converted to a dictionary
|
905
|
+
if isinstance(value, str):
|
906
|
+
settings[key] = ast.literal_eval(value)
|
907
|
+
else:
|
908
|
+
raise ValueError("Expected a string representation of a dictionary.")
|
909
|
+
|
910
|
+
# Check if the result is actually a dictionary
|
911
|
+
if not isinstance(settings[key], dict):
|
912
|
+
raise ValueError("Value is not a valid dictionary.")
|
913
|
+
except (ValueError, SyntaxError) as e:
|
914
|
+
settings[key] = {}
|
915
|
+
q.put(f"Error: Invalid format for {key}. Expected type: dict. Error: {e}")
|
767
916
|
elif isinstance(expected_type, tuple):
|
768
917
|
for typ in expected_type:
|
769
918
|
try:
|
@@ -814,7 +963,7 @@ def generate_fields(variables, scrollable_frame):
|
|
814
963
|
"cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
|
815
964
|
"cell_size_range": "(list) - Size range for cell segmentation.",
|
816
965
|
"cell_types": "(list) - Types of cells to include in the analysis.",
|
817
|
-
"cells": "(list) - The cell types to include in the analysis.",
|
966
|
+
"cells": "(list of lists) - The cell types to include in the analysis.",
|
818
967
|
"cells_per_well": "(int) - Number of cells per well.",
|
819
968
|
"channel_dims": "(list) - The dimensions of the image channels.",
|
820
969
|
"channel_of_interest": "(int) - The channel of interest to use for the analysis.",
|
@@ -864,8 +1013,8 @@ def generate_fields(variables, scrollable_frame):
|
|
864
1013
|
"image_nr": "(int) - Number of images to process.",
|
865
1014
|
"image_size": "(int) - Size of the images for training.",
|
866
1015
|
"img_zoom": "(float) - Zoom factor for the images in plots.",
|
867
|
-
"include_multinucleated": "(
|
868
|
-
"include_multiinfected": "(
|
1016
|
+
"include_multinucleated": "(int) - Whether to include multinucleated cells in the analysis.",
|
1017
|
+
"include_multiinfected": "(int) - Whether to include multi-infected cells in the analysis.",
|
869
1018
|
"include_noninfected": "(bool) - Whether to include non-infected cells in the analysis.",
|
870
1019
|
"include_uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
|
871
1020
|
"init_weights": "(bool) - Whether to initialize weights for the model.",
|
@@ -883,7 +1032,7 @@ def generate_fields(variables, scrollable_frame):
|
|
883
1032
|
"metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
|
884
1033
|
"metadata_types": "(list) - Types of metadata to include in the analysis.",
|
885
1034
|
"merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
|
886
|
-
"merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75
|
1035
|
+
"merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
|
887
1036
|
"metric": "(str) - Metric to use for UMAP.",
|
888
1037
|
"min_cell_count": "(int) - Minimum number of cells required for analysis.",
|
889
1038
|
"min_dist": "(float) - Minimum distance for UMAP.",
|
@@ -892,6 +1041,7 @@ def generate_fields(variables, scrollable_frame):
|
|
892
1041
|
"mix": "(dict) - Mixing settings for the samples.",
|
893
1042
|
"model_name": "(str) - Name of the Cellpose model.",
|
894
1043
|
"model_type": "(str) - Type of model to use for the analysis.",
|
1044
|
+
"model_type_ml": "(str) - Type of model to use for machine learning.",
|
895
1045
|
"nc": "(str) - Negative control identifier.",
|
896
1046
|
"nc_loc": "(str) - Location of the negative control in the images.",
|
897
1047
|
"negative_control": "(str) - Identifier for the negative control.",
|
@@ -922,12 +1072,7 @@ def generate_fields(variables, scrollable_frame):
|
|
922
1072
|
"pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
|
923
1073
|
"pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
|
924
1074
|
"pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
|
925
|
-
"pathogen_intensity_range": "(
|
926
|
-
"pathogen_loc": "(list) - The locations of the pathogen types in the images.",
|
927
|
-
"pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
|
928
|
-
"pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
|
929
|
-
"pathogen_model": "(str) - Model to use for pathogen segmentation.",
|
930
|
-
"pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
|
1075
|
+
"pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
|
931
1076
|
"pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
|
932
1077
|
"pathogen_size_range": "(list) - Size range for pathogen segmentation.",
|
933
1078
|
"pathogen_types": "(list) - Types of pathogens to include in the analysis.",
|
@@ -993,7 +1138,6 @@ def generate_fields(variables, scrollable_frame):
|
|
993
1138
|
"treatments": "(list) - The treatments to include in the analysis.",
|
994
1139
|
"top_features": "(int) - Top features to include in the analysis.",
|
995
1140
|
"train": "(bool) - Whether to train the model.",
|
996
|
-
"train_mode": "(str) - Mode to use for training the model.",
|
997
1141
|
"transform": "(dict) - Transformation to apply to the data.",
|
998
1142
|
"upscale": "(bool) - Whether to upscale the images.",
|
999
1143
|
"upscale_factor": "(float) - Factor by which to upscale the images.",
|
@@ -1003,6 +1147,20 @@ def generate_fields(variables, scrollable_frame):
|
|
1003
1147
|
"verbose": "(bool) - Whether to print verbose output during processing.",
|
1004
1148
|
"weight_decay": "(float) - Weight decay for regularization.",
|
1005
1149
|
"width_height": "(tuple) - Width and height of the input images.",
|
1150
|
+
"barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
|
1151
|
+
"barcode_mapping": "dict - names and barecode csv files",
|
1152
|
+
"compression": "str - type of compression (e.g. zlib)",
|
1153
|
+
"complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
|
1154
|
+
"file_type": "str - type of file to process",
|
1155
|
+
"model_path": "str - path to the model",
|
1156
|
+
"tar_path": "str - path to the tar file with image dataset",
|
1157
|
+
"score_threshold": "float - threshold for classification",
|
1158
|
+
"sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
|
1159
|
+
"file_metadata": "str - string that must be present in image path to be included in the dataset",
|
1160
|
+
"apply_model_to_dataset": "bool - whether to apply model to the dataset",
|
1161
|
+
"train_channels": "list - channels to use for training",
|
1162
|
+
"dataset_mode": "str - How to generate train/test dataset.",
|
1163
|
+
"annotated_classes": "list - list of numbers in annotation column.",
|
1006
1164
|
"um_per_pixel": "(float) - The micrometers per pixel for the images."
|
1007
1165
|
}
|
1008
1166
|
|
@@ -1018,29 +1176,6 @@ def generate_fields(variables, scrollable_frame):
|
|
1018
1176
|
|
1019
1177
|
return vars_dict
|
1020
1178
|
|
1021
|
-
|
1022
|
-
categories = {
|
1023
|
-
"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
|
1024
|
-
"Paths":["grna", "barcodes"],
|
1025
|
-
"Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
|
1026
|
-
"Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
|
1027
|
-
"Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
|
1028
|
-
"Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
|
1029
|
-
"Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
|
1030
|
-
"Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
|
1031
|
-
"Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
|
1032
|
-
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
|
1033
|
-
"Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
|
1034
|
-
"Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
|
1035
|
-
"Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
|
1036
|
-
"Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
|
1037
|
-
"Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
|
1038
|
-
"Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
|
1039
|
-
"Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
|
1040
|
-
"Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
|
1041
|
-
"Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
|
1042
|
-
}
|
1043
|
-
|
1044
1179
|
descriptions = {
|
1045
1180
|
'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
|
1046
1181
|
|
@@ -1048,8 +1183,6 @@ descriptions = {
|
|
1048
1183
|
|
1049
1184
|
'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
|
1050
1185
|
|
1051
|
-
'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
|
1052
|
-
|
1053
1186
|
'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
|
1054
1187
|
|
1055
1188
|
'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
|
@@ -1060,8 +1193,8 @@ descriptions = {
|
|
1060
1193
|
|
1061
1194
|
'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
|
1062
1195
|
|
1063
|
-
'map_barcodes': "Map barcodes
|
1064
|
-
|
1196
|
+
'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
|
1197
|
+
|
1065
1198
|
'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
|
1066
1199
|
|
1067
1200
|
'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
|
@@ -1070,7 +1203,7 @@ descriptions = {
|
|
1070
1203
|
def set_annotate_default_settings(settings):
|
1071
1204
|
settings.setdefault('src', 'path')
|
1072
1205
|
settings.setdefault('image_type', 'cell_png')
|
1073
|
-
settings.setdefault('channels',
|
1206
|
+
settings.setdefault('channels', "r,g,b")
|
1074
1207
|
settings.setdefault('img_size', 200)
|
1075
1208
|
settings.setdefault('annotation_column', 'test')
|
1076
1209
|
settings.setdefault('normalize', 'False')
|
@@ -1079,3 +1212,15 @@ def set_annotate_default_settings(settings):
|
|
1079
1212
|
settings.setdefault('threshold', '2')
|
1080
1213
|
return settings
|
1081
1214
|
|
1215
|
+
def set_default_generate_barecode_mapping(settings={}):
|
1216
|
+
settings.setdefault('src', 'path')
|
1217
|
+
settings.setdefault('chunk_size', 100000)
|
1218
|
+
|
1219
|
+
settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
|
1220
|
+
'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
|
1221
|
+
'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
|
1222
|
+
|
1223
|
+
settings.setdefault('n_jobs', None)
|
1224
|
+
settings.setdefault('compression', 'zlib')
|
1225
|
+
settings.setdefault('complevel', 5)
|
1226
|
+
return settings
|