spacr 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/settings.py CHANGED
@@ -43,7 +43,7 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
43
43
  settings.setdefault('magnification', 20)
44
44
  settings.setdefault('custom_regex', None)
45
45
  settings.setdefault('metadata_type', 'cellvoyager')
46
- settings.setdefault('workers', os.cpu_count()-4)
46
+ settings.setdefault('n_jobs', os.cpu_count()-4)
47
47
  settings.setdefault('randomize', True)
48
48
  settings.setdefault('verbose', True)
49
49
  settings.setdefault('remove_background_cell', False)
@@ -219,6 +219,7 @@ def set_default_umap_image_settings(settings={}):
219
219
  def get_measure_crop_settings(settings):
220
220
 
221
221
  settings.setdefault('src', 'path')
222
+ settings.setdefault('verbose', False)
222
223
 
223
224
  # Test mode
224
225
  settings.setdefault('test_mode', False)
@@ -253,7 +254,7 @@ def get_measure_crop_settings(settings):
253
254
  settings.setdefault('plot',False)
254
255
  settings.setdefault('plot_filtration',False)
255
256
  settings.setdefault('representative_images', False)
256
- settings.setdefault('max_workers', os.cpu_count()-2)
257
+ settings.setdefault('n_jobs', os.cpu_count()-2)
257
258
 
258
259
  # Object settings
259
260
  settings.setdefault('cell_mask_dim',None)
@@ -282,6 +283,7 @@ def get_measure_crop_settings(settings):
282
283
  settings.setdefault('um_per_pixel', 0.1)
283
284
 
284
285
  if settings['test_mode']:
286
+ settings['verbose'] = True
285
287
  settings['plot'] = True
286
288
  settings['plot_filtration'] = True
287
289
  test_imgs = settings['test_nr']
@@ -290,6 +292,7 @@ def get_measure_crop_settings(settings):
290
292
  return settings
291
293
 
292
294
  def set_default_analyze_screen(settings):
295
+ settings.setdefault('src', 'path')
293
296
  settings.setdefault('model_type','xgboost')
294
297
  settings.setdefault('heatmap_feature','predictions')
295
298
  settings.setdefault('grouping','mean')
@@ -338,7 +341,7 @@ def set_default_train_test_model(settings):
338
341
  settings.setdefault('gradient_accumulation_steps',4)
339
342
  settings.setdefault('intermedeate_save',True)
340
343
  settings.setdefault('pin_memory',True)
341
- settings.setdefault('num_workers',cores)
344
+ settings.setdefault('n_jobs',cores)
342
345
  settings.setdefault('channels',['r','g','b'])
343
346
  settings.setdefault('augment',False)
344
347
  settings.setdefault('verbose',False)
@@ -390,6 +393,7 @@ def get_analyze_reads_default_settings(settings):
390
393
  return settings
391
394
 
392
395
  def get_map_barcodes_default_settings(settings):
396
+ settings.setdefault('src', 'path')
393
397
  settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
394
398
  settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
395
399
  settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
@@ -488,266 +492,264 @@ def get_identify_masks_finetune_default_settings(settings):
488
492
  return settings
489
493
 
490
494
  q = None
495
+ expected_types = {
496
+ "src": str,
497
+ "metadata_type": str,
498
+ "custom_regex": (str, type(None)),
499
+ "experiment": str,
500
+ "channels": list,
501
+ "magnification": int,
502
+ "nucleus_channel": (int, type(None)),
503
+ "nucleus_background": int,
504
+ "nucleus_Signal_to_noise": float,
505
+ "nucleus_CP_prob": float,
506
+ "nucleus_FT": float,
507
+ "cell_channel": (int, type(None)),
508
+ "cell_background": (int, float),
509
+ "cell_Signal_to_noise": (int, float),
510
+ "cell_CP_prob": (int, float),
511
+ "cell_FT": (int, float),
512
+ "pathogen_channel": (int, type(None)),
513
+ "pathogen_background": (int, float),
514
+ "pathogen_Signal_to_noise": (int, float),
515
+ "pathogen_CP_prob": (int, float),
516
+ "pathogen_FT": (int, float),
517
+ "preprocess": bool,
518
+ "masks": bool,
519
+ "examples_to_plot": int,
520
+ "randomize": bool,
521
+ "batch_size": int,
522
+ "timelapse": bool,
523
+ "timelapse_displacement": int,
524
+ "timelapse_memory": int,
525
+ "timelapse_frame_limits": list, # This can be a list of lists
526
+ "timelapse_remove_transient": bool,
527
+ "timelapse_mode": str,
528
+ "timelapse_objects": list,
529
+ "fps": int,
530
+ "remove_background": bool,
531
+ "lower_percentile": (int, float),
532
+ "merge_pathogens": bool,
533
+ "normalize_plots": bool,
534
+ "all_to_mip": bool,
535
+ "pick_slice": bool,
536
+ "skip_mode": str,
537
+ "save": bool,
538
+ "plot": bool,
539
+ "n_jobs": int,
540
+ "verbose": bool,
541
+ "input_folder": str,
542
+ "cell_mask_dim": int,
543
+ "cell_min_size": int,
544
+ "cytoplasm_min_size": int,
545
+ "nucleus_mask_dim": int,
546
+ "nucleus_min_size": int,
547
+ "pathogen_mask_dim": int,
548
+ "pathogen_min_size": int,
549
+ "save_png": bool,
550
+ "crop_mode": list,
551
+ "use_bounding_box": bool,
552
+ "png_size": list, # This can be a list of lists
553
+ "normalize": bool,
554
+ "png_dims": list,
555
+ "normalize_by": str,
556
+ "save_measurements": bool,
557
+ "representative_images": bool,
558
+ "plot_filtration": bool,
559
+ "include_uninfected": bool,
560
+ "dialate_pngs": bool,
561
+ "dialate_png_ratios": list,
562
+ "n_jobs": int,
563
+ "cells": list,
564
+ "cell_loc": list,
565
+ "pathogens": list,
566
+ "pathogen_loc": (list, list), # This can be a list of lists
567
+ "treatments": list,
568
+ "treatment_loc": (list, list), # This can be a list of lists
569
+ "channel_of_interest": int,
570
+ "compartments": list,
571
+ "measurement": str,
572
+ "nr_imgs": int,
573
+ "um_per_pixel": (int, float),
574
+ # Additional settings based on provided defaults
575
+ "include_noninfected": bool,
576
+ "include_multiinfected": bool,
577
+ "include_multinucleated": bool,
578
+ "filter_min_max": (list, type(None)),
579
+ "channel_dims": list,
580
+ "backgrounds": list,
581
+ "outline_thickness": int,
582
+ "outline_color": str,
583
+ "overlay_chans": list,
584
+ "overlay": bool,
585
+ "normalization_percentiles": list,
586
+ "print_object_number": bool,
587
+ "nr": int,
588
+ "figuresize": int,
589
+ "cmap": str,
590
+ "test_mode": bool,
591
+ "test_images": int,
592
+ "remove_background_cell": bool,
593
+ "remove_background_nucleus": bool,
594
+ "remove_background_pathogen": bool,
595
+ "pathogen_model": (str, type(None)),
596
+ "filter": bool,
597
+ "upscale": bool,
598
+ "upscale_factor": float,
599
+ "adjust_cells": bool,
600
+ "row_limit": int,
601
+ "tables": list,
602
+ "visualize": str,
603
+ "image_nr": int,
604
+ "dot_size": int,
605
+ "n_neighbors": int,
606
+ "min_dist": float,
607
+ "metric": str,
608
+ "eps": float,
609
+ "min_samples": int,
610
+ "filter_by": str,
611
+ "img_zoom": float,
612
+ "plot_by_cluster": bool,
613
+ "plot_cluster_grids": bool,
614
+ "remove_cluster_noise": bool,
615
+ "remove_highly_correlated": bool,
616
+ "log_data": bool,
617
+ "black_background": bool,
618
+ "remove_image_canvas": bool,
619
+ "plot_outlines": bool,
620
+ "plot_points": bool,
621
+ "smooth_lines": bool,
622
+ "clustering": str,
623
+ "exclude": (str, type(None)),
624
+ "col_to_compare": str,
625
+ "pos": str,
626
+ "neg": str,
627
+ "embedding_by_controls": bool,
628
+ "plot_images": bool,
629
+ "reduction_method": str,
630
+ "save_figure": bool,
631
+ "color_by": (str, type(None)),
632
+ "analyze_clusters": bool,
633
+ "resnet_features": bool,
634
+ "test_nr": int,
635
+ "radial_dist": bool,
636
+ "calculate_correlation": bool,
637
+ "manders_thresholds": list,
638
+ "homogeneity": bool,
639
+ "homogeneity_distances": list,
640
+ "save_arrays": bool,
641
+ "cytoplasm": bool,
642
+ "merge_edge_pathogen_cells": bool,
643
+ "cells_per_well": int,
644
+ "pathogen_size_range": list,
645
+ "nucleus_size_range": list,
646
+ "cell_size_range": list,
647
+ "pathogen_intensity_range": list,
648
+ "nucleus_intensity_range": list,
649
+ "cell_intensity_range": list,
650
+ "target_intensity_min": int,
651
+ "model_type": str,
652
+ "heatmap_feature": str,
653
+ "grouping": str,
654
+ "min_max": str,
655
+ "minimum_cell_count": int,
656
+ "n_estimators": int,
657
+ "test_size": float,
658
+ "location_column": str,
659
+ "positive_control": str,
660
+ "negative_control": str,
661
+ "n_repeats": int,
662
+ "top_features": int,
663
+ "remove_low_variance_features": bool,
664
+ "n_jobs": int,
665
+ "classes": list,
666
+ "schedule": str,
667
+ "loss_type": str,
668
+ "image_size": int,
669
+ "epochs": int,
670
+ "val_split": float,
671
+ "train_mode": str,
672
+ "learning_rate": float,
673
+ "weight_decay": float,
674
+ "dropout_rate": float,
675
+ "init_weights": bool,
676
+ "amsgrad": bool,
677
+ "use_checkpoint": bool,
678
+ "gradient_accumulation": bool,
679
+ "gradient_accumulation_steps": int,
680
+ "intermedeate_save": bool,
681
+ "pin_memory": bool,
682
+ "n_jobs": int,
683
+ "augment": bool,
684
+ "target": str,
685
+ "cell_types": list,
686
+ "cell_plate_metadata": (list, type(None)),
687
+ "pathogen_types": list,
688
+ "pathogen_plate_metadata": (list, list), # This can be a list of lists
689
+ "treatment_plate_metadata": (list, list), # This can be a list of lists
690
+ "metadata_types": list,
691
+ "cell_chann_dim": int,
692
+ "nucleus_chann_dim": int,
693
+ "pathogen_chann_dim": int,
694
+ "plot_nr": int,
695
+ "plot_control": bool,
696
+ "remove_background": bool,
697
+ "target": str,
698
+ "upstream": str,
699
+ "downstream": str,
700
+ "barecode_length_1": int,
701
+ "barecode_length_2": int,
702
+ "chunk_size": int,
703
+ "grna": str,
704
+ "barcodes": str,
705
+ "plate_dict": dict,
706
+ "pc": str,
707
+ "pc_loc": str,
708
+ "nc": str,
709
+ "nc_loc": str,
710
+ "dependent_variable": str,
711
+ "transform": (str, type(None)),
712
+ "agg_type": str,
713
+ "min_cell_count": int,
714
+ "regression_type": str,
715
+ "remove_row_column_effect": bool,
716
+ "alpha": float,
717
+ "fraction_threshold": float,
718
+ "class_1_threshold": (float, type(None)),
719
+ "batch_size": int,
720
+ "CP_prob": float,
721
+ "flow_threshold": float,
722
+ "percentiles": (list, type(None)),
723
+ "circular": bool,
724
+ "invert": bool,
725
+ "diameter": int,
726
+ "grayscale": bool,
727
+ "resize": bool,
728
+ "target_height": (int, type(None)),
729
+ "target_width": (int, type(None)),
730
+ "rescale": bool,
731
+ "resample": bool,
732
+ "model_name": str,
733
+ "Signal_to_noise": int,
734
+ "learning_rate": float,
735
+ "weight_decay": float,
736
+ "batch_size": int,
737
+ "n_epochs": int,
738
+ "from_scratch": bool,
739
+ "width_height": list,
740
+ "resize": bool,
741
+ "gene_weights_csv": str,
742
+ "fraction_threshold": float,
743
+ }
491
744
 
492
- def check_settings(vars_dict):
493
- global q
745
+ def check_settings_v1(vars_dict, expected_types,q=None):
494
746
  from .gui_utils import parse_list
495
747
  settings = {}
496
748
  # Define the expected types for each key, including None where applicable
497
- expected_types = {
498
- "src": str,
499
- "metadata_type": str,
500
- "custom_regex": (str, type(None)),
501
- "experiment": str,
502
- "channels": list,
503
- "magnification": int,
504
- "nucleus_channel": (int, type(None)),
505
- "nucleus_background": int,
506
- "nucleus_Signal_to_noise": float,
507
- "nucleus_CP_prob": float,
508
- "nucleus_FT": float,
509
- "cell_channel": (int, type(None)),
510
- "cell_background": (int, float),
511
- "cell_Signal_to_noise": (int, float),
512
- "cell_CP_prob": (int, float),
513
- "cell_FT": (int, float),
514
- "pathogen_channel": (int, type(None)),
515
- "pathogen_background": (int, float),
516
- "pathogen_Signal_to_noise": (int, float),
517
- "pathogen_CP_prob": (int, float),
518
- "pathogen_FT": (int, float),
519
- "preprocess": bool,
520
- "masks": bool,
521
- "examples_to_plot": int,
522
- "randomize": bool,
523
- "batch_size": int,
524
- "timelapse": bool,
525
- "timelapse_displacement": int,
526
- "timelapse_memory": int,
527
- "timelapse_frame_limits": list, # This can be a list of lists
528
- "timelapse_remove_transient": bool,
529
- "timelapse_mode": str,
530
- "timelapse_objects": list,
531
- "fps": int,
532
- "remove_background": bool,
533
- "lower_percentile": (int, float),
534
- "merge_pathogens": bool,
535
- "normalize_plots": bool,
536
- "all_to_mip": bool,
537
- "pick_slice": bool,
538
- "skip_mode": str,
539
- "save": bool,
540
- "plot": bool,
541
- "workers": int,
542
- "verbose": bool,
543
- "input_folder": str,
544
- "cell_mask_dim": int,
545
- "cell_min_size": int,
546
- "cytoplasm_min_size": int,
547
- "nucleus_mask_dim": int,
548
- "nucleus_min_size": int,
549
- "pathogen_mask_dim": int,
550
- "pathogen_min_size": int,
551
- "save_png": bool,
552
- "crop_mode": list,
553
- "use_bounding_box": bool,
554
- "png_size": list, # This can be a list of lists
555
- "normalize": bool,
556
- "png_dims": list,
557
- "normalize_by": str,
558
- "save_measurements": bool,
559
- "representative_images": bool,
560
- "plot_filtration": bool,
561
- "include_uninfected": bool,
562
- "dialate_pngs": bool,
563
- "dialate_png_ratios": list,
564
- "max_workers": int,
565
- "cells": list,
566
- "cell_loc": list,
567
- "pathogens": list,
568
- "pathogen_loc": (list, list), # This can be a list of lists
569
- "treatments": list,
570
- "treatment_loc": (list, list), # This can be a list of lists
571
- "channel_of_interest": int,
572
- "compartments": list,
573
- "measurement": str,
574
- "nr_imgs": int,
575
- "um_per_pixel": (int, float),
576
- # Additional settings based on provided defaults
577
- "include_noninfected": bool,
578
- "include_multiinfected": bool,
579
- "include_multinucleated": bool,
580
- "filter_min_max": (list, type(None)),
581
- "channel_dims": list,
582
- "backgrounds": list,
583
- "outline_thickness": int,
584
- "outline_color": str,
585
- "overlay_chans": list,
586
- "overlay": bool,
587
- "normalization_percentiles": list,
588
- "print_object_number": bool,
589
- "nr": int,
590
- "figuresize": int,
591
- "cmap": str,
592
- "test_mode": bool,
593
- "test_images": int,
594
- "remove_background_cell": bool,
595
- "remove_background_nucleus": bool,
596
- "remove_background_pathogen": bool,
597
- "pathogen_model": (str, type(None)),
598
- "filter": bool,
599
- "upscale": bool,
600
- "upscale_factor": float,
601
- "adjust_cells": bool,
602
- "row_limit": int,
603
- "tables": list,
604
- "visualize": str,
605
- "image_nr": int,
606
- "dot_size": int,
607
- "n_neighbors": int,
608
- "min_dist": float,
609
- "metric": str,
610
- "eps": float,
611
- "min_samples": int,
612
- "filter_by": str,
613
- "img_zoom": float,
614
- "plot_by_cluster": bool,
615
- "plot_cluster_grids": bool,
616
- "remove_cluster_noise": bool,
617
- "remove_highly_correlated": bool,
618
- "log_data": bool,
619
- "black_background": bool,
620
- "remove_image_canvas": bool,
621
- "plot_outlines": bool,
622
- "plot_points": bool,
623
- "smooth_lines": bool,
624
- "clustering": str,
625
- "exclude": (str, type(None)),
626
- "col_to_compare": str,
627
- "pos": str,
628
- "neg": str,
629
- "embedding_by_controls": bool,
630
- "plot_images": bool,
631
- "reduction_method": str,
632
- "save_figure": bool,
633
- "color_by": (str, type(None)),
634
- "analyze_clusters": bool,
635
- "resnet_features": bool,
636
- "test_nr": int,
637
- "radial_dist": bool,
638
- "calculate_correlation": bool,
639
- "manders_thresholds": list,
640
- "homogeneity": bool,
641
- "homogeneity_distances": list,
642
- "save_arrays": bool,
643
- "cytoplasm": bool,
644
- "merge_edge_pathogen_cells": bool,
645
- "cells_per_well": int,
646
- "pathogen_size_range": list,
647
- "nucleus_size_range": list,
648
- "cell_size_range": list,
649
- "pathogen_intensity_range": list,
650
- "nucleus_intensity_range": list,
651
- "cell_intensity_range": list,
652
- "target_intensity_min": int,
653
- "model_type": str,
654
- "heatmap_feature": str,
655
- "grouping": str,
656
- "min_max": str,
657
- "minimum_cell_count": int,
658
- "n_estimators": int,
659
- "test_size": float,
660
- "location_column": str,
661
- "positive_control": str,
662
- "negative_control": str,
663
- "n_repeats": int,
664
- "top_features": int,
665
- "remove_low_variance_features": bool,
666
- "n_jobs": int,
667
- "classes": list,
668
- "schedule": str,
669
- "loss_type": str,
670
- "image_size": int,
671
- "epochs": int,
672
- "val_split": float,
673
- "train_mode": str,
674
- "learning_rate": float,
675
- "weight_decay": float,
676
- "dropout_rate": float,
677
- "init_weights": bool,
678
- "amsgrad": bool,
679
- "use_checkpoint": bool,
680
- "gradient_accumulation": bool,
681
- "gradient_accumulation_steps": int,
682
- "intermedeate_save": bool,
683
- "pin_memory": bool,
684
- "num_workers": int,
685
- "augment": bool,
686
- "target": str,
687
- "cell_types": list,
688
- "cell_plate_metadata": (list, type(None)),
689
- "pathogen_types": list,
690
- "pathogen_plate_metadata": (list, list), # This can be a list of lists
691
- "treatment_plate_metadata": (list, list), # This can be a list of lists
692
- "metadata_types": list,
693
- "cell_chann_dim": int,
694
- "nucleus_chann_dim": int,
695
- "pathogen_chann_dim": int,
696
- "plot_nr": int,
697
- "plot_control": bool,
698
- "remove_background": bool,
699
- "target": str,
700
- "upstream": str,
701
- "downstream": str,
702
- "barecode_length_1": int,
703
- "barecode_length_2": int,
704
- "chunk_size": int,
705
- "grna": str,
706
- "barcodes": str,
707
- "plate_dict": dict,
708
- "pc": str,
709
- "pc_loc": str,
710
- "nc": str,
711
- "nc_loc": str,
712
- "dependent_variable": str,
713
- "transform": (str, type(None)),
714
- "agg_type": str,
715
- "min_cell_count": int,
716
- "regression_type": str,
717
- "remove_row_column_effect": bool,
718
- "alpha": float,
719
- "fraction_threshold": float,
720
- "class_1_threshold": (float, type(None)),
721
- "batch_size": int,
722
- "CP_prob": float,
723
- "flow_threshold": float,
724
- "percentiles": (list, type(None)),
725
- "circular": bool,
726
- "invert": bool,
727
- "diameter": int,
728
- "grayscale": bool,
729
- "resize": bool,
730
- "target_height": (int, type(None)),
731
- "target_width": (int, type(None)),
732
- "rescale": bool,
733
- "resample": bool,
734
- "model_name": str,
735
- "Signal_to_noise": int,
736
- "learning_rate": float,
737
- "weight_decay": float,
738
- "batch_size": int,
739
- "n_epochs": int,
740
- "from_scratch": bool,
741
- "width_height": list,
742
- "resize": bool,
743
- "gene_weights_csv": str,
744
- "fraction_threshold": float,
745
- }
746
749
 
747
750
  for key, (label, widget, var) in vars_dict.items():
748
751
  if key not in expected_types:
749
752
  if key not in ["General","Nucleus","Cell","Pathogen","Timelapse","Plot","Object Image","Annotate Data","Measurements","Advanced","Miscellaneous","Test"]:
750
-
751
753
  q.put(f"Key {key} not found in expected types.")
752
754
  continue
753
755
 
@@ -794,94 +796,291 @@ def check_settings(vars_dict):
794
796
 
795
797
  return settings
796
798
 
799
+ def check_settings(vars_dict, expected_types, q=None):
800
+ from .gui_utils import parse_list
801
+
802
+ if q is None:
803
+ from multiprocessing import Queue
804
+ q = Queue()
805
+
806
+ settings = {}
807
+
808
+ for key, (label, widget, var) in vars_dict.items():
809
+ if key not in expected_types:
810
+ if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test"]:
811
+ q.put(f"Key {key} not found in expected types.")
812
+ continue
813
+
814
+ value = var.get()
815
+ expected_type = expected_types.get(key, str)
816
+
817
+ try:
818
+ if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
819
+ parsed_value = ast.literal_eval(value) if value else None
820
+ if isinstance(parsed_value, list):
821
+ if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
822
+ settings[key] = parsed_value
823
+ else:
824
+ raise ValueError("Invalid format: Mixed list and list of lists")
825
+ else:
826
+ raise ValueError("Invalid format for list or list of lists")
827
+ elif expected_type == list:
828
+ settings[key] = parse_list(value) if value else None
829
+ elif expected_type == bool:
830
+ settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
831
+ elif expected_type == (int, type(None)):
832
+ settings[key] = int(value) if value else None
833
+ elif expected_type == (float, type(None)):
834
+ settings[key] = float(value) if value else None
835
+ elif expected_type == (int, float):
836
+ settings[key] = float(value) if '.' in value else int(value)
837
+ elif expected_type == (str, type(None)):
838
+ settings[key] = str(value) if value else None
839
+ elif isinstance(expected_type, tuple):
840
+ for typ in expected_type:
841
+ try:
842
+ settings[key] = typ(value) if value else None
843
+ break
844
+ except (ValueError, TypeError):
845
+ continue
846
+ else:
847
+ raise ValueError
848
+ else:
849
+ settings[key] = expected_type(value) if value else None
850
+ except (ValueError, SyntaxError) as e:
851
+ expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
852
+ q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}")
853
+ return
854
+
855
+ return settings
856
+
797
857
  def generate_fields(variables, scrollable_frame):
798
858
  from .gui_utils import create_input_field
799
859
  from .gui_elements import spacrToolTip
800
860
  row = 1
801
861
  vars_dict = {}
802
862
  tooltips = {
803
- "src": "Path to the folder containing the images.",
804
- "metadata_type": "Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
805
- "custom_regex": "Custom regex pattern to extract metadata from the image names. This will only be used if 'custom' is selected for 'metadata_type'.",
806
- "experiment": "Name of the experiment. This will be used to name the output files.",
807
- "channels": "List of channels to use for the analysis. The first channel is 0, the second is 1, and so on. For example, [0,1,2] will use channels 0, 1, and 2.",
808
- "magnification": "At what magnification the images were taken. This will be used to determine the size of the objects in the images.",
809
- "nucleus_channel": "The channel to use for the nucleus. If None, the nucleus will not be segmented.",
810
- "nucleus_background": "The background intensity for the nucleus channel. This will be used to remove background noise.",
811
- "nucleus_Signal_to_noise": "The signal-to-noise ratio for the nucleus channel. This will be used to determine the range of intensities to normalize images to for nucleus segmentation.",
812
- "nucleus_CP_prob": "The cellpose probability threshold for the nucleus channel. This will be used to segment the nucleus.",
813
- "nucleus_FT": "The flow threshold for nucleus objects. This will be used in nuclues segmentation.",
814
- "cell_channel": "The channel to use for the cell. If None, the cell will not be segmented.",
815
- "cell_background": "The background intensity for the cell channel. This will be used to remove background noise.",
816
- "cell_Signal_to_noise": "The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
817
- "cell_CP_prob": "The cellpose probability threshold for the cell channel. This will be used in cell segmentation.",
818
- "cell_FT": "The flow threshold for cell objects. This will be used to segment the cells.",
819
- "pathogen_channel": "The channel to use for the pathogen. If None, the pathogen will not be segmented.",
820
- "pathogen_background": "The background intensity for the pathogen channel. This will be used to remove background noise.",
821
- "pathogen_Signal_to_noise": "The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
822
- "pathogen_CP_prob": "The cellpose probability threshold for the pathogen channel. This will be used to segment the pathogen.",
823
- "pathogen_FT": "The flow threshold for pathogen objects. This will be used in pathogen segmentation.",
824
- "preprocess": "Whether to preprocess the images before segmentation. This includes background removal and normalization. Set to False only if this step has already been done.",
825
- "masks": "Whether to generate masks for the segmented objects. If True, masks will be generated for the nucleus, cell, and pathogen.",
826
- "examples_to_plot": "The number of images to plot for each segmented object. This will be used to visually inspect the segmentation results and normalization.",
827
- "randomize": "Whether to randomize the order of the images before processing. Recommended to avoid bias in the segmentation.",
828
- "batch_size": "The batch size to use for processing the images. This will determine how many images are processed at once. Images are normalized and segmented in batches. Lower if application runs out of RAM or VRAM.",
829
- "timelapse": "Whether to process the images as a timelapse.",
830
- "timelapse_displacement": "The displacement between frames in the timelapse. This will be used to align the frames before processing.",
831
- "timelapse_memory": "The number of frames to in tandem objects must be present in to be considered the same object in the timelapse.",
832
- "timelapse_frame_limits": "The frame limits to use for the timelapse. This will determine which frames are processed. For example, [5,20] will process frames 5 to 20.",
833
- "timelapse_remove_transient": "Whether to remove transient objects in the timelapse. Transient objects are present in fewer than all frames.",
834
- "timelapse_mode": "The mode to use for processing the timelapse. 'trackpy' uses the trackpy library for tracking objects, while 'btrack' uses the btrack library.",
835
- "timelapse_objects": "The objects to track in the timelapse (cell, nucleus or pathogen). This will determine which objects are tracked over time. If None, all objects will be tracked.",
836
- "fps": "Frames per second of the automatically generated timelapse movies.",
837
- "remove_background": "Whether to remove background noise from the images. This will help improve the quality of the segmentation.",
838
- "lower_percentile": "The lower quantile to use for normalizing the images. This will be used to determine the range of intensities to normalize images to.",
839
- "merge_pathogens": "Whether to merge pathogen objects that share more than 75% of their perimeter.",
840
- "normalize_plots": "Whether to normalize the plots.",
841
- "all_to_mip": "Whether to convert all images to maximum intensity projections before processing.",
842
- "pick_slice": "Whether to pick a single slice from the z-stack images. If False, the maximum intensity projection will be used.",
843
- "skip_mode": "The mode to use for skipping images. This will determine how to handle images that cannot be processed.",
844
- "save": "Whether to save the results to disk.",
845
- "merge_edge_pathogen_cells": "Whether to merge cells that share pathogen objects.",
846
- "plot": "Whether to plot the results.",
847
- "workers": "The number of workers to use for processing the images. This will determine how many images are processed in parallel. Increase to speed up processing.",
848
- "verbose": "Whether to print verbose output during processing.",
849
- "input_folder": "Path to the folder containing the images.",
850
- "cell_mask_dim": "The dimension of the array the cell mask is saved in.",
851
- "cell_min_size": "The minimum size of cell objects in pixels^2.",
852
- "cytoplasm": "Whether to segment the cytoplasm (Cell - Nucleus + Pathogen).",
853
- "cytoplasm_min_size": "The minimum size of cytoplasm objects in pixels^2.",
854
- "nucleus_mask_dim": "The dimension of the array the nucleus mask is saved in.",
855
- "nucleus_min_size": "The minimum size of nucleus objects in pixels^2.",
856
- "pathogen_mask_dim": "The dimension of the array the pathogen mask is saved in.",
857
- "pathogen_min_size": "The minimum size of pathogen objects in pixels^2.",
858
- "save_png": "Whether to save the segmented objects as PNG images.",
859
- "crop_mode": "The mode to use for cropping the images. This will determine which objects are cropped from the images (cell, nucleus, pathogen, cytoplasm).",
860
- "use_bounding_box": "Whether to use the bounding box of the objects for cropping. If False, only the object itself will be cropped.",
861
- "png_size": "The size of the PNG images to save. This will determine the size of the saved images.",
862
- "normalize": "The percentiles to use for normalizing the images. This will be used to determine the range of intensities to normalize images to. If None, no normalization is done.",
863
- "png_dims": "The dimensions of the PNG images to save. This will determine the dimensions of the saved images. Maximum of 3 dimensions e.g. [1,2,3].",
864
- "normalize_by": "Whether to normalize the images by field of view (fov) or by PNG image (png).",
865
- "save_measurements": "Whether to save the measurements to disk.",
866
- "representative_images": "Whether to save representative images of the segmented objects (Not working yet).",
867
- "plot_filtration": "Whether to plot the filtration steps.",
868
- "include_uninfected": "Whether to include uninfected cells in the analysis.",
869
- "dialate_pngs": "Whether to dilate the PNG images before saving.",
870
- "dialate_png_ratios": "The ratios to use for dilating the PNG images. This will determine the amount of dilation applied to the images before cropping.",
871
- "max_workers": "The number of workers to use for processing the images. This will determine how many images are processed in parallel. Increase to speed up processing.",
872
- "cells": "The cell types to include in the analysis.",
873
- "cell_loc": "The locations of the cell types in the images.",
874
- "pathogens": "The pathogen types to include in the analysis.",
875
- "pathogen_loc": "The locations of the pathogen types in the images.",
876
- "treatments": "The treatments to include in the analysis.",
877
- "treatment_loc": "The locations of the treatments in the images.",
878
- "channel_of_interest": "The channel of interest to use for the analysis.",
879
- "compartments": "The compartments to measure in the images.",
880
- "measurement": "The measurement to use for the analysis.",
881
- "nr_imgs": "The number of images to plot.",
882
- "um_per_pixel": "The micrometers per pixel for the images."
863
+ "adjust_cells": "(bool) - Adjust cell parameters for better segmentation.",
864
+ "agg_type": "(str) - Type of aggregation to use for the data.",
865
+ "alpha": "(float) - Alpha parameter for the regression model.",
866
+ "all_to_mip": "(bool) - Whether to convert all images to maximum intensity projections before processing.",
867
+ "amsgrad": "(bool) - Whether to use AMSGrad optimizer.",
868
+ "analyze_clusters": "(bool) - Whether to analyze the resulting clusters.",
869
+ "augment": "(dict) - Data augmentation settings.",
870
+ "background": "(float) - Background intensity for the images.",
871
+ "backgrounds": "(str) - Background settings for the analysis.",
872
+ "barcodes": "(str) - Path to the file containing barcodes.",
873
+ "batch_size": "(int) - The batch size to use for processing the images. This will determine how many images are processed at once. Images are normalized and segmented in batches. Lower if application runs out of RAM or VRAM.",
874
+ "black_background": "(bool) - Whether to use a black background for plots.",
875
+ "calculate_correlation": "(bool) - Whether to calculate correlations between features.",
876
+ "cell_CP_prob": "(float) - The cellpose probability threshold for the cell channel. This will be used in cell segmentation.",
877
+ "cell_FT": "(float) - The flow threshold for cell objects. This will be used to segment the cells.",
878
+ "cell_background": "(float) - The background intensity for the cell channel. This will be used to remove background noise.",
879
+ "cell_chann_dim": "(int) - Dimension of the channel to use for cell segmentation.",
880
+ "cell_channel": "(int) - The channel to use for the cell. If None, the cell will not be segmented.",
881
+ "cell_intensity_range": "(list) - Intensity range for cell segmentation.",
882
+ "cell_loc": "(list) - The locations of the cell types in the images.",
883
+ "cell_mask_dim": "(int) - The dimension of the array the cell mask is saved in.",
884
+ "cell_min_size": "(int) - The minimum size of cell objects in pixels^2.",
885
+ "cell_plate_metadata": "(str) - Metadata for the cell plate.",
886
+ "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
887
+ "cell_size_range": "(list) - Size range for cell segmentation.",
888
+ "cell_types": "(list) - Types of cells to include in the analysis.",
889
+ "cells": "(list) - The cell types to include in the analysis.",
890
+ "cells_per_well": "(int) - Number of cells per well.",
891
+ "channel_dims": "(list) - The dimensions of the image channels.",
892
+ "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
893
+ "channels": "(list) - List of channels to use for the analysis. The first channel is 0, the second is 1, and so on. For example, [0,1,2] will use channels 0, 1, and 2.",
894
+ "chunk_size": "(int) - Chunk size for processing the sequencing data.",
895
+ "classes": "(list) - Classes to include in the training.",
896
+ "class_1_threshold": "(float) - Threshold for class 1 classification.",
897
+ "clustering": "(str) - Clustering algorithm to use.",
898
+ "col_to_compare": "(str) - Column to compare in the embeddings.",
899
+ "color_by": "(str) - Coloring scheme for the plots.",
900
+ "compartments": "(list) - The compartments to measure in the images.",
901
+ "CP_prob": "(float) - Cellpose probability threshold for segmentation.",
902
+ "crop_mode": "(str) - Mode to use for cropping images (cell, nucleus, pathogen, cytoplasm).",
903
+ "custom_model": "(str) - Path to a custom Cellpose model.",
904
+ "custom_regex": "(str) - Custom regex pattern to extract metadata from the image names. This will only be used if 'custom' is selected for 'metadata_type'.",
905
+ "cytoplasm": "(bool) - Whether to segment the cytoplasm (Cell - Nucleus + Pathogen).",
906
+ "cytoplasm_min_size": "(int) - The minimum size of cytoplasm objects in pixels^2.",
907
+ "dependent_variable": "(str) - The dependent variable for the regression analysis.",
908
+ "diameter": "(float) - Diameter of the objects to segment.",
909
+ "dialate_png_ratios": "(list) - The ratios to use for dilating the PNG images. This will determine the amount of dilation applied to the images before cropping.",
910
+ "dialate_pngs": "(bool) - Whether to dilate the PNG images before saving.",
911
+ "dot_size": "(int) - Size of dots in scatter plots.",
912
+ "downstream": "(str) - Downstream region for sequencing analysis.",
913
+ "dropout_rate": "(float) - Dropout rate for training.",
914
+ "eps": "(float) - Epsilon parameter for clustering.",
915
+ "epochs": "(int) - Number of epochs for training the deep learning model.",
916
+ "examples_to_plot": "(int) - The number of images to plot for each segmented object. This will be used to visually inspect the segmentation results and normalization.",
917
+ "exclude": "(list) - Conditions to exclude from the analysis.",
918
+ "exclude_conditions": "(list) - Specific conditions to exclude from the analysis.",
919
+ "experiment": "(str) - Name of the experiment. This will be used to name the output files.",
920
+ "figuresize": "(tuple) - Size of the figures to plot.",
921
+ "filter": "(dict) - Filter settings for the analysis.",
922
+ "filter_by": "(str) - Feature to filter the data by.",
923
+ "flow_threshold": "(float) - Flow threshold for segmentation.",
924
+ "fps": "(int) - Frames per second of the automatically generated timelapse movies.",
925
+ "fraction_threshold": "(float) - Threshold for the fraction of cells to consider in the analysis.",
926
+ "from_scratch": "(bool) - Whether to train the Cellpose model from scratch.",
927
+ "gene_weights_csv": "(str) - Path to the CSV file containing gene weights.",
928
+ "gradient_accumulation": "(bool) - Whether to use gradient accumulation.",
929
+ "gradient_accumulation_steps": "(int) - Number of steps for gradient accumulation.",
930
+ "grayscale": "(bool) - Whether to process the images in grayscale.",
931
+ "grna": "(str) - Path to the file containing gRNA sequences.",
932
+ "grouping": "(str) - Grouping variable for plotting.",
933
+ "heatmap_feature": "(str) - Feature to use for generating heatmaps.",
934
+ "homogeneity": "(float) - Measure of homogeneity for the objects.",
935
+ "homogeneity_distances": "(list) - Distances to use for measuring homogeneity.",
936
+ "image_nr": "(int) - Number of images to process.",
937
+ "image_size": "(int) - Size of the images for training.",
938
+ "img_zoom": "(float) - Zoom factor for the images in plots.",
939
+ "include_multinucleated": "(bool) - Whether to include multinucleated cells in the analysis.",
940
+ "include_multiinfected": "(bool) - Whether to include multi-infected cells in the analysis.",
941
+ "include_noninfected": "(bool) - Whether to include non-infected cells in the analysis.",
942
+ "include_uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
943
+ "init_weights": "(bool) - Whether to initialize weights for the model.",
944
+ "input_folder": "(str) - Path to the folder containing the images.",
945
+ "intermedeate_save": "(bool) - Whether to save intermediate results.",
946
+ "invert": "(bool) - Whether to invert the image intensities.",
947
+ "learning_rate": "(float) - Learning rate for training.",
948
+ "location_column": "(str) - Column name for the location information.",
949
+ "log_data": "(bool) - Whether to log-transform the data.",
950
+ "lower_percentile": "(float) - The lower quantile to use for normalizing the images. This will be used to determine the range of intensities to normalize images to.",
951
+ "magnification": "(int) - At what magnification the images were taken. This will be used to determine the size of the objects in the images.",
952
+ "manders_thresholds": "(list) - Thresholds for Manders' coefficients.",
953
+ "mask": "(bool) - Whether to generate masks for the segmented objects. If True, masks will be generated for the nucleus, cell, and pathogen.",
954
+ "measurement": "(str) - The measurement to use for the analysis.",
955
+ "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
956
+ "metadata_types": "(list) - Types of metadata to include in the analysis.",
957
+ "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
958
+ "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
959
+ "metric": "(str) - Metric to use for UMAP.",
960
+ "min_cell_count": "(int) - Minimum number of cells required for analysis.",
961
+ "min_dist": "(float) - Minimum distance for UMAP.",
962
+ "min_max": "(tuple) - Minimum and maximum values for normalizing plots.",
963
+ "min_samples": "(int) - Minimum number of samples for clustering.",
964
+ "mix": "(dict) - Mixing settings for the samples.",
965
+ "model_name": "(str) - Name of the Cellpose model.",
966
+ "model_type": "(str) - Type of model to use for the analysis.",
967
+ "nc": "(str) - Negative control identifier.",
968
+ "nc_loc": "(str) - Location of the negative control in the images.",
969
+ "negative_control": "(str) - Identifier for the negative control.",
970
+ "n_estimators": "(int) - Number of estimators for the model.",
971
+ "n_epochs": "(int) - Number of epochs for training the Cellpose model.",
972
+ "n_jobs": "(int) - The number of n_jobs to use for processing the images. This will determine how many images are processed in parallel. Increase to speed up processing.",
973
+ "n_neighbors": "(int) - Number of neighbors for UMAP.",
974
+ "n_repeats": "(int) - Number of repeats for cross-validation.",
975
+ "normalize": "(list) - The percentiles to use for normalizing the images. This will be used to determine the range of intensities to normalize images to. If None, no normalization is done.",
976
+ "normalize_by": "(str) - Whether to normalize the images by field of view (fov) or by PNG image (png).",
977
+ "normalize_plots": "(bool) - Whether to normalize the plots.",
978
+ "nr_imgs": "(int) - The number of images to plot.",
979
+ "nucleus_CP_prob": "(float) - The cellpose probability threshold for the nucleus channel. This will be used to segment the nucleus.",
980
+ "nucleus_FT": "(float) - The flow threshold for nucleus objects. This will be used in nucleus segmentation.",
981
+ "nucleus_background": "(float) - The background intensity for the nucleus channel. This will be used to remove background noise.",
982
+ "nucleus_chann_dim": "(int) - Dimension of the channel to use for nucleus segmentation.",
983
+ "nucleus_channel": "(int) - The channel to use for the nucleus. If None, the nucleus will not be segmented.",
984
+ "nucleus_intensity_range": "(list) - Intensity range for nucleus segmentation.",
985
+ "nucleus_loc": "(str) - Location of the nucleus in the images.",
986
+ "nucleus_mask_dim": "(int) - The dimension of the array the nucleus mask is saved in.",
987
+ "nucleus_min_size": "(int) - The minimum size of nucleus objects in pixels^2.",
988
+ "nucleus_Signal_to_noise": "(float) - The signal-to-noise ratio for the nucleus channel. This will be used to determine the range of intensities to normalize images to for nucleus segmentation.",
989
+ "nucleus_size_range": "(list) - Size range for nucleus segmentation.",
990
+ "optimizer_type": "(str) - Type of optimizer to use.",
991
+ "other": "(dict) - Additional parameters for the regression analysis.",
992
+ "pathogen_CP_prob": "(float) - The cellpose probability threshold for the pathogen channel. This will be used to segment the pathogen.",
993
+ "pathogen_FT": "(float) - The flow threshold for pathogen objects. This will be used in pathogen segmentation.",
994
+ "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
995
+ "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
996
+ "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
997
+ "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
998
+ "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
999
+ "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
1000
+ "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
1001
+ "pathogen_model": "(str) - Model to use for pathogen segmentation.",
1002
+ "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
1003
+ "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
1004
+ "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
1005
+ "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
1006
+ "pc": "(str) - Positive control identifier.",
1007
+ "pc_loc": "(str) - Location of the positive control in the images.",
1008
+ "percentiles": "(list) - Percentiles to use for normalizing the images.",
1009
+ "pick_slice": "(bool) - Whether to pick a single slice from the z-stack images. If False, the maximum intensity projection will be used.",
1010
+ "pin_memory": "(bool) - Whether to pin memory for the data loader.",
1011
+ "plate": "(str) - Plate identifier for the experiment.",
1012
+ "plate_dict": "(dict) - Dictionary of plate metadata.",
1013
+ "plot": "(bool) - Whether to plot the results.",
1014
+ "plot_by_cluster": "(bool) - Whether to plot images by clusters.",
1015
+ "plot_cluster_grids": "(bool) - Whether to plot grids of clustered images.",
1016
+ "plot_control": "(dict) - Control settings for plotting.",
1017
+ "plot_filtration": "(bool) - Whether to plot the filtration steps.",
1018
+ "plot_images": "(bool) - Whether to plot images.",
1019
+ "plot_nr": "(int) - Number of plots to generate.",
1020
+ "plot_outlines": "(bool) - Whether to plot outlines of segmented objects.",
1021
+ "png_dims": "(list) - The dimensions of the PNG images to save. This will determine the dimensions of the saved images. Maximum of 3 dimensions e.g. [1,2,3].",
1022
+ "png_size": "(int) - The size of the PNG images to save. This will determine the size of the saved images.",
1023
+ "positive_control": "(str) - Identifier for the positive control.",
1024
+ "preprocess": "(bool) - Whether to preprocess the images before segmentation. This includes background removal and normalization. Set to False only if this step has already been done.",
1025
+ "radial_dist": "(list) - Radial distances for measuring features.",
1026
+ "random_test": "(bool) - Whether to randomly select images for testing.",
1027
+ "randomize": "(bool) - Whether to randomize the order of the images before processing. Recommended to avoid bias in the segmentation.",
1028
+ "regression_type": "(str) - Type of regression to perform.",
1029
+ "remove_background": "(bool) - Whether to remove background noise from the images. This will help improve the quality of the segmentation.",
1030
+ "remove_background_cell": "(bool) - Whether to remove background noise from the cell channel.",
1031
+ "remove_background_nucleus": "(bool) - Whether to remove background noise from the nucleus channel.",
1032
+ "remove_background_pathogen": "(bool) - Whether to remove background noise from the pathogen channel.",
1033
+ "remove_cluster_noise": "(bool) - Whether to remove noise from the clusters.",
1034
+ "remove_highly_correlated": "(bool) - Whether to remove highly correlated features.",
1035
+ "remove_highly_correlated_features": "(bool) - Whether to remove highly correlated features from the analysis.",
1036
+ "remove_image_canvas": "(bool) - Whether to remove the image canvas after plotting.",
1037
+ "remove_low_variance_features": "(bool) - Whether to remove low variance features from the analysis.",
1038
+ "remove_row_column_effect": "(bool) - Whether to remove row and column effects from the data.",
1039
+ "representative_images": "(bool) - Whether to save representative images of the segmented objects (Not working yet).",
1040
+ "resize": "(bool) - Resize factor for the images.",
1041
+ "resample": "(bool) - Whether to resample the images during processing.",
1042
+ "rescale": "(float) - Rescaling factor for the images.",
1043
+ "reduction_method": "(str) - Dimensionality reduction method to use ().",
1044
+ "resnet_features": "(bool) - Whether to use ResNet features for embedding.",
1045
+ "row_limit": "(int) - Limit on the number of rows to plot.",
1046
+ "save": "(bool) - Whether to save the results to disk.",
1047
+ "save_arrays": "(bool) - Whether to save arrays of segmented objects.",
1048
+ "save_figure": "(bool) - Whether to save the generated figures.",
1049
+ "save_measurements": "(bool) - Whether to save the measurements to disk.",
1050
+ "save_png": "(bool) - Whether to save the segmented objects as PNG images.",
1051
+ "schedule": "(str) - Schedule for processing the data.",
1052
+ "Signal_to_noise": "(float) - Signal-to-noise ratio for the images.",
1053
+ "skip_mode": "(str) - The mode to use for skipping images. This will determine how to handle images that cannot be processed.",
1054
+ "smooth_lines": "(bool) - Whether to smooth lines in the plots.",
1055
+ "src": "(str, path) - Path to source directory.",
1056
+ "target": "(str) - Target variable for the analysis.",
1057
+ "target_height": "(int) - Target height for resizing the images.",
1058
+ "target_intensity_min": "(float) - Minimum intensity for the target objects.",
1059
+ "target_width": "(int) - Target width for resizing the images.",
1060
+ "tables": "(list) - Tables to include in the analysis.",
1061
+ "test": "(bool) - Whether to run the pipeline in test mode.",
1062
+ "test_images": "(list) - List of images to use for testing.",
1063
+ "test_mode": "(bool) - Mode to use for testing the analysis pipeline.",
1064
+ "test_nr": "(int) - Number of test images.",
1065
+ "test_size": "(float) - Size of the test set.",
1066
+ "treatment_loc": "(list) - The locations of the treatments in the images.",
1067
+ "treatments": "(list) - The treatments to include in the analysis.",
1068
+ "top_features": "(int) - Top features to include in the analysis.",
1069
+ "train": "(bool) - Whether to train the model.",
1070
+ "train_mode": "(str) - Mode to use for training the model.",
1071
+ "transform": "(dict) - Transformation to apply to the data.",
1072
+ "upscale": "(bool) - Whether to upscale the images.",
1073
+ "upscale_factor": "(float) - Factor by which to upscale the images.",
1074
+ "upstream": "(str) - Upstream region for sequencing analysis.",
1075
+ "val_split": "(float) - Validation split ratio.",
1076
+ "visualize": "(bool) - Whether to visualize the embeddings.",
1077
+ "verbose": "(bool) - Whether to print verbose output during processing.",
1078
+ "weight_decay": "(float) - Weight decay for regularization.",
1079
+ "width_height": "(tuple) - Width and height of the input images.",
1080
+ "um_per_pixel": "(float) - The micrometers per pixel for the images."
883
1081
  }
884
1082
 
1083
+
885
1084
  for key, (var_type, options, default_value) in variables.items():
886
1085
  label, widget, var = create_input_field(scrollable_frame.scrollable_frame, key, row, var_type, options, default_value)
887
1086
  vars_dict[key] = (label, widget, var) # Store the label, widget, and variable
@@ -893,20 +1092,65 @@ def generate_fields(variables, scrollable_frame):
893
1092
  return vars_dict
894
1093
 
895
1094
  categories = {
896
- "General": ["src", "input_folder", "metadata_type", "custom_regex", "experiment", "channels", "magnification"],
897
- "Nucleus": ["nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
898
- "Cell": ["cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
899
- "Pathogen": ["pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
900
- "Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
901
- "Plot": ["plot_filtration", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
1095
+ "General": ["src", "input_folder", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
1096
+ "Paths":["grna", "barcodes"],
1097
+ "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
1098
+ "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
1099
+ "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
1100
+ "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
1101
+ "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
1102
+ "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
1103
+ "Plot": ["plot_control", "plot_nr", "plot_filtration", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
902
1104
  "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
903
- "Annotate Data": ["positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "representative_images", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
1105
+ "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "representative_images", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
904
1106
  "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
905
- "Advanced": ["schedule", "test_size","exclude","n_repeats","top_features","n_jobs", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "max_workers", "workers", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","num_workers","channels","augment"],
1107
+ "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
906
1108
  "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
907
1109
  "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
908
- "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate","init_weights", "train", "classes"],
1110
+ "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
909
1111
  "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
910
- "Test": ["test_mode", "test_images", "random_test", "test_nr"],
911
- "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "test"]
1112
+ "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
1113
+ "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
1114
+ }
1115
+
1116
+ descriptions = {
1117
+ 'mask': "Generate Cellpose masks for Cells, Nuclei, and Pathogens. This module uses: preprocess_generate_masks from spacr.core.\n\nKey Features:\n- Automated Mask Generation: Automatically generate accurate masks for various cellular components using Cellpose, a robust deep learning model for cell segmentation.\n- Versatility: Capable of handling different types of biological samples, including cells, nuclei, and pathogens.\n- Integration: Directly integrates with other modules, providing the foundational masks required for subsequent analysis.",
1118
+
1119
+ 'measure': "Capture Measurements from Cells, Nuclei, Pathogens, and Cytoplasm objects. Generate single object PNG images for one or several objects. (Requires masks from the Mask module). This module uses: measure_crop from spacr.measure.\n\nKey Features:\n- Comprehensive Measurement Capture: Obtain detailed measurements for various cellular components, including area, perimeter, intensity, and more.\n- Image Generation: Create high-resolution PNG images of individual objects, facilitating further analysis and visualization.\n- Mask Dependency: Requires accurate masks generated by the Mask module to ensure precise measurements.",
1120
+
1121
+ 'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). This module uses: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
1122
+
1123
+ 'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). This module uses: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
1124
+
1125
+ 'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). This module uses: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
1126
+
1127
+ 'train_cellpose': "Train custom Cellpose models for your specific dataset. This module uses: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
1128
+
1129
+ 'ml_analyze': "Perform machine learning analysis on your data. This module uses: ml_analysis_tools from spacr.ml.\n\nKey Features:\n- Comprehensive Analysis: Utilize a suite of machine learning tools for data analysis.\n- Customizable Workflows: Configure and run different ML algorithms based on your research requirements.\n- Integration: Works seamlessly with other modules to analyze data produced from various steps.",
1130
+
1131
+ 'cellpose_masks': "Generate masks using Cellpose for all images in your dataset. This module uses: generate_masks from spacr.cellpose.\n\nKey Features:\n- Batch Processing: Generate masks for large sets of images efficiently.\n- Robust Segmentation: Leverage Cellpose's capabilities for accurate segmentation across diverse samples.\n- Automation: Automate the mask generation process for streamlined workflows.",
1132
+
1133
+ 'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. This module uses: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
1134
+
1135
+ 'map_barcodes': "Map barcodes to your data for identification and tracking. This module uses: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
1136
+
1137
+ 'regression': "Perform regression analysis on your data. This module uses: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
1138
+
1139
+ 'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. This module uses: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
912
1140
  }
1141
+
1142
+ def set_annotate_default_settings(settings):
1143
+ settings.setdefault('src', 'path')
1144
+ settings.setdefault('image_type', 'cell_png')
1145
+ settings.setdefault('channels', 'r,g,b')
1146
+ settings.setdefault('geom', "3200x2000")
1147
+ settings.setdefault('img_size', [200, 200])
1148
+ settings.setdefault('rows', 10)
1149
+ settings.setdefault('columns', 18)
1150
+ settings.setdefault('annotation_column', 'test')
1151
+ settings.setdefault('normalize', 'False')
1152
+ settings.setdefault('percentiles', [2, 98])
1153
+ settings.setdefault('measurement', 'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
1154
+ settings.setdefault('threshold', '2')
1155
+ return settings
1156
+