spacr 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/settings.py CHANGED
@@ -3,8 +3,8 @@ import os, ast
3
3
  def set_default_plot_merge_settings():
4
4
  settings = {}
5
5
  settings.setdefault('include_noninfected', True)
6
- settings.setdefault('include_multiinfected', True)
7
- settings.setdefault('include_multinucleated', True)
6
+ settings.setdefault('include_multiinfected', 10)
7
+ settings.setdefault('include_multinucleated', 1)
8
8
  settings.setdefault('remove_background', False)
9
9
  settings.setdefault('filter_min_max', None)
10
10
  settings.setdefault('channel_dims', [0,1,2,3])
@@ -20,7 +20,7 @@ def set_default_plot_merge_settings():
20
20
  settings.setdefault('normalize', True)
21
21
  settings.setdefault('print_object_number', True)
22
22
  settings.setdefault('nr', 1)
23
- settings.setdefault('figuresize', 50)
23
+ settings.setdefault('figuresize', 10)
24
24
  settings.setdefault('cmap', 'inferno')
25
25
  settings.setdefault('verbose', True)
26
26
  return settings
@@ -70,7 +70,7 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
70
70
 
71
71
  # Plot settings
72
72
  settings.setdefault('plot', False)
73
- settings.setdefault('figuresize', 50)
73
+ settings.setdefault('figuresize', 10)
74
74
  settings.setdefault('cmap', 'inferno')
75
75
  settings.setdefault('normalize', True)
76
76
  settings.setdefault('normalize_plots', True)
@@ -116,7 +116,7 @@ def set_default_settings_preprocess_img_data(settings):
116
116
  skip_mode = settings.setdefault('skip_mode', False)
117
117
 
118
118
  cmap = settings.setdefault('cmap', 'inferno')
119
- figuresize = settings.setdefault('figuresize', 50)
119
+ figuresize = settings.setdefault('figuresize', 10)
120
120
  normalize = settings.setdefault('normalize', True)
121
121
  save_dtype = settings.setdefault('save_dtype', 'uint16')
122
122
 
@@ -189,7 +189,7 @@ def set_default_umap_image_settings(settings={}):
189
189
  settings.setdefault('remove_cluster_noise', True)
190
190
  settings.setdefault('remove_highly_correlated', True)
191
191
  settings.setdefault('log_data', False)
192
- settings.setdefault('figuresize', 60)
192
+ settings.setdefault('figuresize', 10)
193
193
  settings.setdefault('black_background', True)
194
194
  settings.setdefault('remove_image_canvas', False)
195
195
  settings.setdefault('plot_outlines', True)
@@ -277,7 +277,7 @@ def get_measure_crop_settings(settings):
277
277
 
278
278
  def set_default_analyze_screen(settings):
279
279
  settings.setdefault('src', 'path')
280
- settings.setdefault('model_type','xgboost')
280
+ settings.setdefault('model_type_ml','xgboost')
281
281
  settings.setdefault('heatmap_feature','predictions')
282
282
  settings.setdefault('grouping','mean')
283
283
  settings.setdefault('min_max','allq')
@@ -314,7 +314,6 @@ def set_default_train_test_model(settings):
314
314
  settings.setdefault('batch_size',64)
315
315
  settings.setdefault('epochs',100)
316
316
  settings.setdefault('val_split',0.1)
317
- settings.setdefault('train_mode','erm')
318
317
  settings.setdefault('learning_rate',0.001)
319
318
  settings.setdefault('weight_decay',0.00001)
320
319
  settings.setdefault('dropout_rate',0.1)
@@ -324,14 +323,90 @@ def set_default_train_test_model(settings):
324
323
  settings.setdefault('gradient_accumulation',True)
325
324
  settings.setdefault('gradient_accumulation_steps',4)
326
325
  settings.setdefault('intermedeate_save',True)
327
- settings.setdefault('pin_memory',True)
326
+ settings.setdefault('pin_memory',False)
328
327
  settings.setdefault('n_jobs',cores)
329
- settings.setdefault('channels',['r','g','b'])
328
+ settings.setdefault('train_channels',['r','g','b'])
330
329
  settings.setdefault('augment',False)
331
330
  settings.setdefault('verbose',False)
332
331
  return settings
333
332
 
333
+ def set_generate_training_dataset_defaults(settings):
334
+
335
+ settings.setdefault('src','path')
336
+ settings.setdefault('dataset_mode','metadata')
337
+ settings.setdefault('annotation_column','test')
338
+ settings.setdefault('annotated_classes',[1,2])
339
+ settings.setdefault('classes',['nc','pc'])
340
+ settings.setdefault('size',224)
341
+ settings.setdefault('test_split',0.1)
342
+ settings.setdefault('class_metadata',[['c1'],['c2']])
343
+ settings.setdefault('metadata_type_by','col')
344
+ settings.setdefault('channel_of_interest',3)
345
+ settings.setdefault('custom_measurement',None)
346
+ settings.setdefault('tables',None)
347
+ settings.setdefault('png_type','cell_png')
348
+
349
+ return settings
350
+
351
+ def deep_spacr_defaults(settings):
352
+
353
+ cores = os.cpu_count()-4
354
+
355
+ settings.setdefault('src','path')
356
+ settings.setdefault('dataset_mode','metadata')
357
+ settings.setdefault('annotation_column','test')
358
+ settings.setdefault('annotated_classes',[1,2])
359
+ settings.setdefault('classes',['nc','pc'])
360
+ settings.setdefault('size',224)
361
+ settings.setdefault('test_split',0.1)
362
+ settings.setdefault('class_metadata',[['c1'],['c2']])
363
+ settings.setdefault('metadata_type_by','col')
364
+ settings.setdefault('channel_of_interest',3)
365
+ settings.setdefault('custom_measurement',None)
366
+ settings.setdefault('tables',None)
367
+ settings.setdefault('png_type','cell_png')
368
+ settings.setdefault('custom_model',False)
369
+ settings.setdefault('custom_model_path','path')
370
+ settings.setdefault('train',True)
371
+ settings.setdefault('test',False)
372
+ settings.setdefault('model_type','maxvit_t')
373
+ settings.setdefault('optimizer_type','adamw')
374
+ settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
375
+ settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
376
+ settings.setdefault('normalize',True)
377
+ settings.setdefault('image_size',224)
378
+ settings.setdefault('batch_size',64)
379
+ settings.setdefault('epochs',100)
380
+ settings.setdefault('val_split',0.1)
381
+ settings.setdefault('learning_rate',0.001)
382
+ settings.setdefault('weight_decay',0.00001)
383
+ settings.setdefault('dropout_rate',0.1)
384
+ settings.setdefault('init_weights',True)
385
+ settings.setdefault('amsgrad',True)
386
+ settings.setdefault('use_checkpoint',True)
387
+ settings.setdefault('gradient_accumulation',True)
388
+ settings.setdefault('gradient_accumulation_steps',4)
389
+ settings.setdefault('intermedeate_save',True)
390
+ settings.setdefault('pin_memory',False)
391
+ settings.setdefault('n_jobs',cores)
392
+ settings.setdefault('train_channels',['r','g','b'])
393
+ settings.setdefault('augment',False)
394
+ settings.setdefault('preload_batches', 3)
395
+ settings.setdefault('verbose',True)
396
+ settings.setdefault('apply_model_to_dataset',True)
397
+ settings.setdefault('file_metadata',None)
398
+ settings.setdefault('sample',None)
399
+ settings.setdefault('experiment','exp.')
400
+ settings.setdefault('score_threshold',0.5)
401
+ settings.setdefault('tar_path','path')
402
+ settings.setdefault('model_path','path')
403
+ settings.setdefault('file_type','cell_png')
404
+ settings.setdefault('generate_training_dataset', True)
405
+ settings.setdefault('train_DL_model', True)
406
+ return settings
407
+
334
408
  def get_analyze_recruitment_default_settings(settings):
409
+ settings.setdefault('src','path')
335
410
  settings.setdefault('target','protein')
336
411
  settings.setdefault('cell_types',['HeLa'])
337
412
  settings.setdefault('cell_plate_metadata',None)
@@ -351,12 +426,10 @@ def get_analyze_recruitment_default_settings(settings):
351
426
  settings.setdefault('plot',True)
352
427
  settings.setdefault('plot_nr',10)
353
428
  settings.setdefault('plot_control',True)
354
- settings.setdefault('figuresize',20)
355
- settings.setdefault('remove_background',False)
356
- settings.setdefault('backgrounds',100)
429
+ settings.setdefault('figuresize',10)
357
430
  settings.setdefault('include_noninfected',True)
358
- settings.setdefault('include_multiinfected',True)
359
- settings.setdefault('include_multinucleated',True)
431
+ settings.setdefault('include_multiinfected',10)
432
+ settings.setdefault('include_multinucleated',1)
360
433
  settings.setdefault('cells_per_well',0)
361
434
  settings.setdefault('pathogen_size_range',[0,100000])
362
435
  settings.setdefault('nucleus_size_range',[0,100000])
@@ -368,6 +441,7 @@ def get_analyze_recruitment_default_settings(settings):
368
441
  return settings
369
442
 
370
443
  def get_analyze_reads_default_settings(settings):
444
+ settings.setdefault('src', 'path')
371
445
  settings.setdefault('upstream', 'CTTCTGGTAAATGGGGATGTCAAGTT')
372
446
  settings.setdefault('downstream', 'GTTTAAGAGCTATGCTGGAAACAGCAG') #This is the reverce compliment of the column primer starting from the end #TGCTGTTTAAGAGCTATGCTGGAAACAGCA
373
447
  settings.setdefault('barecode_length_1', 8)
@@ -380,7 +454,7 @@ def get_map_barcodes_default_settings(settings):
380
454
  settings.setdefault('src', 'path')
381
455
  settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
382
456
  settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
383
- settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
457
+ settings.setdefault('plate_dict', "{'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'}")
384
458
  settings.setdefault('test', False)
385
459
  settings.setdefault('verbose', True)
386
460
  settings.setdefault('pc', 'TGGT1_220950_1')
@@ -506,7 +580,7 @@ expected_types = {
506
580
  "timelapse": bool,
507
581
  "timelapse_displacement": int,
508
582
  "timelapse_memory": int,
509
- "timelapse_frame_limits": list, # This can be a list of lists
583
+ "timelapse_frame_limits": (list, type(None)), # This can be a list of lists
510
584
  "timelapse_remove_transient": bool,
511
585
  "timelapse_mode": str,
512
586
  "timelapse_objects": list,
@@ -533,7 +607,7 @@ expected_types = {
533
607
  "save_png": bool,
534
608
  "crop_mode": list,
535
609
  "use_bounding_box": bool,
536
- "png_size": list, # This can be a list of lists
610
+ "png_size": list, # This can be a list of lists
537
611
  "normalize": bool,
538
612
  "png_dims": list,
539
613
  "normalize_by": str,
@@ -545,7 +619,7 @@ expected_types = {
545
619
  "cells": list,
546
620
  "cell_loc": list,
547
621
  "pathogens": list,
548
- "pathogen_loc": (list, list), # This can be a list of lists
622
+ "pathogen_loc": (list, list), # This can be a list of lists
549
623
  "treatments": list,
550
624
  "treatment_loc": (list, list), # This can be a list of lists
551
625
  "channel_of_interest": int,
@@ -553,10 +627,9 @@ expected_types = {
553
627
  "measurement": str,
554
628
  "nr_imgs": int,
555
629
  "um_per_pixel": (int, float),
556
- # Additional settings based on provided defaults
557
630
  "include_noninfected": bool,
558
- "include_multiinfected": bool,
559
- "include_multinucleated": bool,
631
+ "include_multiinfected": int,
632
+ "include_multinucleated": int,
560
633
  "filter_min_max": (list, type(None)),
561
634
  "channel_dims": list,
562
635
  "backgrounds": list,
@@ -650,7 +723,6 @@ expected_types = {
650
723
  "image_size": int,
651
724
  "epochs": int,
652
725
  "val_split": float,
653
- "train_mode": str,
654
726
  "learning_rate": float,
655
727
  "weight_decay": float,
656
728
  "dropout_rate": float,
@@ -665,9 +737,9 @@ expected_types = {
665
737
  "augment": bool,
666
738
  "target": str,
667
739
  "cell_types": list,
668
- "cell_plate_metadata": (list, type(None)),
740
+ "cell_plate_metadata": (list, list),
669
741
  "pathogen_types": list,
670
- "pathogen_plate_metadata": (list, list), # This can be a list of lists
742
+ "pathogen_plate_metadata": (list, list), # This can be a list of lists
671
743
  "treatment_plate_metadata": (list, list), # This can be a list of lists
672
744
  "metadata_types": list,
673
745
  "cell_chann_dim": int,
@@ -720,10 +792,71 @@ expected_types = {
720
792
  "from_scratch": bool,
721
793
  "width_height": list,
722
794
  "resize": bool,
795
+ "compression": str,
796
+ "complevel": int,
723
797
  "gene_weights_csv": str,
724
798
  "fraction_threshold": float,
799
+ "barcode_mapping":dict,
800
+ "redunction_method":str,
801
+ "mix":str,
802
+ "model_type_ml":str,
803
+ "exclude_conditions":list,
804
+ "remove_highly_correlated_features":bool,
805
+ 'barcode_coordinates':list, # This is a list of lists
806
+ 'reverse_complement':bool,
807
+ 'file_type':str,
808
+ 'model_path':str,
809
+ 'tar_path':str,
810
+ 'score_threshold':float,
811
+ 'sample':None,
812
+ 'file_metadata':None,
813
+ 'apply_model_to_dataset':False,
814
+ "train":bool,
815
+ "test":bool,
816
+ 'train_channels':list,
817
+ "optimizer_type":str,
818
+ "dataset_mode":str,
819
+ "annotated_classes":list,
820
+ "annotation_column":str,
821
+ "apply_model_to_dataset":bool,
822
+ "metadata_type_by":str,
823
+ "custom_measurement":str,
824
+ "custom_model":bool,
825
+ "size":int,
826
+ "test_split":float,
827
+ "class_metadata":list, # This is a list of lists
828
+ "png_type":str,
829
+ "custom_model_path":str,
830
+ "generate_training_dataset":bool,
831
+ 'preload_batches':int,
832
+ "train_DL_model":bool,
725
833
  }
726
834
 
835
+ categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
836
+ "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
837
+ "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
838
+ "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
839
+ "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
840
+ "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
841
+ "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
842
+ "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
843
+ "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
844
+ "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
845
+ "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
846
+ "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
847
+ "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
848
+ "Machine Learning":[],
849
+ "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
850
+ "Generate Dataset":["preload_batches", "file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
851
+ "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
852
+ "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
853
+ "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
854
+ "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
855
+ "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
856
+ }
857
+
858
+ category_keys = list(categories.keys())
859
+
727
860
  def check_settings(vars_dict, expected_types, q=None):
728
861
  from .gui_utils import parse_list
729
862
 
@@ -735,7 +868,7 @@ def check_settings(vars_dict, expected_types, q=None):
735
868
 
736
869
  for key, (label, widget, var, _) in vars_dict.items():
737
870
  if key not in expected_types:
738
- if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test"]:
871
+ if key not in category_keys:
739
872
  q.put(f"Key {key} not found in expected types.")
740
873
  continue
741
874
 
@@ -743,13 +876,15 @@ def check_settings(vars_dict, expected_types, q=None):
743
876
  expected_type = expected_types.get(key, str)
744
877
 
745
878
  try:
746
- if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
879
+ if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
747
880
  parsed_value = ast.literal_eval(value) if value else None
748
881
  if isinstance(parsed_value, list):
749
882
  if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
750
883
  settings[key] = parsed_value
751
884
  else:
752
885
  raise ValueError("Invalid format: Mixed list and list of lists")
886
+ #elif parsed_value == None:
887
+ # settings[key] = None
753
888
  else:
754
889
  raise ValueError("Invalid format for list or list of lists")
755
890
  elif expected_type == list:
@@ -764,6 +899,20 @@ def check_settings(vars_dict, expected_types, q=None):
764
899
  settings[key] = float(value) if '.' in value else int(value)
765
900
  elif expected_type == (str, type(None)):
766
901
  settings[key] = str(value) if value else None
902
+ elif expected_type == dict:
903
+ try:
904
+ # Ensure that the value is a string that can be converted to a dictionary
905
+ if isinstance(value, str):
906
+ settings[key] = ast.literal_eval(value)
907
+ else:
908
+ raise ValueError("Expected a string representation of a dictionary.")
909
+
910
+ # Check if the result is actually a dictionary
911
+ if not isinstance(settings[key], dict):
912
+ raise ValueError("Value is not a valid dictionary.")
913
+ except (ValueError, SyntaxError) as e:
914
+ settings[key] = {}
915
+ q.put(f"Error: Invalid format for {key}. Expected type: dict. Error: {e}")
767
916
  elif isinstance(expected_type, tuple):
768
917
  for typ in expected_type:
769
918
  try:
@@ -814,7 +963,7 @@ def generate_fields(variables, scrollable_frame):
814
963
  "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
815
964
  "cell_size_range": "(list) - Size range for cell segmentation.",
816
965
  "cell_types": "(list) - Types of cells to include in the analysis.",
817
- "cells": "(list) - The cell types to include in the analysis.",
966
+ "cells": "(list of lists) - The cell types to include in the analysis.",
818
967
  "cells_per_well": "(int) - Number of cells per well.",
819
968
  "channel_dims": "(list) - The dimensions of the image channels.",
820
969
  "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
@@ -864,8 +1013,8 @@ def generate_fields(variables, scrollable_frame):
864
1013
  "image_nr": "(int) - Number of images to process.",
865
1014
  "image_size": "(int) - Size of the images for training.",
866
1015
  "img_zoom": "(float) - Zoom factor for the images in plots.",
867
- "include_multinucleated": "(bool) - Whether to include multinucleated cells in the analysis.",
868
- "include_multiinfected": "(bool) - Whether to include multi-infected cells in the analysis.",
1016
+ "include_multinucleated": "(int) - Whether to include multinucleated cells in the analysis.",
1017
+ "include_multiinfected": "(int) - Whether to include multi-infected cells in the analysis.",
869
1018
  "include_noninfected": "(bool) - Whether to include non-infected cells in the analysis.",
870
1019
  "include_uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
871
1020
  "init_weights": "(bool) - Whether to initialize weights for the model.",
@@ -883,7 +1032,7 @@ def generate_fields(variables, scrollable_frame):
883
1032
  "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
884
1033
  "metadata_types": "(list) - Types of metadata to include in the analysis.",
885
1034
  "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
886
- "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
1035
+ "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
887
1036
  "metric": "(str) - Metric to use for UMAP.",
888
1037
  "min_cell_count": "(int) - Minimum number of cells required for analysis.",
889
1038
  "min_dist": "(float) - Minimum distance for UMAP.",
@@ -892,6 +1041,7 @@ def generate_fields(variables, scrollable_frame):
892
1041
  "mix": "(dict) - Mixing settings for the samples.",
893
1042
  "model_name": "(str) - Name of the Cellpose model.",
894
1043
  "model_type": "(str) - Type of model to use for the analysis.",
1044
+ "model_type_ml": "(str) - Type of model to use for machine learning.",
895
1045
  "nc": "(str) - Negative control identifier.",
896
1046
  "nc_loc": "(str) - Location of the negative control in the images.",
897
1047
  "negative_control": "(str) - Identifier for the negative control.",
@@ -922,12 +1072,7 @@ def generate_fields(variables, scrollable_frame):
922
1072
  "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
923
1073
  "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
924
1074
  "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
925
- "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
926
- "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
927
- "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
928
- "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
929
- "pathogen_model": "(str) - Model to use for pathogen segmentation.",
930
- "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
1075
+ "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
931
1076
  "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
932
1077
  "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
933
1078
  "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -993,7 +1138,6 @@ def generate_fields(variables, scrollable_frame):
993
1138
  "treatments": "(list) - The treatments to include in the analysis.",
994
1139
  "top_features": "(int) - Top features to include in the analysis.",
995
1140
  "train": "(bool) - Whether to train the model.",
996
- "train_mode": "(str) - Mode to use for training the model.",
997
1141
  "transform": "(dict) - Transformation to apply to the data.",
998
1142
  "upscale": "(bool) - Whether to upscale the images.",
999
1143
  "upscale_factor": "(float) - Factor by which to upscale the images.",
@@ -1003,6 +1147,20 @@ def generate_fields(variables, scrollable_frame):
1003
1147
  "verbose": "(bool) - Whether to print verbose output during processing.",
1004
1148
  "weight_decay": "(float) - Weight decay for regularization.",
1005
1149
  "width_height": "(tuple) - Width and height of the input images.",
1150
+ "barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
1151
+ "barcode_mapping": "dict - names and barecode csv files",
1152
+ "compression": "str - type of compression (e.g. zlib)",
1153
+ "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
1154
+ "file_type": "str - type of file to process",
1155
+ "model_path": "str - path to the model",
1156
+ "tar_path": "str - path to the tar file with image dataset",
1157
+ "score_threshold": "float - threshold for classification",
1158
+ "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
1159
+ "file_metadata": "str - string that must be present in image path to be included in the dataset",
1160
+ "apply_model_to_dataset": "bool - whether to apply model to the dataset",
1161
+ "train_channels": "list - channels to use for training",
1162
+ "dataset_mode": "str - How to generate train/test dataset.",
1163
+ "annotated_classes": "list - list of numbers in annotation column.",
1006
1164
  "um_per_pixel": "(float) - The micrometers per pixel for the images."
1007
1165
  }
1008
1166
 
@@ -1018,29 +1176,6 @@ def generate_fields(variables, scrollable_frame):
1018
1176
 
1019
1177
  return vars_dict
1020
1178
 
1021
-
1022
- categories = {
1023
- "General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
1024
- "Paths":["grna", "barcodes"],
1025
- "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
1026
- "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
1027
- "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
1028
- "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
1029
- "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
1030
- "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
1031
- "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
1032
- "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
1033
- "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
1034
- "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
1035
- "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
1036
- "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
1037
- "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
1038
- "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
1039
- "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
1040
- "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
1041
- "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
1042
- }
1043
-
1044
1179
  descriptions = {
1045
1180
  'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
1046
1181
 
@@ -1048,8 +1183,6 @@ descriptions = {
1048
1183
 
1049
1184
  'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
1050
1185
 
1051
- 'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
1052
-
1053
1186
  'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
1054
1187
 
1055
1188
  'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
@@ -1060,8 +1193,8 @@ descriptions = {
1060
1193
 
1061
1194
  'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
1062
1195
 
1063
- 'map_barcodes': "Map barcodes to your data for identification and tracking. Function: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
1064
-
1196
+ 'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
1197
+
1065
1198
  'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
1066
1199
 
1067
1200
  'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
@@ -1070,7 +1203,7 @@ descriptions = {
1070
1203
  def set_annotate_default_settings(settings):
1071
1204
  settings.setdefault('src', 'path')
1072
1205
  settings.setdefault('image_type', 'cell_png')
1073
- settings.setdefault('channels', 'r,g,b')
1206
+ settings.setdefault('channels', "r,g,b")
1074
1207
  settings.setdefault('img_size', 200)
1075
1208
  settings.setdefault('annotation_column', 'test')
1076
1209
  settings.setdefault('normalize', 'False')
@@ -1079,3 +1212,15 @@ def set_annotate_default_settings(settings):
1079
1212
  settings.setdefault('threshold', '2')
1080
1213
  return settings
1081
1214
 
1215
+ def set_default_generate_barecode_mapping(settings={}):
1216
+ settings.setdefault('src', 'path')
1217
+ settings.setdefault('chunk_size', 100000)
1218
+
1219
+ settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
1220
+ 'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
1221
+ 'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
1222
+
1223
+ settings.setdefault('n_jobs', None)
1224
+ settings.setdefault('compression', 'zlib')
1225
+ settings.setdefault('complevel', 5)
1226
+ return settings