spacr 0.2.53__py3-none-any.whl → 0.2.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/settings.py CHANGED
@@ -3,8 +3,8 @@ import os, ast
3
3
  def set_default_plot_merge_settings():
4
4
  settings = {}
5
5
  settings.setdefault('include_noninfected', True)
6
- settings.setdefault('include_multiinfected', True)
7
- settings.setdefault('include_multinucleated', True)
6
+ settings.setdefault('include_multiinfected', 10)
7
+ settings.setdefault('include_multinucleated', 1)
8
8
  settings.setdefault('remove_background', False)
9
9
  settings.setdefault('filter_min_max', None)
10
10
  settings.setdefault('channel_dims', [0,1,2,3])
@@ -20,7 +20,7 @@ def set_default_plot_merge_settings():
20
20
  settings.setdefault('normalize', True)
21
21
  settings.setdefault('print_object_number', True)
22
22
  settings.setdefault('nr', 1)
23
- settings.setdefault('figuresize', 50)
23
+ settings.setdefault('figuresize', 10)
24
24
  settings.setdefault('cmap', 'inferno')
25
25
  settings.setdefault('verbose', True)
26
26
  return settings
@@ -70,7 +70,7 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
70
70
 
71
71
  # Plot settings
72
72
  settings.setdefault('plot', False)
73
- settings.setdefault('figuresize', 50)
73
+ settings.setdefault('figuresize', 10)
74
74
  settings.setdefault('cmap', 'inferno')
75
75
  settings.setdefault('normalize', True)
76
76
  settings.setdefault('normalize_plots', True)
@@ -116,7 +116,7 @@ def set_default_settings_preprocess_img_data(settings):
116
116
  skip_mode = settings.setdefault('skip_mode', False)
117
117
 
118
118
  cmap = settings.setdefault('cmap', 'inferno')
119
- figuresize = settings.setdefault('figuresize', 50)
119
+ figuresize = settings.setdefault('figuresize', 10)
120
120
  normalize = settings.setdefault('normalize', True)
121
121
  save_dtype = settings.setdefault('save_dtype', 'uint16')
122
122
 
@@ -189,7 +189,7 @@ def set_default_umap_image_settings(settings={}):
189
189
  settings.setdefault('remove_cluster_noise', True)
190
190
  settings.setdefault('remove_highly_correlated', True)
191
191
  settings.setdefault('log_data', False)
192
- settings.setdefault('figuresize', 60)
192
+ settings.setdefault('figuresize', 10)
193
193
  settings.setdefault('black_background', True)
194
194
  settings.setdefault('remove_image_canvas', False)
195
195
  settings.setdefault('plot_outlines', True)
@@ -277,7 +277,7 @@ def get_measure_crop_settings(settings):
277
277
 
278
278
  def set_default_analyze_screen(settings):
279
279
  settings.setdefault('src', 'path')
280
- settings.setdefault('model_type','xgboost')
280
+ settings.setdefault('model_type_ml','xgboost')
281
281
  settings.setdefault('heatmap_feature','predictions')
282
282
  settings.setdefault('grouping','mean')
283
283
  settings.setdefault('min_max','allq')
@@ -314,7 +314,6 @@ def set_default_train_test_model(settings):
314
314
  settings.setdefault('batch_size',64)
315
315
  settings.setdefault('epochs',100)
316
316
  settings.setdefault('val_split',0.1)
317
- settings.setdefault('train_mode','erm')
318
317
  settings.setdefault('learning_rate',0.001)
319
318
  settings.setdefault('weight_decay',0.00001)
320
319
  settings.setdefault('dropout_rate',0.1)
@@ -324,14 +323,90 @@ def set_default_train_test_model(settings):
324
323
  settings.setdefault('gradient_accumulation',True)
325
324
  settings.setdefault('gradient_accumulation_steps',4)
326
325
  settings.setdefault('intermedeate_save',True)
327
- settings.setdefault('pin_memory',True)
326
+ settings.setdefault('pin_memory',False)
328
327
  settings.setdefault('n_jobs',cores)
329
- settings.setdefault('channels',['r','g','b'])
328
+ settings.setdefault('train_channels',['r','g','b'])
330
329
  settings.setdefault('augment',False)
331
330
  settings.setdefault('verbose',False)
332
331
  return settings
333
332
 
333
+ def set_generate_training_dataset_defaults(settings):
334
+
335
+ settings.setdefault('src','path')
336
+ settings.setdefault('dataset_mode','metadata')
337
+ settings.setdefault('annotation_column','test')
338
+ settings.setdefault('annotated_classes',[1,2])
339
+ settings.setdefault('classes',['nc','pc'])
340
+ settings.setdefault('size',224)
341
+ settings.setdefault('test_split',0.1)
342
+ settings.setdefault('class_metadata',[['c1'],['c2']])
343
+ settings.setdefault('metadata_type_by','col')
344
+ settings.setdefault('channel_of_interest',3)
345
+ settings.setdefault('custom_measurement',None)
346
+ settings.setdefault('tables',None)
347
+ settings.setdefault('png_type','cell_png')
348
+
349
+ return settings
350
+
351
+ def deep_spacr_defaults(settings):
352
+
353
+ cores = os.cpu_count()-4
354
+
355
+ settings.setdefault('src','path')
356
+ settings.setdefault('dataset_mode','metadata')
357
+ settings.setdefault('annotation_column','test')
358
+ settings.setdefault('annotated_classes',[1,2])
359
+ settings.setdefault('classes',['nc','pc'])
360
+ settings.setdefault('size',224)
361
+ settings.setdefault('test_split',0.1)
362
+ settings.setdefault('class_metadata',[['c1'],['c2']])
363
+ settings.setdefault('metadata_type_by','col')
364
+ settings.setdefault('channel_of_interest',3)
365
+ settings.setdefault('custom_measurement',None)
366
+ settings.setdefault('tables',None)
367
+ settings.setdefault('png_type','cell_png')
368
+ settings.setdefault('custom_model',False)
369
+ settings.setdefault('custom_model_path','path')
370
+ settings.setdefault('train',True)
371
+ settings.setdefault('test',False)
372
+ settings.setdefault('model_type','maxvit_t')
373
+ settings.setdefault('optimizer_type','adamw')
374
+ settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
375
+ settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
376
+ settings.setdefault('normalize',True)
377
+ settings.setdefault('image_size',224)
378
+ settings.setdefault('batch_size',64)
379
+ settings.setdefault('epochs',100)
380
+ settings.setdefault('val_split',0.1)
381
+ settings.setdefault('learning_rate',0.001)
382
+ settings.setdefault('weight_decay',0.00001)
383
+ settings.setdefault('dropout_rate',0.1)
384
+ settings.setdefault('init_weights',True)
385
+ settings.setdefault('amsgrad',True)
386
+ settings.setdefault('use_checkpoint',True)
387
+ settings.setdefault('gradient_accumulation',True)
388
+ settings.setdefault('gradient_accumulation_steps',4)
389
+ settings.setdefault('intermedeate_save',True)
390
+ settings.setdefault('pin_memory',False)
391
+ settings.setdefault('n_jobs',cores)
392
+ settings.setdefault('train_channels',['r','g','b'])
393
+ settings.setdefault('augment',False)
394
+ settings.setdefault('preload_batches', 3)
395
+ settings.setdefault('verbose',True)
396
+ settings.setdefault('apply_model_to_dataset',True)
397
+ settings.setdefault('file_metadata',None)
398
+ settings.setdefault('sample',None)
399
+ settings.setdefault('experiment','exp.')
400
+ settings.setdefault('score_threshold',0.5)
401
+ settings.setdefault('tar_path','path')
402
+ settings.setdefault('model_path','path')
403
+ settings.setdefault('file_type','cell_png')
404
+ settings.setdefault('generate_training_dataset', True)
405
+ settings.setdefault('train_DL_model', True)
406
+ return settings
407
+
334
408
  def get_analyze_recruitment_default_settings(settings):
409
+ settings.setdefault('src','path')
335
410
  settings.setdefault('target','protein')
336
411
  settings.setdefault('cell_types',['HeLa'])
337
412
  settings.setdefault('cell_plate_metadata',None)
@@ -351,12 +426,10 @@ def get_analyze_recruitment_default_settings(settings):
351
426
  settings.setdefault('plot',True)
352
427
  settings.setdefault('plot_nr',10)
353
428
  settings.setdefault('plot_control',True)
354
- settings.setdefault('figuresize',20)
355
- settings.setdefault('remove_background',False)
356
- settings.setdefault('backgrounds',100)
429
+ settings.setdefault('figuresize',10)
357
430
  settings.setdefault('include_noninfected',True)
358
- settings.setdefault('include_multiinfected',True)
359
- settings.setdefault('include_multinucleated',True)
431
+ settings.setdefault('include_multiinfected',10)
432
+ settings.setdefault('include_multinucleated',1)
360
433
  settings.setdefault('cells_per_well',0)
361
434
  settings.setdefault('pathogen_size_range',[0,100000])
362
435
  settings.setdefault('nucleus_size_range',[0,100000])
@@ -534,7 +607,7 @@ expected_types = {
534
607
  "save_png": bool,
535
608
  "crop_mode": list,
536
609
  "use_bounding_box": bool,
537
- "png_size": list, # This can be a list of lists
610
+ "png_size": list, # This can be a list of lists
538
611
  "normalize": bool,
539
612
  "png_dims": list,
540
613
  "normalize_by": str,
@@ -546,7 +619,7 @@ expected_types = {
546
619
  "cells": list,
547
620
  "cell_loc": list,
548
621
  "pathogens": list,
549
- "pathogen_loc": (list, list), # This can be a list of lists
622
+ "pathogen_loc": (list, list), # This can be a list of lists
550
623
  "treatments": list,
551
624
  "treatment_loc": (list, list), # This can be a list of lists
552
625
  "channel_of_interest": int,
@@ -554,10 +627,9 @@ expected_types = {
554
627
  "measurement": str,
555
628
  "nr_imgs": int,
556
629
  "um_per_pixel": (int, float),
557
- # Additional settings based on provided defaults
558
630
  "include_noninfected": bool,
559
- "include_multiinfected": bool,
560
- "include_multinucleated": bool,
631
+ "include_multiinfected": int,
632
+ "include_multinucleated": int,
561
633
  "filter_min_max": (list, type(None)),
562
634
  "channel_dims": list,
563
635
  "backgrounds": list,
@@ -651,7 +723,6 @@ expected_types = {
651
723
  "image_size": int,
652
724
  "epochs": int,
653
725
  "val_split": float,
654
- "train_mode": str,
655
726
  "learning_rate": float,
656
727
  "weight_decay": float,
657
728
  "dropout_rate": float,
@@ -666,9 +737,9 @@ expected_types = {
666
737
  "augment": bool,
667
738
  "target": str,
668
739
  "cell_types": list,
669
- "cell_plate_metadata": (list, type(None)),
740
+ "cell_plate_metadata": (list, list),
670
741
  "pathogen_types": list,
671
- "pathogen_plate_metadata": (list, list), # This can be a list of lists
742
+ "pathogen_plate_metadata": (list, list), # This can be a list of lists
672
743
  "treatment_plate_metadata": (list, list), # This can be a list of lists
673
744
  "metadata_types": list,
674
745
  "cell_chann_dim": int,
@@ -721,10 +792,71 @@ expected_types = {
721
792
  "from_scratch": bool,
722
793
  "width_height": list,
723
794
  "resize": bool,
795
+ "compression": str,
796
+ "complevel": int,
724
797
  "gene_weights_csv": str,
725
798
  "fraction_threshold": float,
799
+ "barcode_mapping":dict,
800
+ "redunction_method":str,
801
+ "mix":str,
802
+ "model_type_ml":str,
803
+ "exclude_conditions":list,
804
+ "remove_highly_correlated_features":bool,
805
+ 'barcode_coordinates':list, # This is a list of lists
806
+ 'reverse_complement':bool,
807
+ 'file_type':str,
808
+ 'model_path':str,
809
+ 'tar_path':str,
810
+ 'score_threshold':float,
811
+ 'sample':None,
812
+ 'file_metadata':None,
813
+ 'apply_model_to_dataset':False,
814
+ "train":bool,
815
+ "test":bool,
816
+ 'train_channels':list,
817
+ "optimizer_type":str,
818
+ "dataset_mode":str,
819
+ "annotated_classes":list,
820
+ "annotation_column":str,
821
+ "apply_model_to_dataset":bool,
822
+ "metadata_type_by":str,
823
+ "custom_measurement":str,
824
+ "custom_model":bool,
825
+ "size":int,
826
+ "test_split":float,
827
+ "class_metadata":list, # This is a list of lists
828
+ "png_type":str,
829
+ "custom_model_path":str,
830
+ "generate_training_dataset":bool,
831
+ 'preload_batches':int,
832
+ "train_DL_model":bool,
726
833
  }
727
834
 
835
+ categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
836
+ "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
837
+ "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
838
+ "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
839
+ "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
840
+ "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
841
+ "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
842
+ "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
843
+ "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
844
+ "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
845
+ "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
846
+ "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
847
+ "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
848
+ "Machine Learning":[],
849
+ "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
850
+ "Generate Dataset":["preload_batches", "file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
851
+ "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
852
+ "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
853
+ "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
854
+ "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
855
+ "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
856
+ }
857
+
858
+ category_keys = list(categories.keys())
859
+
728
860
  def check_settings(vars_dict, expected_types, q=None):
729
861
  from .gui_utils import parse_list
730
862
 
@@ -736,7 +868,7 @@ def check_settings(vars_dict, expected_types, q=None):
736
868
 
737
869
  for key, (label, widget, var, _) in vars_dict.items():
738
870
  if key not in expected_types:
739
- if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test", "Paths"]:
871
+ if key not in category_keys:
740
872
  q.put(f"Key {key} not found in expected types.")
741
873
  continue
742
874
 
@@ -744,7 +876,7 @@ def check_settings(vars_dict, expected_types, q=None):
744
876
  expected_type = expected_types.get(key, str)
745
877
 
746
878
  try:
747
- if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
879
+ if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
748
880
  parsed_value = ast.literal_eval(value) if value else None
749
881
  if isinstance(parsed_value, list):
750
882
  if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
@@ -829,7 +961,7 @@ def generate_fields(variables, scrollable_frame):
829
961
  "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
830
962
  "cell_size_range": "(list) - Size range for cell segmentation.",
831
963
  "cell_types": "(list) - Types of cells to include in the analysis.",
832
- "cells": "(list) - The cell types to include in the analysis.",
964
+ "cells": "(list of lists) - The cell types to include in the analysis.",
833
965
  "cells_per_well": "(int) - Number of cells per well.",
834
966
  "channel_dims": "(list) - The dimensions of the image channels.",
835
967
  "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
@@ -879,8 +1011,8 @@ def generate_fields(variables, scrollable_frame):
879
1011
  "image_nr": "(int) - Number of images to process.",
880
1012
  "image_size": "(int) - Size of the images for training.",
881
1013
  "img_zoom": "(float) - Zoom factor for the images in plots.",
882
- "include_multinucleated": "(bool) - Whether to include multinucleated cells in the analysis.",
883
- "include_multiinfected": "(bool) - Whether to include multi-infected cells in the analysis.",
1014
+ "include_multinucleated": "(int) - Whether to include multinucleated cells in the analysis.",
1015
+ "include_multiinfected": "(int) - Whether to include multi-infected cells in the analysis.",
884
1016
  "include_noninfected": "(bool) - Whether to include non-infected cells in the analysis.",
885
1017
  "include_uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
886
1018
  "init_weights": "(bool) - Whether to initialize weights for the model.",
@@ -898,7 +1030,7 @@ def generate_fields(variables, scrollable_frame):
898
1030
  "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
899
1031
  "metadata_types": "(list) - Types of metadata to include in the analysis.",
900
1032
  "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
901
- "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
1033
+ "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
902
1034
  "metric": "(str) - Metric to use for UMAP.",
903
1035
  "min_cell_count": "(int) - Minimum number of cells required for analysis.",
904
1036
  "min_dist": "(float) - Minimum distance for UMAP.",
@@ -907,6 +1039,7 @@ def generate_fields(variables, scrollable_frame):
907
1039
  "mix": "(dict) - Mixing settings for the samples.",
908
1040
  "model_name": "(str) - Name of the Cellpose model.",
909
1041
  "model_type": "(str) - Type of model to use for the analysis.",
1042
+ "model_type_ml": "(str) - Type of model to use for machine learning.",
910
1043
  "nc": "(str) - Negative control identifier.",
911
1044
  "nc_loc": "(str) - Location of the negative control in the images.",
912
1045
  "negative_control": "(str) - Identifier for the negative control.",
@@ -937,12 +1070,7 @@ def generate_fields(variables, scrollable_frame):
937
1070
  "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
938
1071
  "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
939
1072
  "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
940
- "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
941
- "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
942
- "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
943
- "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
944
- "pathogen_model": "(str) - Model to use for pathogen segmentation.",
945
- "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
1073
+ "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
946
1074
  "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
947
1075
  "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
948
1076
  "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -1008,7 +1136,6 @@ def generate_fields(variables, scrollable_frame):
1008
1136
  "treatments": "(list) - The treatments to include in the analysis.",
1009
1137
  "top_features": "(int) - Top features to include in the analysis.",
1010
1138
  "train": "(bool) - Whether to train the model.",
1011
- "train_mode": "(str) - Mode to use for training the model.",
1012
1139
  "transform": "(dict) - Transformation to apply to the data.",
1013
1140
  "upscale": "(bool) - Whether to upscale the images.",
1014
1141
  "upscale_factor": "(float) - Factor by which to upscale the images.",
@@ -1018,6 +1145,20 @@ def generate_fields(variables, scrollable_frame):
1018
1145
  "verbose": "(bool) - Whether to print verbose output during processing.",
1019
1146
  "weight_decay": "(float) - Weight decay for regularization.",
1020
1147
  "width_height": "(tuple) - Width and height of the input images.",
1148
+ "barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
1149
+ "barcode_mapping": "dict - names and barecode csv files",
1150
+ "compression": "str - type of compression (e.g. zlib)",
1151
+ "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
1152
+ "file_type": "str - type of file to process",
1153
+ "model_path": "str - path to the model",
1154
+ "tar_path": "str - path to the tar file with image dataset",
1155
+ "score_threshold": "float - threshold for classification",
1156
+ "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
1157
+ "file_metadata": "str - string that must be present in image path to be included in the dataset",
1158
+ "apply_model_to_dataset": "bool - whether to apply model to the dataset",
1159
+ "train_channels": "list - channels to use for training",
1160
+ "dataset_mode": "str - How to generate train/test dataset.",
1161
+ "annotated_classes": "list - list of numbers in annotation column.",
1021
1162
  "um_per_pixel": "(float) - The micrometers per pixel for the images."
1022
1163
  }
1023
1164
 
@@ -1033,29 +1174,6 @@ def generate_fields(variables, scrollable_frame):
1033
1174
 
1034
1175
  return vars_dict
1035
1176
 
1036
-
1037
- categories = {
1038
- "General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
1039
- "Paths":["grna", "barcodes"],
1040
- "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
1041
- "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
1042
- "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
1043
- "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
1044
- "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
1045
- "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
1046
- "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
1047
- "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
1048
- "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
1049
- "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
1050
- "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
1051
- "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
1052
- "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
1053
- "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
1054
- "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
1055
- "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
1056
- "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
1057
- }
1058
-
1059
1177
  descriptions = {
1060
1178
  'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
1061
1179
 
@@ -1063,8 +1181,6 @@ descriptions = {
1063
1181
 
1064
1182
  'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
1065
1183
 
1066
- 'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
1067
-
1068
1184
  'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
1069
1185
 
1070
1186
  'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
@@ -1075,8 +1191,8 @@ descriptions = {
1075
1191
 
1076
1192
  'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
1077
1193
 
1078
- 'map_barcodes': "Map barcodes to your data for identification and tracking. Function: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
1079
-
1194
+ 'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
1195
+
1080
1196
  'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
1081
1197
 
1082
1198
  'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
@@ -1085,7 +1201,7 @@ descriptions = {
1085
1201
  def set_annotate_default_settings(settings):
1086
1202
  settings.setdefault('src', 'path')
1087
1203
  settings.setdefault('image_type', 'cell_png')
1088
- settings.setdefault('channels', 'r,g,b')
1204
+ settings.setdefault('channels', "'r','g','b'")
1089
1205
  settings.setdefault('img_size', 200)
1090
1206
  settings.setdefault('annotation_column', 'test')
1091
1207
  settings.setdefault('normalize', 'False')
@@ -1094,3 +1210,15 @@ def set_annotate_default_settings(settings):
1094
1210
  settings.setdefault('threshold', '2')
1095
1211
  return settings
1096
1212
 
1213
+ def set_default_generate_barecode_mapping(settings={}):
1214
+ settings.setdefault('src', 'path')
1215
+ settings.setdefault('chunk_size', 100000)
1216
+
1217
+ settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
1218
+ 'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
1219
+ 'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
1220
+
1221
+ settings.setdefault('n_jobs', None)
1222
+ settings.setdefault('compression', 'zlib')
1223
+ settings.setdefault('complevel', 5)
1224
+ return settings
spacr/utils.py CHANGED
@@ -1,4 +1,4 @@
1
- import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform
1
+ import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip
2
2
 
3
3
  import numpy as np
4
4
  from cellpose import models as cp_models
@@ -87,6 +87,12 @@ from scipy.stats import f_oneway, kruskal
87
87
  from sklearn.cluster import KMeans
88
88
  from scipy import stats
89
89
 
90
+ def save_settings(settings, name='settings'):
91
+
92
+ settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
93
+ settings_csv = os.path.join(settings['src'],'settings',f'{name}.csv')
94
+ os.makedirs(os.path.join(settings['src'],'settings'), exist_ok=True)
95
+ settings_df.to_csv(settings_csv, index=False)
90
96
 
91
97
  def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
92
98
  if isinstance(files_processed, list):
@@ -116,10 +122,8 @@ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batc
116
122
  else:
117
123
  average_time_img = average_time / batch_size
118
124
  time_info = f'Time/batch: {average_time:.3f}sec, Time/image: {average_time_img:.3f}sec, Time_left: {time_left:.3f} min.'
119
-
120
- print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
121
-
122
-
125
+ else:
126
+ print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
123
127
 
124
128
  def reset_mp():
125
129
  current_method = get_start_method()
@@ -1649,7 +1653,7 @@ def split_my_dataset(dataset, split_ratio=0.1):
1649
1653
  val_dataset = Subset(dataset, val_indices)
1650
1654
  return train_dataset, val_dataset
1651
1655
 
1652
- def classification_metrics(all_labels, prediction_pos_probs, loader_name, loss, epoch):
1656
+ def classification_metrics(all_labels, prediction_pos_probs, loss, epoch):
1653
1657
  """
1654
1658
  Calculate classification metrics for binary classification.
1655
1659
 
@@ -1698,11 +1702,9 @@ def classification_metrics(all_labels, prediction_pos_probs, loader_name, loss,
1698
1702
  else:
1699
1703
  acc_nc = np.nan
1700
1704
  data_dict = {'accuracy': acc_all, 'neg_accuracy': acc_nc, 'pos_accuracy': acc_pc, 'loss':loss.item(),'prauc':pr_auc, 'optimal_threshold':optimal_threshold}
1701
- data_df = pd.DataFrame(data_dict, index=[str(epoch)+'_'+loader_name])
1705
+ data_df = pd.DataFrame(data_dict, index=[str(epoch)])
1702
1706
  return data_df
1703
1707
 
1704
-
1705
-
1706
1708
  def compute_irm_penalty(losses, dummy_w, device):
1707
1709
  """
1708
1710
  Computes the Invariant Risk Minimization (IRM) penalty.
@@ -1740,7 +1742,7 @@ def compute_irm_penalty(losses, dummy_w, device):
1740
1742
  # summary(base_model, (channels, height, width))
1741
1743
  # return
1742
1744
 
1743
- def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_checkpoint=False, channels=3, height=224, width=224, chan_dict=None, num_classes=2):
1745
+ def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_checkpoint=False, channels=3, height=224, width=224, chan_dict=None, num_classes=2, verbose=False):
1744
1746
  """
1745
1747
  Choose a model for classification.
1746
1748
 
@@ -1772,7 +1774,7 @@ def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_chec
1772
1774
  print(f'Invalid model_type: {model_type}. Compatible model_types: {model_types}')
1773
1775
  return
1774
1776
 
1775
- print(f'\rModel parameters: Architecture: {model_type} init_weights: {init_weights} dropout_rate: {dropout_rate} use_checkpoint: {use_checkpoint}', end='\r', flush=True)
1777
+ print(f'Model parameters: Architecture: {model_type} init_weights: {init_weights} dropout_rate: {dropout_rate} use_checkpoint: {use_checkpoint}', end='\r', flush=True)
1776
1778
 
1777
1779
  if model_type == 'custom':
1778
1780
 
@@ -1783,8 +1785,8 @@ def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_chec
1783
1785
  else:
1784
1786
  print(f'Compatible model_types: {model_types}')
1785
1787
  raise ValueError(f"Invalid model_type: {model_type}")
1786
-
1787
- print(base_model)
1788
+ if verbose:
1789
+ print(base_model)
1788
1790
 
1789
1791
  return base_model
1790
1792
 
@@ -4424,3 +4426,10 @@ def correct_masks(src):
4424
4426
  cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
4425
4427
  convert_and_relabel_masks(cell_path)
4426
4428
  _load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
4429
+
4430
+ def count_reads_in_fastq(fastq_file):
4431
+ count = 0
4432
+ with gzip.open(fastq_file, "rt") as f:
4433
+ for _ in f:
4434
+ count += 1
4435
+ return count // 4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.2.53
3
+ Version: 0.2.61
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -44,6 +44,7 @@ Requires-Dist: gputil <2.0,>=1.4.0
44
44
  Requires-Dist: gpustat <2.0,>=1.1.1
45
45
  Requires-Dist: pyautogui <1.0,>=0.9.54
46
46
  Requires-Dist: tables <4.0,>=3.8.0
47
+ Requires-Dist: rapidfuzz <4.0,>=3.9
47
48
  Requires-Dist: huggingface-hub <0.25,>=0.24.0
48
49
  Provides-Extra: dev
49
50
  Requires-Dist: pytest <3.11,>=3.9 ; extra == 'dev'