spacr 0.2.53__py3-none-any.whl → 0.2.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/settings.py CHANGED
@@ -277,7 +277,7 @@ def get_measure_crop_settings(settings):
277
277
 
278
278
  def set_default_analyze_screen(settings):
279
279
  settings.setdefault('src', 'path')
280
- settings.setdefault('model_type','xgboost')
280
+ settings.setdefault('model_type_ml','xgboost')
281
281
  settings.setdefault('heatmap_feature','predictions')
282
282
  settings.setdefault('grouping','mean')
283
283
  settings.setdefault('min_max','allq')
@@ -326,11 +326,87 @@ def set_default_train_test_model(settings):
326
326
  settings.setdefault('intermedeate_save',True)
327
327
  settings.setdefault('pin_memory',True)
328
328
  settings.setdefault('n_jobs',cores)
329
- settings.setdefault('channels',['r','g','b'])
329
+ settings.setdefault('train_channels',['r','g','b'])
330
330
  settings.setdefault('augment',False)
331
331
  settings.setdefault('verbose',False)
332
332
  return settings
333
333
 
334
+ def set_generate_training_dataset_defaults(settings):
335
+
336
+ settings.setdefault('src','path')
337
+ settings.setdefault('dataset_mode','annotation')
338
+ settings.setdefault('annotation_column','test')
339
+ settings.setdefault('annotated_classes',[1,2])
340
+ settings.setdefault('classes',['nc','pc'])
341
+ settings.setdefault('size',224)
342
+ settings.setdefault('test_split',0.1)
343
+ settings.setdefault('class_metadata',[['c1'],['c2']])
344
+ settings.setdefault('metadata_type_by','col')
345
+ settings.setdefault('channel_of_interest',3)
346
+ settings.setdefault('custom_measurement',None)
347
+ settings.setdefault('tables',None)
348
+ settings.setdefault('png_type','cell_png')
349
+
350
+ return settings
351
+
352
+ def deep_spacr_defaults(settings):
353
+
354
+ cores = os.cpu_count()-2
355
+
356
+ settings.setdefault('src','path')
357
+ settings.setdefault('dataset_mode','annotation')
358
+ settings.setdefault('annotation_column','test')
359
+ settings.setdefault('annotated_classes',[1,2])
360
+ settings.setdefault('classes',['nc','pc'])
361
+ settings.setdefault('size',224)
362
+ settings.setdefault('test_split',0.1)
363
+ settings.setdefault('class_metadata',[['c1'],['c2']])
364
+ settings.setdefault('metadata_type_by','col')
365
+ settings.setdefault('channel_of_interest',3)
366
+ settings.setdefault('custom_measurement',None)
367
+ settings.setdefault('tables',None)
368
+ settings.setdefault('png_type','cell_png')
369
+ settings.setdefault('custom_model',False)
370
+ settings.setdefault('custom_model_path','path')
371
+ settings.setdefault('train',True)
372
+ settings.setdefault('test',False)
373
+ settings.setdefault('model_type','maxvit_t')
374
+ settings.setdefault('optimizer_type','adamw')
375
+ settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
376
+ settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
377
+ settings.setdefault('normalize',True)
378
+ settings.setdefault('image_size',224)
379
+ settings.setdefault('batch_size',64)
380
+ settings.setdefault('epochs',100)
381
+ settings.setdefault('val_split',0.1)
382
+ settings.setdefault('train_mode','erm')
383
+ settings.setdefault('learning_rate',0.001)
384
+ settings.setdefault('weight_decay',0.00001)
385
+ settings.setdefault('dropout_rate',0.1)
386
+ settings.setdefault('init_weights',True)
387
+ settings.setdefault('amsgrad',True)
388
+ settings.setdefault('use_checkpoint',True)
389
+ settings.setdefault('gradient_accumulation',True)
390
+ settings.setdefault('gradient_accumulation_steps',4)
391
+ settings.setdefault('intermedeate_save',True)
392
+ settings.setdefault('pin_memory',True)
393
+ settings.setdefault('n_jobs',cores)
394
+ settings.setdefault('train_channels',['r','g','b'])
395
+ settings.setdefault('augment',False)
396
+ settings.setdefault('verbose',False)
397
+ settings.setdefault('apply_model_to_dataset',False)
398
+ settings.setdefault('file_metadata',None)
399
+ settings.setdefault('sample',None)
400
+ settings.setdefault('experiment','exp.')
401
+ settings.setdefault('score_threshold',0.5)
402
+ settings.setdefault('tar_path','path')
403
+ settings.setdefault('model_path','path')
404
+ settings.setdefault('file_type','cell_png')
405
+ settings.setdefault('generate_training_dataset', True)
406
+ settings.setdefault('train_DL_model', True)
407
+
408
+ return settings
409
+
334
410
  def get_analyze_recruitment_default_settings(settings):
335
411
  settings.setdefault('target','protein')
336
412
  settings.setdefault('cell_types',['HeLa'])
@@ -534,7 +610,7 @@ expected_types = {
534
610
  "save_png": bool,
535
611
  "crop_mode": list,
536
612
  "use_bounding_box": bool,
537
- "png_size": list, # This can be a list of lists
613
+ "png_size": list, # This can be a list of lists
538
614
  "normalize": bool,
539
615
  "png_dims": list,
540
616
  "normalize_by": str,
@@ -546,7 +622,7 @@ expected_types = {
546
622
  "cells": list,
547
623
  "cell_loc": list,
548
624
  "pathogens": list,
549
- "pathogen_loc": (list, list), # This can be a list of lists
625
+ "pathogen_loc": (list, list), # This can be a list of lists
550
626
  "treatments": list,
551
627
  "treatment_loc": (list, list), # This can be a list of lists
552
628
  "channel_of_interest": int,
@@ -554,7 +630,6 @@ expected_types = {
554
630
  "measurement": str,
555
631
  "nr_imgs": int,
556
632
  "um_per_pixel": (int, float),
557
- # Additional settings based on provided defaults
558
633
  "include_noninfected": bool,
559
634
  "include_multiinfected": bool,
560
635
  "include_multinucleated": bool,
@@ -668,7 +743,7 @@ expected_types = {
668
743
  "cell_types": list,
669
744
  "cell_plate_metadata": (list, type(None)),
670
745
  "pathogen_types": list,
671
- "pathogen_plate_metadata": (list, list), # This can be a list of lists
746
+ "pathogen_plate_metadata": (list, list), # This can be a list of lists
672
747
  "treatment_plate_metadata": (list, list), # This can be a list of lists
673
748
  "metadata_types": list,
674
749
  "cell_chann_dim": int,
@@ -721,10 +796,70 @@ expected_types = {
721
796
  "from_scratch": bool,
722
797
  "width_height": list,
723
798
  "resize": bool,
799
+ "compression": str,
800
+ "complevel": int,
724
801
  "gene_weights_csv": str,
725
802
  "fraction_threshold": float,
803
+ "barcode_mapping":dict,
804
+ "redunction_method":str,
805
+ "mix":str,
806
+ "model_type_ml":str,
807
+ "exclude_conditions":list,
808
+ "remove_highly_correlated_features":bool,
809
+ 'barcode_coordinates':list, # This is a list of lists
810
+ 'reverse_complement':bool,
811
+ 'file_type':str,
812
+ 'model_path':str,
813
+ 'tar_path':str,
814
+ 'score_threshold':float,
815
+ 'sample':None,
816
+ 'file_metadata':None,
817
+ 'apply_model_to_dataset':False,
818
+ "train":bool,
819
+ "test":bool,
820
+ 'train_channels':list,
821
+ "optimizer_type":str,
822
+ "dataset_mode":str,
823
+ "annotated_classes":list,
824
+ "annotation_column":str,
825
+ "apply_model_to_dataset":bool,
826
+ "metadata_type_by":str,
827
+ "custom_measurement":str,
828
+ "custom_model":bool,
829
+ "size":int,
830
+ "test_split":float,
831
+ "class_metadata":list, # This is a list of lists
832
+ "png_type":str,
833
+ "custom_model_path":str,
834
+ "generate_training_dataset":bool,
835
+ "train_DL_model":bool,
726
836
  }
727
837
 
838
+ categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
839
+ "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
840
+ "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
841
+ "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
842
+ "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
843
+ "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
844
+ "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
845
+ "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
846
+ "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
847
+ "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
848
+ "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
849
+ "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
850
+ "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
851
+ "Machine Learning":[],
852
+ "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
853
+ "Generate Dataset":["file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
854
+ "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
855
+ "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
856
+ "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
857
+ "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
858
+ "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
859
+ }
860
+
861
+ category_keys = list(categories.keys())
862
+
728
863
  def check_settings(vars_dict, expected_types, q=None):
729
864
  from .gui_utils import parse_list
730
865
 
@@ -736,7 +871,7 @@ def check_settings(vars_dict, expected_types, q=None):
736
871
 
737
872
  for key, (label, widget, var, _) in vars_dict.items():
738
873
  if key not in expected_types:
739
- if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test", "Paths"]:
874
+ if key not in category_keys:
740
875
  q.put(f"Key {key} not found in expected types.")
741
876
  continue
742
877
 
@@ -744,7 +879,7 @@ def check_settings(vars_dict, expected_types, q=None):
744
879
  expected_type = expected_types.get(key, str)
745
880
 
746
881
  try:
747
- if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
882
+ if key in ["timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
748
883
  parsed_value = ast.literal_eval(value) if value else None
749
884
  if isinstance(parsed_value, list):
750
885
  if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
@@ -829,7 +964,7 @@ def generate_fields(variables, scrollable_frame):
829
964
  "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
830
965
  "cell_size_range": "(list) - Size range for cell segmentation.",
831
966
  "cell_types": "(list) - Types of cells to include in the analysis.",
832
- "cells": "(list) - The cell types to include in the analysis.",
967
+ "cells": "(list of lists) - The cell types to include in the analysis.",
833
968
  "cells_per_well": "(int) - Number of cells per well.",
834
969
  "channel_dims": "(list) - The dimensions of the image channels.",
835
970
  "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
@@ -898,7 +1033,7 @@ def generate_fields(variables, scrollable_frame):
898
1033
  "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
899
1034
  "metadata_types": "(list) - Types of metadata to include in the analysis.",
900
1035
  "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
901
- "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
1036
+ "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
902
1037
  "metric": "(str) - Metric to use for UMAP.",
903
1038
  "min_cell_count": "(int) - Minimum number of cells required for analysis.",
904
1039
  "min_dist": "(float) - Minimum distance for UMAP.",
@@ -907,6 +1042,7 @@ def generate_fields(variables, scrollable_frame):
907
1042
  "mix": "(dict) - Mixing settings for the samples.",
908
1043
  "model_name": "(str) - Name of the Cellpose model.",
909
1044
  "model_type": "(str) - Type of model to use for the analysis.",
1045
+ "model_type_ml": "(str) - Type of model to use for machine learning.",
910
1046
  "nc": "(str) - Negative control identifier.",
911
1047
  "nc_loc": "(str) - Location of the negative control in the images.",
912
1048
  "negative_control": "(str) - Identifier for the negative control.",
@@ -937,12 +1073,7 @@ def generate_fields(variables, scrollable_frame):
937
1073
  "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
938
1074
  "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
939
1075
  "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
940
- "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
941
- "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
942
- "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
943
- "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
944
- "pathogen_model": "(str) - Model to use for pathogen segmentation.",
945
- "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
1076
+ "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
946
1077
  "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
947
1078
  "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
948
1079
  "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -1018,6 +1149,20 @@ def generate_fields(variables, scrollable_frame):
1018
1149
  "verbose": "(bool) - Whether to print verbose output during processing.",
1019
1150
  "weight_decay": "(float) - Weight decay for regularization.",
1020
1151
  "width_height": "(tuple) - Width and height of the input images.",
1152
+ "barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
1153
+ "barcode_mapping": "dict - names and barecode csv files",
1154
+ "compression": "str - type of compression (e.g. zlib)",
1155
+ "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
1156
+ "file_type": "str - type of file to process",
1157
+ "model_path": "str - path to the model",
1158
+ "tar_path": "str - path to the tar file with image dataset",
1159
+ "score_threshold": "float - threshold for classification",
1160
+ "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
1161
+ "file_metadata": "str - string that must be present in image path to be included in the dataset",
1162
+ "apply_model_to_dataset": "bool - whether to apply model to the dataset",
1163
+ "train_channels": "list - channels to use for training",
1164
+ "dataset_mode": "str - How to generate train/test dataset.",
1165
+ "annotated_classes": "list - list of numbers in annotation column.",
1021
1166
  "um_per_pixel": "(float) - The micrometers per pixel for the images."
1022
1167
  }
1023
1168
 
@@ -1033,29 +1178,6 @@ def generate_fields(variables, scrollable_frame):
1033
1178
 
1034
1179
  return vars_dict
1035
1180
 
1036
-
1037
- categories = {
1038
- "General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
1039
- "Paths":["grna", "barcodes"],
1040
- "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
1041
- "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
1042
- "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
1043
- "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
1044
- "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
1045
- "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
1046
- "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
1047
- "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
1048
- "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
1049
- "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
1050
- "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
1051
- "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
1052
- "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
1053
- "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
1054
- "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
1055
- "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
1056
- "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
1057
- }
1058
-
1059
1181
  descriptions = {
1060
1182
  'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
1061
1183
 
@@ -1063,8 +1185,6 @@ descriptions = {
1063
1185
 
1064
1186
  'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
1065
1187
 
1066
- 'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
1067
-
1068
1188
  'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
1069
1189
 
1070
1190
  'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
@@ -1075,8 +1195,8 @@ descriptions = {
1075
1195
 
1076
1196
  'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
1077
1197
 
1078
- 'map_barcodes': "Map barcodes to your data for identification and tracking. Function: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
1079
-
1198
+ 'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
1199
+
1080
1200
  'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
1081
1201
 
1082
1202
  'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
@@ -1085,7 +1205,7 @@ descriptions = {
1085
1205
  def set_annotate_default_settings(settings):
1086
1206
  settings.setdefault('src', 'path')
1087
1207
  settings.setdefault('image_type', 'cell_png')
1088
- settings.setdefault('channels', 'r,g,b')
1208
+ settings.setdefault('channels', "'r','g','b'")
1089
1209
  settings.setdefault('img_size', 200)
1090
1210
  settings.setdefault('annotation_column', 'test')
1091
1211
  settings.setdefault('normalize', 'False')
@@ -1094,3 +1214,15 @@ def set_annotate_default_settings(settings):
1094
1214
  settings.setdefault('threshold', '2')
1095
1215
  return settings
1096
1216
 
1217
+ def set_default_generate_barecode_mapping(settings={}):
1218
+ settings.setdefault('src', 'path')
1219
+ settings.setdefault('chunk_size', 100000)
1220
+
1221
+ settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
1222
+ 'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
1223
+ 'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
1224
+
1225
+ settings.setdefault('n_jobs', None)
1226
+ settings.setdefault('compression', 'zlib')
1227
+ settings.setdefault('complevel', 5)
1228
+ return settings
spacr/utils.py CHANGED
@@ -1,4 +1,4 @@
1
- import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform
1
+ import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip
2
2
 
3
3
  import numpy as np
4
4
  from cellpose import models as cp_models
@@ -88,7 +88,7 @@ from sklearn.cluster import KMeans
88
88
  from scipy import stats
89
89
 
90
90
 
91
- def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
91
+ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type="", metricks=None):
92
92
  if isinstance(files_processed, list):
93
93
  files_processed = len(set(files_processed))
94
94
  if isinstance(files_to_process, list):
@@ -117,9 +117,10 @@ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batc
117
117
  average_time_img = average_time / batch_size
118
118
  time_info = f'Time/batch: {average_time:.3f}sec, Time/image: {average_time_img:.3f}sec, Time_left: {time_left:.3f} min.'
119
119
 
120
- print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
121
-
122
-
120
+ if metricks is None:
121
+ print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
122
+ else:
123
+ print(f'Progress: {files_processed}/{files_to_process}, {metricks}, operation_type: {operation_type} {time_info}')
123
124
 
124
125
  def reset_mp():
125
126
  current_method = get_start_method()
@@ -4424,3 +4425,10 @@ def correct_masks(src):
4424
4425
  cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
4425
4426
  convert_and_relabel_masks(cell_path)
4426
4427
  _load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
4428
+
4429
+ def count_reads_in_fastq(fastq_file):
4430
+ count = 0
4431
+ with gzip.open(fastq_file, "rt") as f:
4432
+ for _ in f:
4433
+ count += 1
4434
+ return count // 4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.2.53
3
+ Version: 0.2.56
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -44,6 +44,7 @@ Requires-Dist: gputil <2.0,>=1.4.0
44
44
  Requires-Dist: gpustat <2.0,>=1.1.1
45
45
  Requires-Dist: pyautogui <1.0,>=0.9.54
46
46
  Requires-Dist: tables <4.0,>=3.8.0
47
+ Requires-Dist: rapidfuzz <4.0,>=3.9
47
48
  Requires-Dist: huggingface-hub <0.25,>=0.24.0
48
49
  Provides-Extra: dev
49
50
  Requires-Dist: pytest <3.11,>=3.9 ; extra == 'dev'
@@ -8,23 +8,23 @@ spacr/app_measure.py,sha256=_K7APYIeOKpV6e_LcqabBjvEi7mfq9Fch8175x1x0k8,162
8
8
  spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
9
9
  spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
10
10
  spacr/chris.py,sha256=YlBjSgeZaY8HPy6jkrT_ISAnCMAKVfvCxF0I9eAZLFM,2418
11
- spacr/core.py,sha256=rQkZUkzwHl3V_gKqgWTEdoem8kmmVDtlrcfJXT7E5aI,148169
12
- spacr/deep_spacr.py,sha256=ASBsN4JpHp_3S-91JUsB34IWTjTGPYI7jKV2qZnUR5M,37005
11
+ spacr/core.py,sha256=3r32IJlvT6ReN7cgAU2frFkdACQyN48mgUETNzrTrwE,160329
12
+ spacr/deep_spacr.py,sha256=Yge4Tu6k7jjIQhuCAOVZkDS8dQR1cDvYtySfahAp6Lg,40153
13
13
  spacr/graph_learning.py,sha256=1tR-ZxvXE3dBz1Saw7BeVFcrsUFu9OlUZeZVifih9eo,13070
14
- spacr/gui.py,sha256=qgP9TAMy1MwDCedkp_CifcuGhs4stWkX1KaGQpL_ES4,7225
15
- spacr/gui_core.py,sha256=CZ_tfCJniCgE5pUQkW2o7gakeYfpxxQAEB_bKWxTCl0,30844
16
- spacr/gui_elements.py,sha256=6dqMk2TLDTKVwr05hctw-cgRMoz2mACPL1-WK-CWvR8,109119
17
- spacr/gui_utils.py,sha256=wFG5QZvLjpETieY0t4kNSG-vh7pL0WK4MbK-m3fNf4w,27116
18
- spacr/io.py,sha256=UYISLJgwpwyoTxKy1v1wzFQ6cLX77h2rHh4t5fF1_4w,115461
14
+ spacr/gui.py,sha256=dSEQhzZgIOT7SOonbIp07-zXcLaEt6M7sRiIREpDv64,7226
15
+ spacr/gui_core.py,sha256=p2Rx97U92KDM4DW1hfFuYRh_4mQ1aSN_0ieOH0vB2g8,30933
16
+ spacr/gui_elements.py,sha256=grC-_jlyJir3W-GLbnN5zTfq9D7ef-KCt_WnlvKXmpY,111819
17
+ spacr/gui_utils.py,sha256=gWDO-BESGsNR26g9t1xy3BLwSdIDXHSmJ7w34V-A56E,28094
18
+ spacr/io.py,sha256=YVykRy_kgC0LF7oBdJoqtnFoTEGx5mfldboHpZXxe9E,115603
19
19
  spacr/logger.py,sha256=7Zqr3TuuOQLWT32gYr2q1qvv7x0a2JhLANmZcnBXAW8,670
20
20
  spacr/measure.py,sha256=4rmzH_a5Y0s1qALVi6YRut3xpnkJXs5vzeTPCEf3QS8,54871
21
21
  spacr/plot.py,sha256=4o9X76ur2kBe6TtOrbIPfo04iC60OZ1rNJoegBxtLmk,72361
22
- spacr/sequencing.py,sha256=dBZsC_9CvHLoSAfOpVZNDP2D8-hAwAjW-IHcoYBxqN0,87372
23
- spacr/settings.py,sha256=I_YUwesZURknLDO9ML5akW56tGjOYHot8nYLWkdcEhw,62104
22
+ spacr/sequencing.py,sha256=dt4urG5tL85qh2sGEi2F9rtOqBQR-eCfcqOvcAtQIzg,75849
23
+ spacr/settings.py,sha256=Vbm1F5I926t3CJ0E7Y3m9pMIFZz1DzzSKNil6kDChIM,67904
24
24
  spacr/sim.py,sha256=FveaVgBi3eypO2oVB5Dx-v0CC1Ny7UPfXkJiiRRodAk,71212
25
25
  spacr/sim_app.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  spacr/timelapse.py,sha256=KMYCgHzf9LTZe-lWl5mvH2EjbKRE6OhpwdY13wEumGc,39504
27
- spacr/utils.py,sha256=TT2gb2nmhNSwkHheaOfpDPXhpE90_6Er3nA77gAbO3U,188225
27
+ spacr/utils.py,sha256=Q1MgPMSf6coNta4mcb6UaWGtnGW1JeruJa7SxtOzvMk,188567
28
28
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
29
29
  spacr/resources/font/open_sans/OFL.txt,sha256=bGMoWBRrE2RcdzDiuYiB8A9OVFlJ0sA2imWwce2DAdo,4484
30
30
  "spacr/resources/font/open_sans/OpenSans-Italic-VariableFont_wdth,wght.ttf",sha256=QSoWv9h46CRX_fdlqFM3O2d3-PF3R1srnb4zUezcLm0,580280
@@ -92,9 +92,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
92
92
  spacr/resources/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model,sha256=z8BbHWZPRnE9D_BHO0fBREE85c1vkltDs-incs2ytXQ,26566572
93
93
  spacr/resources/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv,sha256=fBAGuL_B8ERVdVizO3BHozTDSbZUh1yFzsYK3wkQN68,420
94
94
  spacr/resources/models/cp/toxo_pv_lumen.CP_model,sha256=2y_CindYhmTvVwBH39SNILF3rI3x9SsRn6qrMxHy3l0,26562451
95
- spacr-0.2.53.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
96
- spacr-0.2.53.dist-info/METADATA,sha256=808nN91pxiTVF7SdjXiVl0yUijPcm_bQU4sLGgwm1Co,5223
97
- spacr-0.2.53.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
98
- spacr-0.2.53.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
99
- spacr-0.2.53.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
100
- spacr-0.2.53.dist-info/RECORD,,
95
+ spacr-0.2.56.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
96
+ spacr-0.2.56.dist-info/METADATA,sha256=yXp1WrJwtCMC3ha79ztaaqzfvPa95B2mmVCAm9Uirgk,5259
97
+ spacr-0.2.56.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
98
+ spacr-0.2.56.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
99
+ spacr-0.2.56.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
100
+ spacr-0.2.56.dist-info/RECORD,,
File without changes