spacr 0.3.1__py3-none-any.whl → 0.3.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. spacr/__init__.py +19 -3
  2. spacr/cellpose.py +311 -0
  3. spacr/core.py +245 -2494
  4. spacr/deep_spacr.py +316 -48
  5. spacr/gui.py +1 -0
  6. spacr/gui_core.py +74 -63
  7. spacr/gui_elements.py +110 -5
  8. spacr/gui_utils.py +346 -6
  9. spacr/io.py +680 -141
  10. spacr/logger.py +28 -9
  11. spacr/measure.py +107 -95
  12. spacr/mediar.py +0 -3
  13. spacr/ml.py +1051 -0
  14. spacr/openai.py +37 -0
  15. spacr/plot.py +707 -20
  16. spacr/resources/data/lopit.csv +3833 -0
  17. spacr/resources/data/toxoplasma_metadata.csv +8843 -0
  18. spacr/resources/icons/convert.png +0 -0
  19. spacr/resources/{models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model → icons/dna_matrix.mp4} +0 -0
  20. spacr/sequencing.py +241 -1311
  21. spacr/settings.py +134 -47
  22. spacr/sim.py +0 -2
  23. spacr/submodules.py +349 -0
  24. spacr/timelapse.py +0 -2
  25. spacr/toxo.py +238 -0
  26. spacr/utils.py +419 -180
  27. {spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/METADATA +31 -22
  28. {spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/RECORD +32 -33
  29. spacr/chris.py +0 -50
  30. spacr/graph_learning.py +0 -340
  31. spacr/resources/MEDIAR/.git +0 -1
  32. spacr/resources/MEDIAR_weights/.DS_Store +0 -0
  33. spacr/resources/icons/.DS_Store +0 -0
  34. spacr/resources/icons/spacr_logo_rotation.gif +0 -0
  35. spacr/resources/models/cp/toxo_plaque_cyto_e25000_X1120_Y1120.CP_model_settings.csv +0 -23
  36. spacr/resources/models/cp/toxo_pv_lumen.CP_model +0 -0
  37. spacr/sim_app.py +0 -0
  38. {spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/LICENSE +0 -0
  39. {spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/WHEEL +0 -0
  40. {spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/entry_points.txt +0 -0
  41. {spacr-0.3.1.dist-info → spacr-0.3.22.dist-info}/top_level.txt +0 -0
spacr/settings.py CHANGED
@@ -2,9 +2,9 @@ import os, ast
2
2
 
3
3
  def set_default_plot_merge_settings():
4
4
  settings = {}
5
- settings.setdefault('include_noninfected', True)
6
- settings.setdefault('include_multiinfected', 10)
7
- settings.setdefault('include_multinucleated', 1)
5
+ settings.setdefault('uninfected', True)
6
+ settings.setdefault('pathogen_limit', 10)
7
+ settings.setdefault('nuclei_limit', 1)
8
8
  settings.setdefault('remove_background', False)
9
9
  settings.setdefault('filter_min_max', None)
10
10
  settings.setdefault('channel_dims', [0,1,2,3])
@@ -217,7 +217,7 @@ def set_default_umap_image_settings(settings={}):
217
217
  settings.setdefault('verbose',True)
218
218
  return settings
219
219
 
220
- def get_measure_crop_settings(settings):
220
+ def get_measure_crop_settings(settings={}):
221
221
 
222
222
  settings.setdefault('src', 'path')
223
223
  settings.setdefault('verbose', False)
@@ -246,7 +246,7 @@ def get_measure_crop_settings(settings):
246
246
  settings.setdefault('normalize_by','png')
247
247
  settings.setdefault('crop_mode',['cell'])
248
248
  settings.setdefault('dialate_pngs', False)
249
- settings.setdefault('dialate_png_ratios', [0.2])
249
+ settings.setdefault('dialate_png_ratios', [0.2, 0,2])
250
250
 
251
251
  # Timelapsed settings
252
252
  settings.setdefault('timelapse', False)
@@ -291,6 +291,9 @@ def set_default_analyze_screen(settings):
291
291
  settings.setdefault('positive_control','c2')
292
292
  settings.setdefault('negative_control','c1')
293
293
  settings.setdefault('exclude',None)
294
+ settings.setdefault('nuclei_limit',True)
295
+ settings.setdefault('pathogen_limit',3)
296
+ settings.setdefault('uninfected',True)
294
297
  settings.setdefault('n_repeats',10)
295
298
  settings.setdefault('top_features',30)
296
299
  settings.setdefault('remove_low_variance_features',True)
@@ -345,6 +348,9 @@ def set_generate_training_dataset_defaults(settings):
345
348
  settings.setdefault('channel_of_interest',3)
346
349
  settings.setdefault('custom_measurement',None)
347
350
  settings.setdefault('tables',None)
351
+ settings.setdefault('nuclei_limit',True)
352
+ settings.setdefault('pathogen_limit',True)
353
+ settings.setdefault('uninfected',True)
348
354
  settings.setdefault('png_type','cell_png')
349
355
 
350
356
  return settings
@@ -392,20 +398,50 @@ def deep_spacr_defaults(settings):
392
398
  settings.setdefault('n_jobs',cores)
393
399
  settings.setdefault('train_channels',['r','g','b'])
394
400
  settings.setdefault('augment',False)
395
- settings.setdefault('preload_batches', 3)
396
401
  settings.setdefault('verbose',True)
397
402
  settings.setdefault('apply_model_to_dataset',True)
398
403
  settings.setdefault('file_metadata',None)
399
404
  settings.setdefault('sample',None)
400
405
  settings.setdefault('experiment','exp.')
401
406
  settings.setdefault('score_threshold',0.5)
402
- settings.setdefault('tar_path','path')
407
+ settings.setdefault('dataset','path')
403
408
  settings.setdefault('model_path','path')
404
409
  settings.setdefault('file_type','cell_png')
405
410
  settings.setdefault('generate_training_dataset', True)
406
411
  settings.setdefault('train_DL_model', True)
407
412
  return settings
408
413
 
414
+ def get_train_test_model_settings(settings):
415
+ settings.setdefault('src', 'path')
416
+ settings.setdefault('train', True)
417
+ settings.setdefault('test', False)
418
+ settings.setdefault('custom_model', False)
419
+ settings.setdefault('classes', ['nc','pc'])
420
+ settings.setdefault('train_channels', ['r','g','b'])
421
+ settings.setdefault('model_type', 'maxvit_t')
422
+ settings.setdefault('optimizer_type', 'adamw')
423
+ settings.setdefault('schedule', 'reduce_lr_on_plateau')
424
+ settings.setdefault('loss_type', 'focal_loss')
425
+ settings.setdefault('normalize', True)
426
+ settings.setdefault('image_size', 224)
427
+ settings.setdefault('batch_size', 64)
428
+ settings.setdefault('epochs', 100)
429
+ settings.setdefault('val_split', 0.1)
430
+ settings.setdefault('learning_rate', 0.0001)
431
+ settings.setdefault('weight_decay', 0.00001)
432
+ settings.setdefault('dropout_rate', 0.1)
433
+ settings.setdefault('init_weights', True)
434
+ settings.setdefault('amsgrad', True)
435
+ settings.setdefault('use_checkpoint', True)
436
+ settings.setdefault('gradient_accumulation', True)
437
+ settings.setdefault('gradient_accumulation_steps', 4)
438
+ settings.setdefault('intermedeate_save',True)
439
+ settings.setdefault('pin_memory', True)
440
+ settings.setdefault('n_jobs', 30)
441
+ settings.setdefault('augment', True)
442
+ settings.setdefault('verbose', True)
443
+ return settings
444
+
409
445
  def get_analyze_recruitment_default_settings(settings):
410
446
  settings.setdefault('src','path')
411
447
  settings.setdefault('target','protein')
@@ -425,12 +461,12 @@ def get_analyze_recruitment_default_settings(settings):
425
461
  settings.setdefault('pathogen_mask_dim',6)
426
462
  settings.setdefault('channel_of_interest',2)
427
463
  settings.setdefault('plot',True)
428
- settings.setdefault('plot_nr',10)
464
+ settings.setdefault('plot_nr',3)
429
465
  settings.setdefault('plot_control',True)
430
466
  settings.setdefault('figuresize',10)
431
- settings.setdefault('include_noninfected',True)
432
- settings.setdefault('include_multiinfected',10)
433
- settings.setdefault('include_multinucleated',1)
467
+ settings.setdefault('uninfected',True)
468
+ settings.setdefault('pathogen_limit',10)
469
+ settings.setdefault('nuclei_limit',1)
434
470
  settings.setdefault('cells_per_well',0)
435
471
  settings.setdefault('pathogen_size_range',[0,100000])
436
472
  settings.setdefault('nucleus_size_range',[0,100000])
@@ -481,21 +517,33 @@ def get_train_cellpose_default_settings(settings):
481
517
  settings.setdefault('verbose',True)
482
518
  return settings
483
519
 
520
+ def set_generate_dataset_defaults(settings):
521
+ settings.setdefault('src','path')
522
+ settings.setdefault('file_metadata',None)
523
+ settings.setdefault('experiment','experiment_1')
524
+ settings.setdefault('sample',None)
525
+ return settings
526
+
484
527
  def get_perform_regression_default_settings(settings):
485
- settings.setdefault('gene_weights_csv', '/nas_mnt/carruthers/Einar/mitoscreen/sequencing/combined_reads/EO1_combined/EO1_combined_combination_counts.csv')
528
+ settings.setdefault('highlight','239740')
486
529
  settings.setdefault('dependent_variable','predictions')
487
530
  settings.setdefault('transform',None)
488
531
  settings.setdefault('agg_type','mean')
489
532
  settings.setdefault('min_cell_count',25)
490
533
  settings.setdefault('regression_type','ols')
491
- settings.setdefault('remove_row_column_effect',False)
534
+ settings.setdefault('random_row_column_effects',False)
492
535
  settings.setdefault('alpha',1)
493
536
  settings.setdefault('fraction_threshold',0.1)
537
+ settings.setdefault('location_column','column')
494
538
  settings.setdefault('nc','c1')
495
539
  settings.setdefault('pc','c2')
496
540
  settings.setdefault('other','c3')
497
541
  settings.setdefault('plate','plate1')
498
542
  settings.setdefault('class_1_threshold',None)
543
+ settings.setdefault('cov_type',None)
544
+ settings.setdefault('metadata_files',['/home/carruthers/Documents/TGME49_Summary.csv','/home/carruthers/Documents/TGGT1_Summary.csv'])
545
+ settings.setdefault('toxo', True)
546
+
499
547
 
500
548
  if settings['regression_type'] == 'quantile':
501
549
  print(f"Using alpha as quantile for quantile regression, alpha: {settings['alpha']}")
@@ -552,9 +600,10 @@ def get_identify_masks_finetune_default_settings(settings):
552
600
 
553
601
  q = None
554
602
  expected_types = {
555
- "src": str,
603
+ "src": (str, list),
556
604
  "metadata_type": str,
557
605
  "custom_regex": (str, type(None)),
606
+ "cov_type": (str, type(None)),
558
607
  "experiment": str,
559
608
  "channels": list,
560
609
  "magnification": int,
@@ -628,9 +677,9 @@ expected_types = {
628
677
  "measurement": str,
629
678
  "nr_imgs": int,
630
679
  "um_per_pixel": (int, float),
631
- "include_noninfected": bool,
632
- "include_multiinfected": int,
633
- "include_multinucleated": int,
680
+ "uninfected": bool,
681
+ "pathogen_limit": int,
682
+ "nuclei_limit": int,
634
683
  "filter_min_max": (list, type(None)),
635
684
  "channel_dims": list,
636
685
  "backgrounds": list,
@@ -767,7 +816,7 @@ expected_types = {
767
816
  "agg_type": str,
768
817
  "min_cell_count": int,
769
818
  "regression_type": str,
770
- "remove_row_column_effect": bool,
819
+ "random_row_column_effects": bool,
771
820
  "alpha": float,
772
821
  "fraction_threshold": float,
773
822
  "class_1_threshold": (float, type(None)),
@@ -807,7 +856,7 @@ expected_types = {
807
856
  'reverse_complement':bool,
808
857
  'file_type':str,
809
858
  'model_path':str,
810
- 'tar_path':str,
859
+ 'dataset':str,
811
860
  'score_threshold':float,
812
861
  'sample':None,
813
862
  'file_metadata':None,
@@ -829,34 +878,33 @@ expected_types = {
829
878
  "png_type":str,
830
879
  "custom_model_path":str,
831
880
  "generate_training_dataset":bool,
832
- 'preload_batches':int,
833
881
  "segmentation_mode":str,
834
882
  "train_DL_model":bool,
835
883
  }
836
884
 
837
- categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode"],
838
- "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
885
+ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset","model_path","grna_csv","row_csv","column_csv"],
886
+ "General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode"],
887
+ "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
888
+ "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
839
889
  "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
840
- "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
841
- "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
842
- "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
890
+ "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
843
891
  "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
844
- "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
845
- "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
846
- "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
847
- "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
848
892
  "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
849
- "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
850
- "Machine Learning":[],
851
- "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
852
- "Generate Dataset":["preload_batches", "file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
853
- "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
854
- "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
855
- "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
893
+ "Sequencing": ["signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
894
+ "Generate Dataset":["file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
895
+ "Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
896
+ "Hyperparamiters (Embedding)": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
897
+ "Hyperparamiters (Clustering)": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
898
+ "Hyperparamiters (Regression)":["cov_type", "class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "random_row_column_effects", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable"],
899
+ "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
900
+ "Plot": ["plot", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
856
901
  "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
857
- "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
902
+ "Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
903
+ "Advanced": ["target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "uninfected", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
904
+ "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
858
905
  }
859
906
 
907
+
860
908
  category_keys = list(categories.keys())
861
909
 
862
910
  def check_settings(vars_dict, expected_types, q=None):
@@ -935,7 +983,7 @@ def check_settings(vars_dict, expected_types, q=None):
935
983
 
936
984
  def generate_fields(variables, scrollable_frame):
937
985
  from .gui_utils import create_input_field
938
- from .gui_elements import set_dark_style, spacrToolTip
986
+ from .gui_elements import spacrToolTip
939
987
  row = 1
940
988
  vars_dict = {}
941
989
  tooltips = {
@@ -1015,9 +1063,9 @@ def generate_fields(variables, scrollable_frame):
1015
1063
  "image_nr": "(int) - Number of images to process.",
1016
1064
  "image_size": "(int) - Size of the images for training.",
1017
1065
  "img_zoom": "(float) - Zoom factor for the images in plots.",
1018
- "include_multinucleated": "(int) - Whether to include multinucleated cells in the analysis.",
1019
- "include_multiinfected": "(int) - Whether to include multi-infected cells in the analysis.",
1020
- "include_noninfected": "(bool) - Whether to include non-infected cells in the analysis.",
1066
+ "nuclei_limit": "(int) - Whether to include multinucleated cells in the analysis.",
1067
+ "pathogen_limit": "(int) - Whether to include multi-infected cells in the analysis.",
1068
+ "uninfected": "(bool) - Whether to include non-infected cells in the analysis.",
1021
1069
  "include_uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
1022
1070
  "init_weights": "(bool) - Whether to initialize weights for the model.",
1023
1071
  "src": "(str) - Path to the folder containing the images.",
@@ -1109,7 +1157,7 @@ def generate_fields(variables, scrollable_frame):
1109
1157
  "remove_highly_correlated_features": "(bool) - Whether to remove highly correlated features from the analysis.",
1110
1158
  "remove_image_canvas": "(bool) - Whether to remove the image canvas after plotting.",
1111
1159
  "remove_low_variance_features": "(bool) - Whether to remove low variance features from the analysis.",
1112
- "remove_row_column_effect": "(bool) - Whether to remove row and column effects from the data.",
1160
+ "random_row_column_effects": "(bool) - Whether to remove row and column effects from the data.",
1113
1161
  "resize": "(bool) - Resize factor for the images.",
1114
1162
  "resample": "(bool) - Whether to resample the images during processing.",
1115
1163
  "rescale": "(float) - Rescaling factor for the images.",
@@ -1155,7 +1203,7 @@ def generate_fields(variables, scrollable_frame):
1155
1203
  "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
1156
1204
  "file_type": "str - type of file to process",
1157
1205
  "model_path": "str - path to the model",
1158
- "tar_path": "str - path to the tar file with image dataset",
1206
+ "dataset": "str - file name of the tar file with image dataset",
1159
1207
  "score_threshold": "float - threshold for classification",
1160
1208
  "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
1161
1209
  "file_metadata": "str - string that must be present in image path to be included in the dataset",
@@ -1163,9 +1211,44 @@ def generate_fields(variables, scrollable_frame):
1163
1211
  "train_channels": "list - channels to use for training",
1164
1212
  "dataset_mode": "str - How to generate train/test dataset.",
1165
1213
  "annotated_classes": "list - list of numbers in annotation column.",
1166
- "um_per_pixel": "(float) - The micrometers per pixel for the images."
1214
+ "um_per_pixel": "(float) - The micrometers per pixel for the images.",
1215
+ "segmentation_model": "(str) - The segmentation model to use, either cellpose or mediar.",
1216
+ "pathogen_model": "(str) - use a custom cellpose model to detect pathogen objects.",
1217
+ "timelapse_displacement": "(int) - Displacement for timelapse tracking.",
1218
+ "timelapse_memory": "(int) - Memory for timelapse tracking.",
1219
+ "timelapse_mode": "(str) - Mode for timelapse tracking, trackpy or btrack.",
1220
+ "timelapse_frame_limits": "(list) - Frame limits for timelapse tracking [start,end].",
1221
+ "timelapse_objects": "(list) - Objects to track in the timelapse, cells, nuclei, or pathogens.",
1222
+ "timelapse_remove_transient": "(bool) - Whether to remove transient objects in the timelapse.",
1223
+ "masks": "(bool) - Whether to generate masks for the segmented objects.",
1224
+ "timelapse": "(bool) - Whether to analyze images as a timelapse.",
1225
+ "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
1226
+ "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
1227
+ "use_bounding_box": "(bool) - Whether to use the bounding box for cropping the images.",
1228
+ "plot_points": "(bool) - Whether to plot scatterplot points.",
1229
+ "embedding_by_controls": "(bool) - Use the controlls to greate the embedding, then apply this embedding to all of the data.",
1230
+ "pos": "(str) - Positive control identifier.",
1231
+ "neg": "(str) - Negative control identifier.",
1232
+ "minimum_cell_count": "(int) - Minimum number of cells/well. if number of cells < minimum_cell_count, the well is excluded from the analysis.",
1233
+ "circular": "(bool) - If a circle is to be drawn and corners excluded (e.g. square images of round wells).",
1234
+ "highlight": "(str) - highlight genes/grnas containing this string.",
1235
+ "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
1236
+ "treatment_plate_metadata": "(str) - Metadata for the treatment plate.",
1237
+ "regex": "(str) - Regular expression to use.",
1238
+ "target_sequence": "(str) - The DNA sequence to look for that the consensus sequence will start with directly downstream of the first barcode.",
1239
+ "offset": "(int) - The offset to use for the consensus sequence, e.g. -8 if the barecode is 8 bases before target_sequence.",
1240
+ "expected_end": "(int) - The expected length of the sequence from the start of the first barcode to the end of the last.",
1241
+ "column_csv": "(path) - path to the csv file containing column barcodes.",
1242
+ "row_csv": "(path) - path to the csv file containing row barcodes.",
1243
+ "grna_csv": "(path) - path to the csv file containing gRNA sequences.",
1244
+ "save_h5": "(bool) - Whether to save the results to an HDF5 file. (this generates a large file, if compression is used this can be very time consuming)",
1245
+ "comp_type": "(str) - Compression type for the HDF5 file (e.g. zlib).",
1246
+ "comp_level": "(int) - Compression level for the HDF5 file (0-9). Higher is slower and yields smaller files.",
1247
+ "mode": "(str) - Mode to use for sequence analysis (either single for R1 or R2 fastq files or paired for the combination of R1 and R2).",
1248
+ "signal_direction": "(str) - Direction of fastq file (R1 or R2). only relevent when mode is single.",
1249
+ "custom_model_path": "(str) - Path to the custom model to finetune.",
1167
1250
  }
1168
-
1251
+
1169
1252
  for key, (var_type, options, default_value) in variables.items():
1170
1253
  label, widget, var, frame = create_input_field(scrollable_frame.scrollable_frame, key, row, var_type, options, default_value)
1171
1254
  vars_dict[key] = (label, widget, var, frame) # Store the label, widget, and variable
@@ -1210,12 +1293,13 @@ def set_annotate_default_settings(settings):
1210
1293
  settings.setdefault('annotation_column', 'test')
1211
1294
  settings.setdefault('normalize', 'False')
1212
1295
  settings.setdefault('percentiles', [2, 98])
1213
- settings.setdefault('measurement', 'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
1214
- settings.setdefault('threshold', '2')
1296
+ settings.setdefault('measurement', '')#'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
1297
+ settings.setdefault('threshold', '')#'2')
1215
1298
  return settings
1216
1299
 
1217
1300
  def set_default_generate_barecode_mapping(settings={}):
1218
1301
  settings.setdefault('src', 'path')
1302
+ settings.setdefault('regex', '^(?P<column>.{8})TGCTG.*TAAAC(?P<grna>.{20,21})AACTT.*AGAAG(?P<row>.{8}).*'),
1219
1303
  settings.setdefault('target_sequence', 'TGCTGTTTCCAGCATAGCTCTTAAAC')
1220
1304
  settings.setdefault('offset_start', -8)
1221
1305
  settings.setdefault('expected_end', 89)
@@ -1227,4 +1311,7 @@ def set_default_generate_barecode_mapping(settings={}):
1227
1311
  settings.setdefault('comp_level', 5)
1228
1312
  settings.setdefault('chunk_size', 100000)
1229
1313
  settings.setdefault('n_jobs', None)
1314
+ settings.setdefault('mode', 'paired')
1315
+ settings.setdefault('single_direction', 'R1')
1316
+ settings.setdefault('test', False)
1230
1317
  return settings
spacr/sim.py CHANGED
@@ -15,8 +15,6 @@ import statsmodels.api as sm
15
15
  from multiprocessing import cpu_count, Pool, Manager
16
16
  from copy import deepcopy
17
17
 
18
- from .logger import log_function_call
19
-
20
18
  warnings.filterwarnings("ignore")
21
19
  warnings.filterwarnings("ignore", category=RuntimeWarning) # Ignore RuntimeWarning
22
20