spacr 0.2.46__py3-none-any.whl → 0.2.56__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. spacr/core.py +306 -21
  2. spacr/deep_spacr.py +101 -41
  3. spacr/gui.py +1 -3
  4. spacr/gui_core.py +78 -65
  5. spacr/gui_elements.py +437 -152
  6. spacr/gui_utils.py +84 -73
  7. spacr/io.py +14 -7
  8. spacr/measure.py +196 -145
  9. spacr/plot.py +2 -42
  10. spacr/resources/font/open_sans/OFL.txt +93 -0
  11. spacr/resources/font/open_sans/OpenSans-Italic-VariableFont_wdth,wght.ttf +0 -0
  12. spacr/resources/font/open_sans/OpenSans-VariableFont_wdth,wght.ttf +0 -0
  13. spacr/resources/font/open_sans/README.txt +100 -0
  14. spacr/resources/font/open_sans/static/OpenSans-Bold.ttf +0 -0
  15. spacr/resources/font/open_sans/static/OpenSans-BoldItalic.ttf +0 -0
  16. spacr/resources/font/open_sans/static/OpenSans-ExtraBold.ttf +0 -0
  17. spacr/resources/font/open_sans/static/OpenSans-ExtraBoldItalic.ttf +0 -0
  18. spacr/resources/font/open_sans/static/OpenSans-Italic.ttf +0 -0
  19. spacr/resources/font/open_sans/static/OpenSans-Light.ttf +0 -0
  20. spacr/resources/font/open_sans/static/OpenSans-LightItalic.ttf +0 -0
  21. spacr/resources/font/open_sans/static/OpenSans-Medium.ttf +0 -0
  22. spacr/resources/font/open_sans/static/OpenSans-MediumItalic.ttf +0 -0
  23. spacr/resources/font/open_sans/static/OpenSans-Regular.ttf +0 -0
  24. spacr/resources/font/open_sans/static/OpenSans-SemiBold.ttf +0 -0
  25. spacr/resources/font/open_sans/static/OpenSans-SemiBoldItalic.ttf +0 -0
  26. spacr/resources/font/open_sans/static/OpenSans_Condensed-Bold.ttf +0 -0
  27. spacr/resources/font/open_sans/static/OpenSans_Condensed-BoldItalic.ttf +0 -0
  28. spacr/resources/font/open_sans/static/OpenSans_Condensed-ExtraBold.ttf +0 -0
  29. spacr/resources/font/open_sans/static/OpenSans_Condensed-ExtraBoldItalic.ttf +0 -0
  30. spacr/resources/font/open_sans/static/OpenSans_Condensed-Italic.ttf +0 -0
  31. spacr/resources/font/open_sans/static/OpenSans_Condensed-Light.ttf +0 -0
  32. spacr/resources/font/open_sans/static/OpenSans_Condensed-LightItalic.ttf +0 -0
  33. spacr/resources/font/open_sans/static/OpenSans_Condensed-Medium.ttf +0 -0
  34. spacr/resources/font/open_sans/static/OpenSans_Condensed-MediumItalic.ttf +0 -0
  35. spacr/resources/font/open_sans/static/OpenSans_Condensed-Regular.ttf +0 -0
  36. spacr/resources/font/open_sans/static/OpenSans_Condensed-SemiBold.ttf +0 -0
  37. spacr/resources/font/open_sans/static/OpenSans_Condensed-SemiBoldItalic.ttf +0 -0
  38. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Bold.ttf +0 -0
  39. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-BoldItalic.ttf +0 -0
  40. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-ExtraBold.ttf +0 -0
  41. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-ExtraBoldItalic.ttf +0 -0
  42. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Italic.ttf +0 -0
  43. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Light.ttf +0 -0
  44. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-LightItalic.ttf +0 -0
  45. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Medium.ttf +0 -0
  46. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-MediumItalic.ttf +0 -0
  47. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-Regular.ttf +0 -0
  48. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-SemiBold.ttf +0 -0
  49. spacr/resources/font/open_sans/static/OpenSans_SemiCondensed-SemiBoldItalic.ttf +0 -0
  50. spacr/sequencing.py +481 -587
  51. spacr/settings.py +197 -122
  52. spacr/utils.py +21 -13
  53. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/METADATA +7 -4
  54. spacr-0.2.56.dist-info/RECORD +100 -0
  55. spacr-0.2.46.dist-info/RECORD +0 -60
  56. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/LICENSE +0 -0
  57. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/WHEEL +0 -0
  58. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/entry_points.txt +0 -0
  59. {spacr-0.2.46.dist-info → spacr-0.2.56.dist-info}/top_level.txt +0 -0
spacr/settings.py CHANGED
@@ -220,6 +220,7 @@ def get_measure_crop_settings(settings):
220
220
 
221
221
  settings.setdefault('src', 'path')
222
222
  settings.setdefault('verbose', False)
223
+ settings.setdefault('experiment', 'exp')
223
224
 
224
225
  # Test mode
225
226
  settings.setdefault('test_mode', False)
@@ -252,8 +253,6 @@ def get_measure_crop_settings(settings):
252
253
 
253
254
  # Operational settings
254
255
  settings.setdefault('plot',False)
255
- settings.setdefault('plot_filtration',False)
256
- settings.setdefault('representative_images', False)
257
256
  settings.setdefault('n_jobs', os.cpu_count()-2)
258
257
 
259
258
  # Object settings
@@ -268,24 +267,9 @@ def get_measure_crop_settings(settings):
268
267
  settings.setdefault('cytoplasm_min_size',0)
269
268
  settings.setdefault('merge_edge_pathogen_cells', True)
270
269
 
271
- # Miscellaneous settings
272
- settings.setdefault('experiment', 'exp')
273
- settings.setdefault('cells', ['HeLa'])
274
- settings.setdefault('cell_loc', None)
275
- settings.setdefault('pathogens', ['ME49Dku80WT', 'ME49Dku80dgra8:GRA8', 'ME49Dku80dgra8', 'ME49Dku80TKO'])
276
- settings.setdefault('pathogen_loc', [['c1', 'c2', 'c3', 'c4', 'c5', 'c6'], ['c7', 'c8', 'c9', 'c10', 'c11', 'c12'], ['c13', 'c14', 'c15', 'c16', 'c17', 'c18'], ['c19', 'c20', 'c21', 'c22', 'c23', 'c24']])
277
- settings.setdefault('treatments', ['BR1', 'BR2', 'BR3'])
278
- settings.setdefault('treatment_loc', [['c1', 'c2', 'c7', 'c8', 'c13', 'c14', 'c19', 'c20'], ['c3', 'c4', 'c9', 'c10', 'c15', 'c16', 'c21', 'c22'], ['c5', 'c6', 'c11', 'c12', 'c17', 'c18', 'c23', 'c24']])
279
- settings.setdefault('channel_of_interest', 2)
280
- settings.setdefault('compartments', ['pathogen', 'cytoplasm'])
281
- settings.setdefault('measurement', 'mean_intensity')
282
- settings.setdefault('nr_imgs', 32)
283
- settings.setdefault('um_per_pixel', 0.1)
284
-
285
270
  if settings['test_mode']:
286
271
  settings['verbose'] = True
287
272
  settings['plot'] = True
288
- settings['plot_filtration'] = True
289
273
  test_imgs = settings['test_nr']
290
274
  print(f'Test mode enabled with {test_imgs} images, plotting set to True')
291
275
 
@@ -293,7 +277,7 @@ def get_measure_crop_settings(settings):
293
277
 
294
278
  def set_default_analyze_screen(settings):
295
279
  settings.setdefault('src', 'path')
296
- settings.setdefault('model_type','xgboost')
280
+ settings.setdefault('model_type_ml','xgboost')
297
281
  settings.setdefault('heatmap_feature','predictions')
298
282
  settings.setdefault('grouping','mean')
299
283
  settings.setdefault('min_max','allq')
@@ -342,11 +326,87 @@ def set_default_train_test_model(settings):
342
326
  settings.setdefault('intermedeate_save',True)
343
327
  settings.setdefault('pin_memory',True)
344
328
  settings.setdefault('n_jobs',cores)
345
- settings.setdefault('channels',['r','g','b'])
329
+ settings.setdefault('train_channels',['r','g','b'])
346
330
  settings.setdefault('augment',False)
347
331
  settings.setdefault('verbose',False)
348
332
  return settings
349
333
 
334
+ def set_generate_training_dataset_defaults(settings):
335
+
336
+ settings.setdefault('src','path')
337
+ settings.setdefault('dataset_mode','annotation')
338
+ settings.setdefault('annotation_column','test')
339
+ settings.setdefault('annotated_classes',[1,2])
340
+ settings.setdefault('classes',['nc','pc'])
341
+ settings.setdefault('size',224)
342
+ settings.setdefault('test_split',0.1)
343
+ settings.setdefault('class_metadata',[['c1'],['c2']])
344
+ settings.setdefault('metadata_type_by','col')
345
+ settings.setdefault('channel_of_interest',3)
346
+ settings.setdefault('custom_measurement',None)
347
+ settings.setdefault('tables',None)
348
+ settings.setdefault('png_type','cell_png')
349
+
350
+ return settings
351
+
352
+ def deep_spacr_defaults(settings):
353
+
354
+ cores = os.cpu_count()-2
355
+
356
+ settings.setdefault('src','path')
357
+ settings.setdefault('dataset_mode','annotation')
358
+ settings.setdefault('annotation_column','test')
359
+ settings.setdefault('annotated_classes',[1,2])
360
+ settings.setdefault('classes',['nc','pc'])
361
+ settings.setdefault('size',224)
362
+ settings.setdefault('test_split',0.1)
363
+ settings.setdefault('class_metadata',[['c1'],['c2']])
364
+ settings.setdefault('metadata_type_by','col')
365
+ settings.setdefault('channel_of_interest',3)
366
+ settings.setdefault('custom_measurement',None)
367
+ settings.setdefault('tables',None)
368
+ settings.setdefault('png_type','cell_png')
369
+ settings.setdefault('custom_model',False)
370
+ settings.setdefault('custom_model_path','path')
371
+ settings.setdefault('train',True)
372
+ settings.setdefault('test',False)
373
+ settings.setdefault('model_type','maxvit_t')
374
+ settings.setdefault('optimizer_type','adamw')
375
+ settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
376
+ settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
377
+ settings.setdefault('normalize',True)
378
+ settings.setdefault('image_size',224)
379
+ settings.setdefault('batch_size',64)
380
+ settings.setdefault('epochs',100)
381
+ settings.setdefault('val_split',0.1)
382
+ settings.setdefault('train_mode','erm')
383
+ settings.setdefault('learning_rate',0.001)
384
+ settings.setdefault('weight_decay',0.00001)
385
+ settings.setdefault('dropout_rate',0.1)
386
+ settings.setdefault('init_weights',True)
387
+ settings.setdefault('amsgrad',True)
388
+ settings.setdefault('use_checkpoint',True)
389
+ settings.setdefault('gradient_accumulation',True)
390
+ settings.setdefault('gradient_accumulation_steps',4)
391
+ settings.setdefault('intermedeate_save',True)
392
+ settings.setdefault('pin_memory',True)
393
+ settings.setdefault('n_jobs',cores)
394
+ settings.setdefault('train_channels',['r','g','b'])
395
+ settings.setdefault('augment',False)
396
+ settings.setdefault('verbose',False)
397
+ settings.setdefault('apply_model_to_dataset',False)
398
+ settings.setdefault('file_metadata',None)
399
+ settings.setdefault('sample',None)
400
+ settings.setdefault('experiment','exp.')
401
+ settings.setdefault('score_threshold',0.5)
402
+ settings.setdefault('tar_path','path')
403
+ settings.setdefault('model_path','path')
404
+ settings.setdefault('file_type','cell_png')
405
+ settings.setdefault('generate_training_dataset', True)
406
+ settings.setdefault('train_DL_model', True)
407
+
408
+ return settings
409
+
350
410
  def get_analyze_recruitment_default_settings(settings):
351
411
  settings.setdefault('target','protein')
352
412
  settings.setdefault('cell_types',['HeLa'])
@@ -384,6 +444,7 @@ def get_analyze_recruitment_default_settings(settings):
384
444
  return settings
385
445
 
386
446
  def get_analyze_reads_default_settings(settings):
447
+ settings.setdefault('src', 'path')
387
448
  settings.setdefault('upstream', 'CTTCTGGTAAATGGGGATGTCAAGTT')
388
449
  settings.setdefault('downstream', 'GTTTAAGAGCTATGCTGGAAACAGCAG') #This is the reverce compliment of the column primer starting from the end #TGCTGTTTAAGAGCTATGCTGGAAACAGCA
389
450
  settings.setdefault('barecode_length_1', 8)
@@ -396,7 +457,7 @@ def get_map_barcodes_default_settings(settings):
396
457
  settings.setdefault('src', 'path')
397
458
  settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
398
459
  settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
399
- settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
460
+ settings.setdefault('plate_dict', "{'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'}")
400
461
  settings.setdefault('test', False)
401
462
  settings.setdefault('verbose', True)
402
463
  settings.setdefault('pc', 'TGGT1_220950_1')
@@ -549,13 +610,11 @@ expected_types = {
549
610
  "save_png": bool,
550
611
  "crop_mode": list,
551
612
  "use_bounding_box": bool,
552
- "png_size": list, # This can be a list of lists
613
+ "png_size": list, # This can be a list of lists
553
614
  "normalize": bool,
554
615
  "png_dims": list,
555
616
  "normalize_by": str,
556
617
  "save_measurements": bool,
557
- "representative_images": bool,
558
- "plot_filtration": bool,
559
618
  "include_uninfected": bool,
560
619
  "dialate_pngs": bool,
561
620
  "dialate_png_ratios": list,
@@ -563,7 +622,7 @@ expected_types = {
563
622
  "cells": list,
564
623
  "cell_loc": list,
565
624
  "pathogens": list,
566
- "pathogen_loc": (list, list), # This can be a list of lists
625
+ "pathogen_loc": (list, list), # This can be a list of lists
567
626
  "treatments": list,
568
627
  "treatment_loc": (list, list), # This can be a list of lists
569
628
  "channel_of_interest": int,
@@ -571,7 +630,6 @@ expected_types = {
571
630
  "measurement": str,
572
631
  "nr_imgs": int,
573
632
  "um_per_pixel": (int, float),
574
- # Additional settings based on provided defaults
575
633
  "include_noninfected": bool,
576
634
  "include_multiinfected": bool,
577
635
  "include_multinucleated": bool,
@@ -685,7 +743,7 @@ expected_types = {
685
743
  "cell_types": list,
686
744
  "cell_plate_metadata": (list, type(None)),
687
745
  "pathogen_types": list,
688
- "pathogen_plate_metadata": (list, list), # This can be a list of lists
746
+ "pathogen_plate_metadata": (list, list), # This can be a list of lists
689
747
  "treatment_plate_metadata": (list, list), # This can be a list of lists
690
748
  "metadata_types": list,
691
749
  "cell_chann_dim": int,
@@ -738,63 +796,69 @@ expected_types = {
738
796
  "from_scratch": bool,
739
797
  "width_height": list,
740
798
  "resize": bool,
799
+ "compression": str,
800
+ "complevel": int,
741
801
  "gene_weights_csv": str,
742
802
  "fraction_threshold": float,
803
+ "barcode_mapping":dict,
804
+ "redunction_method":str,
805
+ "mix":str,
806
+ "model_type_ml":str,
807
+ "exclude_conditions":list,
808
+ "remove_highly_correlated_features":bool,
809
+ 'barcode_coordinates':list, # This is a list of lists
810
+ 'reverse_complement':bool,
811
+ 'file_type':str,
812
+ 'model_path':str,
813
+ 'tar_path':str,
814
+ 'score_threshold':float,
815
+ 'sample':None,
816
+ 'file_metadata':None,
817
+ 'apply_model_to_dataset':False,
818
+ "train":bool,
819
+ "test":bool,
820
+ 'train_channels':list,
821
+ "optimizer_type":str,
822
+ "dataset_mode":str,
823
+ "annotated_classes":list,
824
+ "annotation_column":str,
825
+ "apply_model_to_dataset":bool,
826
+ "metadata_type_by":str,
827
+ "custom_measurement":str,
828
+ "custom_model":bool,
829
+ "size":int,
830
+ "test_split":float,
831
+ "class_metadata":list, # This is a list of lists
832
+ "png_type":str,
833
+ "custom_model_path":str,
834
+ "generate_training_dataset":bool,
835
+ "train_DL_model":bool,
743
836
  }
744
837
 
745
- def check_settings_v1(vars_dict, expected_types,q=None):
746
- from .gui_utils import parse_list
747
- settings = {}
748
- # Define the expected types for each key, including None where applicable
749
-
750
- for key, (label, widget, var) in vars_dict.items():
751
- if key not in expected_types:
752
- if key not in ["General","Nucleus","Cell","Pathogen","Timelapse","Plot","Object Image","Annotate Data","Measurements","Advanced","Miscellaneous","Test"]:
753
- q.put(f"Key {key} not found in expected types.")
754
- continue
755
-
756
- value = var.get()
757
- expected_type = expected_types.get(key, str)
838
+ categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
839
+ "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
840
+ "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
841
+ "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
842
+ "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
843
+ "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
844
+ "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
845
+ "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
846
+ "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
847
+ "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
848
+ "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
849
+ "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
850
+ "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
851
+ "Machine Learning":[],
852
+ "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
853
+ "Generate Dataset":["file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
854
+ "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
855
+ "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
856
+ "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
857
+ "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
858
+ "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
859
+ }
758
860
 
759
- try:
760
- if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
761
- parsed_value = ast.literal_eval(value) if value else None
762
- if isinstance(parsed_value, list):
763
- if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
764
- settings[key] = parsed_value
765
- else:
766
- raise ValueError("Invalid format: Mixed list and list of lists")
767
- else:
768
- raise ValueError("Invalid format for list or list of lists")
769
- elif expected_type == list:
770
- settings[key] = parse_list(value) if value else None
771
- elif expected_type == bool:
772
- settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
773
- elif expected_type == (int, type(None)):
774
- settings[key] = int(value) if value else None
775
- elif expected_type == (float, type(None)):
776
- settings[key] = float(value) if value else None
777
- elif expected_type == (int, float):
778
- settings[key] = float(value) if '.' in value else int(value)
779
- elif expected_type == (str, type(None)):
780
- settings[key] = str(value) if value else None
781
- elif isinstance(expected_type, tuple):
782
- for typ in expected_type:
783
- try:
784
- settings[key] = typ(value) if value else None
785
- break
786
- except (ValueError, TypeError):
787
- continue
788
- else:
789
- raise ValueError
790
- else:
791
- settings[key] = expected_type(value) if value else None
792
- except (ValueError, SyntaxError):
793
- expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
794
- q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}.")
795
- return
796
-
797
- return settings
861
+ category_keys = list(categories.keys())
798
862
 
799
863
  def check_settings(vars_dict, expected_types, q=None):
800
864
  from .gui_utils import parse_list
@@ -805,9 +869,9 @@ def check_settings(vars_dict, expected_types, q=None):
805
869
 
806
870
  settings = {}
807
871
 
808
- for key, (label, widget, var) in vars_dict.items():
872
+ for key, (label, widget, var, _) in vars_dict.items():
809
873
  if key not in expected_types:
810
- if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test"]:
874
+ if key not in category_keys:
811
875
  q.put(f"Key {key} not found in expected types.")
812
876
  continue
813
877
 
@@ -815,7 +879,7 @@ def check_settings(vars_dict, expected_types, q=None):
815
879
  expected_type = expected_types.get(key, str)
816
880
 
817
881
  try:
818
- if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
882
+ if key in ["timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
819
883
  parsed_value = ast.literal_eval(value) if value else None
820
884
  if isinstance(parsed_value, list):
821
885
  if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
@@ -836,6 +900,20 @@ def check_settings(vars_dict, expected_types, q=None):
836
900
  settings[key] = float(value) if '.' in value else int(value)
837
901
  elif expected_type == (str, type(None)):
838
902
  settings[key] = str(value) if value else None
903
+ elif expected_type == dict:
904
+ try:
905
+ # Ensure that the value is a string that can be converted to a dictionary
906
+ if isinstance(value, str):
907
+ settings[key] = ast.literal_eval(value)
908
+ else:
909
+ raise ValueError("Expected a string representation of a dictionary.")
910
+
911
+ # Check if the result is actually a dictionary
912
+ if not isinstance(settings[key], dict):
913
+ raise ValueError("Value is not a valid dictionary.")
914
+ except (ValueError, SyntaxError) as e:
915
+ settings[key] = {}
916
+ q.put(f"Error: Invalid format for {key}. Expected type: dict. Error: {e}")
839
917
  elif isinstance(expected_type, tuple):
840
918
  for typ in expected_type:
841
919
  try:
@@ -856,7 +934,7 @@ def check_settings(vars_dict, expected_types, q=None):
856
934
 
857
935
  def generate_fields(variables, scrollable_frame):
858
936
  from .gui_utils import create_input_field
859
- from .gui_elements import spacrToolTip
937
+ from .gui_elements import set_dark_style, spacrToolTip
860
938
  row = 1
861
939
  vars_dict = {}
862
940
  tooltips = {
@@ -886,7 +964,7 @@ def generate_fields(variables, scrollable_frame):
886
964
  "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
887
965
  "cell_size_range": "(list) - Size range for cell segmentation.",
888
966
  "cell_types": "(list) - Types of cells to include in the analysis.",
889
- "cells": "(list) - The cell types to include in the analysis.",
967
+ "cells": "(list of lists) - The cell types to include in the analysis.",
890
968
  "cells_per_well": "(int) - Number of cells per well.",
891
969
  "channel_dims": "(list) - The dimensions of the image channels.",
892
970
  "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
@@ -955,7 +1033,7 @@ def generate_fields(variables, scrollable_frame):
955
1033
  "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
956
1034
  "metadata_types": "(list) - Types of metadata to include in the analysis.",
957
1035
  "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
958
- "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
1036
+ "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
959
1037
  "metric": "(str) - Metric to use for UMAP.",
960
1038
  "min_cell_count": "(int) - Minimum number of cells required for analysis.",
961
1039
  "min_dist": "(float) - Minimum distance for UMAP.",
@@ -964,6 +1042,7 @@ def generate_fields(variables, scrollable_frame):
964
1042
  "mix": "(dict) - Mixing settings for the samples.",
965
1043
  "model_name": "(str) - Name of the Cellpose model.",
966
1044
  "model_type": "(str) - Type of model to use for the analysis.",
1045
+ "model_type_ml": "(str) - Type of model to use for machine learning.",
967
1046
  "nc": "(str) - Negative control identifier.",
968
1047
  "nc_loc": "(str) - Location of the negative control in the images.",
969
1048
  "negative_control": "(str) - Identifier for the negative control.",
@@ -994,12 +1073,7 @@ def generate_fields(variables, scrollable_frame):
994
1073
  "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
995
1074
  "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
996
1075
  "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
997
- "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
998
- "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
999
- "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
1000
- "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
1001
- "pathogen_model": "(str) - Model to use for pathogen segmentation.",
1002
- "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
1076
+ "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
1003
1077
  "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
1004
1078
  "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
1005
1079
  "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -1014,7 +1088,6 @@ def generate_fields(variables, scrollable_frame):
1014
1088
  "plot_by_cluster": "(bool) - Whether to plot images by clusters.",
1015
1089
  "plot_cluster_grids": "(bool) - Whether to plot grids of clustered images.",
1016
1090
  "plot_control": "(dict) - Control settings for plotting.",
1017
- "plot_filtration": "(bool) - Whether to plot the filtration steps.",
1018
1091
  "plot_images": "(bool) - Whether to plot images.",
1019
1092
  "plot_nr": "(int) - Number of plots to generate.",
1020
1093
  "plot_outlines": "(bool) - Whether to plot outlines of segmented objects.",
@@ -1036,7 +1109,6 @@ def generate_fields(variables, scrollable_frame):
1036
1109
  "remove_image_canvas": "(bool) - Whether to remove the image canvas after plotting.",
1037
1110
  "remove_low_variance_features": "(bool) - Whether to remove low variance features from the analysis.",
1038
1111
  "remove_row_column_effect": "(bool) - Whether to remove row and column effects from the data.",
1039
- "representative_images": "(bool) - Whether to save representative images of the segmented objects (Not working yet).",
1040
1112
  "resize": "(bool) - Resize factor for the images.",
1041
1113
  "resample": "(bool) - Whether to resample the images during processing.",
1042
1114
  "rescale": "(float) - Rescaling factor for the images.",
@@ -1077,42 +1149,35 @@ def generate_fields(variables, scrollable_frame):
1077
1149
  "verbose": "(bool) - Whether to print verbose output during processing.",
1078
1150
  "weight_decay": "(float) - Weight decay for regularization.",
1079
1151
  "width_height": "(tuple) - Width and height of the input images.",
1152
+ "barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
1153
+ "barcode_mapping": "dict - names and barecode csv files",
1154
+ "compression": "str - type of compression (e.g. zlib)",
1155
+ "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
1156
+ "file_type": "str - type of file to process",
1157
+ "model_path": "str - path to the model",
1158
+ "tar_path": "str - path to the tar file with image dataset",
1159
+ "score_threshold": "float - threshold for classification",
1160
+ "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
1161
+ "file_metadata": "str - string that must be present in image path to be included in the dataset",
1162
+ "apply_model_to_dataset": "bool - whether to apply model to the dataset",
1163
+ "train_channels": "list - channels to use for training",
1164
+ "dataset_mode": "str - How to generate train/test dataset.",
1165
+ "annotated_classes": "list - list of numbers in annotation column.",
1080
1166
  "um_per_pixel": "(float) - The micrometers per pixel for the images."
1081
1167
  }
1082
1168
 
1083
-
1084
1169
  for key, (var_type, options, default_value) in variables.items():
1085
- label, widget, var = create_input_field(scrollable_frame.scrollable_frame, key, row, var_type, options, default_value)
1086
- vars_dict[key] = (label, widget, var) # Store the label, widget, and variable
1170
+ label, widget, var, frame = create_input_field(scrollable_frame.scrollable_frame, key, row, var_type, options, default_value)
1171
+ vars_dict[key] = (label, widget, var, frame) # Store the label, widget, and variable
1087
1172
 
1088
1173
  # Add tooltip to the label if it exists in the tooltips dictionary
1089
1174
  if key in tooltips:
1090
1175
  spacrToolTip(label, tooltips[key])
1176
+
1091
1177
  row += 1
1178
+
1092
1179
  return vars_dict
1093
1180
 
1094
- categories = {
1095
- "General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
1096
- "Paths":["grna", "barcodes"],
1097
- "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
1098
- "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
1099
- "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
1100
- "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
1101
- "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
1102
- "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
1103
- "Plot": ["plot_control", "plot_nr", "plot_filtration", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
1104
- "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
1105
- "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "representative_images", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
1106
- "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
1107
- "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
1108
- "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
1109
- "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
1110
- "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
1111
- "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
1112
- "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
1113
- "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
1114
- }
1115
-
1116
1181
  descriptions = {
1117
1182
  'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
1118
1183
 
@@ -1120,8 +1185,6 @@ descriptions = {
1120
1185
 
1121
1186
  'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
1122
1187
 
1123
- 'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
1124
-
1125
1188
  'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
1126
1189
 
1127
1190
  'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
@@ -1132,8 +1195,8 @@ descriptions = {
1132
1195
 
1133
1196
  'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
1134
1197
 
1135
- 'map_barcodes': "Map barcodes to your data for identification and tracking. Function: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
1136
-
1198
+ 'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
1199
+
1137
1200
  'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
1138
1201
 
1139
1202
  'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
@@ -1142,7 +1205,7 @@ descriptions = {
1142
1205
  def set_annotate_default_settings(settings):
1143
1206
  settings.setdefault('src', 'path')
1144
1207
  settings.setdefault('image_type', 'cell_png')
1145
- settings.setdefault('channels', 'r,g,b')
1208
+ settings.setdefault('channels', "'r','g','b'")
1146
1209
  settings.setdefault('img_size', 200)
1147
1210
  settings.setdefault('annotation_column', 'test')
1148
1211
  settings.setdefault('normalize', 'False')
@@ -1151,3 +1214,15 @@ def set_annotate_default_settings(settings):
1151
1214
  settings.setdefault('threshold', '2')
1152
1215
  return settings
1153
1216
 
1217
+ def set_default_generate_barecode_mapping(settings={}):
1218
+ settings.setdefault('src', 'path')
1219
+ settings.setdefault('chunk_size', 100000)
1220
+
1221
+ settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
1222
+ 'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
1223
+ 'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
1224
+
1225
+ settings.setdefault('n_jobs', None)
1226
+ settings.setdefault('compression', 'zlib')
1227
+ settings.setdefault('complevel', 5)
1228
+ return settings
spacr/utils.py CHANGED
@@ -1,4 +1,4 @@
1
- import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, signal
1
+ import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip
2
2
 
3
3
  import numpy as np
4
4
  from cellpose import models as cp_models
@@ -88,11 +88,11 @@ from sklearn.cluster import KMeans
88
88
  from scipy import stats
89
89
 
90
90
 
91
- def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
91
+ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type="", metricks=None):
92
92
  if isinstance(files_processed, list):
93
- files_processed = len(files_processed)
93
+ files_processed = len(set(files_processed))
94
94
  if isinstance(files_to_process, list):
95
- files_to_process = len(files_to_process)
95
+ files_to_process = len(set(files_to_process))
96
96
  if isinstance(batch_size, list):
97
97
  batch_size = len(batch_size)
98
98
 
@@ -117,9 +117,10 @@ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batc
117
117
  average_time_img = average_time / batch_size
118
118
  time_info = f'Time/batch: {average_time:.3f}sec, Time/image: {average_time_img:.3f}sec, Time_left: {time_left:.3f} min.'
119
119
 
120
- print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
121
-
122
-
120
+ if metricks is None:
121
+ print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
122
+ else:
123
+ print(f'Progress: {files_processed}/{files_to_process}, {metricks}, operation_type: {operation_type} {time_info}')
123
124
 
124
125
  def reset_mp():
125
126
  current_method = get_start_method()
@@ -3628,22 +3629,22 @@ def delete_folder(folder_path):
3628
3629
  def measure_test_mode(settings):
3629
3630
 
3630
3631
  if settings['test_mode']:
3631
- if not os.path.basename(settings['input_folder']) == 'test':
3632
- all_files = os.listdir(settings['input_folder'])
3632
+ if not os.path.basename(settings['src']) == 'test':
3633
+ all_files = os.listdir(settings['src'])
3633
3634
  random_files = random.sample(all_files, settings['test_nr'])
3634
3635
 
3635
- src = os.path.join(os.path.dirname(settings['input_folder']),'test', 'merged')
3636
+ src = os.path.join(os.path.dirname(settings['src']),'test', 'merged')
3636
3637
  if os.path.exists(src):
3637
3638
  delete_folder(src)
3638
3639
  os.makedirs(src, exist_ok=True)
3639
3640
 
3640
3641
  for file in random_files:
3641
- shutil.copy(os.path.join(settings['input_folder'], file), os.path.join(src,file))
3642
+ shutil.copy(os.path.join(settings['src'], file), os.path.join(src,file))
3642
3643
 
3643
- settings['input_folder'] = src
3644
+ settings['src'] = src
3644
3645
  print(f'Changed source folder to {src} for test mode')
3645
3646
  else:
3646
- print(f'Test mode enabled, using source folder {settings["input_folder"]}')
3647
+ print(f'Test mode enabled, using source folder {settings["src"]}')
3647
3648
 
3648
3649
  return settings
3649
3650
 
@@ -4424,3 +4425,10 @@ def correct_masks(src):
4424
4425
  cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
4425
4426
  convert_and_relabel_masks(cell_path)
4426
4427
  _load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
4428
+
4429
+ def count_reads_in_fastq(fastq_file):
4430
+ count = 0
4431
+ with gzip.open(fastq_file, "rt") as f:
4432
+ for _ in f:
4433
+ count += 1
4434
+ return count // 4