PyPI - spacr - Versions diffs - 0.2.46__py3-none-any.whl → 0.2.56__py3-none-any.whl - Mend

spacr 0.2.46py3-none-any.whl → 0.2.56py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

spacr/settings.py CHANGED Viewed

@@ -220,6 +220,7 @@ def get_measure_crop_settings(settings):
     settings.setdefault('src', 'path')
     settings.setdefault('verbose', False)
+    settings.setdefault('experiment', 'exp')
     # Test mode
     settings.setdefault('test_mode', False)
@@ -252,8 +253,6 @@ def get_measure_crop_settings(settings):
     # Operational settings
     settings.setdefault('plot',False)
-    settings.setdefault('plot_filtration',False)
-    settings.setdefault('representative_images', False)
     settings.setdefault('n_jobs', os.cpu_count()-2)
     # Object settings
@@ -268,24 +267,9 @@ def get_measure_crop_settings(settings):
     settings.setdefault('cytoplasm_min_size',0)
     settings.setdefault('merge_edge_pathogen_cells', True)
-    # Miscellaneous settings
-    settings.setdefault('experiment', 'exp')
-    settings.setdefault('cells', ['HeLa'])
-    settings.setdefault('cell_loc', None)
-    settings.setdefault('pathogens', ['ME49Dku80WT', 'ME49Dku80dgra8:GRA8', 'ME49Dku80dgra8', 'ME49Dku80TKO'])
-    settings.setdefault('pathogen_loc', [['c1', 'c2', 'c3', 'c4', 'c5', 'c6'], ['c7', 'c8', 'c9', 'c10', 'c11', 'c12'], ['c13', 'c14', 'c15', 'c16', 'c17', 'c18'], ['c19', 'c20', 'c21', 'c22', 'c23', 'c24']])
-    settings.setdefault('treatments', ['BR1', 'BR2', 'BR3'])
-    settings.setdefault('treatment_loc', [['c1', 'c2', 'c7', 'c8', 'c13', 'c14', 'c19', 'c20'], ['c3', 'c4', 'c9', 'c10', 'c15', 'c16', 'c21', 'c22'], ['c5', 'c6', 'c11', 'c12', 'c17', 'c18', 'c23', 'c24']])
-    settings.setdefault('channel_of_interest', 2)
-    settings.setdefault('compartments', ['pathogen', 'cytoplasm'])
-    settings.setdefault('measurement', 'mean_intensity')
-    settings.setdefault('nr_imgs', 32)
-    settings.setdefault('um_per_pixel', 0.1)
     if settings['test_mode']:
         settings['verbose'] = True
         settings['plot'] = True
-        settings['plot_filtration'] = True
         test_imgs = settings['test_nr']
         print(f'Test mode enabled with {test_imgs} images, plotting set to True')
@@ -293,7 +277,7 @@ def get_measure_crop_settings(settings):
 def set_default_analyze_screen(settings):
     settings.setdefault('src', 'path')
-    settings.setdefault('model_type','xgboost')
+    settings.setdefault('model_type_ml','xgboost')
     settings.setdefault('heatmap_feature','predictions')
     settings.setdefault('grouping','mean')
     settings.setdefault('min_max','allq')
@@ -342,11 +326,87 @@ def set_default_train_test_model(settings):
     settings.setdefault('intermedeate_save',True)
     settings.setdefault('pin_memory',True)
     settings.setdefault('n_jobs',cores)
-    settings.setdefault('channels',['r','g','b'])
+    settings.setdefault('train_channels',['r','g','b'])
     settings.setdefault('augment',False)
     settings.setdefault('verbose',False)
     return settings
+def set_generate_training_dataset_defaults(settings):
+    settings.setdefault('src','path')
+    settings.setdefault('dataset_mode','annotation')
+    settings.setdefault('annotation_column','test')
+    settings.setdefault('annotated_classes',[1,2])
+    settings.setdefault('classes',['nc','pc'])
+    settings.setdefault('size',224)
+    settings.setdefault('test_split',0.1)
+    settings.setdefault('class_metadata',[['c1'],['c2']])
+    settings.setdefault('metadata_type_by','col')
+    settings.setdefault('channel_of_interest',3)
+    settings.setdefault('custom_measurement',None)
+    settings.setdefault('tables',None)
+    settings.setdefault('png_type','cell_png')
+    return settings
+def deep_spacr_defaults(settings):
+    cores = os.cpu_count()-2
+    settings.setdefault('src','path')
+    settings.setdefault('dataset_mode','annotation')
+    settings.setdefault('annotation_column','test')
+    settings.setdefault('annotated_classes',[1,2])
+    settings.setdefault('classes',['nc','pc'])
+    settings.setdefault('size',224)
+    settings.setdefault('test_split',0.1)
+    settings.setdefault('class_metadata',[['c1'],['c2']])
+    settings.setdefault('metadata_type_by','col')
+    settings.setdefault('channel_of_interest',3)
+    settings.setdefault('custom_measurement',None)
+    settings.setdefault('tables',None)
+    settings.setdefault('png_type','cell_png')
+    settings.setdefault('custom_model',False)
+    settings.setdefault('custom_model_path','path')
+    settings.setdefault('train',True)
+    settings.setdefault('test',False)
+    settings.setdefault('model_type','maxvit_t')
+    settings.setdefault('optimizer_type','adamw')
+    settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
+    settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
+    settings.setdefault('normalize',True)
+    settings.setdefault('image_size',224)
+    settings.setdefault('batch_size',64)
+    settings.setdefault('epochs',100)
+    settings.setdefault('val_split',0.1)
+    settings.setdefault('train_mode','erm')
+    settings.setdefault('learning_rate',0.001)
+    settings.setdefault('weight_decay',0.00001)
+    settings.setdefault('dropout_rate',0.1)
+    settings.setdefault('init_weights',True)
+    settings.setdefault('amsgrad',True)
+    settings.setdefault('use_checkpoint',True)
+    settings.setdefault('gradient_accumulation',True)
+    settings.setdefault('gradient_accumulation_steps',4)
+    settings.setdefault('intermedeate_save',True)
+    settings.setdefault('pin_memory',True)
+    settings.setdefault('n_jobs',cores)
+    settings.setdefault('train_channels',['r','g','b'])
+    settings.setdefault('augment',False)
+    settings.setdefault('verbose',False)
+    settings.setdefault('apply_model_to_dataset',False)
+    settings.setdefault('file_metadata',None)
+    settings.setdefault('sample',None)
+    settings.setdefault('experiment','exp.')
+    settings.setdefault('score_threshold',0.5)
+    settings.setdefault('tar_path','path')
+    settings.setdefault('model_path','path')
+    settings.setdefault('file_type','cell_png')
+    settings.setdefault('generate_training_dataset', True)
+    settings.setdefault('train_DL_model', True)
+    return settings
 def get_analyze_recruitment_default_settings(settings):
     settings.setdefault('target','protein')
     settings.setdefault('cell_types',['HeLa'])
@@ -384,6 +444,7 @@ def get_analyze_recruitment_default_settings(settings):
     return settings
 def get_analyze_reads_default_settings(settings):
+    settings.setdefault('src', 'path')
     settings.setdefault('upstream', 'CTTCTGGTAAATGGGGATGTCAAGTT')
     settings.setdefault('downstream', 'GTTTAAGAGCTATGCTGGAAACAGCAG') #This is the reverce compliment of the column primer starting from the end #TGCTGTTTAAGAGCTATGCTGGAAACAGCA
     settings.setdefault('barecode_length_1', 8)
@@ -396,7 +457,7 @@ def get_map_barcodes_default_settings(settings):
     settings.setdefault('src', 'path')
     settings.setdefault('grna', '/home/carruthers/Documents/grna_barcodes.csv')
     settings.setdefault('barcodes', '/home/carruthers/Documents/SCREEN_BARCODES.csv')
-    settings.setdefault('plate_dict', {'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'})
+    settings.setdefault('plate_dict', "{'EO1': 'plate1', 'EO2': 'plate2', 'EO3': 'plate3', 'EO4': 'plate4', 'EO5': 'plate5', 'EO6': 'plate6', 'EO7': 'plate7', 'EO8': 'plate8'}")
     settings.setdefault('test', False)
     settings.setdefault('verbose', True)
     settings.setdefault('pc', 'TGGT1_220950_1')
@@ -549,13 +610,11 @@ expected_types = {
     "save_png": bool,
     "crop_mode": list,
     "use_bounding_box": bool,
-    "png_size": list,  # This can be a list of lists
+    "png_size": list,  # This can be a list of lists
     "normalize": bool,
     "png_dims": list,
     "normalize_by": str,
     "save_measurements": bool,
-    "representative_images": bool,
-    "plot_filtration": bool,
     "include_uninfected": bool,
     "dialate_pngs": bool,
     "dialate_png_ratios": list,
@@ -563,7 +622,7 @@ expected_types = {
     "cells": list,
     "cell_loc": list,
     "pathogens": list,
-    "pathogen_loc": (list, list),  # This can be a list of lists
+    "pathogen_loc": (list, list),  # This can be a list of lists
     "treatments": list,
     "treatment_loc": (list, list),  # This can be a list of lists
     "channel_of_interest": int,
@@ -571,7 +630,6 @@ expected_types = {
     "measurement": str,
     "nr_imgs": int,
     "um_per_pixel": (int, float),
-    # Additional settings based on provided defaults
     "include_noninfected": bool,
     "include_multiinfected": bool,
     "include_multinucleated": bool,
@@ -685,7 +743,7 @@ expected_types = {
     "cell_types": list,
     "cell_plate_metadata": (list, type(None)),
     "pathogen_types": list,
-    "pathogen_plate_metadata": (list, list),  # This can be a list of lists
+    "pathogen_plate_metadata": (list, list),  # This can be a list of lists
     "treatment_plate_metadata": (list, list),  # This can be a list of lists
     "metadata_types": list,
     "cell_chann_dim": int,
@@ -738,63 +796,69 @@ expected_types = {
     "from_scratch": bool,
     "width_height": list,
     "resize": bool,
+    "compression": str,
+    "complevel": int,
     "gene_weights_csv": str,
     "fraction_threshold": float,
+    "barcode_mapping":dict,
+    "redunction_method":str,
+    "mix":str,
+    "model_type_ml":str,
+    "exclude_conditions":list,
+    "remove_highly_correlated_features":bool,
+    'barcode_coordinates':list,  # This is a list of lists
+    'reverse_complement':bool,
+    'file_type':str,
+    'model_path':str,
+    'tar_path':str,
+    'score_threshold':float,
+    'sample':None,
+    'file_metadata':None,
+    'apply_model_to_dataset':False,
+    "train":bool,
+    "test":bool,
+    'train_channels':list,
+    "optimizer_type":str,
+    "dataset_mode":str,
+    "annotated_classes":list,
+    "annotation_column":str,
+    "apply_model_to_dataset":bool,
+    "metadata_type_by":str,
+    "custom_measurement":str,
+    "custom_model":bool,
+    "size":int,
+    "test_split":float,
+    "class_metadata":list, # This is a list of lists
+    "png_type":str,
+    "custom_model_path":str,
+    "generate_training_dataset":bool,
+    "train_DL_model":bool,
 }
-def check_settings_v1(vars_dict, expected_types,q=None):
-    from .gui_utils import parse_list
-    settings = {}
-    # Define the expected types for each key, including None where applicable
-    for key, (label, widget, var) in vars_dict.items():
-        if key not in expected_types:
-            if key not in ["General","Nucleus","Cell","Pathogen","Timelapse","Plot","Object Image","Annotate Data","Measurements","Advanced","Miscellaneous","Test"]:
-                q.put(f"Key {key} not found in expected types.")
-                continue
-        value = var.get()
-        expected_type = expected_types.get(key, str)
+categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
+             "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
+             "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
+             "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
+             "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
+             "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
+             "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
+             "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
+             "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
+             "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
+             "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
+             "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
+             "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
+             "Machine Learning":[],
+             "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
+             "Generate Dataset":["file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
+             "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
+             "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
+             "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
+             "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
+             "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
+             }
-        try:
-            if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
-                parsed_value = ast.literal_eval(value) if value else None
-                if isinstance(parsed_value, list):
-                    if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
-                        settings[key] = parsed_value
-                    else:
-                        raise ValueError("Invalid format: Mixed list and list of lists")
-                else:
-                    raise ValueError("Invalid format for list or list of lists")
-            elif expected_type == list:
-                settings[key] = parse_list(value) if value else None
-            elif expected_type == bool:
-                settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
-            elif expected_type == (int, type(None)):
-                settings[key] = int(value) if value else None
-            elif expected_type == (float, type(None)):
-                settings[key] = float(value) if value else None
-            elif expected_type == (int, float):
-                settings[key] = float(value) if '.' in value else int(value)
-            elif expected_type == (str, type(None)):
-                settings[key] = str(value) if value else None
-            elif isinstance(expected_type, tuple):
-                for typ in expected_type:
-                    try:
-                        settings[key] = typ(value) if value else None
-                        break
-                    except (ValueError, TypeError):
-                        continue
-                else:
-                    raise ValueError
-            else:
-                settings[key] = expected_type(value) if value else None
-        except (ValueError, SyntaxError):
-            expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
-            q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}.")
-            return
-    return settings
+category_keys = list(categories.keys())
 def check_settings(vars_dict, expected_types, q=None):
     from .gui_utils import parse_list
@@ -805,9 +869,9 @@ def check_settings(vars_dict, expected_types, q=None):
     settings = {}
-    for key, (label, widget, var) in vars_dict.items():
+    for key, (label, widget, var, _) in vars_dict.items():
         if key not in expected_types:
-            if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test"]:
+            if key not in category_keys:
                 q.put(f"Key {key} not found in expected types.")
                 continue
@@ -815,7 +879,7 @@ def check_settings(vars_dict, expected_types, q=None):
         expected_type = expected_types.get(key, str)
         try:
-            if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
+            if key in ["timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
                 parsed_value = ast.literal_eval(value) if value else None
                 if isinstance(parsed_value, list):
                     if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
@@ -836,6 +900,20 @@ def check_settings(vars_dict, expected_types, q=None):
                 settings[key] = float(value) if '.' in value else int(value)
             elif expected_type == (str, type(None)):
                 settings[key] = str(value) if value else None
+            elif expected_type == dict:
+                try:
+                    # Ensure that the value is a string that can be converted to a dictionary
+                    if isinstance(value, str):
+                        settings[key] = ast.literal_eval(value)
+                    else:
+                        raise ValueError("Expected a string representation of a dictionary.")
+                    # Check if the result is actually a dictionary
+                    if not isinstance(settings[key], dict):
+                        raise ValueError("Value is not a valid dictionary.")
+                except (ValueError, SyntaxError) as e:
+                    settings[key] = {}
+                    q.put(f"Error: Invalid format for {key}. Expected type: dict. Error: {e}")
             elif isinstance(expected_type, tuple):
                 for typ in expected_type:
                     try:
@@ -856,7 +934,7 @@ def check_settings(vars_dict, expected_types, q=None):
 def generate_fields(variables, scrollable_frame):
     from .gui_utils import create_input_field
-    from .gui_elements import spacrToolTip
+    from .gui_elements import set_dark_style, spacrToolTip
     row = 1
     vars_dict = {}
     tooltips = {
@@ -886,7 +964,7 @@ def generate_fields(variables, scrollable_frame):
         "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
         "cell_size_range": "(list) - Size range for cell segmentation.",
         "cell_types": "(list) - Types of cells to include in the analysis.",
-        "cells": "(list) - The cell types to include in the analysis.",
+        "cells": "(list of lists) - The cell types to include in the analysis.",
         "cells_per_well": "(int) - Number of cells per well.",
         "channel_dims": "(list) - The dimensions of the image channels.",
         "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
@@ -955,7 +1033,7 @@ def generate_fields(variables, scrollable_frame):
         "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
         "metadata_types": "(list) - Types of metadata to include in the analysis.",
         "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
-        "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
+        "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
         "metric": "(str) - Metric to use for UMAP.",
         "min_cell_count": "(int) - Minimum number of cells required for analysis.",
         "min_dist": "(float) - Minimum distance for UMAP.",
@@ -964,6 +1042,7 @@ def generate_fields(variables, scrollable_frame):
         "mix": "(dict) - Mixing settings for the samples.",
         "model_name": "(str) - Name of the Cellpose model.",
         "model_type": "(str) - Type of model to use for the analysis.",
+        "model_type_ml": "(str) - Type of model to use for machine learning.",
         "nc": "(str) - Negative control identifier.",
         "nc_loc": "(str) - Location of the negative control in the images.",
         "negative_control": "(str) - Identifier for the negative control.",
@@ -994,12 +1073,7 @@ def generate_fields(variables, scrollable_frame):
         "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
         "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
         "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
-        "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
-        "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
-        "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
-        "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
-        "pathogen_model": "(str) - Model to use for pathogen segmentation.",
-        "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
+        "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
         "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
         "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
         "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -1014,7 +1088,6 @@ def generate_fields(variables, scrollable_frame):
         "plot_by_cluster": "(bool) - Whether to plot images by clusters.",
         "plot_cluster_grids": "(bool) - Whether to plot grids of clustered images.",
         "plot_control": "(dict) - Control settings for plotting.",
-        "plot_filtration": "(bool) - Whether to plot the filtration steps.",
         "plot_images": "(bool) - Whether to plot images.",
         "plot_nr": "(int) - Number of plots to generate.",
         "plot_outlines": "(bool) - Whether to plot outlines of segmented objects.",
@@ -1036,7 +1109,6 @@ def generate_fields(variables, scrollable_frame):
         "remove_image_canvas": "(bool) - Whether to remove the image canvas after plotting.",
         "remove_low_variance_features": "(bool) - Whether to remove low variance features from the analysis.",
         "remove_row_column_effect": "(bool) - Whether to remove row and column effects from the data.",
-        "representative_images": "(bool) - Whether to save representative images of the segmented objects (Not working yet).",
         "resize": "(bool) - Resize factor for the images.",
         "resample": "(bool) - Whether to resample the images during processing.",
         "rescale": "(float) - Rescaling factor for the images.",
@@ -1077,42 +1149,35 @@ def generate_fields(variables, scrollable_frame):
         "verbose": "(bool) - Whether to print verbose output during processing.",
         "weight_decay": "(float) - Weight decay for regularization.",
         "width_height": "(tuple) - Width and height of the input images.",
+        "barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
+        "barcode_mapping": "dict - names and barecode csv files",
+        "compression": "str - type of compression (e.g. zlib)",
+        "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
+        "file_type": "str - type of file to process",
+        "model_path": "str - path to the model",
+        "tar_path": "str - path to the tar file with image dataset",
+        "score_threshold": "float - threshold for classification",
+        "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
+        "file_metadata": "str - string that must be present in image path to be included in the dataset",
+        "apply_model_to_dataset": "bool - whether to apply model to the dataset",
+        "train_channels": "list - channels to use for training",
+        "dataset_mode": "str - How to generate train/test dataset.",
+        "annotated_classes": "list - list of numbers in annotation column.",
         "um_per_pixel": "(float) - The micrometers per pixel for the images."
     }
     for key, (var_type, options, default_value) in variables.items():
-        label, widget, var = create_input_field(scrollable_frame.scrollable_frame, key, row, var_type, options, default_value)
-        vars_dict[key] = (label, widget, var)  # Store the label, widget, and variable
+        label, widget, var, frame = create_input_field(scrollable_frame.scrollable_frame, key, row, var_type, options, default_value)
+        vars_dict[key] = (label, widget, var, frame)  # Store the label, widget, and variable
         # Add tooltip to the label if it exists in the tooltips dictionary
         if key in tooltips:
             spacrToolTip(label, tooltips[key])
         row += 1
     return vars_dict
-categories = {
-    "General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
-    "Paths":["grna", "barcodes"],
-    "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
-    "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
-    "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
-    "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
-    "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
-    "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
-    "Plot": ["plot_control", "plot_nr", "plot_filtration", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
-    "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
-    "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "representative_images", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
-    "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
-    "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
-    "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
-    "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
-    "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
-    "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
-    "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
-    "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
-}
 descriptions = {
     'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
@@ -1120,8 +1185,6 @@ descriptions = {
     'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
-    'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
     'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
     'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
@@ -1132,8 +1195,8 @@ descriptions = {
     'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
-    'map_barcodes': "Map barcodes to your data for identification and tracking. Function: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
+    'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
     'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
     'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
@@ -1142,7 +1205,7 @@ descriptions = {
 def set_annotate_default_settings(settings):
     settings.setdefault('src', 'path')
     settings.setdefault('image_type', 'cell_png')
-    settings.setdefault('channels', 'r,g,b')
+    settings.setdefault('channels', "'r','g','b'")
     settings.setdefault('img_size', 200)
     settings.setdefault('annotation_column', 'test')
     settings.setdefault('normalize', 'False')
@@ -1151,3 +1214,15 @@ def set_annotate_default_settings(settings):
     settings.setdefault('threshold', '2')
     return settings
+def set_default_generate_barecode_mapping(settings={}):
+    settings.setdefault('src', 'path')
+    settings.setdefault('chunk_size', 100000)
+    settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
+                                            'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
+                                            'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
+    settings.setdefault('n_jobs', None)
+    settings.setdefault('compression', 'zlib')
+    settings.setdefault('complevel', 5)
+    return settings

spacr/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, signal
+import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip
 import numpy as np
 from cellpose import models as cp_models
@@ -88,11 +88,11 @@ from sklearn.cluster import KMeans
 from scipy import stats
-def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
+def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type="", metricks=None):
     if isinstance(files_processed, list):
-        files_processed = len(files_processed)
+        files_processed = len(set(files_processed))
     if isinstance(files_to_process, list):
-        files_to_process = len(files_to_process)
+        files_to_process = len(set(files_to_process))
     if isinstance(batch_size, list):
         batch_size = len(batch_size)
@@ -117,9 +117,10 @@ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batc
             average_time_img = average_time / batch_size
             time_info = f'Time/batch: {average_time:.3f}sec, Time/image: {average_time_img:.3f}sec, Time_left: {time_left:.3f} min.'
-    print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
+    if metricks is None:
+        print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
+    else:
+        print(f'Progress: {files_processed}/{files_to_process}, {metricks}, operation_type: {operation_type} {time_info}')
 def reset_mp():
     current_method = get_start_method()
@@ -3628,22 +3629,22 @@ def delete_folder(folder_path):
 def measure_test_mode(settings):
     if settings['test_mode']:
-        if not os.path.basename(settings['input_folder']) == 'test':
-            all_files = os.listdir(settings['input_folder'])
+        if not os.path.basename(settings['src']) == 'test':
+            all_files = os.listdir(settings['src'])
             random_files = random.sample(all_files, settings['test_nr'])
-            src = os.path.join(os.path.dirname(settings['input_folder']),'test', 'merged')
+            src = os.path.join(os.path.dirname(settings['src']),'test', 'merged')
             if os.path.exists(src):
                 delete_folder(src)
             os.makedirs(src, exist_ok=True)
             for file in random_files:
-                shutil.copy(os.path.join(settings['input_folder'], file), os.path.join(src,file))
+                shutil.copy(os.path.join(settings['src'], file), os.path.join(src,file))
-            settings['input_folder'] = src
+            settings['src'] = src
             print(f'Changed source folder to {src} for test mode')
         else:
-            print(f'Test mode enabled, using source folder {settings["input_folder"]}')
+            print(f'Test mode enabled, using source folder {settings["src"]}')
     return settings
@@ -4424,3 +4425,10 @@ def correct_masks(src):
     cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
     convert_and_relabel_masks(cell_path)
     _load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
+def count_reads_in_fastq(fastq_file):
+    count = 0
+    with gzip.open(fastq_file, "rt") as f:
+        for _ in f:
+            count += 1
+    return count // 4

spacr 0.2.46__py3-none-any.whl → 0.2.56__py3-none-any.whl

spacr 0.2.46py3-none-any.whl → 0.2.56py3-none-any.whl