PyPI - spacr - Versions diffs - 0.2.53__py3-none-any.whl → 0.2.61__py3-none-any.whl - Mend

spacr 0.2.53py3-none-any.whl → 0.2.61py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

spacr/core.py +218 -283
spacr/deep_spacr.py +248 -269
spacr/gui.py +1 -1
spacr/gui_core.py +301 -94
spacr/gui_elements.py +43 -20
spacr/gui_utils.py +81 -47
spacr/io.py +116 -45
spacr/plot.py +47 -1
spacr/sequencing.py +443 -643
spacr/settings.py +192 -64
spacr/utils.py +22 -13
{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/METADATA +2 -1
{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/RECORD +17 -17
{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/LICENSE +0 -0
{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/WHEEL +0 -0
{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/entry_points.txt +0 -0
{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/top_level.txt +0 -0

spacr/settings.py CHANGED Viewed

@@ -3,8 +3,8 @@ import os, ast
 def set_default_plot_merge_settings():
     settings = {}
     settings.setdefault('include_noninfected', True)
-    settings.setdefault('include_multiinfected', True)
-    settings.setdefault('include_multinucleated', True)
+    settings.setdefault('include_multiinfected', 10)
+    settings.setdefault('include_multinucleated', 1)
     settings.setdefault('remove_background', False)
     settings.setdefault('filter_min_max', None)
     settings.setdefault('channel_dims', [0,1,2,3])
@@ -20,7 +20,7 @@ def set_default_plot_merge_settings():
     settings.setdefault('normalize', True)
     settings.setdefault('print_object_number', True)
     settings.setdefault('nr', 1)
-    settings.setdefault('figuresize', 50)
+    settings.setdefault('figuresize', 10)
     settings.setdefault('cmap', 'inferno')
     settings.setdefault('verbose', True)
     return settings
@@ -70,7 +70,7 @@ def set_default_settings_preprocess_generate_masks(src, settings={}):
     # Plot settings
     settings.setdefault('plot', False)
-    settings.setdefault('figuresize', 50)
+    settings.setdefault('figuresize', 10)
     settings.setdefault('cmap', 'inferno')
     settings.setdefault('normalize', True)
     settings.setdefault('normalize_plots', True)
@@ -116,7 +116,7 @@ def set_default_settings_preprocess_img_data(settings):
     skip_mode = settings.setdefault('skip_mode', False)
     cmap = settings.setdefault('cmap', 'inferno')
-    figuresize = settings.setdefault('figuresize', 50)
+    figuresize = settings.setdefault('figuresize', 10)
     normalize = settings.setdefault('normalize', True)
     save_dtype = settings.setdefault('save_dtype', 'uint16')
@@ -189,7 +189,7 @@ def set_default_umap_image_settings(settings={}):
     settings.setdefault('remove_cluster_noise', True)
     settings.setdefault('remove_highly_correlated', True)
     settings.setdefault('log_data', False)
-    settings.setdefault('figuresize', 60)
+    settings.setdefault('figuresize', 10)
     settings.setdefault('black_background', True)
     settings.setdefault('remove_image_canvas', False)
     settings.setdefault('plot_outlines', True)
@@ -277,7 +277,7 @@ def get_measure_crop_settings(settings):
 def set_default_analyze_screen(settings):
     settings.setdefault('src', 'path')
-    settings.setdefault('model_type','xgboost')
+    settings.setdefault('model_type_ml','xgboost')
     settings.setdefault('heatmap_feature','predictions')
     settings.setdefault('grouping','mean')
     settings.setdefault('min_max','allq')
@@ -314,7 +314,6 @@ def set_default_train_test_model(settings):
     settings.setdefault('batch_size',64)
     settings.setdefault('epochs',100)
     settings.setdefault('val_split',0.1)
-    settings.setdefault('train_mode','erm')
     settings.setdefault('learning_rate',0.001)
     settings.setdefault('weight_decay',0.00001)
     settings.setdefault('dropout_rate',0.1)
@@ -324,14 +323,90 @@ def set_default_train_test_model(settings):
     settings.setdefault('gradient_accumulation',True)
     settings.setdefault('gradient_accumulation_steps',4)
     settings.setdefault('intermedeate_save',True)
-    settings.setdefault('pin_memory',True)
+    settings.setdefault('pin_memory',False)
     settings.setdefault('n_jobs',cores)
-    settings.setdefault('channels',['r','g','b'])
+    settings.setdefault('train_channels',['r','g','b'])
     settings.setdefault('augment',False)
     settings.setdefault('verbose',False)
     return settings
+def set_generate_training_dataset_defaults(settings):
+    settings.setdefault('src','path')
+    settings.setdefault('dataset_mode','metadata')
+    settings.setdefault('annotation_column','test')
+    settings.setdefault('annotated_classes',[1,2])
+    settings.setdefault('classes',['nc','pc'])
+    settings.setdefault('size',224)
+    settings.setdefault('test_split',0.1)
+    settings.setdefault('class_metadata',[['c1'],['c2']])
+    settings.setdefault('metadata_type_by','col')
+    settings.setdefault('channel_of_interest',3)
+    settings.setdefault('custom_measurement',None)
+    settings.setdefault('tables',None)
+    settings.setdefault('png_type','cell_png')
+    return settings
+def deep_spacr_defaults(settings):
+    cores = os.cpu_count()-4
+    settings.setdefault('src','path')
+    settings.setdefault('dataset_mode','metadata')
+    settings.setdefault('annotation_column','test')
+    settings.setdefault('annotated_classes',[1,2])
+    settings.setdefault('classes',['nc','pc'])
+    settings.setdefault('size',224)
+    settings.setdefault('test_split',0.1)
+    settings.setdefault('class_metadata',[['c1'],['c2']])
+    settings.setdefault('metadata_type_by','col')
+    settings.setdefault('channel_of_interest',3)
+    settings.setdefault('custom_measurement',None)
+    settings.setdefault('tables',None)
+    settings.setdefault('png_type','cell_png')
+    settings.setdefault('custom_model',False)
+    settings.setdefault('custom_model_path','path')
+    settings.setdefault('train',True)
+    settings.setdefault('test',False)
+    settings.setdefault('model_type','maxvit_t')
+    settings.setdefault('optimizer_type','adamw')
+    settings.setdefault('schedule','reduce_lr_on_plateau') #reduce_lr_on_plateau, step_lr
+    settings.setdefault('loss_type','focal_loss') # binary_cross_entropy_with_logits
+    settings.setdefault('normalize',True)
+    settings.setdefault('image_size',224)
+    settings.setdefault('batch_size',64)
+    settings.setdefault('epochs',100)
+    settings.setdefault('val_split',0.1)
+    settings.setdefault('learning_rate',0.001)
+    settings.setdefault('weight_decay',0.00001)
+    settings.setdefault('dropout_rate',0.1)
+    settings.setdefault('init_weights',True)
+    settings.setdefault('amsgrad',True)
+    settings.setdefault('use_checkpoint',True)
+    settings.setdefault('gradient_accumulation',True)
+    settings.setdefault('gradient_accumulation_steps',4)
+    settings.setdefault('intermedeate_save',True)
+    settings.setdefault('pin_memory',False)
+    settings.setdefault('n_jobs',cores)
+    settings.setdefault('train_channels',['r','g','b'])
+    settings.setdefault('augment',False)
+    settings.setdefault('preload_batches', 3)
+    settings.setdefault('verbose',True)
+    settings.setdefault('apply_model_to_dataset',True)
+    settings.setdefault('file_metadata',None)
+    settings.setdefault('sample',None)
+    settings.setdefault('experiment','exp.')
+    settings.setdefault('score_threshold',0.5)
+    settings.setdefault('tar_path','path')
+    settings.setdefault('model_path','path')
+    settings.setdefault('file_type','cell_png')
+    settings.setdefault('generate_training_dataset', True)
+    settings.setdefault('train_DL_model', True)
+    return settings
 def get_analyze_recruitment_default_settings(settings):
+    settings.setdefault('src','path')
     settings.setdefault('target','protein')
     settings.setdefault('cell_types',['HeLa'])
     settings.setdefault('cell_plate_metadata',None)
@@ -351,12 +426,10 @@ def get_analyze_recruitment_default_settings(settings):
     settings.setdefault('plot',True)
     settings.setdefault('plot_nr',10)
     settings.setdefault('plot_control',True)
-    settings.setdefault('figuresize',20)
-    settings.setdefault('remove_background',False)
-    settings.setdefault('backgrounds',100)
+    settings.setdefault('figuresize',10)
     settings.setdefault('include_noninfected',True)
-    settings.setdefault('include_multiinfected',True)
-    settings.setdefault('include_multinucleated',True)
+    settings.setdefault('include_multiinfected',10)
+    settings.setdefault('include_multinucleated',1)
     settings.setdefault('cells_per_well',0)
     settings.setdefault('pathogen_size_range',[0,100000])
     settings.setdefault('nucleus_size_range',[0,100000])
@@ -534,7 +607,7 @@ expected_types = {
     "save_png": bool,
     "crop_mode": list,
     "use_bounding_box": bool,
-    "png_size": list,  # This can be a list of lists
+    "png_size": list,  # This can be a list of lists
     "normalize": bool,
     "png_dims": list,
     "normalize_by": str,
@@ -546,7 +619,7 @@ expected_types = {
     "cells": list,
     "cell_loc": list,
     "pathogens": list,
-    "pathogen_loc": (list, list),  # This can be a list of lists
+    "pathogen_loc": (list, list),  # This can be a list of lists
     "treatments": list,
     "treatment_loc": (list, list),  # This can be a list of lists
     "channel_of_interest": int,
@@ -554,10 +627,9 @@ expected_types = {
     "measurement": str,
     "nr_imgs": int,
     "um_per_pixel": (int, float),
-    # Additional settings based on provided defaults
     "include_noninfected": bool,
-    "include_multiinfected": bool,
-    "include_multinucleated": bool,
+    "include_multiinfected": int,
+    "include_multinucleated": int,
     "filter_min_max": (list, type(None)),
     "channel_dims": list,
     "backgrounds": list,
@@ -651,7 +723,6 @@ expected_types = {
     "image_size": int,
     "epochs": int,
     "val_split": float,
-    "train_mode": str,
     "learning_rate": float,
     "weight_decay": float,
     "dropout_rate": float,
@@ -666,9 +737,9 @@ expected_types = {
     "augment": bool,
     "target": str,
     "cell_types": list,
-    "cell_plate_metadata": (list, type(None)),
+    "cell_plate_metadata": (list, list),
     "pathogen_types": list,
-    "pathogen_plate_metadata": (list, list),  # This can be a list of lists
+    "pathogen_plate_metadata": (list, list),  # This can be a list of lists
     "treatment_plate_metadata": (list, list),  # This can be a list of lists
     "metadata_types": list,
     "cell_chann_dim": int,
@@ -721,10 +792,71 @@ expected_types = {
     "from_scratch": bool,
     "width_height": list,
     "resize": bool,
+    "compression": str,
+    "complevel": int,
     "gene_weights_csv": str,
     "fraction_threshold": float,
+    "barcode_mapping":dict,
+    "redunction_method":str,
+    "mix":str,
+    "model_type_ml":str,
+    "exclude_conditions":list,
+    "remove_highly_correlated_features":bool,
+    'barcode_coordinates':list,  # This is a list of lists
+    'reverse_complement':bool,
+    'file_type':str,
+    'model_path':str,
+    'tar_path':str,
+    'score_threshold':float,
+    'sample':None,
+    'file_metadata':None,
+    'apply_model_to_dataset':False,
+    "train":bool,
+    "test":bool,
+    'train_channels':list,
+    "optimizer_type":str,
+    "dataset_mode":str,
+    "annotated_classes":list,
+    "annotation_column":str,
+    "apply_model_to_dataset":bool,
+    "metadata_type_by":str,
+    "custom_measurement":str,
+    "custom_model":bool,
+    "size":int,
+    "test_split":float,
+    "class_metadata":list, # This is a list of lists
+    "png_type":str,
+    "custom_model_path":str,
+    "generate_training_dataset":bool,
+    'preload_batches':int,
+    "train_DL_model":bool,
 }
+categories = {"General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model"],
+             "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
+             "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
+             "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
+             "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
+             "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
+             "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
+             "Paths":["grna", "barcodes", "custom_model_path", "tar_path","model_path"],
+             "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size", "barcode_mapping", "reverse_complement", "barcode_coordinates", "complevel", "compression","plate_dict"],
+             "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
+             "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
+             "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
+             "Annotation": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
+             "Machine Learning":[],
+             "Deep Learning": ["png_type","score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment"],
+             "Generate Dataset":["preload_batches", "file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
+             "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
+             "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
+             "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
+             "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
+             "Advanced": ["target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"]
+             }
+category_keys = list(categories.keys())
 def check_settings(vars_dict, expected_types, q=None):
     from .gui_utils import parse_list
@@ -736,7 +868,7 @@ def check_settings(vars_dict, expected_types, q=None):
     for key, (label, widget, var, _) in vars_dict.items():
         if key not in expected_types:
-            if key not in ["General", "Nucleus", "Cell", "Pathogen", "Timelapse", "Plot", "Object Image", "Annotate Data", "Measurements", "Advanced", "Miscellaneous", "Test", "Paths"]:
+            if key not in category_keys:
                 q.put(f"Key {key} not found in expected types.")
                 continue
@@ -744,7 +876,7 @@ def check_settings(vars_dict, expected_types, q=None):
         expected_type = expected_types.get(key, str)
         try:
-            if key in ["png_size", "pathogen_plate_metadata", "treatment_plate_metadata"]:
+            if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
                 parsed_value = ast.literal_eval(value) if value else None
                 if isinstance(parsed_value, list):
                     if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
@@ -829,7 +961,7 @@ def generate_fields(variables, scrollable_frame):
         "cell_Signal_to_noise": "(float) - The signal-to-noise ratio for the cell channel. This will be used to determine the range of intensities to normalize images to for cell segmentation.",
         "cell_size_range": "(list) - Size range for cell segmentation.",
         "cell_types": "(list) - Types of cells to include in the analysis.",
-        "cells": "(list) - The cell types to include in the analysis.",
+        "cells": "(list of lists) - The cell types to include in the analysis.",
         "cells_per_well": "(int) - Number of cells per well.",
         "channel_dims": "(list) - The dimensions of the image channels.",
         "channel_of_interest": "(int) - The channel of interest to use for the analysis.",
@@ -879,8 +1011,8 @@ def generate_fields(variables, scrollable_frame):
         "image_nr": "(int) - Number of images to process.",
         "image_size": "(int) - Size of the images for training.",
         "img_zoom": "(float) - Zoom factor for the images in plots.",
-        "include_multinucleated": "(bool) - Whether to include multinucleated cells in the analysis.",
-        "include_multiinfected": "(bool) - Whether to include multi-infected cells in the analysis.",
+        "include_multinucleated": "(int) - Whether to include multinucleated cells in the analysis.",
+        "include_multiinfected": "(int) - Whether to include multi-infected cells in the analysis.",
         "include_noninfected": "(bool) - Whether to include non-infected cells in the analysis.",
         "include_uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
         "init_weights": "(bool) - Whether to initialize weights for the model.",
@@ -898,7 +1030,7 @@ def generate_fields(variables, scrollable_frame):
         "metadata_type": "(str) - Type of metadata to expect in the images. This will determine how the images are processed. If 'custom' is selected, you can provide a custom regex pattern to extract metadata from the image names.",
         "metadata_types": "(list) - Types of metadata to include in the analysis.",
         "merge_edge_pathogen_cells": "(bool) - Whether to merge cells that share pathogen objects.",
-        "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75% of their perimeter.",
+        "merge_pathogens": "(bool) - Whether to merge pathogen objects that share more than 75 percent of their perimeter.",
         "metric": "(str) - Metric to use for UMAP.",
         "min_cell_count": "(int) - Minimum number of cells required for analysis.",
         "min_dist": "(float) - Minimum distance for UMAP.",
@@ -907,6 +1039,7 @@ def generate_fields(variables, scrollable_frame):
         "mix": "(dict) - Mixing settings for the samples.",
         "model_name": "(str) - Name of the Cellpose model.",
         "model_type": "(str) - Type of model to use for the analysis.",
+        "model_type_ml": "(str) - Type of model to use for machine learning.",
         "nc": "(str) - Negative control identifier.",
         "nc_loc": "(str) - Location of the negative control in the images.",
         "negative_control": "(str) - Identifier for the negative control.",
@@ -937,12 +1070,7 @@ def generate_fields(variables, scrollable_frame):
         "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
         "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
         "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
-        "pathogen_intensity_range": "(list) - Intensity range for pathogen segmentation.",
-        "pathogen_loc": "(list) - The locations of the pathogen types in the images.",
-        "pathogen_mask_dim": "(int) - The dimension of the array the pathogen mask is saved in.",
-        "pathogen_min_size": "(int) - The minimum size of pathogen objects in pixels^2.",
-        "pathogen_model": "(str) - Model to use for pathogen segmentation.",
-        "pathogen_plate_metadata": "(str) - Metadata for the pathogen plate.",
+        "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
         "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
         "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
         "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -1008,7 +1136,6 @@ def generate_fields(variables, scrollable_frame):
         "treatments": "(list) - The treatments to include in the analysis.",
         "top_features": "(int) - Top features to include in the analysis.",
         "train": "(bool) - Whether to train the model.",
-        "train_mode": "(str) - Mode to use for training the model.",
         "transform": "(dict) - Transformation to apply to the data.",
         "upscale": "(bool) - Whether to upscale the images.",
         "upscale_factor": "(float) - Factor by which to upscale the images.",
@@ -1018,6 +1145,20 @@ def generate_fields(variables, scrollable_frame):
         "verbose": "(bool) - Whether to print verbose output during processing.",
         "weight_decay": "(float) - Weight decay for regularization.",
         "width_height": "(tuple) - Width and height of the input images.",
+        "barcode_coordinates": "(list of lists) - Coordinates of the barcodes in the sequence.",
+        "barcode_mapping": "dict - names and barecode csv files",
+        "compression": "str - type of compression (e.g. zlib)",
+        "complevel": "int - level of compression (0-9). Higher is slower and yealds smaller files",
+        "file_type": "str - type of file to process",
+        "model_path": "str - path to the model",
+        "tar_path": "str - path to the tar file with image dataset",
+        "score_threshold": "float - threshold for classification",
+        "sample": "str - number of images to sample for tar dataset (including both classes). Default: None",
+        "file_metadata": "str - string that must be present in image path to be included in the dataset",
+        "apply_model_to_dataset": "bool - whether to apply model to the dataset",
+        "train_channels": "list - channels to use for training",
+        "dataset_mode": "str - How to generate train/test dataset.",
+        "annotated_classes": "list - list of numbers in annotation column.",
         "um_per_pixel": "(float) - The micrometers per pixel for the images."
     }
@@ -1033,29 +1174,6 @@ def generate_fields(variables, scrollable_frame):
     return vars_dict
-categories = {
-    "General": ["src", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims"],
-    "Paths":["grna", "barcodes"],
-    "Regression":["class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "remove_row_column_effect", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable", "gene_weights_csv"],
-    "Cellpose":["from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "circular", "invert", "diameter", "grayscale", "background", "Signal_to_noise", "resize", "target_height", "target_width"],
-    "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
-    "Cell": ["cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "include_uninfected", "merge_edge_pathogen_cells", "adjust_cells"],
-    "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim"],
-    "Timelapse": ["fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
-    "Plot": ["plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "normalize", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
-    "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "dialate_png_ratios", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
-    "Annotate Data": ["nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","pathogen_types", "pathogen_plate_metadata", "treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "cells", "cell_loc", "pathogens", "pathogen_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
-    "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
-    "Advanced": ["plate_dict", "target_intensity_min", "cells_per_well", "include_multinucleated", "include_multiinfected", "include_noninfected", "backgrounds", "plot", "timelapse", "schedule", "test_size","exclude","n_repeats","top_features", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs", "train_mode","amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory","n_jobs","channels","augment"],
-    "Clustering": ["eps","min_samples","analyze_clusters","clustering","remove_cluster_noise"],
-    "Embedding": ["visualize","n_neighbors","min_dist","metric","resnet_features","reduction_method","embedding_by_controls","col_to_compare","log_data"],
-    "Train DL Model": ["epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes"],
-    "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"],
-    "Test": ["test_mode", "test_images", "random_test", "test_nr", "test"],
-    "Sequencing": ["upstream", "downstream", "barecode_length_1", "barecode_length_2", "chunk_size"]
-}
 descriptions = {
     'mask': "\n\nHelp:\n- Generate Cells, Nuclei, Pathogens, and Cytoplasm masks from intensity images in src.\n- To ensure that spacr is installed correctly:\n- 1. Downloade the training set (click Download).\n- 2. Import settings (click settings navigate to downloaded dataset settings folder and import preprocess_generate_masks_settings.csv).\n- 3. Run the module.\n- 4. Proceed to the Measure module (click Measure in the menue bar).\n- For further help, click the Help button in the menue bar.",
@@ -1063,8 +1181,6 @@ descriptions = {
     'classify': "Train and Test any Torch Computer vision model. (Requires PNG images from the Measure module). Function: train_test_model from spacr.deep_spacr.\n\nKey Features:\n- Deep Learning Integration: Train and evaluate state-of-the-art Torch models for various classification tasks.\n- Flexible Training: Supports a wide range of Torch models, allowing customization based on specific research needs.\n- Data Requirement: Requires PNG images generated by the Measure module for training and testing.",
-    'sequencing': "Find Barcodes and gRNA sequences in FASTQ files. (Requires paired-end FASTQ files, R1 and R2). Function: analyze_reads from spacr.sequencing.\n\nKey Features:\n- Barcode and gRNA Identification: Efficiently detect and extract barcode and gRNA sequences from raw sequencing data.\n- Paired-End Support: Specifically designed to handle paired-end FASTQ files, ensuring accurate sequence alignment and analysis.\n- High Throughput: Capable of processing large sequencing datasets quickly and accurately.",
     'umap': "Generate UMAP or tSNE embeddings and represent points as single cell images. (Requires measurements.db and PNG images from the Measure module). Function: generate_image_umap from spacr.core.\n\nKey Features:\n- Dimensionality Reduction: Employ UMAP or tSNE algorithms to reduce high-dimensional data into two dimensions for visualization.\n- Single Cell Representation: Visualize embedding points as single cell images, providing an intuitive understanding of data clusters.\n- Data Integration: Requires measurements and images generated by the Measure module, ensuring comprehensive data representation.",
     'train_cellpose': "Train custom Cellpose models for your specific dataset. Function: train_cellpose_model from spacr.core.\n\nKey Features:\n- Custom Model Training: Train Cellpose models on your dataset to improve segmentation accuracy.\n- Data Adaptation: Tailor the model to handle specific types of biological samples more effectively.\n- Advanced Training Options: Supports various training parameters and configurations for optimized performance.",
@@ -1075,8 +1191,8 @@ descriptions = {
     'cellpose_all': "Run Cellpose on all images in your dataset and obtain masks and measurements. Function: cellpose_analysis from spacr.cellpose.\n\nKey Features:\n- End-to-End Analysis: Perform both segmentation and measurement extraction in a single step.\n- Efficiency: Process entire datasets with minimal manual intervention.\n- Comprehensive Output: Obtain detailed masks and corresponding measurements for further analysis.",
-    'map_barcodes': "Map barcodes to your data for identification and tracking. Function: barcode_mapping_tools from spacr.sequencing.\n\nKey Features:\n- Barcode Integration: Efficiently map and integrate barcode information into your dataset.\n- Tracking: Enable tracking and identification of samples using barcodes.\n- Compatibility: Works with sequencing data to ensure accurate mapping and analysis.",
+    'map_barcodes': "\n\nHelp:\n- 1 .Generate consensus read fastq files from R1 and R2 files.\n- 2. Map barcodes from sequencing data for identification and tracking of samples.\n- 3. Run the module to extract and map barcodes from your FASTQ files in chunks.\n- Prepare your barcode CSV files with the appropriate 'name' and 'sequence' columns.\n- Configure the barcode settings (coordinates and reverse complement flags) according to your experimental setup.\n- For further help, click the Help button in the menu bar.",
     'regression': "Perform regression analysis on your data. Function: regression_tools from spacr.analysis.\n\nKey Features:\n- Statistical Analysis: Conduct various types of regression analysis to identify relationships within your data.\n- Flexible Options: Supports multiple regression models and configurations.\n- Data Insight: Gain deeper insights into your dataset through advanced regression techniques.",
     'recruitment': "Analyze recruitment data to understand sample recruitment dynamics. Function: recruitment_analysis_tools from spacr.analysis.\n\nKey Features:\n- Recruitment Analysis: Investigate and analyze the recruitment of samples over time or conditions.\n- Visualization: Generate visualizations to represent recruitment trends and patterns.\n- Integration: Utilize data from various sources for a comprehensive recruitment analysis."
@@ -1085,7 +1201,7 @@ descriptions = {
 def set_annotate_default_settings(settings):
     settings.setdefault('src', 'path')
     settings.setdefault('image_type', 'cell_png')
-    settings.setdefault('channels', 'r,g,b')
+    settings.setdefault('channels', "'r','g','b'")
     settings.setdefault('img_size', 200)
     settings.setdefault('annotation_column', 'test')
     settings.setdefault('normalize', 'False')
@@ -1094,3 +1210,15 @@ def set_annotate_default_settings(settings):
     settings.setdefault('threshold', '2')
     return settings
+def set_default_generate_barecode_mapping(settings={}):
+    settings.setdefault('src', 'path')
+    settings.setdefault('chunk_size', 100000)
+    settings.setdefault('barcode_mapping', {'row': ['/home/carruthers/Documents/row_barcodes.csv',(80, 88), True],
+                                            'grna': ['/home/carruthers/Documents/grna_barcodes.csv',(34, 55), True],
+                                            'column': ['/home/carruthers/Documents/column_barcodes.csv',(0, 7), False]})
+    settings.setdefault('n_jobs', None)
+    settings.setdefault('compression', 'zlib')
+    settings.setdefault('complevel', 5)
+    return settings

spacr/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform
+import sys, os, re, sqlite3, torch, torchvision, random, string, shutil, cv2, tarfile, glob, psutil, platform, gzip
 import numpy as np
 from cellpose import models as cp_models
@@ -87,6 +87,12 @@ from scipy.stats import f_oneway, kruskal
 from sklearn.cluster import KMeans
 from scipy import stats
+def save_settings(settings, name='settings'):
+    settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
+    settings_csv = os.path.join(settings['src'],'settings',f'{name}.csv')
+    os.makedirs(os.path.join(settings['src'],'settings'), exist_ok=True)
+    settings_df.to_csv(settings_csv, index=False)
 def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batch_size=None, operation_type=""):
     if isinstance(files_processed, list):
@@ -116,10 +122,8 @@ def print_progress(files_processed, files_to_process, n_jobs, time_ls=None, batc
         else:
             average_time_img = average_time / batch_size
             time_info = f'Time/batch: {average_time:.3f}sec, Time/image: {average_time_img:.3f}sec, Time_left: {time_left:.3f} min.'
-    print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
+    else:
+        print(f'Progress: {files_processed}/{files_to_process}, operation_type: {operation_type} {time_info}')
 def reset_mp():
     current_method = get_start_method()
@@ -1649,7 +1653,7 @@ def split_my_dataset(dataset, split_ratio=0.1):
     val_dataset = Subset(dataset, val_indices)
     return train_dataset, val_dataset
-def classification_metrics(all_labels, prediction_pos_probs, loader_name, loss, epoch):
+def classification_metrics(all_labels, prediction_pos_probs, loss, epoch):
     """
     Calculate classification metrics for binary classification.
@@ -1698,11 +1702,9 @@ def classification_metrics(all_labels, prediction_pos_probs, loader_name, loss,
     else:
         acc_nc = np.nan
     data_dict = {'accuracy': acc_all, 'neg_accuracy': acc_nc, 'pos_accuracy': acc_pc, 'loss':loss.item(),'prauc':pr_auc, 'optimal_threshold':optimal_threshold}
-    data_df = pd.DataFrame(data_dict, index=[str(epoch)+'_'+loader_name])
+    data_df = pd.DataFrame(data_dict, index=[str(epoch)])
     return data_df
 def compute_irm_penalty(losses, dummy_w, device):
     """
     Computes the Invariant Risk Minimization (IRM) penalty.
@@ -1740,7 +1742,7 @@ def compute_irm_penalty(losses, dummy_w, device):
 #    summary(base_model, (channels, height, width))
 #    return
-def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_checkpoint=False, channels=3, height=224, width=224, chan_dict=None, num_classes=2):
+def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_checkpoint=False, channels=3, height=224, width=224, chan_dict=None, num_classes=2, verbose=False):
     """
     Choose a model for classification.
@@ -1772,7 +1774,7 @@ def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_chec
         print(f'Invalid model_type: {model_type}. Compatible model_types: {model_types}')
         return
-    print(f'\rModel parameters: Architecture: {model_type} init_weights: {init_weights} dropout_rate: {dropout_rate} use_checkpoint: {use_checkpoint}', end='\r', flush=True)
+    print(f'Model parameters: Architecture: {model_type} init_weights: {init_weights} dropout_rate: {dropout_rate} use_checkpoint: {use_checkpoint}', end='\r', flush=True)
     if model_type == 'custom':
@@ -1783,8 +1785,8 @@ def choose_model(model_type, device, init_weights=True, dropout_rate=0, use_chec
     else:
         print(f'Compatible model_types: {model_types}')
         raise ValueError(f"Invalid model_type: {model_type}")
-    print(base_model)
+    if verbose:
+        print(base_model)
     return base_model
@@ -4424,3 +4426,10 @@ def correct_masks(src):
     cell_path = os.path.join(src,'norm_channel_stack', 'cell_mask_stack')
     convert_and_relabel_masks(cell_path)
     _load_and_concatenate_arrays(src, [0,1,2,3], 1, 0, 2)
+def count_reads_in_fastq(fastq_file):
+    count = 0
+    with gzip.open(fastq_file, "rt") as f:
+        for _ in f:
+            count += 1
+    return count // 4

{spacr-0.2.53.dist-info → spacr-0.2.61.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.2.53
+Version: 0.2.61
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson
@@ -44,6 +44,7 @@ Requires-Dist: gputil <2.0,>=1.4.0
 Requires-Dist: gpustat <2.0,>=1.1.1
 Requires-Dist: pyautogui <1.0,>=0.9.54
 Requires-Dist: tables <4.0,>=3.8.0
+Requires-Dist: rapidfuzz <4.0,>=3.9
 Requires-Dist: huggingface-hub <0.25,>=0.24.0
 Provides-Extra: dev
 Requires-Dist: pytest <3.11,>=3.9 ; extra == 'dev'

spacr 0.2.53__py3-none-any.whl → 0.2.61__py3-none-any.whl

spacr 0.2.53py3-none-any.whl → 0.2.61py3-none-any.whl