PyPI - spacr - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

spacr 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

spacr/__init__.py +2 -2
spacr/core.py +14 -3
spacr/deep_spacr.py +2 -95
spacr/gui_core.py +301 -46
spacr/gui_elements.py +131 -0
spacr/gui_utils.py +24 -20
spacr/io.py +312 -8
spacr/measure.py +11 -12
spacr/plot.py +2 -2
spacr/settings.py +157 -49
spacr/sp_stats.py +221 -0
spacr/submodules.py +2 -2
spacr/utils.py +115 -33
{spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/METADATA +2 -1
{spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/RECORD +19 -18
{spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/LICENSE +0 -0
{spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/WHEEL +0 -0
{spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/entry_points.txt +0 -0
{spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/top_level.txt +0 -0

spacr/settings.py CHANGED Viewed

@@ -86,10 +86,10 @@ def set_default_settings_preprocess_generate_masks(settings={}):
     settings.setdefault('fps', 2)
     settings.setdefault('timelapse_displacement', None)
     settings.setdefault('timelapse_memory', 3)
-    settings.setdefault('timelapse_frame_limits', None)
+    settings.setdefault('timelapse_frame_limits', [5,])
     settings.setdefault('timelapse_remove_transient', False)
     settings.setdefault('timelapse_mode', 'trackpy')
-    settings.setdefault('timelapse_objects', 'cells')
+    settings.setdefault('timelapse_objects', None)
     # Misc settings
     settings.setdefault('all_to_mip', False)
@@ -256,7 +256,13 @@ def get_measure_crop_settings(settings={}):
     settings.setdefault('homogeneity', True)
     settings.setdefault('homogeneity_distances', [8,16,32])
-    # Cropping settings
+    # Cropping settings    # Cropping settings
+    settings.setdefault('save_arrays', False)
+    settings.setdefault('save_png',True)
+    settings.setdefault('use_bounding_box',False)
+    settings.setdefault('png_size',[224,224])
+    settings.setdefault('png_dims',[0,1,2])
+    settings.setdefault('normalize',False)    # Cropping settings
     settings.setdefault('save_arrays', False)
     settings.setdefault('save_png',True)
     settings.setdefault('use_bounding_box',False)
@@ -277,9 +283,9 @@ def get_measure_crop_settings(settings={}):
     settings.setdefault('n_jobs', os.cpu_count()-2)
     # Object settings
-    settings.setdefault('cell_mask_dim',None)
-    settings.setdefault('nucleus_mask_dim',None)
-    settings.setdefault('pathogen_mask_dim',None)
+    settings.setdefault('cell_mask_dim',4)
+    settings.setdefault('nucleus_mask_dim',5)
+    settings.setdefault('pathogen_mask_dim',6)
     settings.setdefault('cytoplasm',False)
     settings.setdefault('uninfected',True)
     settings.setdefault('cell_min_size',0)
@@ -473,7 +479,7 @@ def get_train_test_model_settings(settings):
      return settings
 def get_analyze_recruitment_default_settings(settings):
-    settings.setdefault('src','path')
+    settings.setdefault('src', 'path')
     settings.setdefault('target','protein')
     settings.setdefault('cell_types',['HeLa'])
     settings.setdefault('cell_plate_metadata',None)
@@ -672,6 +678,7 @@ expected_types = {
     "timelapse_displacement": int,
     "timelapse_memory": int,
     "timelapse_frame_limits": (list, type(None)),  # This can be a list of lists
+    #"timelapse_frame_limits": (list, type(None)),  # This can be a list of lists
     "timelapse_remove_transient": bool,
     "timelapse_mode": str,
     "timelapse_objects": list,
@@ -944,13 +951,13 @@ expected_types = {
 }
 categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset","model_path","grna_csv","row_csv","column_csv", "metadata_files", "score_data","count_data"],
-             "General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode", "delete_intermediate"],
+             "General": ["cell_mask_dim", "cytoplasm", "cell_chann_dim", "cell_channel", "nucleus_chann_dim", "nucleus_channel", "nucleus_mask_dim", "pathogen_mask_dim", "pathogen_chann_dim", "pathogen_channel", "test_mode", "plot", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode", "delete_intermediate", "uninfected", ],
              "Cellpose":["fill_in","from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "invert", "diameter", "grayscale", "Signal_to_noise", "resize", "target_height", "target_width"],
-             "Cell": ["cell_diamiter","cell_intensity_range", "cell_size_range", "cell_chann_dim", "cell_channel", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cell_mask_dim", "cytoplasm", "cytoplasm_min_size", "uninfected", "merge_edge_pathogen_cells", "adjust_cells", "cells", "cell_loc"],
-             "Nucleus": ["nucleus_diamiter","nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
-             "Pathogen": ["pathogen_diamiter","pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
+             "Cell": ["cell_diamiter","cell_intensity_range", "cell_size_range", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cytoplasm_min_size", "adjust_cells", "cells", "cell_loc"],
+             "Nucleus": ["nucleus_diamiter","nucleus_intensity_range", "nucleus_size_range", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_loc"],
+             "Pathogen": ["pathogen_diamiter","pathogen_intensity_range", "pathogen_size_range", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
              "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
-             "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
+             "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "use_bounding_box"],
              "Sequencing": ["outlier_detection","offset_start","chunk_size","single_direction", "signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
              "Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
              "Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
@@ -959,11 +966,10 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
              "Hyperparamiters (Regression)":["cross_validation","prune_features","reg_lambda","reg_alpha","cov_type", "class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "random_row_column_effects", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable"],
              "Hyperparamiters (Activation)":["cam_type", "overlay", "correlation", "target_layer", "normalize_input"],
              "Annotation": ["filter_column", "filter_value","volcano", "toxo", "controls", "nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
-             "Plot": ["plot", "split_axis_lims", "x_lim","log_x","log_y", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
-             "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
+             "Plot": ["split_axis_lims", "x_lim","log_x","log_y", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
              "Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
-             "Advanced": ["target_unique_count","threshold_multiplier", "threshold_method", "min_n","shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
-             "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
+             "Advanced": ["merge_edge_pathogen_cells", "test_images", "random_test", "test_nr", "test", "test_split", "normalize", "target_unique_count","threshold_multiplier", "threshold_method", "min_n","shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
+             "Beta": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
              }
@@ -972,6 +978,127 @@ category_keys = list(categories.keys())
 def check_settings(vars_dict, expected_types, q=None):
     from .gui_utils import parse_list
+    if q is None:
+        from multiprocessing import Queue
+        q = Queue()
+    settings = {}
+    errors = []  # Collect errors instead of stopping at the first one
+    for key, (label, widget, var, _) in vars_dict.items():
+        if key not in expected_types and key not in category_keys:
+            errors.append(f"Warning: Key '{key}' not found in expected types.")
+            continue
+        value = var.get()
+        if value in ['None', '']:
+            value = None
+        expected_type = expected_types.get(key, str)
+        try:
+            if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "png_dims", "pathogen_plate_metadata", "treatment_plate_metadata", "class_metadata", "crop_mode"]:
+                if value is None:
+                    parsed_value = None
+                else:
+                    try:
+                        parsed_value = ast.literal_eval(value)
+                    except (ValueError, SyntaxError):
+                        raise ValueError(f"Expected a list or list of lists but got an invalid format: {value}")
+                if isinstance(parsed_value, list):
+                    if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
+                        settings[key] = parsed_value
+                    else:
+                        raise ValueError(f"Invalid format: '{key}' contains mixed types (single values and lists).")
+                else:
+                    raise ValueError(f"Expected a list for '{key}', but got {type(parsed_value).__name__}.")
+            elif expected_type == list:
+                settings[key] = parse_list(value) if value else None
+                if isinstance(settings[key], list) and len(settings[key]) == 1:
+                    settings[key] = settings[key][0]
+            elif expected_type == bool:
+                settings[key] = value.lower() in ['true', '1', 't', 'y', 'yes'] if isinstance(value, str) else bool(value)
+            elif expected_type == (int, type(None)):
+                if value is None or str(value).isdigit():
+                    settings[key] = int(value) if value is not None else None
+                else:
+                    raise ValueError(f"Expected an integer or None for '{key}', but got '{value}'.")
+            elif expected_type == (float, type(None)):
+                if value is None or (isinstance(value, str) and value.replace(".", "", 1).isdigit()):
+                    settings[key] = float(value) if value is not None else None
+                else:
+                    raise ValueError(f"Expected a float or None for '{key}', but got '{value}'.")
+            elif expected_type == (int, float):
+                try:
+                    settings[key] = float(value) if '.' in str(value) else int(value)
+                except ValueError:
+                    raise ValueError(f"Expected an integer or float for '{key}', but got '{value}'.")
+            elif expected_type == (str, type(None)):
+                settings[key] = str(value) if value is not None else None
+            elif expected_type == (str, type(None), list):
+                if isinstance(value, list):
+                    settings[key] = parse_list(value) if value else None
+                elif isinstance(value, str):
+                    settings[key] = str(value)
+                else:
+                    settings[key] = None
+            elif expected_type == dict:
+                try:
+                    if isinstance(value, str):
+                        parsed_dict = ast.literal_eval(value)
+                    else:
+                        raise ValueError("Expected a string representation of a dictionary.")
+                    if not isinstance(parsed_dict, dict):
+                        raise ValueError(f"Expected a dictionary for '{key}', but got {type(parsed_dict).__name__}.")
+                    settings[key] = parsed_dict
+                except (ValueError, SyntaxError) as e:
+                    settings[key] = {}
+                    errors.append(f"Error: Invalid dictionary format for '{key}'. Expected type: dict. Error: {e}")
+            elif isinstance(expected_type, tuple):
+                for typ in expected_type:
+                    try:
+                        settings[key] = typ(value) if value else None
+                        break
+                    except (ValueError, TypeError):
+                        continue
+                else:
+                    raise ValueError(f"Value '{value}' for '{key}' does not match any expected types: {expected_type}.")
+            else:
+                try:
+                    settings[key] = expected_type(value) if value else None
+                except (ValueError, TypeError):
+                    raise ValueError(f"Expected type {expected_type.__name__} for '{key}', but got '{value}'.")
+        except (ValueError, SyntaxError) as e:
+            expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
+            errors.append(f"Error: '{key}' has invalid format. Expected type: {expected_type_name}. Got value: '{value}'. Error: {e}")
+    # Send all collected errors to the queue
+    for error in errors:
+        q.put(error)
+    return settings, errors
+def check_settings_v1(vars_dict, expected_types, q=None):
+    from .gui_utils import parse_list
     if q is None:
         from multiprocessing import Queue
         q = Queue()
@@ -984,22 +1111,26 @@ def check_settings(vars_dict, expected_types, q=None):
                 q.put(f"Key {key} not found in expected types.")
                 continue
-        value = var.get()
-        if value == 'None':
+        value = var.get()
+        if value in ['None', '']:
             value = None
         expected_type = expected_types.get(key, str)
         try:
-            if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
-                parsed_value = ast.literal_eval(value) if value else None
+            #if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
+            if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "png_dims", "pathogen_plate_metadata", "treatment_plate_metadata", "class_metadata", "crop_mode"]:
+                if value is None:
+                        parsed_value = None
+                else:
+                    parsed_value = ast.literal_eval(value) if isinstance(value, str) and value.strip() else None
                 if isinstance(parsed_value, list):
                     if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
                         settings[key] = parsed_value
                     else:
                         raise ValueError("Invalid format: Mixed list and list of lists")
-                #elif parsed_value == None:
-                #    settings[key] = None
                 else:
                     raise ValueError("Invalid format for list or list of lists")
@@ -1180,30 +1311,7 @@ def generate_fields(variables, scrollable_frame):
         "n_epochs": "(int) - Number of epochs for training the Cellpose model.",
         "n_jobs": "(int) - The number of n_jobs to use for processing the images. This will determine how many images are processed in parallel. Increase to speed up processing.",
         "n_neighbors": "(int) - Number of neighbors for UMAP.",
-        "n_repeats": "(int) - Number of repeats for cross-validation.",
-        "normalize": "(list) - The percentiles to use for normalizing the images. This will be used to determine the range of intensities to normalize images to. If None, no normalization is done.",
-        "normalize_by": "(str) - Whether to normalize the images by field of view (fov) or by PNG image (png).",
-        "normalize_plots": "(bool) - Whether to normalize the plots.",
-        "nr_imgs": "(int) - The number of images to plot.",
-        "nucleus_CP_prob": "(float) - The cellpose probability threshold for the nucleus channel. This will be used to segment the nucleus.",
-        "nucleus_FT": "(float) - The flow threshold for nucleus objects. This will be used in nucleus segmentation.",
-        "nucleus_background": "(float) - The background intensity for the nucleus channel. This will be used to remove background noise.",
-        "nucleus_chann_dim": "(int) - Dimension of the channel to use for nucleus segmentation.",
-        "nucleus_channel": "(int) - The channel to use for the nucleus. If None, the nucleus will not be segmented.",
-        "nucleus_intensity_range": "(list) - Intensity range for nucleus segmentation.",
-        "nucleus_loc": "(str) - Location of the nucleus in the images.",
-        "nucleus_mask_dim": "(int) - The dimension of the array the nucleus mask is saved in.",
-        "nucleus_min_size": "(int) - The minimum size of nucleus objects in pixels^2.",
-        "nucleus_Signal_to_noise": "(float) - The signal-to-noise ratio for the nucleus channel. This will be used to determine the range of intensities to normalize images to for nucleus segmentation.",
-        "nucleus_size_range": "(list) - Size range for nucleus segmentation.",
-        "optimizer_type": "(str) - Type of optimizer to use.",
-        "other": "(dict) - Additional parameters for the regression analysis.",
-        "pathogen_CP_prob": "(float) - The cellpose probability threshold for the pathogen channel. This will be used to segment the pathogen.",
-        "pathogen_FT": "(float) - The flow threshold for pathogen objects. This will be used in pathogen segmentation.",
-        "pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
-        "pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
-        "pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
-        "pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
+        "n_repeats": "(int) - Number of repeats for the pathogen plate.",
         "pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
         "pathogen_size_range": "(list) - Size range for pathogen segmentation.",
         "pathogen_types": "(list) - Types of pathogens to include in the analysis.",
@@ -1222,7 +1330,7 @@ def generate_fields(variables, scrollable_frame):
         "plot_nr": "(int) - Number of plots to generate.",
         "plot_outlines": "(bool) - Whether to plot outlines of segmented objects.",
         "png_dims": "(list) - The dimensions of the PNG images to save. This will determine the dimensions of the saved images. Maximum of 3 dimensions e.g. [1,2,3].",
-        "png_size": "(int) - The size of the PNG images to save. This will determine the size of the saved images.",
+        "png_size": "(list) - The size of the PNG images to save. This will determine the size of the saved images.",
         "positive_control": "(str) - Identifier for the positive control.",
         "preprocess": "(bool) - Whether to preprocess the images before segmentation. This includes background removal and normalization. Set to False only if this step has already been done.",
         "radial_dist": "(list) - Radial distances for measuring features.",
@@ -1385,8 +1493,8 @@ def set_annotate_default_settings(settings):
     settings.setdefault('normalize', 'False')
     settings.setdefault('normalize_channels', "r,g,b")
     settings.setdefault('percentiles', [2, 98])
-    settings.setdefault('measurement', '')#'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
-    settings.setdefault('threshold', '')#'2')
+    settings.setdefault('measurement', '') #'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
+    settings.setdefault('threshold', '') #'2')
     return settings
 def set_default_generate_barecode_mapping(settings={}):

spacr/sp_stats.py ADDED Viewed

@@ -0,0 +1,221 @@
+from scipy.stats import shapiro, normaltest, levene, ttest_ind, mannwhitneyu, kruskal, f_oneway
+from statsmodels.stats.multicomp import pairwise_tukeyhsd
+import scikit_posthocs as sp
+import numpy as np
+import pandas as pd
+from scipy.stats import chi2_contingency, fisher_exact
+import itertools
+from statsmodels.stats.multitest import multipletests
+def choose_p_adjust_method(num_groups, num_data_points):
+    """
+    Selects the most appropriate p-value adjustment method based on data characteristics.
+    Parameters:
+    - num_groups: Number of unique groups being compared
+    - num_data_points: Number of data points per group (assuming balanced groups)
+    Returns:
+    - A string representing the recommended p-adjustment method
+    """
+    num_comparisons = (num_groups * (num_groups - 1)) // 2  # Number of pairwise comparisons
+    # Decision logic for choosing the adjustment method
+    if num_comparisons <= 10 and num_data_points > 5:
+        return 'holm'  # Balanced between power and Type I error control
+    elif num_comparisons > 10 and num_data_points <= 5:
+        return 'fdr_bh'  # FDR control for large number of comparisons and small sample size
+    elif num_comparisons <= 10:
+        return 'sidak'  # Less conservative than Bonferroni, good for independent comparisons
+    else:
+        return 'bonferroni'  # Very conservative, use for strict control of Type I errors
+def perform_normality_tests(df, grouping_column, data_columns):
+    """Perform normality tests for each group and data column."""
+    unique_groups = df[grouping_column].unique()
+    normality_results = []
+    for column in data_columns:
+        for group in unique_groups:
+            data = df.loc[df[grouping_column] == group, column].dropna()
+            n_samples = len(data)
+            if n_samples < 3:
+                # Skip test if there aren't enough data points
+                print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
+                normality_results.append({
+                    'Comparison': f'Normality test for {group} on {column}',
+                    'Test Statistic': None,
+                    'p-value': None,
+                    'Test Name': 'Skipped',
+                    'Column': column,
+                    'n': n_samples
+                })
+                continue
+            # Choose the appropriate normality test based on the sample size
+            if n_samples >= 8:
+                stat, p_value = normaltest(data)
+                test_name = "D'Agostino-Pearson test"
+            else:
+                stat, p_value = shapiro(data)
+                test_name = "Shapiro-Wilk test"
+            normality_results.append({
+                'Comparison': f'Normality test for {group} on {column}',
+                'Test Statistic': stat,
+                'p-value': p_value,
+                'Test Name': test_name,
+                'Column': column,
+                'n': n_samples
+            })
+        # Check if all groups are normally distributed (p > 0.05)
+        normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
+        is_normal = all(p > 0.05 for p in normal_p_values)
+    return is_normal, normality_results
+def perform_levene_test(df, grouping_column, data_column):
+    """Perform Levene's test for equal variance."""
+    unique_groups = df[grouping_column].unique()
+    grouped_data = [df.loc[df[grouping_column] == group, data_column].dropna() for group in unique_groups]
+    stat, p_value = levene(*grouped_data)
+    return stat, p_value
+def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
+    """Perform statistical tests for each data column."""
+    unique_groups = df[grouping_column].unique()
+    test_results = []
+    for column in data_columns:
+        grouped_data = [df.loc[df[grouping_column] == group, column].dropna() for group in unique_groups]
+        if len(unique_groups) == 2:  # For two groups
+            if paired:
+                print("Performing paired tests (not implemented in this template).")
+                continue  # Extend as needed
+            else:
+                # Check normality for two groups
+                is_normal, _ = perform_normality_tests(df, grouping_column, [column])
+                if is_normal:
+                    stat, p = ttest_ind(grouped_data[0], grouped_data[1])
+                    test_name = 'T-test'
+                else:
+                    stat, p = mannwhitneyu(grouped_data[0], grouped_data[1])
+                    test_name = 'Mann-Whitney U test'
+        else:
+            # Check normality for multiple groups
+            is_normal, _ = perform_normality_tests(df, grouping_column, [column])
+            if is_normal:
+                stat, p = f_oneway(*grouped_data)
+                test_name = 'One-way ANOVA'
+            else:
+                stat, p = kruskal(*grouped_data)
+                test_name = 'Kruskal-Wallis test'
+        test_results.append({
+            'Column': column,
+            'Test Name': test_name,
+            'Test Statistic': stat,
+            'p-value': p,
+            'Groups': len(unique_groups)
+        })
+    return test_results
+def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
+    """Perform post-hoc tests for multiple groups with both original and adjusted p-values."""
+    unique_groups = df[grouping_column].unique()
+    posthoc_results = []
+    if len(unique_groups) > 2:
+        num_groups = len(unique_groups)
+        num_data_points = len(df[data_column].dropna()) // num_groups  # Assuming roughly equal data points per group
+        p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
+        if is_normal:
+            # Tukey's HSD automatically adjusts p-values
+            tukey_result = pairwise_tukeyhsd(df[data_column], df[grouping_column], alpha=0.05)
+            for comparison, p_value in zip(tukey_result._results_table.data[1:], tukey_result.pvalues):
+                posthoc_results.append({
+                    'Comparison': f"{comparison[0]} vs {comparison[1]}",
+                    'Original p-value': None,  # Tukey HSD does not provide raw p-values
+                    'Adjusted p-value': p_value,
+                    'Adjusted Method': 'Tukey HSD',
+                    'Test Name': 'Tukey HSD'
+                })
+        else:
+            # Dunn's test with p-value adjustment
+            raw_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=None)
+            adjusted_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=p_adjust_method)
+            for i, group_a in enumerate(adjusted_dunn_result.index):
+                for j, group_b in enumerate(adjusted_dunn_result.columns):
+                    if i < j:  # Only consider unique pairs
+                        posthoc_results.append({
+                            'Comparison': f"{group_a} vs {group_b}",
+                            'Original p-value': raw_dunn_result.iloc[i, j],
+                            'Adjusted p-value': adjusted_dunn_result.iloc[i, j],
+                            'Adjusted Method': p_adjust_method,
+                            'Test Name': "Dunn's Post-hoc"
+                        })
+    return posthoc_results
+def chi_pairwise(raw_counts, verbose=False):
+    """
+    Perform pairwise chi-square or Fisher's exact tests between all unique group pairs
+    and apply p-value correction.
+    Parameters:
+    - raw_counts (DataFrame): Contingency table with group-wise counts.
+    - verbose (bool): Whether to print results for each pair.
+    Returns:
+    - pairwise_df (DataFrame): DataFrame with pairwise test results, including corrected p-values.
+    """
+    pairwise_results = []
+    groups = raw_counts.index.unique()  # Use index from raw_counts for group pairs
+    raw_p_values = []  # Store raw p-values for correction later
+    # Calculate the number of groups and average number of data points per group
+    num_groups = len(groups)
+    num_data_points = raw_counts.sum(axis=1).mean()  # Average total data points per group
+    p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
+    for group1, group2 in itertools.combinations(groups, 2):
+        contingency_table = raw_counts.loc[[group1, group2]].values
+        if contingency_table.shape[1] == 2:  # Fisher's Exact Test for 2x2 tables
+            oddsratio, p_value = fisher_exact(contingency_table)
+            test_name = "Fisher's Exact Test"
+        else:  # Chi-Square Test for larger tables
+            chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)
+            test_name = 'Pairwise Chi-Square Test'
+        pairwise_results.append({
+            'Group 1': group1,
+            'Group 2': group2,
+            'Test Name': test_name,
+            'p-value': p_value
+        })
+        raw_p_values.append(p_value)
+    # Apply p-value correction
+    corrected_p_values = multipletests(raw_p_values, method=p_adjust_method)[1]
+    # Add corrected p-values to results
+    for i, result in enumerate(pairwise_results):
+        result['p-value_adj'] = corrected_p_values[i]
+    pairwise_df = pd.DataFrame(pairwise_results)
+    pairwise_df['adj'] = p_adjust_method
+    if verbose:
+        # Print pairwise results
+        print("\nPairwise Frequency Analysis Results:")
+        print(pairwise_df.to_string(index=False))
+    return pairwise_df

spacr/submodules.py CHANGED Viewed

@@ -21,7 +21,7 @@ from sklearn.metrics import mean_absolute_error
 import matplotlib.pyplot as plt
 from natsort import natsorted
-def analyze_recruitment(settings={}):
+def analyze_recruitment(settings):
     """
     Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
@@ -1041,7 +1041,7 @@ def analyze_class_proportion(settings):
     from .io import _read_and_merge_data
     from .settings import set_analyze_class_proportion_defaults
     from .plot import plot_plates, plot_proportion_stacked_bars
-    from .stats import perform_normality_tests, perform_levene_test, perform_statistical_tests, perform_posthoc_tests
+    from .sp_stats import perform_normality_tests, perform_levene_test, perform_statistical_tests, perform_posthoc_tests
     settings = set_analyze_class_proportion_defaults(settings)
     save_settings(settings, name='analyze_class_proportion', show=True)

spacr 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

spacr 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl