spacr 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +2 -2
- spacr/core.py +14 -3
- spacr/deep_spacr.py +2 -95
- spacr/gui_core.py +301 -46
- spacr/gui_elements.py +131 -0
- spacr/gui_utils.py +24 -20
- spacr/io.py +312 -8
- spacr/measure.py +11 -12
- spacr/plot.py +2 -2
- spacr/settings.py +157 -49
- spacr/sp_stats.py +221 -0
- spacr/submodules.py +2 -2
- spacr/utils.py +115 -33
- {spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/METADATA +2 -1
- {spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/RECORD +19 -18
- {spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/LICENSE +0 -0
- {spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/WHEEL +0 -0
- {spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/entry_points.txt +0 -0
- {spacr-0.4.0.dist-info → spacr-0.4.2.dist-info}/top_level.txt +0 -0
spacr/settings.py
CHANGED
@@ -86,10 +86,10 @@ def set_default_settings_preprocess_generate_masks(settings={}):
|
|
86
86
|
settings.setdefault('fps', 2)
|
87
87
|
settings.setdefault('timelapse_displacement', None)
|
88
88
|
settings.setdefault('timelapse_memory', 3)
|
89
|
-
settings.setdefault('timelapse_frame_limits',
|
89
|
+
settings.setdefault('timelapse_frame_limits', [5,])
|
90
90
|
settings.setdefault('timelapse_remove_transient', False)
|
91
91
|
settings.setdefault('timelapse_mode', 'trackpy')
|
92
|
-
settings.setdefault('timelapse_objects',
|
92
|
+
settings.setdefault('timelapse_objects', None)
|
93
93
|
|
94
94
|
# Misc settings
|
95
95
|
settings.setdefault('all_to_mip', False)
|
@@ -256,7 +256,13 @@ def get_measure_crop_settings(settings={}):
|
|
256
256
|
settings.setdefault('homogeneity', True)
|
257
257
|
settings.setdefault('homogeneity_distances', [8,16,32])
|
258
258
|
|
259
|
-
# Cropping settings
|
259
|
+
# Cropping settings # Cropping settings
|
260
|
+
settings.setdefault('save_arrays', False)
|
261
|
+
settings.setdefault('save_png',True)
|
262
|
+
settings.setdefault('use_bounding_box',False)
|
263
|
+
settings.setdefault('png_size',[224,224])
|
264
|
+
settings.setdefault('png_dims',[0,1,2])
|
265
|
+
settings.setdefault('normalize',False) # Cropping settings
|
260
266
|
settings.setdefault('save_arrays', False)
|
261
267
|
settings.setdefault('save_png',True)
|
262
268
|
settings.setdefault('use_bounding_box',False)
|
@@ -277,9 +283,9 @@ def get_measure_crop_settings(settings={}):
|
|
277
283
|
settings.setdefault('n_jobs', os.cpu_count()-2)
|
278
284
|
|
279
285
|
# Object settings
|
280
|
-
settings.setdefault('cell_mask_dim',
|
281
|
-
settings.setdefault('nucleus_mask_dim',
|
282
|
-
settings.setdefault('pathogen_mask_dim',
|
286
|
+
settings.setdefault('cell_mask_dim',4)
|
287
|
+
settings.setdefault('nucleus_mask_dim',5)
|
288
|
+
settings.setdefault('pathogen_mask_dim',6)
|
283
289
|
settings.setdefault('cytoplasm',False)
|
284
290
|
settings.setdefault('uninfected',True)
|
285
291
|
settings.setdefault('cell_min_size',0)
|
@@ -473,7 +479,7 @@ def get_train_test_model_settings(settings):
|
|
473
479
|
return settings
|
474
480
|
|
475
481
|
def get_analyze_recruitment_default_settings(settings):
|
476
|
-
settings.setdefault('src','path')
|
482
|
+
settings.setdefault('src', 'path')
|
477
483
|
settings.setdefault('target','protein')
|
478
484
|
settings.setdefault('cell_types',['HeLa'])
|
479
485
|
settings.setdefault('cell_plate_metadata',None)
|
@@ -672,6 +678,7 @@ expected_types = {
|
|
672
678
|
"timelapse_displacement": int,
|
673
679
|
"timelapse_memory": int,
|
674
680
|
"timelapse_frame_limits": (list, type(None)), # This can be a list of lists
|
681
|
+
#"timelapse_frame_limits": (list, type(None)), # This can be a list of lists
|
675
682
|
"timelapse_remove_transient": bool,
|
676
683
|
"timelapse_mode": str,
|
677
684
|
"timelapse_objects": list,
|
@@ -944,13 +951,13 @@ expected_types = {
|
|
944
951
|
}
|
945
952
|
|
946
953
|
categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset","model_path","grna_csv","row_csv","column_csv", "metadata_files", "score_data","count_data"],
|
947
|
-
"General": ["metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode", "delete_intermediate"],
|
954
|
+
"General": ["cell_mask_dim", "cytoplasm", "cell_chann_dim", "cell_channel", "nucleus_chann_dim", "nucleus_channel", "nucleus_mask_dim", "pathogen_mask_dim", "pathogen_chann_dim", "pathogen_channel", "test_mode", "plot", "metadata_type", "custom_regex", "experiment", "channels", "magnification", "channel_dims", "apply_model_to_dataset", "generate_training_dataset", "train_DL_model", "segmentation_mode", "delete_intermediate", "uninfected", ],
|
948
955
|
"Cellpose":["fill_in","from_scratch", "n_epochs", "width_height", "model_name", "custom_model", "resample", "rescale", "CP_prob", "flow_threshold", "percentiles", "invert", "diameter", "grayscale", "Signal_to_noise", "resize", "target_height", "target_width"],
|
949
|
-
"Cell": ["cell_diamiter","cell_intensity_range", "cell_size_range", "
|
950
|
-
"Nucleus": ["nucleus_diamiter","nucleus_intensity_range", "nucleus_size_range", "
|
951
|
-
"Pathogen": ["pathogen_diamiter","pathogen_intensity_range", "pathogen_size_range", "
|
956
|
+
"Cell": ["cell_diamiter","cell_intensity_range", "cell_size_range", "cell_background", "cell_Signal_to_noise", "cell_CP_prob", "cell_FT", "remove_background_cell", "cell_min_size", "cytoplasm_min_size", "adjust_cells", "cells", "cell_loc"],
|
957
|
+
"Nucleus": ["nucleus_diamiter","nucleus_intensity_range", "nucleus_size_range", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_loc"],
|
958
|
+
"Pathogen": ["pathogen_diamiter","pathogen_intensity_range", "pathogen_size_range", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
|
952
959
|
"Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
|
953
|
-
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "
|
960
|
+
"Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "use_bounding_box"],
|
954
961
|
"Sequencing": ["outlier_detection","offset_start","chunk_size","single_direction", "signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
|
955
962
|
"Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
|
956
963
|
"Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
|
@@ -959,11 +966,10 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
|
|
959
966
|
"Hyperparamiters (Regression)":["cross_validation","prune_features","reg_lambda","reg_alpha","cov_type", "class_1_threshold", "plate", "other", "fraction_threshold", "alpha", "random_row_column_effects", "regression_type", "min_cell_count", "agg_type", "transform", "dependent_variable"],
|
960
967
|
"Hyperparamiters (Activation)":["cam_type", "overlay", "correlation", "target_layer", "normalize_input"],
|
961
968
|
"Annotation": ["filter_column", "filter_value","volcano", "toxo", "controls", "nc_loc", "pc_loc", "nc", "pc", "cell_plate_metadata","treatment_plate_metadata", "metadata_types", "cell_types", "target","positive_control","negative_control", "location_column", "treatment_loc", "channel_of_interest", "measurement", "treatments", "um_per_pixel", "nr_imgs", "exclude", "exclude_conditions", "mix", "pos", "neg"],
|
962
|
-
"Plot": ["
|
963
|
-
"Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
|
969
|
+
"Plot": ["split_axis_lims", "x_lim","log_x","log_y", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
|
964
970
|
"Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
|
965
|
-
"Advanced": ["target_unique_count","threshold_multiplier", "threshold_method", "min_n","shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
|
966
|
-
"
|
971
|
+
"Advanced": ["merge_edge_pathogen_cells", "test_images", "random_test", "test_nr", "test", "test_split", "normalize", "target_unique_count","threshold_multiplier", "threshold_method", "min_n","shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
|
972
|
+
"Beta": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
|
967
973
|
}
|
968
974
|
|
969
975
|
|
@@ -972,6 +978,127 @@ category_keys = list(categories.keys())
|
|
972
978
|
def check_settings(vars_dict, expected_types, q=None):
|
973
979
|
from .gui_utils import parse_list
|
974
980
|
|
981
|
+
if q is None:
|
982
|
+
from multiprocessing import Queue
|
983
|
+
q = Queue()
|
984
|
+
|
985
|
+
settings = {}
|
986
|
+
errors = [] # Collect errors instead of stopping at the first one
|
987
|
+
|
988
|
+
for key, (label, widget, var, _) in vars_dict.items():
|
989
|
+
if key not in expected_types and key not in category_keys:
|
990
|
+
errors.append(f"Warning: Key '{key}' not found in expected types.")
|
991
|
+
continue
|
992
|
+
|
993
|
+
value = var.get()
|
994
|
+
if value in ['None', '']:
|
995
|
+
value = None
|
996
|
+
|
997
|
+
expected_type = expected_types.get(key, str)
|
998
|
+
|
999
|
+
try:
|
1000
|
+
if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "png_dims", "pathogen_plate_metadata", "treatment_plate_metadata", "class_metadata", "crop_mode"]:
|
1001
|
+
if value is None:
|
1002
|
+
parsed_value = None
|
1003
|
+
else:
|
1004
|
+
try:
|
1005
|
+
parsed_value = ast.literal_eval(value)
|
1006
|
+
except (ValueError, SyntaxError):
|
1007
|
+
raise ValueError(f"Expected a list or list of lists but got an invalid format: {value}")
|
1008
|
+
|
1009
|
+
if isinstance(parsed_value, list):
|
1010
|
+
if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
|
1011
|
+
settings[key] = parsed_value
|
1012
|
+
else:
|
1013
|
+
raise ValueError(f"Invalid format: '{key}' contains mixed types (single values and lists).")
|
1014
|
+
|
1015
|
+
else:
|
1016
|
+
raise ValueError(f"Expected a list for '{key}', but got {type(parsed_value).__name__}.")
|
1017
|
+
|
1018
|
+
elif expected_type == list:
|
1019
|
+
settings[key] = parse_list(value) if value else None
|
1020
|
+
|
1021
|
+
if isinstance(settings[key], list) and len(settings[key]) == 1:
|
1022
|
+
settings[key] = settings[key][0]
|
1023
|
+
|
1024
|
+
elif expected_type == bool:
|
1025
|
+
settings[key] = value.lower() in ['true', '1', 't', 'y', 'yes'] if isinstance(value, str) else bool(value)
|
1026
|
+
|
1027
|
+
elif expected_type == (int, type(None)):
|
1028
|
+
if value is None or str(value).isdigit():
|
1029
|
+
settings[key] = int(value) if value is not None else None
|
1030
|
+
else:
|
1031
|
+
raise ValueError(f"Expected an integer or None for '{key}', but got '{value}'.")
|
1032
|
+
|
1033
|
+
elif expected_type == (float, type(None)):
|
1034
|
+
if value is None or (isinstance(value, str) and value.replace(".", "", 1).isdigit()):
|
1035
|
+
settings[key] = float(value) if value is not None else None
|
1036
|
+
else:
|
1037
|
+
raise ValueError(f"Expected a float or None for '{key}', but got '{value}'.")
|
1038
|
+
|
1039
|
+
elif expected_type == (int, float):
|
1040
|
+
try:
|
1041
|
+
settings[key] = float(value) if '.' in str(value) else int(value)
|
1042
|
+
except ValueError:
|
1043
|
+
raise ValueError(f"Expected an integer or float for '{key}', but got '{value}'.")
|
1044
|
+
|
1045
|
+
elif expected_type == (str, type(None)):
|
1046
|
+
settings[key] = str(value) if value is not None else None
|
1047
|
+
|
1048
|
+
elif expected_type == (str, type(None), list):
|
1049
|
+
if isinstance(value, list):
|
1050
|
+
settings[key] = parse_list(value) if value else None
|
1051
|
+
elif isinstance(value, str):
|
1052
|
+
settings[key] = str(value)
|
1053
|
+
else:
|
1054
|
+
settings[key] = None
|
1055
|
+
|
1056
|
+
elif expected_type == dict:
|
1057
|
+
try:
|
1058
|
+
if isinstance(value, str):
|
1059
|
+
parsed_dict = ast.literal_eval(value)
|
1060
|
+
else:
|
1061
|
+
raise ValueError("Expected a string representation of a dictionary.")
|
1062
|
+
|
1063
|
+
if not isinstance(parsed_dict, dict):
|
1064
|
+
raise ValueError(f"Expected a dictionary for '{key}', but got {type(parsed_dict).__name__}.")
|
1065
|
+
|
1066
|
+
settings[key] = parsed_dict
|
1067
|
+
except (ValueError, SyntaxError) as e:
|
1068
|
+
settings[key] = {}
|
1069
|
+
errors.append(f"Error: Invalid dictionary format for '{key}'. Expected type: dict. Error: {e}")
|
1070
|
+
|
1071
|
+
elif isinstance(expected_type, tuple):
|
1072
|
+
for typ in expected_type:
|
1073
|
+
try:
|
1074
|
+
settings[key] = typ(value) if value else None
|
1075
|
+
break
|
1076
|
+
except (ValueError, TypeError):
|
1077
|
+
continue
|
1078
|
+
else:
|
1079
|
+
raise ValueError(f"Value '{value}' for '{key}' does not match any expected types: {expected_type}.")
|
1080
|
+
|
1081
|
+
else:
|
1082
|
+
try:
|
1083
|
+
settings[key] = expected_type(value) if value else None
|
1084
|
+
except (ValueError, TypeError):
|
1085
|
+
raise ValueError(f"Expected type {expected_type.__name__} for '{key}', but got '{value}'.")
|
1086
|
+
|
1087
|
+
except (ValueError, SyntaxError) as e:
|
1088
|
+
expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
|
1089
|
+
errors.append(f"Error: '{key}' has invalid format. Expected type: {expected_type_name}. Got value: '{value}'. Error: {e}")
|
1090
|
+
|
1091
|
+
# Send all collected errors to the queue
|
1092
|
+
for error in errors:
|
1093
|
+
q.put(error)
|
1094
|
+
|
1095
|
+
|
1096
|
+
|
1097
|
+
return settings, errors
|
1098
|
+
|
1099
|
+
def check_settings_v1(vars_dict, expected_types, q=None):
|
1100
|
+
from .gui_utils import parse_list
|
1101
|
+
|
975
1102
|
if q is None:
|
976
1103
|
from multiprocessing import Queue
|
977
1104
|
q = Queue()
|
@@ -984,22 +1111,26 @@ def check_settings(vars_dict, expected_types, q=None):
|
|
984
1111
|
q.put(f"Key {key} not found in expected types.")
|
985
1112
|
continue
|
986
1113
|
|
987
|
-
value = var.get()
|
988
|
-
if value
|
1114
|
+
value = var.get()
|
1115
|
+
if value in ['None', '']:
|
989
1116
|
value = None
|
990
1117
|
|
991
1118
|
expected_type = expected_types.get(key, str)
|
992
1119
|
|
993
1120
|
try:
|
994
|
-
if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
|
995
|
-
|
1121
|
+
#if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "pathogen_loc", "treatment_loc", "pathogen_plate_metadata", "treatment_plate_metadata", "barcode_coordinates", "class_metadata"]:
|
1122
|
+
if key in ["cell_plate_metadata", "timelapse_frame_limits", "png_size", "png_dims", "pathogen_plate_metadata", "treatment_plate_metadata", "class_metadata", "crop_mode"]:
|
1123
|
+
|
1124
|
+
if value is None:
|
1125
|
+
parsed_value = None
|
1126
|
+
else:
|
1127
|
+
parsed_value = ast.literal_eval(value) if isinstance(value, str) and value.strip() else None
|
1128
|
+
|
996
1129
|
if isinstance(parsed_value, list):
|
997
1130
|
if all(isinstance(i, list) for i in parsed_value) or all(not isinstance(i, list) for i in parsed_value):
|
998
1131
|
settings[key] = parsed_value
|
999
1132
|
else:
|
1000
1133
|
raise ValueError("Invalid format: Mixed list and list of lists")
|
1001
|
-
#elif parsed_value == None:
|
1002
|
-
# settings[key] = None
|
1003
1134
|
else:
|
1004
1135
|
raise ValueError("Invalid format for list or list of lists")
|
1005
1136
|
|
@@ -1180,30 +1311,7 @@ def generate_fields(variables, scrollable_frame):
|
|
1180
1311
|
"n_epochs": "(int) - Number of epochs for training the Cellpose model.",
|
1181
1312
|
"n_jobs": "(int) - The number of n_jobs to use for processing the images. This will determine how many images are processed in parallel. Increase to speed up processing.",
|
1182
1313
|
"n_neighbors": "(int) - Number of neighbors for UMAP.",
|
1183
|
-
"n_repeats": "(int) - Number of repeats for
|
1184
|
-
"normalize": "(list) - The percentiles to use for normalizing the images. This will be used to determine the range of intensities to normalize images to. If None, no normalization is done.",
|
1185
|
-
"normalize_by": "(str) - Whether to normalize the images by field of view (fov) or by PNG image (png).",
|
1186
|
-
"normalize_plots": "(bool) - Whether to normalize the plots.",
|
1187
|
-
"nr_imgs": "(int) - The number of images to plot.",
|
1188
|
-
"nucleus_CP_prob": "(float) - The cellpose probability threshold for the nucleus channel. This will be used to segment the nucleus.",
|
1189
|
-
"nucleus_FT": "(float) - The flow threshold for nucleus objects. This will be used in nucleus segmentation.",
|
1190
|
-
"nucleus_background": "(float) - The background intensity for the nucleus channel. This will be used to remove background noise.",
|
1191
|
-
"nucleus_chann_dim": "(int) - Dimension of the channel to use for nucleus segmentation.",
|
1192
|
-
"nucleus_channel": "(int) - The channel to use for the nucleus. If None, the nucleus will not be segmented.",
|
1193
|
-
"nucleus_intensity_range": "(list) - Intensity range for nucleus segmentation.",
|
1194
|
-
"nucleus_loc": "(str) - Location of the nucleus in the images.",
|
1195
|
-
"nucleus_mask_dim": "(int) - The dimension of the array the nucleus mask is saved in.",
|
1196
|
-
"nucleus_min_size": "(int) - The minimum size of nucleus objects in pixels^2.",
|
1197
|
-
"nucleus_Signal_to_noise": "(float) - The signal-to-noise ratio for the nucleus channel. This will be used to determine the range of intensities to normalize images to for nucleus segmentation.",
|
1198
|
-
"nucleus_size_range": "(list) - Size range for nucleus segmentation.",
|
1199
|
-
"optimizer_type": "(str) - Type of optimizer to use.",
|
1200
|
-
"other": "(dict) - Additional parameters for the regression analysis.",
|
1201
|
-
"pathogen_CP_prob": "(float) - The cellpose probability threshold for the pathogen channel. This will be used to segment the pathogen.",
|
1202
|
-
"pathogen_FT": "(float) - The flow threshold for pathogen objects. This will be used in pathogen segmentation.",
|
1203
|
-
"pathogen_background": "(float) - The background intensity for the pathogen channel. This will be used to remove background noise.",
|
1204
|
-
"pathogen_chann_dim": "(int) - Dimension of the channel to use for pathogen segmentation.",
|
1205
|
-
"pathogen_channel": "(int) - The channel to use for the pathogen. If None, the pathogen will not be segmented.",
|
1206
|
-
"pathogen_intensity_range": "(str) - Metadata for the pathogen plate.",
|
1314
|
+
"n_repeats": "(int) - Number of repeats for the pathogen plate.",
|
1207
1315
|
"pathogen_Signal_to_noise": "(float) - The signal-to-noise ratio for the pathogen channel. This will be used to determine the range of intensities to normalize images to for pathogen segmentation.",
|
1208
1316
|
"pathogen_size_range": "(list) - Size range for pathogen segmentation.",
|
1209
1317
|
"pathogen_types": "(list) - Types of pathogens to include in the analysis.",
|
@@ -1222,7 +1330,7 @@ def generate_fields(variables, scrollable_frame):
|
|
1222
1330
|
"plot_nr": "(int) - Number of plots to generate.",
|
1223
1331
|
"plot_outlines": "(bool) - Whether to plot outlines of segmented objects.",
|
1224
1332
|
"png_dims": "(list) - The dimensions of the PNG images to save. This will determine the dimensions of the saved images. Maximum of 3 dimensions e.g. [1,2,3].",
|
1225
|
-
"png_size": "(
|
1333
|
+
"png_size": "(list) - The size of the PNG images to save. This will determine the size of the saved images.",
|
1226
1334
|
"positive_control": "(str) - Identifier for the positive control.",
|
1227
1335
|
"preprocess": "(bool) - Whether to preprocess the images before segmentation. This includes background removal and normalization. Set to False only if this step has already been done.",
|
1228
1336
|
"radial_dist": "(list) - Radial distances for measuring features.",
|
@@ -1385,8 +1493,8 @@ def set_annotate_default_settings(settings):
|
|
1385
1493
|
settings.setdefault('normalize', 'False')
|
1386
1494
|
settings.setdefault('normalize_channels', "r,g,b")
|
1387
1495
|
settings.setdefault('percentiles', [2, 98])
|
1388
|
-
settings.setdefault('measurement', '')#'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
|
1389
|
-
settings.setdefault('threshold', '')#'2')
|
1496
|
+
settings.setdefault('measurement', '') #'cytoplasm_channel_3_mean_intensity,pathogen_channel_3_mean_intensity')
|
1497
|
+
settings.setdefault('threshold', '') #'2')
|
1390
1498
|
return settings
|
1391
1499
|
|
1392
1500
|
def set_default_generate_barecode_mapping(settings={}):
|
spacr/sp_stats.py
ADDED
@@ -0,0 +1,221 @@
|
|
1
|
+
from scipy.stats import shapiro, normaltest, levene, ttest_ind, mannwhitneyu, kruskal, f_oneway
|
2
|
+
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
3
|
+
import scikit_posthocs as sp
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
from scipy.stats import chi2_contingency, fisher_exact
|
7
|
+
import itertools
|
8
|
+
from statsmodels.stats.multitest import multipletests
|
9
|
+
|
10
|
+
|
11
|
+
def choose_p_adjust_method(num_groups, num_data_points):
|
12
|
+
"""
|
13
|
+
Selects the most appropriate p-value adjustment method based on data characteristics.
|
14
|
+
|
15
|
+
Parameters:
|
16
|
+
- num_groups: Number of unique groups being compared
|
17
|
+
- num_data_points: Number of data points per group (assuming balanced groups)
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
- A string representing the recommended p-adjustment method
|
21
|
+
"""
|
22
|
+
num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
|
23
|
+
|
24
|
+
# Decision logic for choosing the adjustment method
|
25
|
+
if num_comparisons <= 10 and num_data_points > 5:
|
26
|
+
return 'holm' # Balanced between power and Type I error control
|
27
|
+
elif num_comparisons > 10 and num_data_points <= 5:
|
28
|
+
return 'fdr_bh' # FDR control for large number of comparisons and small sample size
|
29
|
+
elif num_comparisons <= 10:
|
30
|
+
return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
|
31
|
+
else:
|
32
|
+
return 'bonferroni' # Very conservative, use for strict control of Type I errors
|
33
|
+
|
34
|
+
def perform_normality_tests(df, grouping_column, data_columns):
|
35
|
+
"""Perform normality tests for each group and data column."""
|
36
|
+
unique_groups = df[grouping_column].unique()
|
37
|
+
normality_results = []
|
38
|
+
|
39
|
+
for column in data_columns:
|
40
|
+
for group in unique_groups:
|
41
|
+
data = df.loc[df[grouping_column] == group, column].dropna()
|
42
|
+
n_samples = len(data)
|
43
|
+
|
44
|
+
if n_samples < 3:
|
45
|
+
# Skip test if there aren't enough data points
|
46
|
+
print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
|
47
|
+
normality_results.append({
|
48
|
+
'Comparison': f'Normality test for {group} on {column}',
|
49
|
+
'Test Statistic': None,
|
50
|
+
'p-value': None,
|
51
|
+
'Test Name': 'Skipped',
|
52
|
+
'Column': column,
|
53
|
+
'n': n_samples
|
54
|
+
})
|
55
|
+
continue
|
56
|
+
|
57
|
+
# Choose the appropriate normality test based on the sample size
|
58
|
+
if n_samples >= 8:
|
59
|
+
stat, p_value = normaltest(data)
|
60
|
+
test_name = "D'Agostino-Pearson test"
|
61
|
+
else:
|
62
|
+
stat, p_value = shapiro(data)
|
63
|
+
test_name = "Shapiro-Wilk test"
|
64
|
+
|
65
|
+
normality_results.append({
|
66
|
+
'Comparison': f'Normality test for {group} on {column}',
|
67
|
+
'Test Statistic': stat,
|
68
|
+
'p-value': p_value,
|
69
|
+
'Test Name': test_name,
|
70
|
+
'Column': column,
|
71
|
+
'n': n_samples
|
72
|
+
})
|
73
|
+
|
74
|
+
# Check if all groups are normally distributed (p > 0.05)
|
75
|
+
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
|
76
|
+
is_normal = all(p > 0.05 for p in normal_p_values)
|
77
|
+
|
78
|
+
return is_normal, normality_results
|
79
|
+
|
80
|
+
|
81
|
+
def perform_levene_test(df, grouping_column, data_column):
|
82
|
+
"""Perform Levene's test for equal variance."""
|
83
|
+
unique_groups = df[grouping_column].unique()
|
84
|
+
grouped_data = [df.loc[df[grouping_column] == group, data_column].dropna() for group in unique_groups]
|
85
|
+
stat, p_value = levene(*grouped_data)
|
86
|
+
return stat, p_value
|
87
|
+
|
88
|
+
def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
|
89
|
+
"""Perform statistical tests for each data column."""
|
90
|
+
unique_groups = df[grouping_column].unique()
|
91
|
+
test_results = []
|
92
|
+
|
93
|
+
for column in data_columns:
|
94
|
+
grouped_data = [df.loc[df[grouping_column] == group, column].dropna() for group in unique_groups]
|
95
|
+
if len(unique_groups) == 2: # For two groups
|
96
|
+
if paired:
|
97
|
+
print("Performing paired tests (not implemented in this template).")
|
98
|
+
continue # Extend as needed
|
99
|
+
else:
|
100
|
+
# Check normality for two groups
|
101
|
+
is_normal, _ = perform_normality_tests(df, grouping_column, [column])
|
102
|
+
if is_normal:
|
103
|
+
stat, p = ttest_ind(grouped_data[0], grouped_data[1])
|
104
|
+
test_name = 'T-test'
|
105
|
+
else:
|
106
|
+
stat, p = mannwhitneyu(grouped_data[0], grouped_data[1])
|
107
|
+
test_name = 'Mann-Whitney U test'
|
108
|
+
else:
|
109
|
+
# Check normality for multiple groups
|
110
|
+
is_normal, _ = perform_normality_tests(df, grouping_column, [column])
|
111
|
+
if is_normal:
|
112
|
+
stat, p = f_oneway(*grouped_data)
|
113
|
+
test_name = 'One-way ANOVA'
|
114
|
+
else:
|
115
|
+
stat, p = kruskal(*grouped_data)
|
116
|
+
test_name = 'Kruskal-Wallis test'
|
117
|
+
|
118
|
+
test_results.append({
|
119
|
+
'Column': column,
|
120
|
+
'Test Name': test_name,
|
121
|
+
'Test Statistic': stat,
|
122
|
+
'p-value': p,
|
123
|
+
'Groups': len(unique_groups)
|
124
|
+
})
|
125
|
+
|
126
|
+
return test_results
|
127
|
+
|
128
|
+
|
129
|
+
def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
|
130
|
+
"""Perform post-hoc tests for multiple groups with both original and adjusted p-values."""
|
131
|
+
unique_groups = df[grouping_column].unique()
|
132
|
+
posthoc_results = []
|
133
|
+
|
134
|
+
if len(unique_groups) > 2:
|
135
|
+
num_groups = len(unique_groups)
|
136
|
+
num_data_points = len(df[data_column].dropna()) // num_groups # Assuming roughly equal data points per group
|
137
|
+
p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
|
138
|
+
|
139
|
+
if is_normal:
|
140
|
+
# Tukey's HSD automatically adjusts p-values
|
141
|
+
tukey_result = pairwise_tukeyhsd(df[data_column], df[grouping_column], alpha=0.05)
|
142
|
+
for comparison, p_value in zip(tukey_result._results_table.data[1:], tukey_result.pvalues):
|
143
|
+
posthoc_results.append({
|
144
|
+
'Comparison': f"{comparison[0]} vs {comparison[1]}",
|
145
|
+
'Original p-value': None, # Tukey HSD does not provide raw p-values
|
146
|
+
'Adjusted p-value': p_value,
|
147
|
+
'Adjusted Method': 'Tukey HSD',
|
148
|
+
'Test Name': 'Tukey HSD'
|
149
|
+
})
|
150
|
+
else:
|
151
|
+
# Dunn's test with p-value adjustment
|
152
|
+
raw_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=None)
|
153
|
+
adjusted_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=p_adjust_method)
|
154
|
+
for i, group_a in enumerate(adjusted_dunn_result.index):
|
155
|
+
for j, group_b in enumerate(adjusted_dunn_result.columns):
|
156
|
+
if i < j: # Only consider unique pairs
|
157
|
+
posthoc_results.append({
|
158
|
+
'Comparison': f"{group_a} vs {group_b}",
|
159
|
+
'Original p-value': raw_dunn_result.iloc[i, j],
|
160
|
+
'Adjusted p-value': adjusted_dunn_result.iloc[i, j],
|
161
|
+
'Adjusted Method': p_adjust_method,
|
162
|
+
'Test Name': "Dunn's Post-hoc"
|
163
|
+
})
|
164
|
+
|
165
|
+
return posthoc_results
|
166
|
+
|
167
|
+
def chi_pairwise(raw_counts, verbose=False):
|
168
|
+
"""
|
169
|
+
Perform pairwise chi-square or Fisher's exact tests between all unique group pairs
|
170
|
+
and apply p-value correction.
|
171
|
+
|
172
|
+
Parameters:
|
173
|
+
- raw_counts (DataFrame): Contingency table with group-wise counts.
|
174
|
+
- verbose (bool): Whether to print results for each pair.
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
- pairwise_df (DataFrame): DataFrame with pairwise test results, including corrected p-values.
|
178
|
+
"""
|
179
|
+
pairwise_results = []
|
180
|
+
groups = raw_counts.index.unique() # Use index from raw_counts for group pairs
|
181
|
+
raw_p_values = [] # Store raw p-values for correction later
|
182
|
+
|
183
|
+
# Calculate the number of groups and average number of data points per group
|
184
|
+
num_groups = len(groups)
|
185
|
+
num_data_points = raw_counts.sum(axis=1).mean() # Average total data points per group
|
186
|
+
p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
|
187
|
+
|
188
|
+
for group1, group2 in itertools.combinations(groups, 2):
|
189
|
+
contingency_table = raw_counts.loc[[group1, group2]].values
|
190
|
+
if contingency_table.shape[1] == 2: # Fisher's Exact Test for 2x2 tables
|
191
|
+
oddsratio, p_value = fisher_exact(contingency_table)
|
192
|
+
test_name = "Fisher's Exact Test"
|
193
|
+
else: # Chi-Square Test for larger tables
|
194
|
+
chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)
|
195
|
+
test_name = 'Pairwise Chi-Square Test'
|
196
|
+
|
197
|
+
pairwise_results.append({
|
198
|
+
'Group 1': group1,
|
199
|
+
'Group 2': group2,
|
200
|
+
'Test Name': test_name,
|
201
|
+
'p-value': p_value
|
202
|
+
})
|
203
|
+
raw_p_values.append(p_value)
|
204
|
+
|
205
|
+
# Apply p-value correction
|
206
|
+
corrected_p_values = multipletests(raw_p_values, method=p_adjust_method)[1]
|
207
|
+
|
208
|
+
# Add corrected p-values to results
|
209
|
+
for i, result in enumerate(pairwise_results):
|
210
|
+
result['p-value_adj'] = corrected_p_values[i]
|
211
|
+
|
212
|
+
pairwise_df = pd.DataFrame(pairwise_results)
|
213
|
+
|
214
|
+
pairwise_df['adj'] = p_adjust_method
|
215
|
+
|
216
|
+
if verbose:
|
217
|
+
# Print pairwise results
|
218
|
+
print("\nPairwise Frequency Analysis Results:")
|
219
|
+
print(pairwise_df.to_string(index=False))
|
220
|
+
|
221
|
+
return pairwise_df
|
spacr/submodules.py
CHANGED
@@ -21,7 +21,7 @@ from sklearn.metrics import mean_absolute_error
|
|
21
21
|
import matplotlib.pyplot as plt
|
22
22
|
from natsort import natsorted
|
23
23
|
|
24
|
-
def analyze_recruitment(settings
|
24
|
+
def analyze_recruitment(settings):
|
25
25
|
"""
|
26
26
|
Analyze recruitment data by grouping the DataFrame by well coordinates and plotting controls and recruitment data.
|
27
27
|
|
@@ -1041,7 +1041,7 @@ def analyze_class_proportion(settings):
|
|
1041
1041
|
from .io import _read_and_merge_data
|
1042
1042
|
from .settings import set_analyze_class_proportion_defaults
|
1043
1043
|
from .plot import plot_plates, plot_proportion_stacked_bars
|
1044
|
-
from .
|
1044
|
+
from .sp_stats import perform_normality_tests, perform_levene_test, perform_statistical_tests, perform_posthoc_tests
|
1045
1045
|
|
1046
1046
|
settings = set_analyze_class_proportion_defaults(settings)
|
1047
1047
|
save_settings(settings, name='analyze_class_proportion', show=True)
|