spacr 1.0.9__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/sp_stats.py CHANGED
@@ -10,7 +10,7 @@ def choose_p_adjust_method(num_groups, num_data_points):
10
10
  """
11
11
  Selects the most appropriate p-value adjustment method based on data characteristics.
12
12
 
13
- Parameters:
13
+ Args:
14
14
  - num_groups: Number of unique groups being compared
15
15
  - num_data_points: Number of data points per group (assuming balanced groups)
16
16
 
@@ -30,7 +30,25 @@ def choose_p_adjust_method(num_groups, num_data_points):
30
30
  return 'bonferroni' # Very conservative, use for strict control of Type I errors
31
31
 
32
32
  def perform_normality_tests(df, grouping_column, data_columns):
33
- """Perform normality tests for each group and data column."""
33
+ """
34
+ Perform normality tests (Shapiro-Wilk or D'Agostino-Pearson) on grouped data.
35
+
36
+ Args:
37
+ df (pd.DataFrame): Input DataFrame containing the data to test.
38
+ grouping_column (str): Column name to group data by (e.g., condition or treatment).
39
+ data_columns (list of str): List of column names containing numeric data to test for normality.
40
+
41
+ Returns:
42
+ tuple:
43
+ - is_normal (bool): True if all group-column combinations pass the normality test (p > 0.05), else False.
44
+ - normality_results (list of dict): List of results for each group-column combination including:
45
+ - 'Comparison': Description of the test
46
+ - 'Test Statistic': Computed test statistic (None if skipped)
47
+ - 'p-value': P-value of the test (None if skipped)
48
+ - 'Test Name': Name of the test used or 'Skipped'
49
+ - 'Column': The data column tested
50
+ - 'n': Sample size
51
+ """
34
52
  unique_groups = df[grouping_column].unique()
35
53
  normality_results = []
36
54
 
@@ -77,14 +95,40 @@ def perform_normality_tests(df, grouping_column, data_columns):
77
95
 
78
96
 
79
97
  def perform_levene_test(df, grouping_column, data_column):
80
- """Perform Levene's test for equal variance."""
98
+ """
99
+ Perform Levene’s test for equal variances across groups.
100
+
101
+ Parameters:
102
+ df (pd.DataFrame): The DataFrame containing the data.
103
+ grouping_column (str): The column indicating group membership.
104
+ data_column (str): The column containing the numeric data.
105
+
106
+ Returns:
107
+ tuple: (statistic, p-value) from Levene’s test.
108
+ """
81
109
  unique_groups = df[grouping_column].unique()
82
110
  grouped_data = [df.loc[df[grouping_column] == group, data_column].dropna() for group in unique_groups]
83
111
  stat, p_value = levene(*grouped_data)
84
112
  return stat, p_value
85
113
 
86
114
  def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
87
- """Perform statistical tests for each data column."""
115
+ """
116
+ Perform statistical tests to compare groups for each specified data column.
117
+
118
+ Parameters:
119
+ df (pd.DataFrame): The DataFrame containing the data.
120
+ grouping_column (str): The column indicating group membership.
121
+ data_columns (list): List of column names to perform tests on.
122
+ paired (bool): Whether to use paired tests (only for two-group comparisons).
123
+
124
+ Returns:
125
+ list of dict: Each dict contains:
126
+ - 'Column': Name of the column tested.
127
+ - 'Test Name': Statistical test used.
128
+ - 'Test Statistic': Test statistic value.
129
+ - 'p-value': P-value of the test.
130
+ - 'Groups': Number of groups compared.
131
+ """
88
132
  unique_groups = df[grouping_column].unique()
89
133
  test_results = []
90
134
 
@@ -125,7 +169,24 @@ def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
125
169
 
126
170
 
127
171
  def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
128
- """Perform post-hoc tests for multiple groups with both original and adjusted p-values."""
172
+ """
173
+ Perform post-hoc pairwise comparisons between groups after a significant overall test (e.g., ANOVA or Kruskal-Wallis).
174
+
175
+ Args:
176
+ df (pd.DataFrame): Input DataFrame containing the data to analyze.
177
+ grouping_column (str): Column name representing group membership.
178
+ data_column (str): Column name with the continuous variable to compare.
179
+ is_normal (bool): Indicator of whether the data meets normality assumptions
180
+ (determines test type: Tukey HSD if True, Dunn's test if False).
181
+
182
+ Returns:
183
+ list of dict: List of dictionaries summarizing pairwise comparisons, each including:
184
+ - 'Comparison': Description of the group pair.
185
+ - 'Original p-value': Raw p-value (None for Tukey HSD).
186
+ - 'Adjusted p-value': Corrected p-value for multiple testing.
187
+ - 'Adjusted Method': Method used for p-value adjustment.
188
+ - 'Test Name': The post-hoc test performed ("Tukey HSD" or "Dunn's Post-hoc").
189
+ """
129
190
  unique_groups = df[grouping_column].unique()
130
191
  posthoc_results = []
131
192
 
@@ -164,16 +225,23 @@ def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
164
225
 
165
226
  def chi_pairwise(raw_counts, verbose=False):
166
227
  """
167
- Perform pairwise chi-square or Fisher's exact tests between all unique group pairs
168
- and apply p-value correction.
228
+ Perform pairwise statistical tests (Chi-Square or Fisher's Exact) on contingency tables
229
+ derived from count data, and apply multiple testing correction.
169
230
 
170
- Parameters:
171
- - raw_counts (DataFrame): Contingency table with group-wise counts.
172
- - verbose (bool): Whether to print results for each pair.
231
+ Args:
232
+ raw_counts (pd.DataFrame): A DataFrame where rows represent groups and columns represent categories.
233
+ The values are raw counts.
234
+ verbose (bool): If True, prints the resulting pairwise test summary.
173
235
 
174
236
  Returns:
175
- - pairwise_df (DataFrame): DataFrame with pairwise test results, including corrected p-values.
176
- """
237
+ pd.DataFrame: A DataFrame with pairwise comparisons including:
238
+ - 'Group 1': First group in the comparison
239
+ - 'Group 2': Second group in the comparison
240
+ - 'Test Name': Type of statistical test used ('Chi-Square' or 'Fisher's Exact')
241
+ - 'p-value': Raw p-value for the test
242
+ - 'p-value_adj': Adjusted p-value after multiple testing correction
243
+ - 'adj': Name of the correction method used
244
+ """
177
245
  pairwise_results = []
178
246
  groups = raw_counts.index.unique() # Use index from raw_counts for group pairs
179
247
  raw_p_values = [] # Store raw p-values for correction later
spacr/spacr_cellpose.py CHANGED
@@ -66,7 +66,43 @@ def parse_cellpose4_output(output):
66
66
  raise ValueError(f"Unrecognized Cellpose flows format: type={type(flows)}, len={len(flows) if hasattr(flows,'__len__') else 'unknown'}")
67
67
 
68
68
  def identify_masks_finetune(settings):
69
-
69
+ """
70
+ Generate Cellpose segmentation masks for a batch of images using a pretrained or custom model.
71
+
72
+ This function loads a set of images from the provided source directory, applies optional
73
+ preprocessing (normalization, resizing), and uses a Cellpose model to generate segmentation masks.
74
+ Masks are optionally visualized and saved to disk. The model, channels, and other parameters are
75
+ defined in the `settings` dictionary.
76
+
77
+ Args:
78
+ settings (dict): Dictionary containing configuration parameters. Must include:
79
+ - 'src' (str): Source folder with `.tif` images.
80
+ - 'model_name' (str): Name of Cellpose model to use (e.g., 'cyto2', 'nucleus').
81
+ - 'custom_model' (str or None): Path to custom model file (.pt), if used.
82
+ - 'channels' (list): List of image channel indices to use for segmentation.
83
+ - 'grayscale' (bool): Whether input images are single-channel.
84
+ - 'diameter' (float): Estimated diameter of objects in pixels.
85
+ - 'flow_threshold' (float): Threshold for mask acceptance based on flow prediction.
86
+ - 'CP_prob' (float): Cell probability threshold for segmentation.
87
+ - 'rescale' (float): Rescaling factor.
88
+ - 'resample' (bool): Whether to resample the image during preprocessing.
89
+ - 'normalize' (bool): Whether to normalize pixel intensities.
90
+ - 'percentiles' (list): Lower and upper percentiles for normalization.
91
+ - 'invert' (bool): Whether to invert image intensities.
92
+ - 'remove_background' (bool): Whether to subtract background value.
93
+ - 'background' (list): Background pixel intensity values to subtract per channel.
94
+ - 'Signal_to_noise' (float): Threshold for signal-to-noise filtering.
95
+ - 'resize' (bool): Whether to resize to fixed target dimensions.
96
+ - 'target_height' (int): Height for resizing.
97
+ - 'target_width' (int): Width for resizing.
98
+ - 'batch_size' (int): Number of images to process per batch.
99
+ - 'fill_in' (bool): Whether to fill holes in masks.
100
+ - 'save' (bool): Whether to save the masks to disk.
101
+ - 'verbose' (bool): Whether to print detailed progress and visualization output.
102
+
103
+ Returns:
104
+ None. Masks are optionally saved to the 'masks' subdirectory in the source folder.
105
+ """
70
106
  from .plot import print_mask_and_flows
71
107
  from .utils import resize_images_and_labels, print_progress, save_settings, fill_holes_in_mask
72
108
  from .io import _load_normalized_images_and_labels, _load_images_and_labels
@@ -189,7 +225,35 @@ def identify_masks_finetune(settings):
189
225
  return
190
226
 
191
227
  def generate_masks_from_imgs(src, model, model_name, batch_size, diameter, cellprob_threshold, flow_threshold, grayscale, save, normalize, channels, percentiles, invert, plot, resize, target_height, target_width, remove_background, background, Signal_to_noise, verbose):
192
-
228
+ """
229
+ Apply a Cellpose model to a batch of images and generate segmentation masks.
230
+
231
+ Args:
232
+ src (str): Directory containing input .tif images.
233
+ model (CellposeModel): Initialized Cellpose model.
234
+ model_name (str): Model identifier (e.g., 'cyto2', 'nucleus').
235
+ batch_size (int): Number of images processed in each batch.
236
+ diameter (float): Estimated object diameter in pixels.
237
+ cellprob_threshold (float): Cell probability threshold.
238
+ flow_threshold (float): Flow threshold for mask acceptance.
239
+ grayscale (bool): If True, treat images as single-channel.
240
+ save (bool): Whether to save output masks.
241
+ normalize (bool): Whether to normalize input images.
242
+ channels (list): Channels to use for processing (e.g., [0, 1]).
243
+ percentiles (list): Percentiles for normalization (e.g., [2, 99]).
244
+ invert (bool): If True, invert image intensity.
245
+ plot (bool): If True, display masks and flows.
246
+ resize (bool): Whether to resize images to fixed target dimensions.
247
+ target_height (int): Height after resizing.
248
+ target_width (int): Width after resizing.
249
+ remove_background (bool): Whether to subtract background intensity.
250
+ background (list): Background intensity values for subtraction.
251
+ Signal_to_noise (float): Minimum SNR threshold.
252
+ verbose (bool): If True, print detailed status messages.
253
+
254
+ Returns:
255
+ None. Saves masks to disk if `save=True`.
256
+ """
193
257
  from .io import _load_images_and_labels, _load_normalized_images_and_labels
194
258
  from .utils import resize_images_and_labels, resizescikit, print_progress
195
259
  from .plot import print_mask_and_flows
@@ -264,7 +328,17 @@ def generate_masks_from_imgs(src, model, model_name, batch_size, diameter, cellp
264
328
  cv2.imwrite(output_filename, mask)
265
329
 
266
330
  def check_cellpose_models(settings):
331
+ """
332
+ Evaluate multiple pretrained Cellpose models ('cyto', 'nuclei', 'cyto2', 'cyto3')
333
+ on a given dataset using standardized settings.
267
334
 
335
+ Args:
336
+ settings (dict): Dictionary of parameters controlling input source, model parameters,
337
+ image preprocessing, and save/visualization options.
338
+
339
+ Returns:
340
+ None. Runs `generate_masks_from_imgs()` for each model and displays results.
341
+ """
268
342
  from .settings import get_check_cellpose_models_default_settings
269
343
 
270
344
  settings = get_check_cellpose_models_default_settings(settings)
@@ -286,7 +360,18 @@ def check_cellpose_models(settings):
286
360
  return
287
361
 
288
362
  def save_results_and_figure(src, fig, results):
363
+ """
364
+ Save a results DataFrame and associated figure to disk.
365
+
366
+ Args:
367
+ src (str): Path to the source directory where the 'results' subfolder will be created.
368
+ fig (matplotlib.figure.Figure): The figure object to be saved as a PDF.
369
+ results (pd.DataFrame or dict or list): Results to be saved. If not a DataFrame,
370
+ it will be converted to one.
289
371
 
372
+ Returns:
373
+ None. Writes results to 'results.csv' and the figure to 'model_comparison_plot.pdf'.
374
+ """
290
375
  if not isinstance(results, pd.DataFrame):
291
376
  results = pd.DataFrame(results)
292
377
 
@@ -299,6 +384,22 @@ def save_results_and_figure(src, fig, results):
299
384
  print(f'Saved figure to {fig_path} and results to {results_path}')
300
385
 
301
386
  def compare_mask(args):
387
+ """
388
+ Compare segmentation masks across different directories for a given filename
389
+ using multiple evaluation metrics.
390
+
391
+ Args:
392
+ args (tuple): A tuple containing:
393
+ - src (str): Not used directly, reserved for future use.
394
+ - filename (str): Name of the mask file to compare across directories.
395
+ - dirs (list of str): List of directory paths where mask files are located.
396
+ - conditions (list of str): Labels corresponding to each directory for result naming.
397
+
398
+ Returns:
399
+ dict or None: A dictionary containing comparison metrics (Jaccard index, boundary F1 score,
400
+ and average precision) for all pairwise combinations of masks.
401
+ Returns None if any mask file is missing.
402
+ """
302
403
  src, filename, dirs, conditions = args
303
404
  paths = [os.path.join(d, filename) for d in dirs]
304
405
 
@@ -327,6 +428,18 @@ def compare_mask(args):
327
428
  return file_results
328
429
 
329
430
  def compare_cellpose_masks(src, verbose=False, processes=None, save=True):
431
+ """
432
+ Compare Cellpose segmentation masks across multiple model output folders.
433
+
434
+ Args:
435
+ src (str): Path to the parent directory containing subdirectories for each model condition.
436
+ verbose (bool): If True, visualize each mask comparison using matplotlib.
437
+ processes (int or None): Number of parallel processes to use. If None, uses os.cpu_count().
438
+ save (bool): Whether to save the visualization outputs and results to disk.
439
+
440
+ Returns:
441
+ None. Results are printed, plotted, and optionally saved to disk.
442
+ """
330
443
  from .plot import visualize_cellpose_masks, plot_comparison_results
331
444
  from .io import _read_mask
332
445