spacr 1.0.7__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/logger.py CHANGED
@@ -4,6 +4,23 @@ import os
4
4
 
5
5
  # Automatically configure logging
6
6
  def configure_logger(log_file_name='spacr.log'):
7
+ """
8
+ Configure the global logging system to log INFO-level messages to a file.
9
+
10
+ This function sets up logging to write messages to a file located in the user's
11
+ home directory. The log format includes timestamp, module name, log level, and message.
12
+
13
+ Args:
14
+ log_file_name (str): Name of the log file. The file will be saved in the user's
15
+ home directory. Default is 'spacr.log'.
16
+
17
+ Example:
18
+ >>> configure_logger()
19
+ # Logs will be saved to ~/spacr.log
20
+
21
+ >>> configure_logger('custom.log')
22
+ # Logs will be saved to ~/custom.log
23
+ """
7
24
  # Determine a safe location for the log file
8
25
  home_dir = os.path.expanduser("~") # Get the user's home directory
9
26
  log_file_path = os.path.join(home_dir, log_file_name) # Save log file in home directory
@@ -23,6 +40,25 @@ logger = logging.getLogger(__name__)
23
40
 
24
41
  # Decorator to log function calls
25
42
  def log_function_call(func):
43
+ """
44
+ Decorator that logs the function call, its arguments, return value, and any exceptions.
45
+
46
+ Logs:
47
+ - Function name and arguments on call
48
+ - Return value on successful completion
49
+ - Exception traceback if an error is raised
50
+
51
+ Args:
52
+ func (Callable): The target function to decorate.
53
+
54
+ Returns:
55
+ Callable: The wrapped function with logging enabled.
56
+
57
+ Example:
58
+ >>> @log_function_call
59
+ ... def multiply(a, b):
60
+ ... return a * b
61
+ """
26
62
  @functools.wraps(func)
27
63
  def wrapper(*args, **kwargs):
28
64
  args_repr = [repr(a) for a in args] # Arguments passed to the function
spacr/measure.py CHANGED
@@ -90,7 +90,7 @@ def _analyze_cytoskeleton(array, mask, channel):
90
90
  """
91
91
  Analyzes and extracts skeleton properties from labeled objects in a masked image based on microtubule staining intensities.
92
92
 
93
- Parameters:
93
+ Args:
94
94
  image : numpy array
95
95
  Intensity image where the microtubules are stained.
96
96
  mask : numpy array
@@ -229,7 +229,7 @@ def _create_dataframe(radial_distributions, object_type):
229
229
  """
230
230
  Create a pandas DataFrame from the given radial distributions.
231
231
 
232
- Parameters:
232
+ Args:
233
233
  - radial_distributions (dict): A dictionary containing the radial distributions.
234
234
  - object_type (str): The type of object.
235
235
 
@@ -249,9 +249,38 @@ def _create_dataframe(radial_distributions, object_type):
249
249
 
250
250
  def _extended_regionprops_table(labels, image, intensity_props):
251
251
  """
252
- Calculate extended region properties table, adding a suite of advanced quantitative features.
252
+ Compute extended region properties for labeled objects in an image.
253
+
254
+ In addition to standard `regionprops_table` features, this function computes
255
+ a comprehensive set of intensity-based statistics for each labeled object,
256
+ including Gini coefficient, entropy, skewness, kurtosis, percentiles, and
257
+ fraction of pixels in the tails of the intensity distribution.
258
+
259
+ Args:
260
+ labels (ndarray): Labeled array of connected components.
261
+ image (ndarray): Intensity image used to compute intensity-based features.
262
+ intensity_props (list): List of region properties to compute using
263
+ `regionprops_table` (e.g., ['label', 'area', 'mean_intensity']).
264
+
265
+ Returns:
266
+ pd.DataFrame: DataFrame with standard and extended intensity features for each region.
267
+
268
+ Extended features include:
269
+ - integrated_intensity: Sum of pixel values.
270
+ - std_intensity: Standard deviation.
271
+ - median_intensity: Median pixel intensity.
272
+ - skew_intensity: Skewness of pixel intensity distribution.
273
+ - kurtosis_intensity: Kurtosis of pixel intensity distribution.
274
+ - mode_intensity: Most frequent pixel intensity.
275
+ - range_intensity: Peak-to-peak range (max - min).
276
+ - iqr_intensity: Interquartile range (75th - 25th percentile).
277
+ - cv_intensity: Coefficient of variation (std / mean).
278
+ - gini_intensity: Gini coefficient of intensity distribution.
279
+ - frac_high90: Fraction of pixels above 90th percentile.
280
+ - frac_low10: Fraction of pixels below 10th percentile.
281
+ - entropy_intensity: Shannon entropy of intensity distribution.
282
+ - percentile_X: Percentile value of pixel intensity for X ∈ {5, 10, 25, 75, 85, 95}.
253
283
  """
254
-
255
284
  def _gini(array):
256
285
  # Compute Gini coefficient (nan safe)
257
286
  array = np.abs(array[~np.isnan(array)])
@@ -343,7 +372,7 @@ def _calculate_homogeneity(label, channel, distances=[2,4,8,16,32,64]):
343
372
  """
344
373
  Calculate the homogeneity values for each region in the label mask.
345
374
 
346
- Parameters:
375
+ Args:
347
376
  - label (ndarray): The label mask containing the regions.
348
377
  - channel (ndarray): The image channel corresponding to the label mask.
349
378
  - distances (list): The distances to calculate the homogeneity for.
@@ -545,7 +574,7 @@ def _estimate_blur(image):
545
574
  """
546
575
  Estimates the blur of an image by computing the variance of its Laplacian.
547
576
 
548
- Parameters:
577
+ Args:
549
578
  image (numpy.ndarray): The input image.
550
579
 
551
580
  Returns:
@@ -565,9 +594,32 @@ def _estimate_blur(image):
565
594
 
566
595
  def _measure_intensity_distance(cell_mask, nucleus_mask, pathogen_mask, channel_arrays, settings):
567
596
  """
568
- Compute Gaussian-smoothed intensity-weighted centroid distances for each cell object.
569
- """
597
+ Measure the distance from the intensity-weighted center of mass (COM) of each cell to the nucleus and pathogen.
598
+
599
+ For each labeled cell in the `cell_mask`, the function computes:
600
+ - A Gaussian-blurred version of each channel in `channel_arrays`
601
+ - The center of mass (COM) of intensity within the cell
602
+ - The Euclidean distance from the COM to the nearest nucleus and pathogen pixels
603
+ The distances are calculated using distance transforms of the inverse masks.
570
604
 
605
+ Args:
606
+ cell_mask (ndarray): 2D array with labeled cells (0 = background, 1..N = cell labels).
607
+ nucleus_mask (ndarray): 2D binary mask (non-zero = nucleus pixels).
608
+ pathogen_mask (ndarray): 2D binary mask (non-zero = pathogen pixels).
609
+ channel_arrays (ndarray): 3D array (H, W, C) of image intensities.
610
+ settings (dict): Dictionary containing configuration. Expected key:
611
+ - 'distance_gaussian_sigma' (float): Sigma for Gaussian blur (default: 1.0).
612
+
613
+ Returns:
614
+ pd.DataFrame: DataFrame with one row per cell and columns:
615
+ - 'label': Cell label
616
+ - 'cell_channel_X_distance_to_nucleus': Distance from cell COM in channel X to nucleus
617
+ - 'cell_channel_X_distance_to_pathogen': Distance from cell COM in channel X to pathogen
618
+
619
+ Notes:
620
+ - If the COM is outside image bounds or undefined, NaNs are returned.
621
+ - Multiple channels are supported and processed independently.
622
+ """
571
623
  sigma = settings.get('distance_gaussian_sigma', 1.0)
572
624
  cell_labels = np.unique(cell_mask)
573
625
  cell_labels = cell_labels[cell_labels > 0]
@@ -797,25 +849,58 @@ def img_list_to_grid(grid, titles=None):
797
849
 
798
850
  #@log_function_call
799
851
  def _measure_crop_core(index, time_ls, file, settings):
800
-
801
852
  """
802
- Measure and crop the images based on specified settings.
803
-
804
- Parameters:
805
- - index: int
806
- The index of the image.
807
- - time_ls: list
808
- The list of time points.
809
- - file: str
810
- The file path of the image.
811
- - settings: dict
812
- The dictionary containing the settings for measurement and cropping.
853
+ Core function for processing a single `.npy` image file containing multichannel image data and object masks.
854
+ Performs filtering, mask refinement, measurement extraction, image cropping, and optional saving of
855
+ processed masks, PNGs, and cropped arrays.
856
+
857
+ Args:
858
+ index (int): Index of the image being processed (used for tracking).
859
+ time_ls (list): List used to record processing duration per file for average timing.
860
+ file (str): Filename of the `.npy` stack to process (must exist in `settings['src']`).
861
+ settings (dict): Dictionary of processing options as defined in `get_measure_crop_settings()`. Key options include:
862
+
863
+ - src (str): Source folder containing `.npy` files.
864
+ - experiment (str): Experiment name for saving to database.
865
+ - verbose (bool): If True, print conversion and processing info.
866
+ - plot (bool): If True, generate matplotlib figures and return them.
867
+ - channels (list[int]): List of channel indices to extract for measurements.
868
+ - cell_mask_dim, nucleus_mask_dim, pathogen_mask_dim (int or None): Indices for masks.
869
+ - cytoplasm (bool): If True, generate and process cytoplasm masks.
870
+ - uninfected (bool): If True, include only uninfected cells.
871
+ - *_min_size (int): Minimum object size in pixels for cell, nucleus, pathogen, cytoplasm.
872
+ - save_measurements (bool): If True, compute and save morphology and intensity metrics to database.
873
+ - radial_dist, manders_thresholds, homogeneity, homogeneity_distances (various): Parameters for advanced measurements.
874
+ - save_png, save_arrays (bool): If True, save PNGs and cropped arrays for each object.
875
+ - png_dims (list[int]): Channel indices to use for PNG image generation.
876
+ - normalize (list[int] or False): Percentiles for normalization or False to skip.
877
+ - normalize_by (str): 'png' or 'fov' for normalization reference.
878
+ - png_size (list or list[list]): Size(s) of cropped PNGs, e.g., [224, 224] or [[224, 224], ...].
879
+ - crop_mode (list[str]): Which objects to crop ('cell', 'nucleus', 'pathogen', 'cytoplasm').
880
+ - dialate_pngs (bool or list[bool]): Whether to dilate cropped region for PNG.
881
+ - dialate_png_ratios (float or list[float]): Dilation radius as fraction of diameter.
882
+ - use_bounding_box (bool): If True, crop bounding box around each object with padding.
883
+ - timelapse (bool): If True, handle relabeling for timelapse data.
884
+ - timelapse_objects (str): Which objects to track over time ('cell', 'nucleus').
885
+ - n_jobs (int): Number of parallel jobs.
886
+ - test_mode (bool): If True, limit number of images and enable plotting.
813
887
 
814
888
  Returns:
815
- - cropped_images: list
816
- A list of cropped images.
889
+ tuple:
890
+ - index (int): Same as input, used for indexing.
891
+ - average_time (float): Average time per file across all calls.
892
+ - cells (int): Number of cells processed (or 0 if error).
893
+ - figs (dict): Dictionary of matplotlib figures, if plotting is enabled.
894
+ Keys are labeled with file names and processing stages.
895
+
896
+ Notes:
897
+ - Saves measurement data into SQLite databases using `_merge_and_save_to_database`.
898
+ - Supports dilation of PNG crops and flexible normalization strategies.
899
+ - Designed for batch use in multiprocessing workflows.
900
+ - All intermediate object masks are optionally filtered by size and merged with overlaps.
901
+ - Relabeling ensures parent-child consistency when necessary (e.g., nucleus within cells).
902
+ - Errors during processing are caught and traceback printed; the image is skipped.
817
903
  """
818
-
819
904
  from .plot import _plot_cropped_arrays
820
905
  from .utils import _merge_overlapping_objects, _filter_object, _relabel_parent_with_child_labels, _exclude_objects, normalize_to_dtype, filepaths_to_database
821
906
  from .utils import _merge_and_save_to_database, _crop_center, _find_bounding_box, _generate_names, _get_percentiles
@@ -1075,17 +1160,60 @@ def _measure_crop_core(index, time_ls, file, settings):
1075
1160
 
1076
1161
  #@log_function_call
1077
1162
  def measure_crop(settings):
1078
-
1079
1163
  """
1080
- Measure the crop of an image based on the provided settings.
1164
+ Main driver function to process `.npy` image stacks in a given folder or list of folders.
1165
+ Applies morphological measurements, generates cropped images (PNGs), and saves object-level
1166
+ metrics and optional visuals using multiprocessing for speed.
1081
1167
 
1082
1168
  Args:
1083
- settings (dict): The settings for measuring the crop.
1169
+ settings (dict): Dictionary of processing parameters. Use `get_measure_crop_settings()` to populate defaults.
1170
+ Key settings include:
1171
+
1172
+ - src (str or list[str]): Path(s) to folder(s) containing `.npy` stacks.
1173
+ - experiment (str): Experiment name to store alongside outputs in the database.
1174
+ - test_mode (bool): If True, limit number of images and enable plotting.
1175
+ - verbose (bool): Print processing info.
1176
+ - channels (list[int]): Channels to use for intensity measurements and PNG generation.
1177
+ - plot (bool): If True, save figures to memory and optionally to disk.
1178
+ - n_jobs (int): Number of parallel processes (defaults to CPU count minus 2).
1179
+ - cell_mask_dim, nucleus_mask_dim, pathogen_mask_dim (int or None): Indices of object masks in the stack.
1180
+ - cytoplasm (bool): If True, derive cytoplasmic mask.
1181
+ - _min_size (int): Minimum pixel size thresholds for object filtering.
1182
+ - merge_edge_pathogen_cells (bool): If True, merge pathogen/cell masks at edges.
1183
+ - timelapse (bool): If True, enable temporal relabeling and GIF generation.
1184
+ - timelapse_objects (str): Which object type to track temporally ("nucleus" or "cell").
1185
+ - save_measurements (bool): Save morphology and intensity features to SQLite DB.
1186
+ - save_png, save_arrays (bool): Save per-object cropped PNGs and/or subarrays.
1187
+ - png_dims (list[int]): Channel indices to render in PNG.
1188
+ - png_size (list[int] or list[list[int]]): PNG crop size in pixels (width, height).
1189
+ - crop_mode (list[str]): Which objects to crop (e.g., ['cell', 'nucleus']).
1190
+ - normalize (list[int] or False): Percentiles for intensity normalization or False to skip.
1191
+ - normalize_by (str): 'png' or 'fov'—reference frame for normalization.
1192
+ - dialate_pngs (bool or list[bool]): Whether to dilate PNG masks before cropping.
1193
+ - dialate_png_ratios (float or list[float]): Dilation factor relative to object size.
1194
+ - use_bounding_box (bool): Use bounding box rather than minimal crop.
1195
+ - delete_intermediate (bool): If True, delete original input arrays after processing.
1196
+
1197
+ Workflow:
1198
+ - Validates and normalizes input settings.
1199
+ - Applies multiprocessing to process each `.npy` file using `_measure_crop_core()`.
1200
+ - Saves measurement outputs (morphology, intensity) to database.
1201
+ - Generates per-object crops as PNGs or arrays, optionally normalized and resized.
1202
+ - If `timelapse=True`, generates summary mask GIFs across timepoints.
1203
+ - Reports progress and CPU usage throughout execution.
1084
1204
 
1085
1205
  Returns:
1086
- None
1087
- """
1206
+ None. Results are written to disk and/or SQLite DBs. Completion is reported via print statements.
1207
+
1208
+ Raises:
1209
+ ValueError: For invalid or missing keys in `settings`.
1210
+ Warnings are printed to console for most incorrect parameter combinations.
1088
1211
 
1212
+ Notes:
1213
+ - The `settings['src']` directory is expected to contain `.npy` files and typically ends with `/merged`.
1214
+ - Processing uses up to `settings['n_jobs']` CPU cores but reserves 6 cores by default.
1215
+ - Errors during file processing are handled per file; execution continues for remaining files.
1216
+ """
1089
1217
  from .io import _save_settings_to_db
1090
1218
  from .timelapse import _timelapse_masks_to_gif
1091
1219
  from .utils import measure_test_mode, print_progress, delete_intermedeate_files, save_settings, format_path_for_system, normalize_src_path
@@ -1240,6 +1368,34 @@ def process_meassure_crop_results(partial_results, settings):
1240
1368
  result = (index, None, None, None)
1241
1369
 
1242
1370
  def generate_cellpose_train_set(folders, dst, min_objects=5):
1371
+ """
1372
+ Prepares a Cellpose training dataset by extracting images and corresponding masks
1373
+ from one or more processed spaCR folders. Filters objects by minimum count per mask.
1374
+
1375
+ Args:
1376
+ folders (list[str]): List of source directories. Each must contain:
1377
+ - a `masks/` folder with segmentation masks
1378
+ - the corresponding raw images at the top level.
1379
+ dst (str): Destination folder where the training dataset will be saved.
1380
+ Two subfolders will be created: `dst/masks` and `dst/imgs`.
1381
+ min_objects (int): Minimum number of objects (excluding background) required
1382
+ in a mask to be included in the training set.
1383
+
1384
+ Workflow:
1385
+ - Iterates through each folder and its `masks/` subfolder.
1386
+ - For each `.tif` or `.png` mask, counts the number of unique objects.
1387
+ - If `nr_of_objects >= min_objects`, the mask and corresponding image are copied
1388
+ to `dst/masks` and `dst/imgs`, respectively.
1389
+ - Output files are renamed using `experiment_id + '_' + original_filename` to avoid collisions.
1390
+
1391
+ Returns:
1392
+ None. Selected images and masks are copied to the target location.
1393
+
1394
+ Notes:
1395
+ - Skips any unreadable or malformed mask files.
1396
+ - Assumes mask files use 0 for background and positive integers for labeled objects.
1397
+ - This function does not validate image-mask alignment—ensure file naming is consistent.
1398
+ """
1243
1399
  os.makedirs(dst, exist_ok=True)
1244
1400
  os.makedirs(os.path.join(dst,'masks'), exist_ok=True)
1245
1401
  os.makedirs(os.path.join(dst,'imgs'), exist_ok=True)
@@ -1268,6 +1424,28 @@ def generate_cellpose_train_set(folders, dst, min_objects=5):
1268
1424
  print(f"Error copying {path} to {new_mask}: {e}")
1269
1425
 
1270
1426
  def get_object_counts(src):
1427
+ """
1428
+ Reads the object count summary from the SQLite database and returns aggregated statistics.
1429
+
1430
+ Args:
1431
+ src (str): Source directory containing a `measurements/measurements.db` file
1432
+ generated by the `measure_crop()` pipeline.
1433
+
1434
+ Returns:
1435
+ pandas.DataFrame: A summary table with one row per `count_type`, including:
1436
+ - total_object_count: Sum of object counts across all files for that type.
1437
+ - avg_object_count_per_file_name: Mean object count per file.
1438
+
1439
+ Example Output:
1440
+ count_type total_object_count avg_object_count_per_file_name
1441
+ ----------- ------------------- -------------------------------
1442
+ nucleus 10500 87.5
1443
+ cell 10892 90.77
1444
+
1445
+ Notes:
1446
+ - Requires the presence of an `object_counts` table in the database.
1447
+ - Fails with an exception if the table does not exist or database is missing.
1448
+ """
1271
1449
  database_path = os.path.join(src, 'measurements/measurements.db')
1272
1450
  # Connect to the SQLite database
1273
1451
  conn = sqlite3.connect(database_path)