PyPI - spacr - Versions diffs - 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

spacr 1.0.9py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

spacr/app_classify.py +10 -0
spacr/app_mask.py +9 -0
spacr/app_measure.py +9 -0
spacr/app_sequencing.py +9 -0
spacr/core.py +172 -1
spacr/deep_spacr.py +296 -7
spacr/gui.py +68 -0
spacr/gui_core.py +319 -10
spacr/gui_elements.py +772 -13
spacr/gui_utils.py +304 -12
spacr/io.py +887 -71
spacr/logger.py +36 -0
spacr/measure.py +206 -28
spacr/ml.py +606 -142
spacr/plot.py +797 -131
spacr/sequencing.py +363 -8
spacr/settings.py +1158 -38
spacr/sp_stats.py +80 -12
spacr/spacr_cellpose.py +115 -2
spacr/submodules.py +747 -19
spacr/timelapse.py +237 -53
spacr/toxo.py +132 -6
spacr/utils.py +2422 -80
{spacr-1.0.9.dist-info → spacr-1.1.0.dist-info}/METADATA +31 -17
{spacr-1.0.9.dist-info → spacr-1.1.0.dist-info}/RECORD +29 -29
{spacr-1.0.9.dist-info → spacr-1.1.0.dist-info}/LICENSE +0 -0
{spacr-1.0.9.dist-info → spacr-1.1.0.dist-info}/WHEEL +0 -0
{spacr-1.0.9.dist-info → spacr-1.1.0.dist-info}/entry_points.txt +0 -0
{spacr-1.0.9.dist-info → spacr-1.1.0.dist-info}/top_level.txt +0 -0

spacr/app_classify.py CHANGED Viewed

@@ -1,6 +1,16 @@
 from .gui import MainApp
 def start_classify_app():
+    """
+    Launch the spaCR GUI with the Classify application preloaded.
+    This function initializes the main GUI window with "Classify" set as the default active application.
+    It then starts the Tkinter main event loop to display the interface.
+    Typical use case:
+        Called from the command line or another script to directly launch the Classify module of spaCR.
+    """
     app = MainApp(default_app="Classify")
     app.mainloop()

spacr/app_mask.py CHANGED Viewed

@@ -1,6 +1,15 @@
 from .gui import MainApp
 def start_mask_app():
+    """
+    Launch the spaCR GUI with the Mask application preloaded.
+    This function initializes the main GUI window with "Mask" selected as the default active module.
+    It is intended for users who want to directly start the application in mask generation mode.
+    Typical use case:
+        Called from the command line or another script to launch the mask generation workflow of spaCR.
+    """
     app = MainApp(default_app="Mask")
     app.mainloop()

spacr/app_measure.py CHANGED Viewed

@@ -1,6 +1,15 @@
 from .gui import MainApp
 def start_measure_app():
+    """
+    Launch the spaCR GUI with the Measure application preloaded.
+    This function initializes the main GUI window with "Measure" selected as the default active module.
+    It is used to directly open the application in object measurement mode.
+    Typical use case:
+        Called from the command line or another script to start spaCR in measurement mode.
+    """
     app = MainApp(default_app="Measure")
     app.mainloop()

spacr/app_sequencing.py CHANGED Viewed

@@ -1,6 +1,15 @@
 from .gui import MainApp
 def start_seq_app():
+    """
+    Launch the spaCR GUI with the Measure application preloaded.
+    This function initializes the main GUI window with "Measure" selected as the default active module.
+    It is used to directly open the application in object measurement mode.
+    Typical use case:
+        Called from the command line or another script to start spaCR in measurement mode.
+    """
     app = MainApp(default_app="Sequencing")
     app.mainloop()

spacr/core.py CHANGED Viewed

@@ -7,6 +7,99 @@ import warnings
 warnings.filterwarnings("ignore", message="3D stack used, but stitch_threshold=0 and do_3D=False, so masks are made per plane only")
 def preprocess_generate_masks(settings):
+    """
+    Preprocess image data and generate Cellpose segmentation masks for cells, nuclei, and pathogens.
+    This function supports preprocessing, metadata conversion, Cellpose-based mask generation, optional
+    mask adjustment, result plotting, and intermediate file cleanup. It handles batch operations and
+    supports advanced timelapse and channel-specific configurations.
+    Args:
+        settings (dict): Dictionary containing the following keys:
+            General settings:
+                - src (str or list): Path(s) to input folders. Required.
+                - denoise (bool): Apply denoising during preprocessing. Default is False.
+                - delete_intermediate (bool): Delete intermediate files after processing. Default is False.
+                - preprocess (bool): Perform preprocessing. Default is True.
+                - masks (bool): Generate masks using Cellpose. Default is True.
+                - save (bool or list of bool): Whether to save outputs per object type. Default is True.
+                - consolidate (bool): Consolidate input folder structure. Default is False.
+                - batch_size (int): Number of files processed per batch. Default is 50.
+                - test_mode (bool): Enable test mode with limited data. Default is False.
+                - test_images (int): Number of test images to use. Default is 10.
+                - magnification (int): Magnification of input data. Default is 20.
+                - custom_regex (str or None): Regex for filename parsing in auto metadata mode.
+                - metadata_type (str): Metadata type; "cellvoyager" or "auto". Default is "cellvoyager".
+                - n_jobs (int): Number of parallel processes. Default is os.cpu_count() - 4.
+                - randomize (bool): Randomize processing order. Default is True.
+                - verbose (bool): Print full settings table. Default is True.
+            Channel background correction:
+                - remove_background_cell (bool): Remove background from cell channel. Default is False.
+                - remove_background_nucleus (bool): Remove background from nucleus channel. Default is False.
+                - remove_background_pathogen (bool): Remove background from pathogen channel. Default is False.
+            Channel diameter and index settings:
+                - cell_diamiter (float or None): Cell diameter estimate for Cellpose.
+                - nucleus_diamiter (float or None): Nucleus diameter estimate for Cellpose.
+                - pathogen_diamiter (float or None): Pathogen diameter estimate for Cellpose.
+                - cell_channel (int or None): Channel index for cell. Default is None.
+                - nucleus_channel (int or None): Channel index for nucleus. Default is None.
+                - pathogen_channel (int or None): Channel index for pathogen. Default is None.
+                - channels (list): List of channel indices to include. Default is [0, 1, 2, 3].
+            Cellpose parameters:
+                - pathogen_background (float): Background intensity for pathogen. Default is 100.
+                - pathogen_Signal_to_noise (float): SNR threshold for pathogen. Default is 10.
+                - pathogen_CP_prob (float): Cellpose probability threshold for pathogen. Default is 0.
+                - cell_background (float): Background intensity for cell. Default is 100.
+                - cell_Signal_to_noise (float): SNR threshold for cell. Default is 10.
+                - cell_CP_prob (float): Cellpose probability threshold for cell. Default is 0.
+                - nucleus_background (float): Background intensity for nucleus. Default is 100.
+                - nucleus_Signal_to_noise (float): SNR threshold for nucleus. Default is 10.
+                - nucleus_CP_prob (float): Cellpose probability threshold for nucleus. Default is 0.
+                - nucleus_FT (float): Intensity scaling factor for nucleus. Default is 1.0.
+                - cell_FT (float): Intensity scaling factor for cell. Default is 1.0.
+                - pathogen_FT (float): Intensity scaling factor for pathogen. Default is 1.0.
+            Plotting settings:
+                - plot (bool): Enable plotting. Default is False.
+                - figuresize (int or float): Figure size for plots. Default is 10.
+                - cmap (str): Colormap used for plotting. Default is "inferno".
+                - normalize (bool): Normalize image intensities before processing. Default is True.
+                - normalize_plots (bool): Normalize intensity for plotting. Default is True.
+                - examples_to_plot (int): Number of examples to plot. Default is 1.
+            Analysis settings:
+                - pathogen_model (str or None): Custom model for pathogen ("toxo_pv_lumen" or "toxo_cyto").
+                - merge_pathogens (bool): Whether to merge multiple pathogen types. Default is False.
+                - filter (bool): Apply percentile filter. Default is False.
+                - lower_percentile (float): Lower percentile for intensity filtering. Default is 2.
+            Timelapse settings:
+                - timelapse (bool): Enable timelapse mode. Default is False.
+                - fps (int): Frames per second for timelapse export. Default is 2.
+                - timelapse_displacement (float or None): Max displacement for object tracking.
+                - timelapse_memory (int): Memory for tracking algorithm. Default is 3.
+                - timelapse_frame_limits (list): Frame limits for tracking. Default is [5].
+                - timelapse_remove_transient (bool): Remove short-lived objects. Default is False.
+                - timelapse_mode (str): Tracking algorithm. Default is "trackpy".
+                - timelapse_objects (str or None): Object type for tracking.
+            Miscellaneous:
+                - all_to_mip (bool): Convert all input to MIP. Default is False.
+                - upscale (bool): Upscale images prior to processing. Default is False.
+                - upscale_factor (float): Upscaling factor. Default is 2.0.
+                - adjust_cells (bool): Adjust cell masks based on nuclei and pathogen. Default is False.
+                - use_sam_cell (bool): Use SAM model for cell segmentation. Default is False.
+                - use_sam_nucleus (bool): Use SAM model for nucleus segmentation. Default is False.
+                - use_sam_pathogen (bool): Use SAM model for pathogen segmentation. Default is False.
+    Returns:
+        None: All outputs (masks, merged arrays, plots, databases) are saved to disk under the source folder(s).
+    """
     from .io import preprocess_img_data, _load_and_concatenate_arrays, convert_to_yokogawa, convert_separate_files_to_yokogawa
     from .plot import plot_image_mask_overlay, plot_arrays
@@ -194,7 +287,85 @@ def preprocess_generate_masks(settings):
     return
 def generate_cellpose_masks(src, settings, object_type):
+    """
+    Generate segmentation masks for a specific object type using Cellpose.
+    This function applies a Cellpose-based segmentation pipeline to images in `.npz` format, using settings
+    for batch size, object type (cell, nucleus, pathogen), filtering, plotting, and timelapse options.
+    Masks are optionally filtered, saved, tracked (for timelapse), and summarized into a SQLite database.
+    Args:
+        src (str): Path to the source folder containing `.npz` files with image stacks.
+        settings (dict): Dictionary of settings used to control preprocessing and segmentation. Includes:
+            General settings:
+                - src (str): Source directory.
+                - denoise (bool): Apply denoising before processing.
+                - delete_intermediate (bool): Remove intermediate files after processing.
+                - preprocess (bool): Enable preprocessing.
+                - masks (bool): Enable mask generation.
+                - save (bool): Save mask outputs.
+                - consolidate (bool): Consolidate image folders.
+                - batch_size (int): Batch size for processing.
+                - test_mode (bool): Enable test mode with limited image count.
+                - test_images (int): Number of test images to process.
+                - magnification (int): Image magnification level.
+                - custom_regex (str or None): Regex pattern for file parsing (metadata_type = 'auto').
+                - metadata_type (str): One of "cellvoyager" or "auto".
+                - n_jobs (int): Number of parallel workers.
+                - randomize (bool): Shuffle file order before processing.
+                - verbose (bool): Print full settings to console.
+            Channel/background/cellpose settings:
+                - remove_background_cell/nucleus/pathogen (bool): Whether to subtract background from channel.
+                - cell_diamiter / nucleus_diamiter / pathogen_diamiter (float or None): Estimated diameter.
+                - cell_channel / nucleus_channel / pathogen_channel (int or None): Channel index.
+                - channels (list): List of channels to include in stack.
+                - cell/background/SNR/CP_prob/FT (float): Intensity/cellpose thresholds and scaling.
+                - pathogen_model (str or None): Custom model for pathogen segmentation (e.g. "toxo_pv_lumen").
+            Plotting:
+                - plot (bool): Plot masks or overlay visualizations.
+                - figuresize (int): Matplotlib figure size.
+                - cmap (str): Colormap to use (e.g. "inferno").
+                - normalize (bool): Normalize input intensities.
+                - normalize_plots (bool): Normalize for plots.
+                - examples_to_plot (int): How many examples to plot.
+            Filtering and merging:
+                - merge_pathogens (bool): Whether to merge pathogen objects.
+                - filter (bool): Apply filtering on masks.
+                - lower_percentile (float): Intensity filter threshold.
+                - merge (bool): Merge adjacent objects if needed.
+            Timelapse:
+                - timelapse (bool): Enable object tracking across timepoints.
+                - timelapse_displacement (float or None): Max tracking displacement.
+                - timelapse_memory (int): Trackpy memory.
+                - timelapse_frame_limits (list): Frames to include in timelapse batch.
+                - timelapse_remove_transient (bool): Remove transient objects.
+                - timelapse_mode (str): One of "trackpy", "btrack", or "iou".
+                - timelapse_objects (list or None): Subset of ['cell', 'nucleus', 'pathogen'] to track.
+            Miscellaneous:
+                - all_to_mip (bool): Convert Z-stacks to max projections.
+                - upscale (bool): Apply upscaling.
+                - upscale_factor (float): Upscaling factor.
+                - adjust_cells (bool): Refine cell masks with nucleus/pathogen.
+                - use_sam_cell/nucleus/pathogen (bool): Use SAM for mask generation.
+        object_type (str): One of 'cell', 'nucleus', or 'pathogen'. Determines which mask to generate.
+    Returns:
+        None. Outputs are saved to disk:
+            - Generated masks are stored in a `*_mask_stack/` folder.
+            - Object counts are written to `measurements/measurements.db`.
+            - Optional overlay plots are saved if enabled.
+            - Optional timelapse movies are saved in `movies/`.
+    Raises:
+        ValueError: If the object_type is missing from the computed channel map, or if invalid tracking settings are provided.
+    """
     from .utils import _masks_to_masks_stack, _filter_cp_masks, _get_cellpose_batch_size, _get_cellpose_channels, _choose_model, all_elements_match, prepare_batch_for_segmentation
     from .io import _create_database, _save_object_counts_to_database, _check_masks, _get_avg_object_size
     from .timelapse import _npz_to_movie, _btrack_track_cells, _trackpy_track_cells

spacr/deep_spacr.py CHANGED Viewed

@@ -16,7 +16,34 @@ from torchvision import transforms
 from torch.utils.data import DataLoader
 def apply_model(src, model_path, image_size=224, batch_size=64, normalize=True, n_jobs=10):
+    """
+    Apply a trained binary classification model to a folder of images.
+    Loads a PyTorch model and applies it to images in the specified folder using batch inference.
+    Supports optional normalization and GPU acceleration. Outputs prediction probabilities and
+    saves results as a CSV file alongside the model.
+    Args:
+        src (str): Path to a folder containing input images (e.g., PNG, JPG).
+        model_path (str): Path to a trained PyTorch model file (.pt or .pth).
+        image_size (int, optional): Size to center-crop input images to. Default is 224.
+        batch_size (int, optional): Number of images to process per batch. Default is 64.
+        normalize (bool, optional): If True, normalize images to [-1, 1] using ImageNet-style transform. Default is True.
+        n_jobs (int, optional): Number of subprocesses to use for data loading. Default is 10.
+    Returns:
+        pandas.DataFrame: A DataFrame with two columns:
+            - "path": Filenames of processed images.
+            - "pred": Model output probabilities (sigmoid of logits).
+    Saves:
+        A CSV file named like <model_path><YYMMDD>_<ext>_test_result.csv, containing the prediction results.
+    Notes:
+        - Uses GPU if available, otherwise runs on CPU.
+        - Assumes model outputs raw logits for binary classification (sigmoid is applied).
+        - The input folder must contain only images readable by `PIL.Image.open`.
+    """
     from .io import NoClassDataset
     from .utils import print_progress
@@ -71,7 +98,31 @@ def apply_model(src, model_path, image_size=224, batch_size=64, normalize=True,
     return df
 def apply_model_to_tar(settings={}):
+    """
+    Apply a trained model to images stored inside a tar archive.
+    Loads a model and applies it to images within a `.tar` archive using batch inference. Results are
+    filtered by a probability threshold and saved to a CSV. Supports GPU acceleration and normalization.
+    Args:
+        settings (dict): Dictionary with the following keys:
+            - tar_path (str): Path to the tar archive with input images.
+            - model_path (str): Path to the trained PyTorch model (.pt/.pth).
+            - image_size (int): Center crop size for input images. Default is 224.
+            - batch_size (int): Batch size for DataLoader. Default is 64.
+            - normalize (bool): Apply normalization to [-1, 1]. Default is True.
+            - n_jobs (int): Number of workers for data loading. Default is system CPU count - 4.
+            - verbose (bool): If True, print progress and model details.
+            - score_threshold (float): Probability threshold for positive classification (used in result filtering).
+    Returns:
+        pandas.DataFrame: DataFrame with:
+            - "path": Filenames inside the tar archive.
+            - "pred": Model prediction scores (sigmoid output).
+    Saves:
+        A CSV file with prediction results to the same directory as the tar file.
+    """
     from .io import TarImageDataset
     from .utils import process_vision_results, print_progress
@@ -172,7 +223,7 @@ def evaluate_model_performance(model, loader, epoch, loss_type):
         """
         Calculate classification metrics for binary classification.
-        Parameters:
+        Args:
         - all_labels (list): List of true labels.
         - prediction_pos_probs (list): List of predicted positive probabilities.
         - loader_name (str): Name of the data loader.
@@ -256,7 +307,27 @@ def evaluate_model_performance(model, loader, epoch, loss_type):
     return data_dict, [prediction_pos_probs, all_labels]
 def test_model_core(model, loader, loader_name, epoch, loss_type):
+    """
+    Evaluate a trained model on a test DataLoader and return performance metrics and predictions.
+    This function evaluates a binary classification model using a specified loss function, computes
+    classification metrics, and logs predictions, targets, and file-level results.
+    Args:
+        model (torch.nn.Module): The trained PyTorch model to evaluate.
+        loader (torch.utils.data.DataLoader): DataLoader providing test data and labels.
+        loader_name (str): Identifier name for the loader (used for logging/debugging).
+        epoch (int): Current epoch number (used for metric tracking).
+        loss_type (str): Type of loss function to use for reporting (e.g., 'bce', 'focal').
+    Returns:
+        tuple:
+            - data_df (pd.DataFrame): DataFrame containing classification metrics for the test set.
+            - prediction_pos_probs (list): List of predicted probabilities for the positive class.
+            - all_labels (list): Ground truth binary labels.
+            - results_df (pd.DataFrame): Per-sample results, including filename, true label, predicted label,
+              and probability for class 1.
+    """
     from .utils import calculate_loss, classification_metrics
     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     model.eval()
@@ -612,7 +683,50 @@ def train_model(dst, model_type, train_loaders, epochs=100, learning_rate=0.0001
     return model, model_path
 def generate_activation_map(settings):
+    """
+    Generate activation maps (Grad-CAM or saliency) from a trained model applied to a dataset stored in a tar archive.
+    This function loads a model, computes class activation maps or saliency maps for each input image, and saves the
+    results as images. Optionally, it plots batch-wise grids of maps and stores correlation results and image metadata
+    into an SQL database.
+    Args:
+        settings (dict): Dictionary of parameters controlling activation map generation. Key fields include:
+            Required paths:
+                - dataset (str): Path to the `.tar` archive containing images.
+                - model_path (str): Path to the trained PyTorch model (.pt or .pth).
+            Model and method:
+                - model_type (str): Model architecture used (e.g., 'maxvit').
+                - cam_type (str): One of ['gradcam', 'gradcam_pp', 'saliency_image', 'saliency_channel'].
+                - target_layer (str or None): Name of the target layer for Grad-CAM (optional, required for Grad-CAM variants).
+            Input transforms:
+                - image_size (int): Size to center-crop images to (e.g., 224).
+                - normalize_input (bool): Whether to normalize images to [-1, 1] range.
+                - channels (list): Channel indices to select from input data (e.g., [0,1,2]).
+            Inference:
+                - batch_size (int): Number of images per inference batch.
+                - shuffle (bool): Whether to shuffle image order in DataLoader.
+                - n_jobs (int): Number of parallel DataLoader workers (default is CPU count - 4).
+            Output control:
+                - save (bool): If True, saves individual activation maps to disk.
+                - plot (bool): If True, generates and saves batch-wise PDF grid plots.
+                - overlay (bool): If True, overlays activation maps on input images.
+                - correlation (bool): If True, computes activation correlation features (e.g., Manders').
+            Correlation-specific:
+                - manders_thresholds (list or float): Threshold(s) for calculating Manders' coefficients.
+    Returns:
+        None. The following outputs are saved:
+            - PNG or JPEG activation maps organized by predicted class and well.
+            - PDF files with batch-wise overlay plots if `plot=True`.
+            - Activation image metadata and correlations saved to SQL database if `save=True`.
+    """
     from .utils import SaliencyMapGenerator, GradCAMGenerator, SelectChannels, activation_maps_to_database, activation_correlations_to_database
     from .utils import print_progress, save_settings, calculate_activation_correlations
     from .io import TarImageDataset
@@ -771,7 +885,18 @@ def generate_activation_map(settings):
     print("Activation map generation complete.")
 def visualize_classes(model, dtype, class_names, **kwargs):
+    """
+    Visualize synthetic input images that maximize class activation.
+    Args:
+        model (torch.nn.Module): The trained classification model.
+        dtype (str): Data type or domain tag used for visualization.
+        class_names (list): List of class names (length 2 assumed for binary classification).
+        **kwargs: Additional keyword arguments passed to `class_visualization()`.
+    Returns:
+        None. Displays matplotlib plots of class visualizations.
+    """
     from .utils import class_visualization
     for target_y in range(2):  # Assuming binary classification
@@ -783,7 +908,22 @@ def visualize_classes(model, dtype, class_names, **kwargs):
         plt.show()
 def visualize_integrated_gradients(src, model_path, target_label_idx=0, image_size=224, channels=[1,2,3], normalize=True, save_integrated_grads=False, save_dir='integrated_grads'):
+    """
+    Visualize integrated gradients for PNG images in a directory.
+    Args:
+        src (str): Directory containing `.png` images.
+        model_path (str): Path to the trained PyTorch model.
+        target_label_idx (int): Index of the target class label.
+        image_size (int): Image size after preprocessing (center crop).
+        channels (list): List of channels to extract (1-indexed).
+        normalize (bool): Whether to normalize image input to [-1, 1].
+        save_integrated_grads (bool): Whether to save integrated gradient maps.
+        save_dir (str): Directory to save integrated gradient outputs.
+    Returns:
+        None. Displays overlays and optionally saves saliency maps.
+    """
     from .utils import IntegratedGradients, preprocess_image
     use_cuda = torch.cuda.is_available()
@@ -832,6 +972,15 @@ def visualize_integrated_gradients(src, model_path, target_label_idx=0, image_si
             integrated_grads_image.save(os.path.join(save_dir, f'integrated_grads_{file}'))
 class SmoothGrad:
+    """
+    Compute SmoothGrad saliency maps from a trained model.
+    Args:
+        model (torch.nn.Module): Trained classification model.
+        n_samples (int): Number of noise samples to average.
+        stdev_spread (float): Standard deviation of noise relative to input range.
+    """
     def __init__(self, model, n_samples=50, stdev_spread=0.15):
         self.model = model
         self.n_samples = n_samples
@@ -855,7 +1004,22 @@ class SmoothGrad:
         return avg_gradients.abs()
 def visualize_smooth_grad(src, model_path, target_label_idx, image_size=224, channels=[1,2,3], normalize=True, save_smooth_grad=False, save_dir='smooth_grad'):
+    """
+    Visualize SmoothGrad maps for PNG images in a folder.
+    Args:
+        src (str): Path to directory containing `.png` images.
+        model_path (str): Path to trained PyTorch model file.
+        target_label_idx (int): Index of the class to explain.
+        image_size (int): Size for center cropping during preprocessing.
+        channels (list): Channel indices to extract from images.
+        normalize (bool): Whether to normalize inputs to [-1, 1].
+        save_smooth_grad (bool): If True, saves saliency maps to disk.
+        save_dir (str): Folder where smooth grad maps are saved.
+    Returns:
+        None. Displays overlay figures and optionally saves maps to disk.
+    """
     from .utils import preprocess_image
     use_cuda = torch.cuda.is_available()
@@ -904,6 +1068,78 @@ def visualize_smooth_grad(src, model_path, target_label_idx, image_size=224, cha
             smooth_grad_image.save(os.path.join(save_dir, f'smooth_grad_{file}'))
 def deep_spacr(settings={}):
+    """
+    Run deep learning-based classification workflow on microscopy data using SpaCr.
+    This function handles dataset generation, model training, and inference using a trained model on tar-archived image datasets.
+    Settings are filled using `deep_spacr_defaults`.
+    Args:
+        settings (dict): Dictionary of settings with the following keys:
+            General:
+                - src (str): Path to the input dataset.
+                - dataset (str): Path to a dataset archive.
+                - dataset_mode (str): Dataset generation mode. Typically 'metadata'.
+                - file_type (str): Type of input files (e.g., 'cell_png').
+                - file_metadata (str or None): Path to file-level metadata, if available.
+                - sample (int or None): Limit to N random samples for development/testing.
+                - experiment (str): Experiment name prefix. Default is 'exp.'.
+            Annotation and class mapping:
+                - annotation_column (str): Metadata column containing class annotations.
+                - annotated_classes (list): List of class IDs used for training (e.g., [1, 2]).
+                - classes (list): Class labels (e.g., ['nc', 'pc']).
+                - class_metadata (list of lists): Mapping of classes to metadata terms (e.g., [['c1'], ['c2']]).
+                - metadata_type_by (str): How to interpret metadata structure. Typically 'columnID'.
+            Image processing:
+                - channel_of_interest (int): Channel index to use for classification.
+                - png_type (str): Type of image format (e.g., 'cell_png').
+                - image_size (int): Input size (e.g., 224 for 224x224 crop).
+                - train_channels (list): Channels to use for training (e.g., ['r', 'g', 'b']).
+                - normalize (bool): Whether to normalize input images. Default is True.
+                - augment (bool): Whether to apply data augmentation.
+            Model and training:
+                - model_type (str): Model architecture (e.g., 'maxvit_t').
+                - optimizer_type (str): Optimizer (e.g., 'adamw').
+                - schedule (str): Learning rate scheduler ('reduce_lr_on_plateau' or 'step_lr').
+                - loss_type (str): Loss function ('focal_loss' or 'binary_cross_entropy_with_logits').
+                - dropout_rate (float): Dropout probability.
+                - init_weights (bool): Initialize model with pretrained weights.
+                - amsgrad (bool): Use AMSGrad variant of AdamW optimizer.
+                - use_checkpoint (bool): Enable checkpointing.
+                - intermedeate_save (bool): Save intermediate models during training.
+            Training control:
+                - train (bool): Enable training phase.
+                - test (bool): Enable evaluation on test set.
+                - train_DL_model (bool): Enable deep learning model training.
+                - generate_training_dataset (bool): Enable generation of train/test splits.
+                - test_split (float): Proportion of data used for testing.
+                - val_split (float): Fraction of training set used for validation.
+                - epochs (int): Number of training epochs.
+                - batch_size (int): Batch size for training and inference.
+                - learning_rate (float): Learning rate.
+                - weight_decay (float): L2 regularization strength.
+                - gradient_accumulation (bool): Accumulate gradients over multiple steps.
+                - gradient_accumulation_steps (int): Number of steps per gradient update.
+            Inference:
+                - apply_model_to_dataset (bool): Run prediction on tar dataset.
+                - tar_path (str): Path to tar file for inference input.
+                - model_path (str): Path to trained model file.
+                - score_threshold (float): Probability threshold for binary classification.
+            Execution:
+                - n_jobs (int): Number of parallel workers.
+                - pin_memory (bool): Whether to use pinned memory in DataLoader.
+                - verbose (bool): Print training and evaluation progress.
+    Returns:
+        None. All outputs (trained models, predictions, settings) are saved to disk.
+    """
     from .settings import deep_spacr_defaults
     from .io import generate_training_dataset, generate_dataset
     from .utils import save_settings
@@ -937,7 +1173,26 @@ def deep_spacr(settings={}):
             apply_model_to_tar(settings)
 def model_knowledge_transfer(teacher_paths, student_save_path, data_loader, device='cpu', student_model_name='maxvit_t', pretrained=True, dropout_rate=None, use_checkpoint=False, alpha=0.5, temperature=2.0, lr=1e-4, epochs=10):
+    """
+    Perform knowledge distillation from one or more teacher models to a student model.
+    Args:
+        teacher_paths (list of str): Paths to pretrained teacher model files (.pth).
+        student_save_path (str): Output path for the saved student model.
+        data_loader (torch.utils.data.DataLoader): DataLoader for training data.
+        device (str): Device to use ('cpu' or 'cuda').
+        student_model_name (str): Name of the student model architecture (e.g., 'maxvit_t').
+        pretrained (bool): Whether to initialize the student model with ImageNet weights.
+        dropout_rate (float or None): Dropout rate for the student model.
+        use_checkpoint (bool): Whether to use gradient checkpointing.
+        alpha (float): Weighting factor between cross-entropy and distillation loss.
+        temperature (float): Temperature scaling for soft targets.
+        lr (float): Learning rate for optimizer.
+        epochs (int): Number of training epochs.
+    Returns:
+        TorchModel: The trained student model after knowledge distillation.
+    """
     from .utils import TorchModel
     # Adjust filename to reflect knowledge-distillation if desired
@@ -1041,7 +1296,22 @@ def model_knowledge_transfer(teacher_paths, student_save_path, data_loader, devi
     return student_model
 def model_fusion(model_paths,save_path,device='cpu',model_name='maxvit_t',pretrained=True,dropout_rate=None,use_checkpoint=False,aggregator='mean'):
+    """
+    Fuse multiple trained models by combining their parameters using a specified aggregation method.
+    Args:
+        model_paths (list of str): List of paths to model checkpoints to be fused.
+        save_path (str): Path where the fused model will be saved.
+        device (str): Device to use ('cpu' or 'cuda').
+        model_name (str): Model architecture to use when initializing.
+        pretrained (bool): Whether to initialize with pretrained weights.
+        dropout_rate (float or None): Dropout rate to apply to the model.
+        use_checkpoint (bool): Whether to use gradient checkpointing.
+        aggregator (str): Aggregation strategy to combine weights. One of {'mean', 'geomean', 'median', 'sum', 'max', 'min'}.
+    Returns:
+        TorchModel: The fused model with combined weights.
+    """
     from .utils import TorchModel
     if save_path.endswith('.pth'):
@@ -1141,14 +1411,33 @@ def model_fusion(model_paths,save_path,device='cpu',model_name='maxvit_t',pretra
     return fused_model
 def annotate_filter_vision(settings):
+    """
+    Annotate and filter a CSV file with experimental metadata and optionally remove training samples.
+    Args:
+        settings (dict): Configuration dictionary with keys:
+            - 'src' (str or list): Paths to CSV annotation files.
+            - 'cells' (dict): Mapping of cell types to annotation labels.
+            - 'cell_loc' (str): Column name for cell type annotations.
+            - 'pathogens' (dict): Mapping of pathogens to annotation labels.
+            - 'pathogen_loc' (str): Column name for pathogen annotations.
+            - 'treatments' (dict): Mapping of treatments to annotation labels.
+            - 'treatment_loc' (str): Column name for treatment annotations.
+            - 'filter_column' (str or None): Column to filter on.
+            - 'upper_threshold' (float): Upper bound for filtering.
+            - 'lower_threshold' (float): Lower bound for filtering.
+            - 'remove_train' (bool): If True, removes rows present in training folders.
+    Returns:
+        None. Saves filtered and annotated CSVs to disk.
+    """
     from .utils import annotate_conditions, correct_metadata
     def filter_csv_by_png(csv_file):
         """
         Filters a DataFrame by removing rows that match PNG filenames in a folder.
-        Parameters:
+        Args:
             csv_file (str): Path to the CSV file.
         Returns:

spacr 1.0.9__py3-none-any.whl → 1.1.0__py3-none-any.whl

spacr 1.0.9py3-none-any.whl → 1.1.0py3-none-any.whl