PyPI - spacr - Versions diffs - 0.0.70__py3-none-any.whl → 0.0.80__py3-none-any.whl - Mend

spacr 0.0.70py3-none-any.whl → 0.0.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

spacr/__init__.py +4 -1
spacr/__main__.py +0 -7
spacr/annotate_app.py +75 -61
spacr/core.py +39 -246
spacr/foldseek.py +6 -6
spacr/get_alfafold_structures.py +3 -3
spacr/io.py +53 -116
spacr/measure.py +46 -59
spacr/plot.py +117 -81
spacr/sequencing.py +508 -491
spacr/sim.py +24 -29
spacr/utils.py +487 -260
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/METADATA +10 -8
spacr-0.0.80.dist-info/RECORD +36 -0
spacr/graph_learning_lap.py +0 -84
spacr/train.py +0 -667
spacr/umap.py +0 -0
spacr-0.0.70.dist-info/RECORD +0 -39
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/LICENSE +0 -0
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/WHEEL +0 -0
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/entry_points.txt +0 -0
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/top_level.txt +0 -0

spacr/__init__.py CHANGED Viewed

@@ -8,16 +8,18 @@ from . import utils
 from . import plot
 from . import measure
 from . import sim
+from . import sequencing
 from . import timelapse
 from . import deep_spacr
-from . import mask_app
 from . import annotate_app
 from . import gui_utils
+from . import mask_app
 from . import gui_mask_app
 from . import gui_measure_app
 from . import gui_classify_app
 from . import logger
 __all__ = [
     "core",
     "io",
@@ -25,6 +27,7 @@ __all__ = [
     "plot",
     "measure",
     "sim",
+    "sequencing"
     "timelapse",
     "deep_spacr",
     "annotate_app",

spacr/__main__.py CHANGED Viewed

@@ -2,12 +2,5 @@
 Copyright © 2024 Something
 """
-import sys, os, glob, pathlib, time
-import numpy as np
-from natsort import natsorted
-from tqdm import tqdm
-#from spacr import utils, io, version, timelapse, plot, core, mask_app, annotate_app
-import logging
 if __name__ == "__main__":
     main()

spacr/annotate_app.py CHANGED Viewed

@@ -10,13 +10,16 @@ from IPython.display import display, HTML
 import tkinter as tk
 from tkinter import ttk
 from ttkthemes import ThemedTk
+from skimage.exposure import rescale_intensity
+import cv2
+import matplotlib.pyplot as plt
 from .logger import log_function_call
 from .gui_utils import ScrollableFrame, set_default_font, set_dark_style, create_dark_mode, style_text_boxes, create_menu_bar
 class ImageApp:
-    def __init__(self, root, db_path, src, image_type=None, channels=None, grid_rows=None, grid_cols=None, image_size=(200, 200), annotation_column='annotate'):
+    def __init__(self, root, db_path, src, image_type=None, channels=None, grid_rows=None, grid_cols=None, image_size=(200, 200), annotation_column='annotate', normalize=False, percentiles=(1,99)):
         """
         Initializes an instance of the ImageApp class.
@@ -30,6 +33,7 @@ class ImageApp:
         - grid_cols (int): The number of columns in the image grid.
         - image_size (tuple): The size of the displayed images.
         - annotation_column (str): The column name for image annotations in the database.
+        - normalize (bool): Whether to normalize images to their 2nd and 98th percentiles. Defaults to False.
         """
         self.root = root
         self.db_path = db_path
@@ -41,6 +45,8 @@ class ImageApp:
         self.annotation_column = annotation_column
         self.image_type = image_type
         self.channels = channels
+        self.normalize = normalize
+        self.percentiles = percentiles
         self.images = {}
         self.pending_updates = {}
         self.labels = []
@@ -119,49 +125,80 @@ class ImageApp:
             label.bind('<Button-3>', self.get_on_image_click(path, label, img))
         self.root.update()
+    def load_single_image(self, path_annotation_tuple):
+        """
+        Loads a single image from the given path and annotation tuple.
+        Args:
+            path_annotation_tuple (tuple): A tuple containing the image path and its annotation.
+        Returns:
+            img (PIL.Image.Image): The loaded image.
+            annotation: The annotation associated with the image.
+        """
+        path, annotation = path_annotation_tuple
+        img = Image.open(path)
+        img = self.normalize_image(img, self.normalize, self.percentiles)
+        img = img.convert('RGB')
+        img = self.filter_channels(img)
+        img = img.resize(self.image_size)
+        return img, annotation
     @staticmethod
-    def normalize_image(img):
+    def normalize_image(img, normalize=False, percentiles=(1, 99)):
         """
-        Normalize the pixel values of an image to the range [0, 255].
+        Normalize the pixel values of an image based on the 2nd and 98th percentiles or the image min and max values,
+        and ensure the image is exported as 8-bit.
         Parameters:
-        - img: PIL.Image.Image
-            The input image to be normalized.
+        - img: PIL.Image.Image. The input image to be normalized.
+        - normalize: bool. Whether to normalize based on the 2nd and 98th percentiles.
+        - percentiles: tuple. The percentiles to use for normalization.
         Returns:
-        - PIL.Image.Image
-            The normalized image.
+        - PIL.Image.Image. The normalized and 8-bit converted image.
         """
         img_array = np.array(img)
-        img_array = ((img_array - img_array.min()) * (1/(img_array.max() - img_array.min()) * 255)).astype('uint8')
-        return Image.fromarray(img_array)
+        if normalize:
+            if img_array.ndim == 2:  # Grayscale image
+                p2, p98 = np.percentile(img_array, percentiles)
+                img_array = rescale_intensity(img_array, in_range=(p2, p98), out_range=(0, 255))
+            else:  # Color image or multi-channel image
+                for channel in range(img_array.shape[2]):
+                    p2, p98 = np.percentile(img_array[:, :, channel], percentiles)
+                    img_array[:, :, channel] = rescale_intensity(img_array[:, :, channel], in_range=(p2, p98), out_range=(0, 255))
+        img_array = np.clip(img_array, 0, 255).astype('uint8')
+        return Image.fromarray(img_array)
     def add_colored_border(self, img, border_width, border_color):
-            """
-            Adds a colored border to an image.
-            Args:
-                img (PIL.Image.Image): The input image.
-                border_width (int): The width of the border in pixels.
-                border_color (str): The color of the border in RGB format.
-            Returns:
-                PIL.Image.Image: The image with the colored border.
-            """
-            top_border = Image.new('RGB', (img.width, border_width), color=border_color)
-            bottom_border = Image.new('RGB', (img.width, border_width), color=border_color)
-            left_border = Image.new('RGB', (border_width, img.height), color=border_color)
-            right_border = Image.new('RGB', (border_width, img.height), color=border_color)
-            bordered_img = Image.new('RGB', (img.width + 2 * border_width, img.height + 2 * border_width), color='white')
-            bordered_img.paste(top_border, (border_width, 0))
-            bordered_img.paste(bottom_border, (border_width, img.height + border_width))
-            bordered_img.paste(left_border, (0, border_width))
-            bordered_img.paste(right_border, (img.width + border_width, border_width))
-            bordered_img.paste(img, (border_width, border_width))
-            return bordered_img
+        """
+        Adds a colored border to an image.
+        Args:
+            img (PIL.Image.Image): The input image.
+            border_width (int): The width of the border in pixels.
+            border_color (str): The color of the border in RGB format.
+        Returns:
+            PIL.Image.Image: The image with the colored border.
+        """
+        top_border = Image.new('RGB', (img.width, border_width), color=border_color)
+        bottom_border = Image.new('RGB', (img.width, border_width), color=border_color)
+        left_border = Image.new('RGB', (border_width, img.height), color=border_color)
+        right_border = Image.new('RGB', (border_width, img.height), color=border_color)
+        bordered_img = Image.new('RGB', (img.width + 2 * border_width, img.height + 2 * border_width), color='white')
+        bordered_img.paste(top_border, (border_width, 0))
+        bordered_img.paste(bottom_border, (border_width, img.height + border_width))
+        bordered_img.paste(left_border, (0, border_width))
+        bordered_img.paste(right_border, (img.width + border_width, border_width))
+        bordered_img.paste(img, (border_width, border_width))
+        return bordered_img
     def filter_channels(self, img):
         """
@@ -189,26 +226,6 @@ class ImageApp:
         return Image.merge("RGB", (r, g, b))
-    def load_single_image(self, path_annotation_tuple):
-            """
-            Loads a single image from the given path and annotation tuple.
-            Args:
-                path_annotation_tuple (tuple): A tuple containing the image path and its annotation.
-            Returns:
-                img (PIL.Image.Image): The loaded image.
-                annotation: The annotation associated with the image.
-            """
-            path, annotation = path_annotation_tuple
-            img = Image.open(path)
-            if img.mode == "I":
-                img = self.normalize_image(img)
-            img = img.convert('RGB')
-            img = self.filter_channels(img)
-            img = img.resize(self.image_size)
-            return img, annotation
     def get_on_image_click(self, path, label, img):
         """
         Returns a callback function that handles the click event on an image.
@@ -244,7 +261,7 @@ class ImageApp:
             self.root.update()
         return on_image_click
     @staticmethod
     def update_html(text):
         display(HTML(f"""
@@ -349,7 +366,7 @@ class ImageApp:
         self.root.destroy()
         print(f'Quit application')
-def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(200, 200), rows=5, columns=5, annotation_column='annotate'):
+def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(200, 200), rows=5, columns=5, annotation_column='annotate', normalize=False, percentiles=(1,99)):
     """
     Annotates images in a database using a graphical user interface.
@@ -363,11 +380,9 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
         rows (int, optional): The number of rows in the image grid. Defaults to 5.
         columns (int, optional): The number of columns in the image grid. Defaults to 5.
         annotation_column (str, optional): The name of the annotation column in the database table. Defaults to 'annotate'.
+        normalize (bool, optional): Whether to normalize images to their 2nd and 98th percentiles. Defaults to False.
     """
     db = os.path.join(src, 'measurements/measurements.db')
-    #print('src', src)
-    #print('db', db)
     conn = sqlite3.connect(db)
     c = conn.cursor()
     c.execute('PRAGMA table_info(png_list)')
@@ -379,7 +394,7 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
     root = tk.Tk()
     root.geometry(geom)
-    app = ImageApp(root, db, src, image_type=image_type, channels=channels, image_size=img_size, grid_rows=rows, grid_cols=columns, annotation_column=annotation_column)
+    app = ImageApp(root, db, src, image_type=image_type, channels=channels, image_size=img_size, grid_rows=rows, grid_cols=columns, annotation_column=annotation_column, normalize=normalize, percentiles=percentiles)
     next_button = tk.Button(root, text="Next", command=app.next_page)
     next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
     back_button = tk.Button(root, text="Back", command=app.previous_page)
@@ -390,7 +405,6 @@ def annotate(src, image_type=None, channels=None, geom="1000x1100", img_size=(20
     app.load_images()
     root.mainloop()
 def check_for_duplicates(db):
     """
     Check for duplicates in the given SQLite database.

spacr/core.py CHANGED Viewed

@@ -15,12 +15,8 @@ from multiprocessing import Pool, cpu_count, Value, Lock
 import seaborn as sns
 from skimage.measure import regionprops, label
-from skimage.morphology import square
 from skimage.transform import resize as resizescikit
-from collections import defaultdict
-from torch.utils.data import DataLoader, random_split
-from sklearn.cluster import KMeans
-from sklearn.decomposition import PCA
+from torch.utils.data import DataLoader
 from skimage import measure
 from sklearn.model_selection import train_test_split
@@ -30,7 +26,6 @@ from sklearn.inspection import permutation_importance
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
 from sklearn.preprocessing import StandardScaler
-from scipy.ndimage import binary_dilation
 from scipy.spatial.distance import cosine, euclidean, mahalanobis, cityblock, minkowski, chebyshev, hamming, jaccard, braycurtis
 import torchvision.transforms as transforms
@@ -40,7 +35,6 @@ import shap
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use('Agg')
-#import matplotlib.pyplot as plt
 from .logger import log_function_call
@@ -1637,216 +1631,14 @@ def analyze_recruitment(src, metadata_settings, advanced_settings):
     cells,wells = _results_to_csv(src, df, df_well)
     return [cells,wells]
-def _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold=5, perimeter_threshold=30):
-    """
-    Merge cells in cell_mask if a parasite in parasite_mask overlaps with more than one cell,
-    and if cells share more than a specified perimeter percentage.
-    Args:
-        parasite_mask (ndarray): Mask of parasites.
-        cell_mask (ndarray): Mask of cells.
-        nuclei_mask (ndarray): Mask of nuclei.
-        overlap_threshold (float): The percentage threshold for merging cells based on parasite overlap.
-        perimeter_threshold (float): The percentage threshold for merging cells based on shared perimeter.
-    Returns:
-        ndarray: The modified cell mask (cell_mask) with unique labels.
-    """
-    labeled_cells = label(cell_mask)
-    labeled_parasites = label(parasite_mask)
-    labeled_nuclei = label(nuclei_mask)
-    num_parasites = np.max(labeled_parasites)
-    num_cells = np.max(labeled_cells)
-    num_nuclei = np.max(labeled_nuclei)
-    # Merge cells based on parasite overlap
-    for parasite_id in range(1, num_parasites + 1):
-        current_parasite_mask = labeled_parasites == parasite_id
-        overlapping_cell_labels = np.unique(labeled_cells[current_parasite_mask])
-        overlapping_cell_labels = overlapping_cell_labels[overlapping_cell_labels != 0]
-        if len(overlapping_cell_labels) > 1:
-            # Calculate the overlap percentages
-            overlap_percentages = [
-                np.sum(current_parasite_mask & (labeled_cells == cell_label)) / np.sum(current_parasite_mask) * 100
-                for cell_label in overlapping_cell_labels
-            ]
-            # Merge cells if overlap percentage is above the threshold
-            for cell_label, overlap_percentage in zip(overlapping_cell_labels, overlap_percentages):
-                if overlap_percentage > overlap_threshold:
-                    first_label = overlapping_cell_labels[0]
-                    for other_label in overlapping_cell_labels[1:]:
-                        if other_label != first_label:
-                            cell_mask[cell_mask == other_label] = first_label
-    # Merge cells based on nucleus overlap
-    for nucleus_id in range(1, num_nuclei + 1):
-        current_nucleus_mask = labeled_nuclei == nucleus_id
-        overlapping_cell_labels = np.unique(labeled_cells[current_nucleus_mask])
-        overlapping_cell_labels = overlapping_cell_labels[overlapping_cell_labels != 0]
-        if len(overlapping_cell_labels) > 1:
-            # Calculate the overlap percentages
-            overlap_percentages = [
-                np.sum(current_nucleus_mask & (labeled_cells == cell_label)) / np.sum(current_nucleus_mask) * 100
-                for cell_label in overlapping_cell_labels
-            ]
-            # Merge cells if overlap percentage is above the threshold for each cell
-            if all(overlap_percentage > overlap_threshold for overlap_percentage in overlap_percentages):
-                first_label = overlapping_cell_labels[0]
-                for other_label in overlapping_cell_labels[1:]:
-                    if other_label != first_label:
-                        cell_mask[cell_mask == other_label] = first_label
-    # Check for cells without nuclei and merge based on shared perimeter
-    labeled_cells = label(cell_mask)  # Re-label after merging based on overlap
-    cell_regions = regionprops(labeled_cells)
-    for region in cell_regions:
-        cell_label = region.label
-        cell_mask_binary = labeled_cells == cell_label
-        overlapping_nuclei = np.unique(nuclei_mask[cell_mask_binary])
-        overlapping_nuclei = overlapping_nuclei[overlapping_nuclei != 0]
-        if len(overlapping_nuclei) == 0:
-            # Cell does not overlap with any nucleus
-            perimeter = region.perimeter
-            # Dilate the cell to find neighbors
-            dilated_cell = binary_dilation(cell_mask_binary, structure=square(3))
-            neighbor_cells = np.unique(labeled_cells[dilated_cell])
-            neighbor_cells = neighbor_cells[(neighbor_cells != 0) & (neighbor_cells != cell_label)]
-            # Calculate shared border length with neighboring cells
-            shared_borders = [
-                np.sum((labeled_cells == neighbor_label) & dilated_cell) for neighbor_label in neighbor_cells
-            ]
-            shared_border_percentages = [shared_border / perimeter * 100 for shared_border in shared_borders]
-            # Merge with the neighbor cell with the largest shared border percentage above the threshold
-            if shared_borders:
-                max_shared_border_index = np.argmax(shared_border_percentages)
-                max_shared_border_percentage = shared_border_percentages[max_shared_border_index]
-                if max_shared_border_percentage > perimeter_threshold:
-                    cell_mask[labeled_cells == cell_label] = neighbor_cells[max_shared_border_index]
-    # Relabel the merged cell mask
-    relabeled_cell_mask, _ = label(cell_mask, return_num=True)
-    return relabeled_cell_mask
-def adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_threshold=5, perimeter_threshold=30):
-    """
-    Process all npy files in the given folders. Merge and relabel cells in cell masks
-    based on parasite overlap and cell perimeter sharing conditions.
-    Args:
-        parasite_folder (str): Path to the folder containing parasite masks.
-        cell_folder (str): Path to the folder containing cell masks.
-        nuclei_folder (str): Path to the folder containing nuclei masks.
-        overlap_threshold (float): The percentage threshold for merging cells based on parasite overlap.
-        perimeter_threshold (float): The percentage threshold for merging cells based on shared perimeter.
-    """
-    parasite_files = sorted([f for f in os.listdir(parasite_folder) if f.endswith('.npy')])
-    cell_files = sorted([f for f in os.listdir(cell_folder) if f.endswith('.npy')])
-    nuclei_files = sorted([f for f in os.listdir(nuclei_folder) if f.endswith('.npy')])
-    # Ensure there are matching files in all folders
-    if not (len(parasite_files) == len(cell_files) == len(nuclei_files)):
-        raise ValueError("The number of files in the folders do not match.")
-    # Match files by name
-    for file_name in parasite_files:
-        parasite_path = os.path.join(parasite_folder, file_name)
-        cell_path = os.path.join(cell_folder, file_name)
-        nuclei_path = os.path.join(nuclei_folder, file_name)
-        # Check if the corresponding cell and nuclei mask files exist
-        if not (os.path.exists(cell_path) and os.path.exists(nuclei_path)):
-            raise ValueError(f"Corresponding cell or nuclei mask file for {file_name} not found.")
-        # Load the masks
-        parasite_mask = np.load(parasite_path)
-        cell_mask = np.load(cell_path)
-        nuclei_mask = np.load(nuclei_path)
-        # Merge and relabel cells
-        merged_cell_mask = _merge_cells_based_on_parasite_overlap(parasite_mask, cell_mask, nuclei_mask, overlap_threshold, perimeter_threshold)
-        # Overwrite the original cell mask file with the merged result
-        np.save(cell_path, merged_cell_mask)
-def process_masks(mask_folder, image_folder, channel, batch_size=50, n_clusters=2, plot=False):
-    def read_files_in_batches(folder, batch_size=50):
-        files = [f for f in os.listdir(folder) if f.endswith('.npy')]
-        files.sort()  # Sort to ensure matching order
-        for i in range(0, len(files), batch_size):
-            yield files[i:i + batch_size]
-    def measure_morphology_and_intensity(mask, image):
-        properties = measure.regionprops(mask, intensity_image=image)
-        properties_list = [{'area': p.area, 'mean_intensity': p.mean_intensity, 'perimeter': p.perimeter, 'eccentricity': p.eccentricity} for p in properties]
-        return properties_list
-    def cluster_objects(properties, n_clusters=2):
-        data = np.array([[p['area'], p['mean_intensity'], p['perimeter'], p['eccentricity']] for p in properties])
-        kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(data)
-        return kmeans
-    def remove_objects_not_in_largest_cluster(mask, labels, largest_cluster_label):
-        cleaned_mask = np.zeros_like(mask)
-        for region in measure.regionprops(mask):
-            if labels[region.label - 1] == largest_cluster_label:
-                cleaned_mask[mask == region.label] = region.label
-        return cleaned_mask
-    def plot_clusters(properties, labels):
-        data = np.array([[p['area'], p['mean_intensity'], p['perimeter'], p['eccentricity']] for p in properties])
-        pca = PCA(n_components=2)
-        data_2d = pca.fit_transform(data)
-        plt.scatter(data_2d[:, 0], data_2d[:, 1], c=labels, cmap='viridis')
-        plt.xlabel('PCA Component 1')
-        plt.ylabel('PCA Component 2')
-        plt.title('Object Clustering')
-        plt.show()
-    all_properties = []
-    # Step 1: Accumulate properties over all files
-    for batch in read_files_in_batches(mask_folder, batch_size):
-        mask_files = [os.path.join(mask_folder, file) for file in batch]
-        image_files = [os.path.join(image_folder, file) for file in batch]
-        masks = [np.load(file) for file in mask_files]
-        images = [np.load(file)[:, :, channel] for file in image_files]
-        for i, mask in enumerate(masks):
-            image = images[i]
-            # Measure morphology and intensity
-            properties = measure_morphology_and_intensity(mask, image)
-            all_properties.extend(properties)
-    # Step 2: Perform clustering on accumulated properties
-    kmeans = cluster_objects(all_properties, n_clusters)
-    labels = kmeans.labels_
-    if plot:
-        # Step 3: Plot clusters using PCA
-        plot_clusters(all_properties, labels)
-    # Step 4: Remove objects not in the largest cluster and overwrite files in batches
-    label_index = 0
-    for batch in read_files_in_batches(mask_folder, batch_size):
-        mask_files = [os.path.join(mask_folder, file) for file in batch]
-        masks = [np.load(file) for file in mask_files]
-        for i, mask in enumerate(masks):
-            batch_properties = measure_morphology_and_intensity(mask, mask)
-            batch_labels = labels[label_index:label_index + len(batch_properties)]
-            largest_cluster_label = np.bincount(batch_labels).argmax()
-            cleaned_mask = remove_objects_not_in_largest_cluster(mask, batch_labels, largest_cluster_label)
-            np.save(mask_files[i], cleaned_mask)
-            label_index += len(batch_properties)
 def preprocess_generate_masks(src, settings={}):
     from .io import preprocess_img_data, _load_and_concatenate_arrays
     from .plot import plot_merged, plot_arrays
     from .utils import _pivot_counts_table, set_default_settings_preprocess_generate_masks, set_default_plot_merge_settings, check_mask_folder
+    from .utils import adjust_cell_masks, _merge_cells_based_on_parasite_overlap, process_masks
     settings = set_default_settings_preprocess_generate_masks(src, settings)
     settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
     settings_csv = os.path.join(src,'settings','preprocess_generate_masks_settings.csv')
     os.makedirs(os.path.join(src,'settings'), exist_ok=True)
@@ -1907,7 +1699,8 @@ def preprocess_generate_masks(src, settings={}):
                 adjust_cell_masks(parasite_folder, cell_folder, nuclei_folder, overlap_threshold=5, perimeter_threshold=30)
                 stop = time.time()
-                print(f'Cell mask adjustment: {stop-start} seconds')
+                adjust_time = (stop-start)/60
+                print(f'Cell mask adjustment: {adjust_time} min.')
         if os.path.exists(os.path.join(src,'measurements')):
             _pivot_counts_table(db_path=os.path.join(src,'measurements', 'measurements.db'))
@@ -2583,6 +2376,7 @@ def generate_cellpose_masks(src, settings, object_type):
         if settings['save']:
             for mask_index, mask in enumerate(mask_stack):
                 output_filename = os.path.join(output_folder, batch_filenames[mask_index])
+                mask = mask.astype(np.uint16)
                 np.save(output_filename, mask)
             mask_stack = []
             batch_filenames = []
@@ -3118,37 +2912,36 @@ def generate_image_umap(settings={}):
     Parameters:
     settings (dict): Dictionary containing the following keys:
-        src (str): Source directory containing the data.
-        row_limit (int): Limit the number of rows to process.
-        tables (list): List of table names to read from the database.
-        visualize (str): Visualization type.
-        image_nr (int): Number of images to display.
-        dot_size (int): Size of dots in the scatter plot.
-        n_neighbors (int): Number of neighbors for UMAP.
-        figuresize (int): Size of the figure.
-        black_background (bool): Whether to use a black background.
-        remove_image_canvas (bool): Whether to remove the image canvas.
-        plot_outlines (bool): Whether to plot outlines.
-        plot_points (bool): Whether to plot points.
-        smooth_lines (bool): Whether to smooth lines.
-        verbose (bool): Whether to print verbose output.
-        embedding_by_controls (bool): Whether to use embedding from controls.
-        col_to_compare (str): Column to compare for control-based embedding.
-        pos (str): Positive control value.
-        neg (str): Negative control value.
-        clustering (str): Clustering method ('DBSCAN' or 'KMeans').
-        exclude (list): List of columns to exclude from the analysis.
-        plot_images (bool): Whether to plot images.
-        reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
-        save_figure (bool): Whether to save the figure as a PDF.
+    src (str): Source directory containing the data.
+    row_limit (int): Limit the number of rows to process.
+    tables (list): List of table names to read from the database.
+    visualize (str): Visualization type.
+    image_nr (int): Number of images to display.
+    dot_size (int): Size of dots in the scatter plot.
+    n_neighbors (int): Number of neighbors for UMAP.
+    figuresize (int): Size of the figure.
+    black_background (bool): Whether to use a black background.
+    remove_image_canvas (bool): Whether to remove the image canvas.
+    plot_outlines (bool): Whether to plot outlines.
+    plot_points (bool): Whether to plot points.
+    smooth_lines (bool): Whether to smooth lines.
+    verbose (bool): Whether to print verbose output.
+    embedding_by_controls (bool): Whether to use embedding from controls.
+    col_to_compare (str): Column to compare for control-based embedding.
+    pos (str): Positive control value.
+    neg (str): Negative control value.
+    clustering (str): Clustering method ('DBSCAN' or 'KMeans').
+    exclude (list): List of columns to exclude from the analysis.
+    plot_images (bool): Whether to plot images.
+    reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
+    save_figure (bool): Whether to save the figure as a PDF.
     Returns:
     pd.DataFrame: DataFrame with the original data and an additional column 'cluster' containing the cluster identity.
     """
     from .io import _read_and_join_tables
-    from .utils import get_db_paths, preprocess_data, reduction_and_clustering, remove_noise, generate_colors, correct_paths, plot_embedding, plot_clusters_grid, get_umap_image_settings
-    from .alpha import cluster_feature_analysis, generate_umap_from_images
+    from .utils import get_db_paths, preprocess_data, reduction_and_clustering, remove_noise, generate_colors, correct_paths, plot_embedding, plot_clusters_grid, get_umap_image_settings, cluster_feature_analysis, generate_umap_from_images
     settings = get_umap_image_settings(settings)
@@ -3311,15 +3104,15 @@ def reducer_hyperparameter_search(settings={}, reduction_params=None, dbscan_par
     Parameters:
     settings (dict): Dictionary containing the following keys:
-        src (str): Source directory containing the data.
-        row_limit (int): Limit the number of rows to process.
-        tables (list): List of table names to read from the database.
-        filter_by (str): Column to filter the data.
-        sample_size (int): Number of samples to use for the hyperparameter search.
-        remove_highly_correlated (bool): Whether to remove highly correlated columns.
-        log_data (bool): Whether to log transform the data.
-        verbose (bool): Whether to print verbose output.
-        reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
+    src (str): Source directory containing the data.
+    row_limit (int): Limit the number of rows to process.
+    tables (list): List of table names to read from the database.
+    filter_by (str): Column to filter the data.
+    sample_size (int): Number of samples to use for the hyperparameter search.
+    remove_highly_correlated (bool): Whether to remove highly correlated columns.
+    log_data (bool): Whether to log transform the data.
+    verbose (bool): Whether to print verbose output.
+    reduction_method (str): Dimensionality reduction method ('UMAP' or 'tSNE').
     reduction_params (list): List of dictionaries containing hyperparameters to test for the reduction method.
     dbscan_params (list): List of dictionaries containing DBSCAN hyperparameters to test.
     kmeans_params (list): List of dictionaries containing KMeans hyperparameters to test.

spacr/foldseek.py CHANGED Viewed

@@ -762,18 +762,18 @@ def analyze_results(foldseek_csv_path, base_dir):
     #display(functional_data_df)
 # Set up directories
-structure_fldr_path = "/home/carruthers/Downloads/ME49_proteome/cif"
-base_dir='/home/carruthers/foldseek/me49'
+#structure_fldr_path = "/home/carruthers/Downloads/ME49_proteome/cif"
+#base_dir='/home/carruthers/foldseek/me49'
-align_to_database(structure_fldr_path, base_dir, cores=25)
+#align_to_database(structure_fldr_path, base_dir, cores=25)
 #foldseek_csv_path = f'{base_dir}/results/pdb/aln_tmscore.csv'
 #analyze_results(foldseek_csv_path, base_dir)
 # Set up directories
-structure_fldr_path = "/home/carruthers/Downloads/GT1_proteome/cif"
-base_dir='/home/carruthers/foldseek/gt1'
+#structure_fldr_path = "/home/carruthers/Downloads/GT1_proteome/cif"
+#base_dir='/home/carruthers/foldseek/gt1'
-align_to_database(structure_fldr_path, base_dir, cores=25)
+#align_to_database(structure_fldr_path, base_dir, cores=25)
 #foldseek_csv_path = f'{base_dir}/results/pdb/aln_tmscore.csv'
 #analyze_results(foldseek_csv_path, base_dir)

spacr/get_alfafold_structures.py CHANGED Viewed

@@ -67,6 +67,6 @@ def download_alphafold_structures(tsv_location, dst, version="4"):
         print(f"Failed download entries saved to: {os.path.join(dst, 'failed_downloads.csv')}")
 # Example usage:
-tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv'  # Replace with the path to your TSV file containing a list of UniProt entries
-dst_folder = '/home/carruthers/Downloads/GT1_proteome'  # Replace with your destination folder
-download_alphafold_structures(tsv_location, dst_folder)
+#tsv_location = '/home/carruthers/Downloads/GT1_proteome/GT1_proteins_uniprot.tsv'  # Replace with the path to your TSV file containing a list of UniProt entries
+#dst_folder = '/home/carruthers/Downloads/GT1_proteome'  # Replace with your destination folder
+#download_alphafold_structures(tsv_location, dst_folder)

spacr 0.0.70__py3-none-any.whl → 0.0.80__py3-none-any.whl

spacr 0.0.70py3-none-any.whl → 0.0.80py3-none-any.whl