PyPI - spacr - Versions diffs - 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl - Mend

spacr 0.2.5py3-none-any.whl → 0.2.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

spacr/__init__.py +1 -11
spacr/core.py +226 -287
spacr/deep_spacr.py +248 -269
spacr/gui.py +41 -19
spacr/gui_core.py +404 -151
spacr/gui_elements.py +778 -179
spacr/gui_utils.py +163 -106
spacr/io.py +116 -45
spacr/measure.py +1 -0
spacr/plot.py +51 -5
spacr/sequencing.py +477 -587
spacr/settings.py +211 -66
spacr/utils.py +34 -14
{spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/METADATA +46 -39
{spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/RECORD +19 -19
{spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/WHEEL +1 -1
{spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/LICENSE +0 -0
{spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/entry_points.txt +0 -0
{spacr-0.2.5.dist-info → spacr-0.2.8.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -1,9 +1,9 @@
-import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob
+import os, re, sqlite3, gc, torch, time, random, shutil, cv2, tarfile, cellpose, glob, queue
 import numpy as np
 import pandas as pd
 import tifffile
-from PIL import Image
-from collections import defaultdict, Counter
+from PIL import Image, ImageOps
+from collections import defaultdict, Counter, deque
 from pathlib import Path
 from functools import partial
 from matplotlib.animation import FuncAnimation
@@ -17,12 +17,12 @@ import imageio.v2 as imageio2
 import matplotlib.pyplot as plt
 from io import BytesIO
 from IPython.display import display, clear_output
-from multiprocessing import Pool, cpu_count
-from torch.utils.data import Dataset
+from multiprocessing import Pool, cpu_count, Process, Queue
+from torch.utils.data import Dataset, DataLoader
 import matplotlib.pyplot as plt
 from torchvision.transforms import ToTensor
 import seaborn as sns
+import atexit
 from .logger import log_function_call
@@ -444,20 +444,7 @@ class NoClassDataset(Dataset):
         # Return both the image and its filename
         return img, self.filenames[index]
-class MyDataset(Dataset):
-    """
-    A custom dataset class for loading and processing image data.
-    Args:
-        data_dir (str): The directory path where the image data is stored.
-        loader_classes (list): A list of class names for the dataset.
-        transform (callable, optional): A function/transform to apply to the image data. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        pin_memory (bool, optional): Whether to pin the loaded images to memory. Default is False.
-        specific_files (list, optional): A list of specific file paths to include in the dataset. Default is None.
-        specific_labels (list, optional): A list of specific labels corresponding to the specific files. Default is None.
-    """
+class spacrDataset(Dataset):
     def __init__(self, data_dir, loader_classes, transform=None, shuffle=True, pin_memory=False, specific_files=None, specific_labels=None):
         self.data_dir = data_dir
         self.classes = loader_classes
@@ -466,7 +453,7 @@ class MyDataset(Dataset):
         self.pin_memory = pin_memory
         self.filenames = []
         self.labels = []
         if specific_files and specific_labels:
             self.filenames = specific_files
             self.labels = specific_labels
@@ -479,33 +466,113 @@ class MyDataset(Dataset):
         if self.shuffle:
             self.shuffle_dataset()
         if self.pin_memory:
-            self.images = [self.load_image(f) for f in self.filenames]
+            # Use multiprocessing to load images in parallel
+            with Pool(processes=cpu_count()) as pool:
+                self.images = pool.map(self.load_image, self.filenames)
+        else:
+            self.images = None
     def load_image(self, img_path):
         img = Image.open(img_path).convert('RGB')
+        img = ImageOps.exif_transpose(img)  # Handle image orientation
         return img
     def __len__(self):
         return len(self.filenames)
     def shuffle_dataset(self):
         combined = list(zip(self.filenames, self.labels))
         random.shuffle(combined)
         self.filenames, self.labels = zip(*combined)
     def get_plate(self, filepath):
-        filename = os.path.basename(filepath)  # Get just the filename from the full path
+        filename = os.path.basename(filepath)
         return filename.split('_')[0]
     def __getitem__(self, index):
+        if self.pin_memory:
+            img = self.images[index]
+        else:
+            img = self.load_image(self.filenames[index])
         label = self.labels[index]
         filename = self.filenames[index]
-        img = self.load_image(filename)
         if self.transform:
             img = self.transform(img)
         return img, label, filename
+class spacrDataLoader(DataLoader):
+    def __init__(self, *args, preload_batches=1, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.preload_batches = preload_batches
+        self.batch_queue = Queue(maxsize=preload_batches)
+        self.process = None
+        self.current_batch_index = 0
+        self._stop_event = False
+        self.pin_memory = kwargs.get('pin_memory', False)
+        atexit.register(self.cleanup)
+    def _preload_next_batches(self):
+        try:
+            for _ in range(self.preload_batches):
+                if self._stop_event:
+                    break
+                batch = next(self._iterator)
+                if self.pin_memory:
+                    batch = self._pin_memory_batch(batch)
+                self.batch_queue.put(batch)
+        except StopIteration:
+            pass
+    def _start_preloading(self):
+        if self.process is None or not self.process.is_alive():
+            self._iterator = iter(super().__iter__())
+            if not self.pin_memory:
+                self.process = Process(target=self._preload_next_batches)
+                self.process.start()
+            else:
+                self._preload_next_batches()  # Directly load if pin_memory is True
+    def _pin_memory_batch(self, batch):
+        if isinstance(batch, (list, tuple)):
+            return [b.pin_memory() if isinstance(b, torch.Tensor) else b for b in batch]
+        elif isinstance(batch, torch.Tensor):
+            return batch.pin_memory()
+        else:
+            return batch
+    def __iter__(self):
+        self._start_preloading()
+        return self
+    def __next__(self):
+        if self.process and not self.process.is_alive() and self.batch_queue.empty():
+            raise StopIteration
+        try:
+            if self.pin_memory:
+                next_batch = self.batch_queue.get(timeout=60)
+            else:
+                next_batch = self.batch_queue.get(timeout=60)
+            self.current_batch_index += 1
+            # Start preloading the next batches
+            if self.batch_queue.qsize() < self.preload_batches:
+                self._start_preloading()
+            return next_batch
+        except queue.Empty:
+            raise StopIteration
+    def cleanup(self):
+        self._stop_event = True
+        if self.process and self.process.is_alive():
+            self.process.terminate()
+            self.process.join()
+    def __del__(self):
+        self.cleanup()
 class NoClassDataset(Dataset):
     def __init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
@@ -2292,18 +2359,27 @@ def _save_model(model, model_type, results_df, dst, epoch, epochs, intermedeate_
     def save_model_at_threshold(threshold, epoch, suffix=""):
         percentile = str(threshold * 100)
-        print(f'\rfound: {percentile}% accurate model')#, end='\r', flush=True)
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}{suffix}_acc_{percentile}_channels_{channels_str}.pth')
+        print(f'Found: {percentile}% accurate model')
+        model_path = f'{dst}/{model_type}_epoch_{str(epoch)}{suffix}_acc_{percentile}_channels_{channels_str}.pth'
+        torch.save(model, model_path)
+        return model_path
     if epoch % 100 == 0 or epoch == epochs:
-        torch.save(model, f'{dst}/{model_type}_epoch_{str(epoch)}_channels_{channels_str}.pth')
+        model_path = f'{dst}/{model_type}_epoch_{str(epoch)}_channels_{channels_str}.pth'
+        torch.save(model, model_path)
+        return model_path
     for threshold in intermedeate_save:
-        if results_df['neg_accuracy'].dropna().mean() >= threshold and results_df['pos_accuracy'].dropna().mean() >= threshold:
-            save_model_at_threshold(threshold, epoch)
-            break  # Ensure we only save for the highest matching threshold
+        if results_df['neg_accuracy'] >= threshold and results_df['pos_accuracy'] >= threshold:
+            print(f"Nc class accuracy: {results_df['neg_accuracy']} Pc class Accuracy: {results_df['pos_accuracy']}")
+            model_path = save_model_at_threshold(threshold, epoch)
+            break
+        else:
+            model_path = None
+    return model_path
-def _save_progress(dst, results_df, train_metrics_df, epoch, epochs):
+def _save_progress(dst, results_df, result_type='train'):
     """
     Save the progress of the classification model.
@@ -2317,18 +2393,13 @@ def _save_progress(dst, results_df, train_metrics_df, epoch, epochs):
     """
     # Save accuracy, loss, PRAUC
     os.makedirs(dst, exist_ok=True)
-    results_path = os.path.join(dst, 'acc_loss_prauc.csv')
+    results_path = os.path.join(dst, f'{result_type}.csv')
     if not os.path.exists(results_path):
         results_df.to_csv(results_path, index=True, header=True, mode='w')
     else:
         results_df.to_csv(results_path, index=True, header=False, mode='a')
-    training_metrics_path = os.path.join(dst, 'training_metrics.csv')
-    if not os.path.exists(training_metrics_path):
-        train_metrics_df.to_csv(training_metrics_path, index=True, header=True, mode='w')
-    else:
-        train_metrics_df.to_csv(training_metrics_path, index=True, header=False, mode='a')
-    if epoch == epochs:
+    if result_type == 'train':
         read_plot_model_stats(results_path, save=True)
     return

spacr/measure.py CHANGED Viewed

@@ -1060,6 +1060,7 @@ def measure_crop(settings):
     files = [f for f in os.listdir(settings['src']) if f.endswith('.npy')]
     n_jobs = settings['n_jobs']
     print(f'using {n_jobs} cpu cores')
+    print_progress(files_processed=0, files_to_process=len(files), n_jobs=n_jobs, time_ls=[], operation_type='Measure and Crop')
     def job_callback(result):
         completed_jobs.add(result[0])

spacr/plot.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os,re, random, cv2, glob, time, math
+import os,re, random, cv2, glob, time, math, torch
 import numpy as np
 import pandas as pd
@@ -125,7 +125,7 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
     return
-def plot_masks(batch, masks, flows, cmap='inferno', figuresize=20, nr=1, file_type='.npz', print_object_number=True):
+def plot_masks(batch, masks, flows, cmap='inferno', figuresize=10, nr=1, file_type='.npz', print_object_number=True):
     """
     Plot the masks and flows for a given batch of images.
@@ -476,7 +476,7 @@ def _filter_objects_in_plot(stack, cell_mask_dim, nucleus_mask_dim, pathogen_mas
     return stack
-def plot_arrays(src, figuresize=50, cmap='inferno', nr=1, normalize=True, q1=1, q2=99):
+def plot_arrays(src, figuresize=10, cmap='inferno', nr=1, normalize=True, q1=1, q2=99):
     """
     Plot randomly selected arrays from a given directory.
@@ -870,7 +870,7 @@ def _save_scimg_plot(src, nr_imgs=16, channel_indices=[0,1,2], um_per_pixel=0.1,
     return
-def _plot_cropped_arrays(stack, filename, figuresize=20, cmap='inferno', threshold=500):
+def _plot_cropped_arrays(stack, filename, figuresize=10, cmap='inferno', threshold=500):
     """
     Plot cropped arrays.
@@ -997,7 +997,7 @@ def _display_gif(path):
     with open(path, 'rb') as file:
         display(ipyimage(file.read()))
-def _plot_recruitment(df, df_type, channel_of_interest, target, columns=[], figuresize=50):
+def _plot_recruitment(df, df_type, channel_of_interest, target, columns=[], figuresize=10):
     """
     Plot recruitment data for different conditions and pathogens.
@@ -1186,6 +1186,52 @@ def _imshow(img, labels, nrow=20, color='white', fontsize=12):
         y = row * img_height + 15
         plt.text(x, y, label, color=color, fontsize=fontsize, fontweight='bold')
     return fig
+def _imshow_gpu(img, labels, nrow=20, color='white', fontsize=12):
+    """
+    Display multiple images in a grid with corresponding labels.
+    Args:
+        img (torch.Tensor): A batch of images as a tensor.
+        labels (list): List of labels corresponding to each image.
+        nrow (int, optional): Number of images per row in the grid. Defaults to 20.
+        color (str, optional): Color of the label text. Defaults to 'white'.
+        fontsize (int, optional): Font size of the label text. Defaults to 12.
+    """
+    if img.is_cuda:
+        img = img.cpu()  # Move to CPU if the tensor is on GPU
+    n_images = len(labels)
+    n_col = nrow
+    n_row = int(np.ceil(n_images / n_col))
+    img_height = img.shape[2]  # Height of the image
+    img_width = img.shape[3]   # Width of the image
+    # Prepare the canvas on CPU
+    canvas = torch.zeros((img_height * n_row, img_width * n_col, 3))
+    for i in range(n_row):
+        for j in range(n_col):
+            idx = i * n_col + j
+            if idx < n_images:
+                # Place the image on the canvas
+                canvas[i * img_height:(i + 1) * img_height, j * img_width:(j + 1) * img_width] = img[idx].permute(1, 2, 0)
+    canvas = canvas.numpy()  # Convert to NumPy for plotting
+    fig = plt.figure(figsize=(50, 50))
+    plt.imshow(canvas)
+    plt.axis("off")
+    for i, label in enumerate(labels):
+        row = i // n_col
+        col = i % n_col
+        x = col * img_width + 2
+        y = row * img_height + 15
+        plt.text(x, y, label, color=color, fontsize=fontsize, fontweight='bold')
+    return fig
 def _plot_histograms_and_stats(df):
     conditions = df['condition'].unique()

spacr 0.2.5__py3-none-any.whl → 0.2.8__py3-none-any.whl

spacr 0.2.5py3-none-any.whl → 0.2.8py3-none-any.whl