PyPI - spacr - Versions diffs - 0.0.70__py3-none-any.whl → 0.0.80__py3-none-any.whl - Mend

spacr 0.0.70py3-none-any.whl → 0.0.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

spacr/__init__.py +4 -1
spacr/__main__.py +0 -7
spacr/annotate_app.py +75 -61
spacr/core.py +39 -246
spacr/foldseek.py +6 -6
spacr/get_alfafold_structures.py +3 -3
spacr/io.py +53 -116
spacr/measure.py +46 -59
spacr/plot.py +117 -81
spacr/sequencing.py +508 -491
spacr/sim.py +24 -29
spacr/utils.py +487 -260
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/METADATA +10 -8
spacr-0.0.80.dist-info/RECORD +36 -0
spacr/graph_learning_lap.py +0 -84
spacr/train.py +0 -667
spacr/umap.py +0 -0
spacr-0.0.70.dist-info/RECORD +0 -39
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/LICENSE +0 -0
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/WHEEL +0 -0
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/entry_points.txt +0 -0
{spacr-0.0.70.dist-info → spacr-0.0.80.dist-info}/top_level.txt +0 -0

spacr/io.py CHANGED Viewed

@@ -255,31 +255,24 @@ class CombinedDataset(Dataset):
 class NoClassDataset(Dataset):
     """
-    A custom dataset class for handling images without class labels.
+    A custom dataset class for handling image data without class labels.
     Args:
-        data_dir (str): The directory path where the images are stored.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
+        data_dir (str): The directory path where the image files are located.
+        transform (callable, optional): A function/transform to apply to the image data. Default is None.
         shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
         load_to_memory (bool, optional): Whether to load all images into memory. Default is False.
     Attributes:
-        data_dir (str): The directory path where the images are stored.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
+        data_dir (str): The directory path where the image files are located.
+        transform (callable): A function/transform to apply to the image data.
         shuffle (bool): Whether to shuffle the dataset.
         load_to_memory (bool): Whether to load all images into memory.
-        filenames (list): List of file paths for the images.
-        images (list): List of loaded images (if load_to_memory is True).
-    Methods:
-        load_image: Loads an image from the given file path.
-        __len__: Returns the number of images in the dataset.
-        shuffle_dataset: Shuffles the dataset.
-        __getitem__: Retrieves an image and its corresponding file path from the dataset.
+        filenames (list): A list of file paths for the image files.
+        images (list): A list of loaded images (if load_to_memory is True).
     """
-    def _init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
+    def __init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
         self.data_dir = data_dir
         self.transform = transform
         self.shuffle = shuffle
@@ -289,16 +282,47 @@ class NoClassDataset(Dataset):
             self.shuffle_dataset()
         if self.load_to_memory:
             self.images = [self.load_image(f) for f in self.filenames]
     #@lru_cache(maxsize=None)
     def load_image(self, img_path):
+        """
+        Load an image from the given file path.
+        Args:
+            img_path (str): The file path of the image.
+        Returns:
+            PIL.Image: The loaded image.
+        """
         img = Image.open(img_path).convert('RGB')
         return img
-    def _len__(self):
+    def __len__(self):
+        """
+        Get the total number of images in the dataset.
+        Returns:
+            int: The number of images in the dataset.
+        """
         return len(self.filenames)
     def shuffle_dataset(self):
+        """
+        Shuffle the dataset.
+        """
         if self.shuffle:
             random.shuffle(self.filenames)
-    def _getitem__(self, index):
+    def __getitem__(self, index):
+        """
+        Get the image and its corresponding filename at the given index.
+        Args:
+            index (int): The index of the image in the dataset.
+        Returns:
+            tuple: A tuple containing the image and its filename.
+        """
         if self.load_to_memory:
             img = self.images[index]
         else:
@@ -374,32 +398,7 @@ class MyDataset(Dataset):
         return img, label, filename
 class NoClassDataset(Dataset):
-    """
-    A custom dataset class for handling images without class labels.
-    Args:
-        data_dir (str): The directory path where the images are stored.
-        transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. Default is None.
-        shuffle (bool, optional): Whether to shuffle the dataset. Default is True.
-        load_to_memory (bool, optional): Whether to load all images into memory. Default is False.
-    Attributes:
-        data_dir (str): The directory path where the images are stored.
-        transform (callable): A function/transform that takes in an PIL image and returns a transformed version.
-        shuffle (bool): Whether to shuffle the dataset.
-        load_to_memory (bool): Whether to load all images into memory.
-        filenames (list): List of file paths of the images.
-        images (list): List of loaded images (if load_to_memory is True).
-    Methods:
-        load_image: Load an image from the given file path.
-        __len__: Get the length of the dataset.
-        shuffle_dataset: Shuffle the dataset.
-        __getitem__: Get an item (image and its filename) from the dataset.
-    """
-    def _init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
+    def __init__(self, data_dir, transform=None, shuffle=True, load_to_memory=False):
         self.data_dir = data_dir
         self.transform = transform
         self.shuffle = shuffle
@@ -409,16 +408,20 @@ class NoClassDataset(Dataset):
             self.shuffle_dataset()
         if self.load_to_memory:
             self.images = [self.load_image(f) for f in self.filenames]
-    #@lru_cache(maxsize=None)
     def load_image(self, img_path):
         img = Image.open(img_path).convert('RGB')
         return img
-    def _len__(self):
+    def __len__(self):
         return len(self.filenames)
     def shuffle_dataset(self):
         if self.shuffle:
             random.shuffle(self.filenames)
-    def _getitem__(self, index):
+    def __getitem__(self, index):
         if self.load_to_memory:
             img = self.images[index]
         else:
@@ -427,8 +430,8 @@ class NoClassDataset(Dataset):
             img = self.transform(img)
         else:
             img = ToTensor()(img)
-        # Return both the image and its filename
         return img, self.filenames[index]
 class TarImageDataset(Dataset):
     def _init__(self, tar_path, transform=None):
@@ -1038,72 +1041,6 @@ def _normalize_img_batch(stack, backgrounds, remove_backgrounds, lower_percentil
     return normalized_stack.astype(save_dtype)
-def _normalize_img_batch_v1(stack, backgrounds, remove_backgrounds, lower_percentile, save_dtype, signal_to_noise, signal_thresholds):
-    """
-    Normalize the stack of images.
-    Args:
-        stack (numpy.ndarray): The stack of images to normalize.
-        backgrounds (list): Background values for each channel.
-        remove_backgrounds (list): Whether to remove background values for each channel.
-        lower_percentile (int): Lower percentile value for normalization.
-        save_dtype (numpy.dtype): Data type for saving the normalized stack.
-        signal_to_noise (list): Signal-to-noise ratio thresholds for each channel.
-        signal_thresholds (list): Signal thresholds for each channel.
-    Returns:
-        numpy.ndarray: The normalized stack.
-    """
-    normalized_stack = np.zeros_like(stack, dtype=np.float32)
-    time_ls = []
-    for chan_index, channel in enumerate(range(stack.shape[-1])):
-        single_channel = stack[:, :, :, channel]
-        background = backgrounds[chan_index]
-        signal_threshold = signal_thresholds[chan_index]
-        remove_background = remove_backgrounds[chan_index]
-        signal_2_noise = signal_to_noise[chan_index]
-        print(f'chan_index:{chan_index} background:{background} signal_threshold:{signal_threshold} remove_background:{remove_background} signal_2_noise:{signal_2_noise}')
-        if remove_background:
-            single_channel[single_channel < background] = 0
-        non_zero_single_channel = single_channel[single_channel != 0]
-        global_lower = np.percentile(non_zero_single_channel, lower_percentile)
-        for upper_p in np.linspace(98, 99.5, num=20).tolist():
-            global_upper = np.percentile(non_zero_single_channel, upper_p)
-            if global_upper >= signal_threshold:
-                break
-        arr_2d_normalized = np.zeros_like(single_channel, dtype=single_channel.dtype)
-        signal_to_noise_ratio_ls = []
-        for array_index in range(single_channel.shape[0]):
-            start = time.time()
-            arr_2d = single_channel[array_index, :, :]
-            non_zero_arr_2d = arr_2d[arr_2d != 0]
-            if non_zero_arr_2d.size > 0:
-                lower, upper = np.percentile(non_zero_arr_2d, (lower_percentile, upper_p))
-                signal_to_noise_ratio = upper / lower
-            else:
-                signal_to_noise_ratio = 0
-            signal_to_noise_ratio_ls.append(signal_to_noise_ratio)
-            average_stnr = np.mean(signal_to_noise_ratio_ls) if len(signal_to_noise_ratio_ls) > 0 else 0
-            if signal_to_noise_ratio > signal_2_noise:
-                arr_2d_rescaled = exposure.rescale_intensity(arr_2d, in_range=(lower, upper), out_range=(0, 1))
-                arr_2d_normalized[array_index, :, :] = arr_2d_rescaled
-            else:
-                arr_2d_normalized[array_index, :, :] = arr_2d
-            stop = time.time()
-            duration = (stop - start) * single_channel.shape[0]
-            time_ls.append(duration)
-            average_time = np.mean(time_ls) if len(time_ls) > 0 else 0
-            print(f'Progress: channels:{chan_index}/{stack.shape[-1] - 1}, arrays:{array_index + 1}/{single_channel.shape[0]}, Signal:{upper:.1f}, noise:{lower:.1f}, Signal-to-noise:{average_stnr:.1f}, Time/channel:{average_time:.2f}sec')
-        normalized_stack[:, :, :, channel] = arr_2d_normalized
-    return normalized_stack.astype(save_dtype)
 def _get_lists_for_normalization(settings):
     """
     Get lists for normalization based on the provided settings.

spacr/measure.py CHANGED Viewed

@@ -626,7 +626,13 @@ def _measure_crop_core(index, time_ls, file, settings):
             _create_database(source_folder+'/measurements/measurements.db')
         if settings['plot_filtration']:
-           _plot_cropped_arrays(data)
+            if len(data.shape) == 3:
+                figuresize = data.shape[2]*10
+            else:
+                figuresize = 10
+            print('')
+            _plot_cropped_arrays(data, file, figuresize)
         channel_arrays = data[:, :, settings['channels']].astype(data_type)
         if settings['cell_mask_dim'] is not None:
@@ -652,7 +658,6 @@ def _measure_crop_core(index, time_ls, file, settings):
                     data[:, :, settings['nucleus_mask_dim']] = nucleus_mask
                     save_folder = settings['input_folder']
                     np.save(os.path.join(save_folder, file), data)
         else:
             nucleus_mask = np.zeros_like(data[:, :, 0])
@@ -703,7 +708,8 @@ def _measure_crop_core(index, time_ls, file, settings):
             data = np.concatenate((data, cytoplasm_mask[:, :, np.newaxis]), axis=2)
         if settings['plot_filtration']:
-            _plot_cropped_arrays(data)
+            _plot_cropped_arrays(data, file, figuresize)
+            #_plot_cropped_arrays(data)
         if settings['save_measurements']:
@@ -792,23 +798,25 @@ def _measure_crop_core(index, time_ls, file, settings):
                         if settings['save_png']:
                             fldr_type = f"{crop_mode}_png/"
                             png_folder = os.path.join(fldr,fldr_type)
                             img_path = os.path.join(png_folder, img_name)
+                            img_paths.append(img_path)
                             png_channels = data[:, :, settings['png_dims']].astype(data_type)
                             if settings['normalize_by'] == 'fov':
-                                percentiles_list = _get_percentiles(png_channels, settings['normalize'][0],q2=settings['normalize'][1])
+                                if not settings['normalize'] is False:
+                                    percentile_list = _get_percentiles(png_channels, settings['normalize'][0], settings['normalize'][1])
                             png_channels = _crop_center(png_channels, region, new_width=width, new_height=height)
                             if isinstance(settings['normalize'], list):
                                 if settings['normalize_by'] == 'png':
-                                    png_channels = normalize_to_dtype(png_channels, q1=settings['normalize'][0],q2=settings['normalize'][1])
+                                    png_channels = normalize_to_dtype(png_channels, settings['normalize'][0], settings['normalize'][1])
                                 if settings['normalize_by'] == 'fov':
-                                    png_channels = normalize_to_dtype(png_channels, q1=settings['normalize'][0],q2=settings['normalize'][1], percentiles=percentiles_list)
+                                    png_channels = normalize_to_dtype(png_channels, settings['normalize'][0], settings['normalize'][1], percentile_list=percentile_list)
+                            else:
+                                png_channels = normalize_to_dtype(png_channels, 0, 100)
                             os.makedirs(png_folder, exist_ok=True)
                             if png_channels.shape[2] == 2:
@@ -818,8 +826,6 @@ def _measure_crop_core(index, time_ls, file, settings):
                             else:
                                 cv2.imwrite(img_path, png_channels)
-                            img_paths.append(img_path)
                             if len(img_paths) == len(objects_in_image):
                                 png_df = pd.DataFrame(img_paths, columns=['png_path'])
@@ -858,7 +864,11 @@ def _measure_crop_core(index, time_ls, file, settings):
                                     traceback.print_exc()
                             if settings['plot']:
-                                _plot_cropped_arrays(png_channels)
+                                if len(png_channels.shape) == 3:
+                                    figuresize = png_channels.shape[2]*10
+                                else:
+                                    figuresize = 10
+                                _plot_cropped_arrays(png_channels, img_name, figuresize, threshold=1)
                         if settings['save_arrays']:
                             row_idx, col_idx = np.where(region)
@@ -867,12 +877,20 @@ def _measure_crop_core(index, time_ls, file, settings):
                             os.makedirs(array_folder, exist_ok=True)
                             np.save(os.path.join(array_folder, img_name), region_array)
                             if settings['plot']:
-                                _plot_cropped_arrays(region_array)
+                                if len(png_channels.shape) == 3:
+                                    figuresize = png_channels.shape[2]*10
+                                else:
+                                    figuresize = 10
+                                _plot_cropped_arrays(png_channels, img_name, figuresize, threshold=1)
                         if not settings['save_arrays'] and not settings['save_png'] and settings['plot']:
                             row_idx, col_idx = np.where(region)
                             region_array = data[row_idx.min():row_idx.max()+1, col_idx.min():col_idx.max()+1, :]
-                            _plot_cropped_arrays(region_array)
+                            if len(png_channels.shape) == 3:
+                                figuresize = png_channels.shape[2]*10
+                            else:
+                                figuresize = 10
+                            _plot_cropped_arrays(png_channels, file, figuresize, threshold=1)
         cells = np.unique(cell_mask)
     except Exception as e:
@@ -899,47 +917,17 @@ def measure_crop(settings):
         None
     """
-    if settings.get('test_mode', False):
-        if not os.basename(settings['src']) == 'test':
-            src = os.path.join(src, 'test')
-            settings['src'] = src
-            print(f'Changed source folder to {src} for test mode')
-        else:
-            print(f'Test mode enabled, using source folder {settings["src"]}')
     from .io import _save_settings_to_db
     from .timelapse import _timelapse_masks_to_gif, _scmovie
     from .plot import _save_scimg_plot
-    from .utils import _list_endpoint_subdirectories, _generate_representative_images
-    #general settings
-    settings['merge_edge_pathogen_cells'] = True
-    settings['radial_dist'] = True
-    settings['calculate_correlation'] = True
-    settings['manders_thresholds'] = [15,85,95]
-    settings['homogeneity'] = True
-    settings['homogeneity_distances'] = [8,16,32]
-    settings['save_arrays'] = False
-    settings['dialate_pngs'] = False
-    settings['dialate_png_ratios'] = [0.2]
-    settings['timelapse'] = False
-    settings['representative_images'] = False
-    settings['timelapse_objects'] = 'cell'
-    settings['max_workers'] = os.cpu_count()-2
-    settings['experiment'] = 'test'
-    settings['cells'] = 'HeLa'
-    settings['cell_loc'] = None
-    settings['pathogens'] = ['ME49Dku80WT', 'ME49Dku80dgra8:GRA8', 'ME49Dku80dgra8', 'ME49Dku80TKO']
-    settings['pathogen_loc'] = [['c1', 'c2', 'c3', 'c4', 'c5', 'c6'], ['c7', 'c8', 'c9', 'c10', 'c11', 'c12'], ['c13', 'c14', 'c15', 'c16', 'c17', 'c18'], ['c19', 'c20', 'c21', 'c22', 'c23', 'c24']]
-    settings['treatments'] = ['BR1', 'BR2', 'BR3']
-    settings['treatment_loc'] = [['c1', 'c2', 'c7', 'c8', 'c13', 'c14', 'c19', 'c20'], ['c3', 'c4', 'c9', 'c10', 'c15', 'c16', 'c21', 'c22'], ['c5', 'c6', 'c11', 'c12', 'c17', 'c18', 'c23', 'c24']]
-    settings['channel_of_interest'] = 2
-    settings['compartments'] = ['pathogen', 'cytoplasm']
-    settings['measurement'] = 'mean_intensity'
-    settings['nr_imgs'] = 32
-    settings['um_per_pixel'] = 0.1
-    settings['center_crop'] = True
+    from .utils import _list_endpoint_subdirectories, _generate_representative_images, get_measure_crop_settings, measure_test_mode
+    settings = get_measure_crop_settings(settings)
+    settings = measure_test_mode(settings)
+    if not os.path.exists(settings['input_folder']):
+        print(f"Error: {settings['input_folder']} does not exist")
+        return
     if settings['cell_mask_dim'] is None:
         settings['include_uninfected'] = True
@@ -951,8 +939,6 @@ def measure_crop(settings):
         settings['cytoplasm'] = True
     else:
         settings['cytoplasm'] = False
-    #settings = {**settings, **annotation_settings, **advanced_settings}
     dirname = os.path.dirname(settings['input_folder'])
     settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
@@ -970,10 +956,11 @@ def measure_crop(settings):
     if isinstance(settings['normalize'], bool) and settings['normalize']:
         print(f'WARNING: to notmalize single object pngs set normalize to a list of 2 integers, e.g. [1,99] (lower and upper percentiles)')
         return
-    if settings['normalize_by'] not in ['png', 'fov']:
-        print("Warning: normalize_by should be either 'png' to notmalize each png to its own percentiles or 'fov' to normalize each png to the fov percentiles ")
-        return
+    if isinstance(settings['normalize'], list) or isinstance(settings['normalize'], bool) and settings['normalize']:
+        if settings['normalize_by'] not in ['png', 'fov']:
+            print("Warning: normalize_by should be either 'png' to notmalize each png to its own percentiles or 'fov' to normalize each png to the fov percentiles ")
+            return
     if not all(isinstance(settings[key], int) or settings[key] is None for key in int_setting_keys):
         print(f"WARNING: {int_setting_keys} must all be integers")

spacr 0.0.70__py3-none-any.whl → 0.0.80__py3-none-any.whl

spacr 0.0.70py3-none-any.whl → 0.0.80py3-none-any.whl