PyPI - spacr - Versions diffs - 0.3.52__py3-none-any.whl → 0.3.55__py3-none-any.whl - Mend

spacr 0.3.52py3-none-any.whl → 0.3.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

spacr/gui_elements.py +1 -1
spacr/gui_utils.py +0 -111
spacr/io.py +114 -140
spacr/measure.py +10 -11
spacr/ml.py +41 -32
spacr/plot.py +24 -293
spacr/sequencing.py +13 -9
spacr/settings.py +15 -9
spacr/submodules.py +19 -19
spacr/timelapse.py +16 -16
spacr/toxo.py +15 -15
spacr/utils.py +72 -164
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/METADATA +1 -1
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/RECORD +18 -18
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/LICENSE +0 -0
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/WHEEL +0 -0
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/entry_points.txt +0 -0
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/top_level.txt +0 -0

spacr/gui_elements.py CHANGED Viewed

@@ -706,7 +706,7 @@ class spacrProgressBar(ttk.Progressbar):
     def set_label_position(self):
         if self.label and self.progress_label:
-            row_info = self.grid_info().get('row', 0)
+            row_info = self.grid_info().get('row_name', 0)
             col_info = self.grid_info().get('column', 0)
             col_span = self.grid_info().get('columnspan', 1)
             self.progress_label.grid(row=row_info + 1, column=col_info, columnspan=col_span, pady=5, padx=5, sticky='ew')

spacr/gui_utils.py CHANGED Viewed

@@ -106,32 +106,6 @@ def parse_list(value):
     except (ValueError, SyntaxError) as e:
         raise ValueError(f"Invalid format for list: {value}. Error: {e}")
-def parse_list_v1(value):
-    """
-    Parses a string representation of a list and returns the parsed list.
-    Args:
-        value (str): The string representation of the list.
-    Returns:
-        list: The parsed list, which can contain integers, floats, or strings.
-    Raises:
-        ValueError: If the input value is not a valid list format or contains mixed types or unsupported types.
-    """
-    try:
-        parsed_value = ast.literal_eval(value)
-        if isinstance(parsed_value, list):
-            # Check if all elements are homogeneous (either all int, float, or str)
-            if all(isinstance(item, (int, float, str)) for item in parsed_value):
-                return parsed_value
-            else:
-                raise ValueError("List contains mixed types or unsupported types")
-        else:
-            raise ValueError(f"Expected a list but got {type(parsed_value).__name__}")
-    except (ValueError, SyntaxError) as e:
-        raise ValueError(f"Invalid format for list: {value}. Error: {e}")
 # Usage example in your create_input_field function
 def create_input_field(frame, label_text, row, var_type='entry', options=None, default_value=None):
     """
@@ -696,91 +670,6 @@ def ensure_after_tasks(frame):
     if not hasattr(frame, 'after_tasks'):
         frame.after_tasks = []
-def display_gif_in_plot_frame_v1(gif_path, parent_frame):
-    """Display and zoom a GIF to fill the entire parent_frame, maintaining aspect ratio, with lazy resizing and caching."""
-    # Clear parent_frame if it contains any previous widgets
-    for widget in parent_frame.winfo_children():
-        widget.destroy()
-    # Load the GIF
-    gif = Image.open(gif_path)
-    # Get the aspect ratio of the GIF
-    gif_width, gif_height = gif.size
-    gif_aspect_ratio = gif_width / gif_height
-    # Create a label to display the GIF and configure it to fill the parent_frame
-    label = tk.Label(parent_frame, bg="black")
-    label.grid(row=0, column=0, sticky="nsew")  # Expands in all directions (north, south, east, west)
-    # Configure parent_frame to stretch the label to fill available space
-    parent_frame.grid_rowconfigure(0, weight=1)
-    parent_frame.grid_columnconfigure(0, weight=1)
-    # Cache for storing resized frames (lazily filled)
-    resized_frames_cache = {}
-    # Last frame dimensions
-    last_frame_width = 0
-    last_frame_height = 0
-    def resize_and_crop_frame(frame_idx, frame_width, frame_height):
-        """Resize and crop the current frame of the GIF to fit the parent_frame while maintaining the aspect ratio."""
-        # If the frame is already cached at the current size, return it
-        if (frame_idx, frame_width, frame_height) in resized_frames_cache:
-            return resized_frames_cache[(frame_idx, frame_width, frame_height)]
-        # Calculate the scaling factor to zoom in on the GIF
-        scale_factor = max(frame_width / gif_width, frame_height / gif_height)
-        # Calculate new dimensions while maintaining the aspect ratio
-        new_width = int(gif_width * scale_factor)
-        new_height = int(gif_height * scale_factor)
-        # Resize the GIF to fit the frame
-        gif.seek(frame_idx)
-        resized_gif = gif.copy().resize((new_width, new_height), Image.Resampling.LANCZOS)
-        # Calculate the cropping box to center the resized GIF in the frame
-        crop_left = (new_width - frame_width) // 2
-        crop_top = (new_height - frame_height) // 2
-        crop_right = crop_left + frame_width
-        crop_bottom = crop_top + frame_height
-        # Crop the resized GIF to exactly fit the frame
-        cropped_gif = resized_gif.crop((crop_left, crop_top, crop_right, crop_bottom))
-        # Convert the cropped frame to a Tkinter-compatible format
-        frame_image = ImageTk.PhotoImage(cropped_gif)
-        # Cache the resized frame
-        resized_frames_cache[(frame_idx, frame_width, frame_height)] = frame_image
-        return frame_image
-    def update_frame(frame_idx):
-        """Update the GIF frame using lazy resizing and caching."""
-        # Get the current size of the parent_frame
-        frame_width = parent_frame.winfo_width()
-        frame_height = parent_frame.winfo_height()
-        # Only resize if the frame size has changed
-        nonlocal last_frame_width, last_frame_height
-        if frame_width != last_frame_width or frame_height != last_frame_height:
-            last_frame_width, last_frame_height = frame_width, frame_height
-        # Get the resized and cropped frame image
-        frame_image = resize_and_crop_frame(frame_idx, frame_width, frame_height)
-        label.config(image=frame_image)
-        label.image = frame_image  # Keep a reference to avoid garbage collection
-        # Move to the next frame, or loop back to the beginning
-        next_frame_idx = (frame_idx + 1) % gif.n_frames
-        parent_frame.after(gif.info['duration'], update_frame, next_frame_idx)
-    # Start the GIF animation from frame 0
-    update_frame(0)
 def display_gif_in_plot_frame(gif_path, parent_frame):
     """Display and zoom a GIF to fill the entire parent_frame, maintaining aspect ratio, with lazy resizing and caching."""
     # Clear parent_frame if it contains any previous widgets

spacr/io.py CHANGED Viewed

@@ -292,121 +292,6 @@ def _load_normalized_images_and_labels(image_files, label_files, channels=None,
     return normalized_images, labels, image_names, label_names, orig_dims
-def _load_normalized_images_and_labels_v1(image_files, label_files, channels=None, percentiles=None, invert=False, visualize=False, remove_background=False, background=0, Signal_to_noise=10, target_height=None, target_width=None):
-    from .plot import normalize_and_visualize, plot_resize
-    from .utils import invert_image, apply_mask
-    from skimage.transform import resize as resizescikit
-    if isinstance(percentiles, list):
-        if len(percentiles) !=2:
-            percentiles = None
-        if not percentiles[0] is int:
-            percentiles = None
-        if not percentiles[1] is int:
-            percentiles = None
-    signal_thresholds = background * Signal_to_noise
-    lower_percentile = 2
-    images = []
-    labels = []
-    orig_dims = []
-    num_channels = 4
-    percentiles_1 = [[] for _ in range(num_channels)]
-    percentiles_99 = [[] for _ in range(num_channels)]
-    image_names = [os.path.basename(f) for f in image_files]
-    image_dir = os.path.dirname(image_files[0])
-    if label_files is not None:
-        label_names = [os.path.basename(f) for f in label_files]
-        label_dir = os.path.dirname(label_files[0])
-    # Load, normalize, and resize images
-    for i, img_file in enumerate(image_files):
-        image = cellpose.io.imread(img_file)
-        orig_dims.append((image.shape[0], image.shape[1]))
-        if invert:
-            image = invert_image(image)
-        # If specific channels are specified, select them
-        if channels is not None and image.ndim == 3:
-            image = image[..., channels]
-        if remove_background:
-            image[image < background] = 0
-        if image.ndim < 3:
-            image = np.expand_dims(image, axis=-1)
-        if percentiles is None:
-            for c in range(image.shape[-1]):
-                p1 = np.percentile(image[..., c], lower_percentile)
-                percentiles_1[c].append(p1)
-                for percentile in [98, 99, 99.9, 99.99, 99.999]:
-                    p = np.percentile(image[..., c], percentile)
-                    if p > signal_thresholds:
-                        percentiles_99[c].append(p)
-                        break
-        # Resize image
-        if target_height is not None and target_width is not None:
-            if image.ndim == 2:
-                image_shape = (target_height, target_width)
-            elif image.ndim == 3:
-                image_shape = (target_height, target_width, image.shape[-1])
-            image = resizescikit(image, image_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
-        images.append(image)
-    if percentiles is None:
-        # Calculate average percentiles for normalization
-        avg_p1 = [np.mean(p) for p in percentiles_1]
-        avg_p99 = [np.mean(p) if len(p) > 0 else np.mean(percentiles_1[i]) for i, p in enumerate(percentiles_99)]
-        print(f'Average 1st percentiles: {avg_p1}, Average 99th percentiles: {avg_p99}')
-        normalized_images = []
-        for image in images:
-            normalized_image = np.zeros_like(image, dtype=np.float32)
-            for c in range(image.shape[-1]):
-                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(avg_p1[c], avg_p99[c]), out_range=(0, 1))
-            normalized_images.append(normalized_image)
-            if visualize:
-                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
-    else:
-        normalized_images = []
-        for image in images:
-            normalized_image = np.zeros_like(image, dtype=np.float32)
-            for c in range(image.shape[-1]):
-                low_p = np.percentile(image[..., c], percentiles[0])
-                high_p = np.percentile(image[..., c], percentiles[1])
-                normalized_image[..., c] = rescale_intensity(image[..., c], in_range=(low_p, high_p), out_range=(0, 1))
-            normalized_images.append(normalized_image)
-            if visualize:
-                normalize_and_visualize(image, normalized_image, title=f"Channel {c+1} Normalized")
-    if label_files is not None:
-        for lbl_file in label_files:
-            label = cellpose.io.imread(lbl_file)
-            # Resize label
-            if target_height is not None and target_width is not None:
-                label = resizescikit(label, (target_height, target_width), order=0, preserve_range=True, anti_aliasing=False).astype(label.dtype)
-            labels.append(label)
-    else:
-        label_names = []
-        label_dir = None
-    print(f'Loaded and normalized {len(normalized_images)} images and {len(labels)} labels from {image_dir} and {label_dir}')
-    if visualize and images and labels:
-        plot_resize(images, normalized_images, labels, labels)
-    return normalized_images, labels, image_names, label_names, orig_dims
 class CombineLoaders:
     """
@@ -1875,6 +1760,9 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
     Returns:
         pandas.DataFrame: The joined DataFrame containing the data from the specified tables, or None if an error occurs.
     """
+    from .utils import rename_columns_in_db
+    rename_columns_in_db(db_path)
     conn = sqlite3.connect(db_path)
     dataframes = {}
     for table_name in table_names:
@@ -1885,11 +1773,11 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
             print(e)
     conn.close()
     if 'png_list' in dataframes:
-        png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plate', 'row', 'col']].copy()
+        png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plate', 'row_name', 'column_name']].copy()
         png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
         png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
         if 'cell' in dataframes:
-            join_cols = ['object_label', 'plate', 'row', 'col']
+            join_cols = ['object_label', 'plate', 'row_name', 'column_name']
             dataframes['cell'] = pd.merge(dataframes['cell'], png_list_df, on=join_cols, how='left')
         else:
             print("Cell table not found in database tables.")
@@ -2190,6 +2078,8 @@ def _read_db(db_loc, tables):
     Returns:
     - dfs (list): A list of pandas DataFrames, each containing the data from a table.
     """
+    from .utils import rename_columns_in_db
+    rename_columns_in_db(db_loc)
     conn = sqlite3.connect(db_loc)
     dfs = []
     for table in tables:
@@ -2310,7 +2200,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
         merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
     #Add prc column (plate row column)
-    metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row'] + '_' +x['col'])
+    metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name'])
     #Count cells per well
     cells_well = pd.DataFrame(metadata.groupby('prc')['object_label'].nunique())
@@ -2322,7 +2212,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
     metadata.drop(columns=object_label_cols, inplace=True)
     #Add prcfo column (plate row column field object)
-    metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row'] + '_' +x['col']+ '_' +x['field']+ '_' +x['object_label'])
+    metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name']+ '_' +x['field']+ '_' +x['object_label'])
     metadata.set_index('prcfo', inplace=True)
     merged_df = metadata.merge(merged_df, left_index=True, right_index=True)
@@ -2517,6 +2407,10 @@ def _copy_missclassified(df):
     return
 def _read_db(db_loc, tables):
+    from .utils import rename_columns_in_db
+    rename_columns_in_db(db_loc)
     conn = sqlite3.connect(db_loc) # Create a connection to the database
     dfs = []
     for table in tables:
@@ -2667,7 +2561,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
             merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
     #Add prc column (plate row column)
-    metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row'] + '_' +x['col'])
+    metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name'])
     #Count cells per well
     cells_well = pd.DataFrame(metadata.groupby('prc')['object_label'].nunique())
@@ -2679,7 +2573,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
     metadata.drop(columns=object_label_cols, inplace=True)
     #Add prcfo column (plate row column field object)
-    metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row'] + '_' +x['col']+ '_' +x['field']+ '_' +x['object_label'])
+    metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name']+ '_' +x['field']+ '_' +x['object_label'])
     metadata.set_index('prcfo', inplace=True)
     merged_df = metadata.merge(merged_df, left_index=True, right_index=True)
@@ -3030,8 +2924,7 @@ def generate_loaders(src, mode='train', image_size=224, batch_size=32, classes=[
 def generate_training_dataset(settings):
     # Function to filter png_list_df by prcfo present in df without merging
-    def filter_png_list(db_path, settings):
-        tables = ['cell', 'nucleus', 'pathogen', 'cytoplasm']
+    def filter_png_list(db_path, settings, tables = ['cell', 'nucleus', 'pathogen', 'cytoplasm']):
         df, _ = _read_and_merge_data(locs=[db_path],
                                      tables=tables,
                                      verbose=False,
@@ -3053,9 +2946,8 @@ def generate_training_dataset(settings):
         return size
     # Measurement-based selection logic
-    def measurement_based_selection(settings, db_path):
+    def measurement_based_selection(settings, db_path, tables = ['cell', 'nucleus', 'pathogen', 'cytoplasm']):
         class_paths_ls = []
-        tables = ['cell', 'nucleus', 'pathogen', 'cytoplasm']
         df, _ = _read_and_merge_data(locs=[db_path],
                                      tables=tables,
                                      verbose=False,
@@ -3068,7 +2960,7 @@ def generate_training_dataset(settings):
                                  treatment_loc=settings['class_metadata'])#, types=settings['metadata_type_by'])
         print('length df 2', len(df))
-        png_list_df = filter_png_list(db_path, settings)
+        png_list_df = filter_png_list(db_path, settings, tables=settings['tables'])
         if settings['custom_measurement']:
             if isinstance(settings['custom_measurement'], list):
@@ -3101,8 +2993,8 @@ def generate_training_dataset(settings):
     # Metadata-based selection logic
     def metadata_based_selection(db_path, settings):
         class_paths_ls = []
-        df = filter_png_list(db_path, settings)
+        df = filter_png_list(db_path, settings, tables=settings['tables'])
         df['metadata_based_class'] = pd.NA
         for i, class_ in enumerate(settings['classes']):
             ls = settings['class_metadata'][i]
@@ -3126,10 +3018,10 @@ def generate_training_dataset(settings):
     def annotation_based_selection(db_path, dst, settings):
         class_paths_ls = training_dataset_from_annotation(db_path, dst, settings['annotation_column'], annotated_classes=settings['annotated_classes'])
-        size = get_smallest_class_size(class_paths_ls, settings, 'annotation')
-        for i, class_paths in enumerate(class_paths_ls):
-            if len(class_paths) > size:
-                class_paths_ls[i] = random.sample(class_paths, size)
+        #size = get_smallest_class_size(class_paths_ls, settings, 'annotation')
+        #for i, class_paths in enumerate(class_paths_ls):
+        #    if len(class_paths) > size:
+        #        class_paths_ls[i] = random.sample(class_paths, size)
         return class_paths_ls
@@ -3137,6 +3029,13 @@ def generate_training_dataset(settings):
     from .utils import get_paths_from_db, annotate_conditions, save_settings
     from .settings import set_generate_training_dataset_defaults
+    if 'nucleus' not in settings['tables']:
+        settings['nuclei_limit'] = False
+    if 'pathogen' not in settings['tables']:
+        settings['pathogen_limit'] = 0
+        settings['uninfected'] = True
     # Set default settings and save
     settings = set_generate_training_dataset_defaults(settings)
     save_settings(settings, 'cv_dataset', show=True)
@@ -3145,6 +3044,7 @@ def generate_training_dataset(settings):
     if isinstance(settings['src'], str):
         src = [settings['src']]
+        settings['src'] = src
     for i, src in enumerate(settings['src']):
         db_path = os.path.join(src, 'measurements', 'measurements.db')
@@ -3170,7 +3070,7 @@ def generate_training_dataset(settings):
             class_paths_ls = metadata_based_selection(db_path, settings)
         elif settings['dataset_mode'] == 'measurement':
-            class_paths_ls = measurement_based_selection(settings, db_path)
+            class_paths_ls = measurement_based_selection(settings, db_path, tables=settings['tables'])
         if class_path_list is None:
             class_path_list = [[] for _ in range(len(class_paths_ls))]
@@ -3180,22 +3080,72 @@ def generate_training_dataset(settings):
             class_path_list[idx].extend(class_paths_ls[idx])
     # Generate and return training and testing directories
+    print('class_path_list',len(class_path_list))
     train_class_dir, test_class_dir = generate_dataset_from_lists(dst, class_data=class_path_list, classes=settings['classes'], test_split=settings['test_split'])
     return train_class_dir, test_class_dir
 def training_dataset_from_annotation(db_path, dst, annotation_column='test', annotated_classes=(1, 2)):
     all_paths = []
     # Connect to the database and retrieve the image paths and annotations
     print(f'Reading DataBase: {db_path}')
     with sqlite3.connect(db_path) as conn:
         cursor = conn.cursor()
-        # Prepare the query with parameterized placeholders for annotated_classes
-        placeholders = ','.join('?' * len(annotated_classes))
-        query = f"SELECT png_path, {annotation_column} FROM png_list WHERE {annotation_column} IN ({placeholders})"
-        cursor.execute(query, annotated_classes)
+        # Retrieve all paths and annotations from the database
+        query = f"SELECT png_path, {annotation_column} FROM png_list"
+        cursor.execute(query)
+        while True:
+            rows = cursor.fetchmany(1000)
+            if not rows:
+                break
+            for row in rows:
+                all_paths.append(row)
+    print('Total paths retrieved:', len(all_paths))
+    # Filter paths based on annotated_classes
+    class_paths = []
+    for class_ in annotated_classes:
+        class_paths_temp = [path for path, annotation in all_paths if annotation == class_]
+        class_paths.append(class_paths_temp)
+        print(f'Found {len(class_paths_temp)} images in class {class_}')
+    # If only one class is provided, create an alternative list by sampling paths from all_paths that are not in the annotated class
+    if len(annotated_classes) == 1:
+        target_class = annotated_classes[0]
+        count_target_class = len(class_paths[0])
+        print(f'Annotated class: {target_class} with {count_target_class} images')
+        # Filter all_paths to exclude paths that belong to the target class
+        alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
+        print('Alternative paths available:', len(alt_class_paths))
+        # Randomly sample an equal number of images for the second class
+        sampled_alt_class_paths = random.sample(alt_class_paths, min(count_target_class, len(alt_class_paths)))
+        print(f'Sampled {len(sampled_alt_class_paths)} alternative images for balancing')
+        # Append this list as the second class
+        class_paths.append(sampled_alt_class_paths)
+    print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
+    for i, ls in enumerate(class_paths):
+        print(f'Class {i}: {len(ls)} images')
+    return class_paths
+def training_dataset_from_annotation_v2(db_path, dst, annotation_column='test', annotated_classes=(1, 2)):
+    all_paths = []
+    # Connect to the database and retrieve the image paths and annotations
+    print(f'Reading DataBase: {db_path}')
+    with sqlite3.connect(db_path) as conn:
+        cursor = conn.cursor()
+        # Retrieve all paths and annotations from the database
+        query = f"SELECT png_path, {annotation_column} FROM png_list"
+        cursor.execute(query)
         while True:
             rows = cursor.fetchmany(1000)
             if not rows:
@@ -3203,13 +3153,36 @@ def training_dataset_from_annotation(db_path, dst, annotation_column='test', ann
             for row in rows:
                 all_paths.append(row)
-    # Filter paths based on annotation
+    print('Total paths retrieved:', len(all_paths))
+    # Filter paths based on annotated_classes
     class_paths = []
     for class_ in annotated_classes:
         class_paths_temp = [path for path, annotation in all_paths if annotation == class_]
         class_paths.append(class_paths_temp)
+        print(f'Found {len(class_paths_temp)} images in class {class_}')
+    # If only one class is provided, create an alternative list by sampling paths from all_paths that are not in the annotated class
+    if len(annotated_classes) == 1:
+        target_class = annotated_classes[0]
+        count_target_class = len(class_paths[0])
+        print(f'Annotated class: {target_class} with {count_target_class} images')
+        # Filter all_paths to exclude paths that belong to the target class
+        alt_class_paths = [path for path, annotation in all_paths if annotation != target_class]
+        print('Alternative paths available:', len(alt_class_paths))
+        # Randomly sample an equal number of images for the second class
+        sampled_alt_class_paths = random.sample(alt_class_paths, min(count_target_class, len(alt_class_paths)))
+        print(f'Sampled {len(sampled_alt_class_paths)} alternative images for balancing')
+        # Append this list as the second class
+        class_paths.append(sampled_alt_class_paths)
     print(f'Generated a list of lists from annotation of {len(class_paths)} classes')
+    for i, ls in enumerate(class_paths):
+        print(f'Class {i}: {len(ls)} images')
     return class_paths
 def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
@@ -3228,8 +3201,9 @@ def generate_dataset_from_lists(dst, class_data, classes, test_split=0.1):
         test_class_dir = os.path.join(dst, f'test/{cls}')
         os.makedirs(train_class_dir, exist_ok=True)
         os.makedirs(test_class_dir, exist_ok=True)
         # Split the data
+        print('data',len(data), test_split)
         train_data, test_data = train_test_split(data, test_size=test_split, shuffle=True, random_state=42)
         # Copy train files

spacr/measure.py CHANGED Viewed

@@ -16,6 +16,7 @@ from skimage.util import img_as_bool
 import matplotlib.pyplot as plt
 from math import ceil, sqrt
 def get_components(cell_mask, nucleus_mask, pathogen_mask):
     """
     Get the components (nucleus and pathogens) for each cell in the given masks.
@@ -761,12 +762,10 @@ def _measure_crop_core(index, time_ls, file, settings):
         if settings['cytoplasm_min_size'] is not None and settings['cytoplasm_min_size'] != 0:
             cytoplasm_mask = _filter_object(cytoplasm_mask, settings['cytoplasm_min_size'])
-        if settings['cell_mask_dim'] is not None:
+        if settings['cell_mask_dim'] is not None and settings['nucleus_mask_dim'] is not None and settings['pathogen_mask_dim'] is not None:
             cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask = _exclude_objects(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, uninfected=settings['uninfected'])
-        # Update data with the new masks
-        if settings['cell_mask_dim'] is not None:
             data[:, :, settings['cell_mask_dim']] = cell_mask.astype(data_type)
         if settings['nucleus_mask_dim'] is not None:
             data[:, :, settings['nucleus_mask_dim']] = nucleus_mask.astype(data_type)
         if settings['pathogen_mask_dim'] is not None:
@@ -779,7 +778,6 @@ def _measure_crop_core(index, time_ls, file, settings):
             figs[f'{file_name}__after_filtration'] = fig
         if settings['save_measurements']:
             cell_df, nucleus_df, pathogen_df, cytoplasm_df = _morphological_measurements(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, settings)
             #if settings['skeleton']:
@@ -789,7 +787,6 @@ def _measure_crop_core(index, time_ls, file, settings):
             cell_intensity_df, nucleus_intensity_df, pathogen_intensity_df, cytoplasm_intensity_df = _intensity_measurements(cell_mask, nucleus_mask, pathogen_mask, cytoplasm_mask, channel_arrays, settings, sizes=[1, 2, 3, 4, 5], periphery=True, outside=True)
             if settings['cell_mask_dim'] is not None:
                 cell_merged_df = _merge_and_save_to_database(cell_df, cell_intensity_df, 'cell', source_folder, file_name, settings['experiment'], settings['timelapse'])
             if settings['nucleus_mask_dim'] is not None:
                 nucleus_merged_df = _merge_and_save_to_database(nucleus_df, nucleus_intensity_df, 'nucleus', source_folder, file_name, settings['experiment'], settings['timelapse'])
@@ -800,7 +797,6 @@ def _measure_crop_core(index, time_ls, file, settings):
                 cytoplasm_merged_df = _merge_and_save_to_database(cytoplasm_df, cytoplasm_intensity_df, 'cytoplasm', source_folder, file_name, settings['experiment'], settings['timelapse'])
         if settings['save_png'] or settings['save_arrays'] or settings['plot']:
             if isinstance(settings['dialate_pngs'], bool):
                 dialate_pngs = [settings['dialate_pngs'], settings['dialate_pngs'], settings['dialate_pngs']]
             if isinstance(settings['dialate_pngs'], list):
@@ -825,13 +821,15 @@ def _measure_crop_core(index, time_ls, file, settings):
                 if len(crop_ls) != len(size_ls):
                     print(f"Setting: size_ls: {settings['png_size']} should be a list of integers, or a list of lists of integers if crop_ls: {settings['crop_mode']} has multiple elements")
                 for crop_idx, crop_mode in enumerate(crop_ls):
                     width, height = size_ls[crop_idx]
                     if crop_mode == 'cell':
                         crop_mask = cell_mask.copy()
                         dialate_png = dialate_pngs[crop_idx]
                         dialate_png_ratio = dialate_png_ratios[crop_idx]
                     elif crop_mode == 'nucleus':
                         crop_mask = nucleus_mask.copy()
                         dialate_png = dialate_pngs[crop_idx]
@@ -852,7 +850,7 @@ def _measure_crop_core(index, time_ls, file, settings):
                     for _id in objects_in_image:
-                        region = (crop_mask == _id)  # This creates a boolean mask for the region of interest
+                        region = (crop_mask == _id)
                         # Use the boolean mask to filter the cell_mask and then find unique IDs
                         region_cell_ids = np.atleast_1d(np.unique(cell_mask[region]))
@@ -947,7 +945,7 @@ def measure_crop(settings):
     from .io import _save_settings_to_db
     from .timelapse import _timelapse_masks_to_gif
-    from .utils import measure_test_mode, print_progress
+    from .utils import measure_test_mode, print_progress, save_settings
     from .settings import get_measure_crop_settings
     if not isinstance(settings['src'], (str, list)):
@@ -1032,9 +1030,10 @@ def measure_crop(settings):
                 settings['crop_mode'] = [settings['crop_mode']]
                 settings['crop_mode'] = [str(crop_mode) for crop_mode in settings['crop_mode']]
                 print(f"Converted crop_mode to list: {settings['crop_mode']}")
-                return
             _save_settings_to_db(settings)
+            #save_settings(settings, name='measure_crop', show=True)
             files = [f for f in os.listdir(settings['src']) if f.endswith('.npy')]
             n_jobs = settings['n_jobs']
             print(f'using {n_jobs} cpu cores')

spacr 0.3.52__py3-none-any.whl → 0.3.55__py3-none-any.whl

spacr 0.3.52py3-none-any.whl → 0.3.55py3-none-any.whl