PyPI - spacr - Versions diffs - 0.3.80__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

spacr 0.3.80py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

spacr/__init__.py +0 -4
spacr/core.py +27 -13
spacr/deep_spacr.py +378 -5
spacr/gui_core.py +82 -20
spacr/gui_elements.py +192 -3
spacr/gui_utils.py +1 -1
spacr/io.py +5 -176
spacr/measure.py +10 -6
spacr/ml.py +369 -46
spacr/plot.py +201 -90
spacr/settings.py +80 -21
spacr/submodules.py +282 -1
spacr/toxo.py +98 -75
spacr/utils.py +144 -49
{spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/METADATA +2 -1
{spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/RECORD +20 -20
{spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/LICENSE +0 -0
{spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/WHEEL +0 -0
{spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/entry_points.txt +0 -0
{spacr-0.3.80.dist-info → spacr-0.4.0.dist-info}/top_level.txt +0 -0

spacr/gui_core.py CHANGED Viewed

@@ -4,6 +4,7 @@ from tkinter import ttk
 from tkinter import filedialog
 from multiprocessing import Process, Value, Queue, set_start_method
 from tkinter import ttk
+import matplotlib
 from matplotlib.figure import Figure
 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
 import numpy as np
@@ -323,52 +324,48 @@ def show_next_figure():
         index_control.set(figure_index)
         index_control.set_to(len(figures) - 1)
         display_figure(fig)
 def process_fig_queue():
     global canvas, fig_queue, canvas_widget, parent_frame, uppdate_frequency, figures, figure_index, index_control
     from .gui_elements import standardize_figure
-    #print("process_fig_queue called", flush=True)
     try:
-        got_new_figure = False
         while not fig_queue.empty():
             fig = fig_queue.get_nowait()
-            #print("Got a figure from fig_queue", flush=True)
             if fig is None:
-                print("Warning: Retrieved a None figure from fig_queue.", flush=True)
+                print("Warning: Retrieved a None figure from fig_queue.")
                 continue
             # Standardize the figure appearance before adding it
             fig = standardize_figure(fig)
             figures.append(fig)
+            # OPTIONAL: Cap the size of the figures deque at 100
+            MAX_FIGURES = 100
+            while len(figures) > MAX_FIGURES:
+                # Discard the oldest figure
+                old_fig = figures.popleft()
+                # If needed, you could also close the figure to free memory:
+                matplotlib.pyplot.close(old_fig)
             # Update slider maximum
             index_control.set_to(len(figures) - 1)
-            #print("New maximum slider value after adding a figure:", index_control.to, flush=True)
             # If no figure has been displayed yet
             if figure_index == -1:
                 figure_index = 0
                 display_figure(figures[figure_index])
                 index_control.set(figure_index)
-                #print("Displayed the first figure and set slider value to 0", flush=True)
-            #got_new_figure = True
-        #if not got_new_figure:
-            # No new figures this time
-            #print("No new figures found in the queue this iteration.", flush=True)
     except Exception as e:
-        print("Exception in process_fig_queue:", e, flush=True)
+        print("Exception in process_fig_queue:", e)
         traceback.print_exc()
     finally:
         # Schedule process_fig_queue() to run again
         after_id = canvas_widget.after(uppdate_frequency, process_fig_queue)
         parent_frame.after_tasks.append(after_id)
-        #print("process_fig_queue scheduled again", flush=True)
 def update_figure(value):
     from .gui_elements import standardize_figure
@@ -513,7 +510,7 @@ def import_settings(settings_type='mask'):
     #vars_dict = hide_all_settings(vars_dict, categories=None)
     csv_settings = read_settings_from_csv(csv_file_path)
     if settings_type == 'mask':
-        settings = set_default_settings_preprocess_generate_masks(src='path', settings={})
+        settings = set_default_settings_preprocess_generate_masks(settings={})
     elif settings_type == 'measure':
         settings = get_measure_crop_settings(settings={})
     elif settings_type == 'classify':
@@ -565,7 +562,7 @@ def setup_settings_panel(vertical_container, settings_type='mask'):
     settings_frame.grid_columnconfigure(0, weight=1)
     if settings_type == 'mask':
-        settings = set_default_settings_preprocess_generate_masks(src='path', settings={})
+        settings = set_default_settings_preprocess_generate_masks(settings={})
     elif settings_type == 'measure':
         settings = get_measure_crop_settings(settings={})
     elif settings_type == 'classify':
@@ -881,7 +878,7 @@ def start_process(q=None, fig_queue=None, settings_type='mask'):
         q.put(f"Error: {e}")
         return
-    if thread_control.get("run_thread") is not None:
+    if isinstance(thread_control, dict) and thread_control.get("run_thread") is not None:
         initiate_abort()
     stop_requested = Value('i', 0)
@@ -987,6 +984,66 @@ def main_thread_update_function(root, q, fig_queue, canvas_widget):
         print(f"Error updating GUI canvas: {e}")
     finally:
         root.after(uppdate_frequency, lambda: main_thread_update_function(root, q, fig_queue, canvas_widget))
+def cleanup_previous_instance():
+    """
+    Cleans up resources from the previous application instance.
+    """
+    global parent_frame, usage_bars, figures, figure_index, thread_control, canvas, q, fig_queue
+    # 1. Destroy all widgets in the parent frame
+    if parent_frame is not None:
+        for widget in parent_frame.winfo_children():
+            try:
+                widget.destroy()
+            except Exception as e:
+                print(f"Error destroying widget: {e}")
+        parent_frame.update_idletasks()
+        parent_frame = None
+    # 2. Cancel all pending `after` tasks
+    if parent_frame is not None:
+        parent_window = parent_frame.winfo_toplevel()
+        if hasattr(parent_window, 'after_tasks'):
+            for after_id in parent_window.after_tasks:
+                parent_window.after_cancel(after_id)
+            parent_window.after_tasks = []
+    # 3. Clear global queues
+    if q is not None:
+        while not q.empty():
+            q.get()
+        q = None
+    if fig_queue is not None:
+        while not fig_queue.empty():
+            fig_queue.get()
+        fig_queue = None
+    # 4. Stop and reset global thread control
+    if thread_control is not None:
+        thread_control['stop'] = True
+        #thread_control = None
+    # 5. Reset usage bars, figures, and indices
+    usage_bars = []
+    figures = deque()
+    figure_index = -1
+    # 6. Clear canvas or other visualizations
+    if canvas is not None:
+        try:
+            if hasattr(canvas, 'figure'):  # Check if it's a FigureCanvasTkAgg
+                canvas.figure.clear()  # Clear the Matplotlib figure
+                canvas.get_tk_widget().destroy()  # Destroy the Tkinter widget
+            else:
+                # Assume it's a standard Tkinter Canvas
+                canvas.delete("all")
+        except Exception as e:
+            print(f"Error clearing canvas: {e}")
+        canvas = None
+    print("Previous instance cleaned up successfully.")
 def initiate_root(parent, settings_type='mask'):
     """
@@ -1002,7 +1059,11 @@ def initiate_root(parent, settings_type='mask'):
     global q, fig_queue, thread_control, parent_frame, scrollable_frame, button_frame, vars_dict, canvas, canvas_widget, button_scrollable_frame, progress_bar, uppdate_frequency, figures, figure_index, index_control, usage_bars
-    from .gui_utils import setup_frame, get_screen_dimensions
+    # Clean up any previous instance
+    cleanup_previous_instance()
+    from .gui_utils import setup_frame
+    from .gui_elements import create_menu_bar
     from .settings import descriptions
     #from .openai import Chatbot
@@ -1065,6 +1126,7 @@ def initiate_root(parent, settings_type='mask'):
         process_console_queue()
         process_fig_queue()
+        create_menu_bar(parent)
         after_id = parent_window.after(uppdate_frequency, lambda: main_thread_update_function(parent_window, q, fig_queue, canvas_widget))
         parent_window.after_tasks.append(after_id)

spacr/gui_elements.py CHANGED Viewed

@@ -7,6 +7,7 @@ from tkinter import font
 from queue import Queue
 from tkinter import Label, Frame, Button
 import numpy as np
+import pandas as pd
 from PIL import Image, ImageOps, ImageTk, ImageDraw, ImageFont, ImageEnhance
 from concurrent.futures import ThreadPoolExecutor
 from skimage.exposure import rescale_intensity
@@ -17,10 +18,28 @@ from skimage.draw import polygon, line
 from skimage.transform import resize
 from scipy.ndimage import binary_fill_holes, label
 from tkinter import ttk, scrolledtext
-from skimage.color import rgb2gray
+from sklearn.model_selection import train_test_split
+from xgboost import XGBClassifier
+from sklearn.metrics import classification_report, confusion_matrix
 fig = None
+def restart_gui_app(root):
+    """
+    Restarts the GUI application by destroying the current instance
+    and launching a fresh one.
+    """
+    try:
+        # Destroy the current root window
+        root.destroy()
+        # Import and launch a new instance of the application
+        from spacr.gui import gui_app
+        new_root = tk.Tk()  # Create a fresh Tkinter root instance
+        gui_app()
+    except Exception as e:
+        print(f"Error restarting GUI application: {e}")
 def create_menu_bar(root):
     from .gui import initiate_root
     gui_apps = {
@@ -56,6 +75,7 @@ def create_menu_bar(root):
     # Add a separator and an exit option
     app_menu.add_separator()
+    #app_menu.add_command(label="Home",command=lambda: restart_gui_app(root))
     app_menu.add_command(label="Help", command=lambda: webbrowser.open("https://spacr.readthedocs.io/en/latest/?badge=latest"))
     app_menu.add_command(label="Exit", command=root.quit)
@@ -2201,7 +2221,8 @@ class AnnotateApp:
             self.image_size = (image_size, image_size)
         else:
             raise ValueError("Invalid image size")
+        self.orig_annotation_columns = annotation_column
         self.annotation_column = annotation_column
         self.image_type = image_type
         self.channels = channels
@@ -2258,6 +2279,12 @@ class AnnotateApp:
         self.exit_button = Button(self.button_frame, text="Exit", command=self.shutdown, bg=self.bg_color, fg=self.fg_color, highlightbackground=self.fg_color, highlightcolor=self.fg_color, highlightthickness=1)
         self.exit_button.pack(side="right", padx=5)
+        self.train_button = Button(self.button_frame,text="Train & Classify (beta)",command=self.train_and_classify,bg=self.bg_color,fg=self.fg_color,highlightbackground=self.fg_color,highlightcolor=self.fg_color,highlightthickness=1)
+        self.train_button.pack(side="right", padx=5)
+        self.train_button = Button(self.button_frame,text="orig.",command=self.swich_back_annotation_column,bg=self.bg_color,fg=self.fg_color,highlightbackground=self.fg_color,highlightcolor=self.fg_color,highlightthickness=1)
+        self.train_button.pack(side="right", padx=5)
         # Calculate grid rows and columns based on the root window size and image size
         self.calculate_grid_dimensions()
@@ -2280,7 +2307,12 @@ class AnnotateApp:
             self.grid_frame.grid_rowconfigure(row, weight=1)
         for col in range(self.grid_cols):
             self.grid_frame.grid_columnconfigure(col, weight=1)
+    def swich_back_annotation_column(self):
+        self.annotation_column = self.orig_annotation_columns
+        self.prefilter_paths_annotations()
+        self.update_display()
     def calculate_grid_dimensions(self):
         window_width = self.root.winfo_width()
         window_height = self.root.winfo_height()
@@ -2603,6 +2635,163 @@ class AnnotateApp:
             print(f'Quit application')
         else:
             print('Waiting for pending updates to finish before quitting')
+    def train_and_classify(self):
+        """
+        1) Merge data from the relevant DB tables (including png_list).
+        2) Collect manual annotations from png_list.<annotation_column> => 'manual_annotation'.
+           - 1 => class=1, 2 => class=0 (for training).
+        3) If only one class is present, randomly sample unannotated images as the other class.
+        4) Train an XGBoost model.
+        5) Classify *all* rows -> fill XGboost_score (prob of class=1) & XGboost_annotation (1 or 2 if high confidence).
+        6) Write those columns back to sqlite, so every row in png_list has a score (and possibly an annotation).
+        7) Refresh the UI (prefilter_paths_annotations + load_images).
+        """
+        # Optionally, update your GUI status label
+        self.update_gui_text("Merging data...")
+        from spacr.io import _read_and_merge_data  # Adapt to your actual import
+        # (1) Merge data
+        merged_df, obj_df_ls = _read_and_merge_data(
+            locs=[self.db_path],
+            tables=['cell', 'cytoplasm', 'nucleus', 'pathogen', 'png_list'],
+            verbose=False
+        )
+        # (2) Load manual annotations from the DB
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        c.execute(f"SELECT png_path, {self.annotation_column} FROM png_list WHERE {self.annotation_column} IS NOT NULL")
+        annotated_rows = c.fetchall()  # e.g. [(png_path, 1 or 2), ...]
+        conn.close()
+        # dict {png_path -> 1 or 2}
+        annot_dict = dict(annotated_rows)
+        # Add 'manual_annotation' to merged_df
+        merged_df['manual_annotation'] = merged_df['png_path'].map(annot_dict)
+        # Subset with manual labels
+        annotated_df = merged_df.dropna(subset=['manual_annotation']).copy()
+        # Convert "2" => "0" for binary classification
+        annotated_df['manual_annotation'] = annotated_df['manual_annotation'].replace({2: 0}).astype(int)
+        # (3) Handle single-class scenario
+        class_counts = annotated_df['manual_annotation'].value_counts()
+        if len(class_counts) == 1:
+            single_class = class_counts.index[0]  # 0 or 1
+            needed = class_counts.iloc[0]
+            other_class = 1 if single_class == 0 else 0
+            unannotated_df_all = merged_df[merged_df['manual_annotation'].isna()].copy()
+            if len(unannotated_df_all) == 0:
+                print("No unannotated rows to sample for the other class. Cannot proceed.")
+                self.update_gui_text("Not enough data to train (no second class).")
+                return
+            sample_size = min(needed, len(unannotated_df_all))
+            artificially_labeled = unannotated_df_all.sample(n=sample_size, replace=False).copy()
+            artificially_labeled['manual_annotation'] = other_class
+            annotated_df = pd.concat([annotated_df, artificially_labeled], ignore_index=True)
+            print(f"Only one class was present => randomly labeled {sample_size} unannotated rows as {other_class}.")
+        if len(annotated_df) < 2:
+            print("Not enough annotated data to train (need at least 2).")
+            self.update_gui_text("Not enough data to train.")
+            return
+        # (4) Train XGBoost
+        self.update_gui_text("Training XGBoost model...")
+        # Identify numeric columns
+        ignore_cols = {'png_path', 'manual_annotation'}
+        feature_cols = [
+            col for col in annotated_df.columns
+            if col not in ignore_cols
+            and (annotated_df[col].dtype == float or annotated_df[col].dtype == int)
+        ]
+        X_data = annotated_df[feature_cols].fillna(0).values
+        y_data = annotated_df['manual_annotation'].values
+        # standard train/test
+        X_train, X_test, y_train, y_test = train_test_split(
+            X_data, y_data, test_size=0.1, random_state=42
+        )
+        model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
+        model.fit(X_train, y_train)
+        # Evaluate
+        preds = model.predict(X_test)
+        print("=== Classification Report ===")
+        print(classification_report(y_test, preds))
+        print("=== Confusion Matrix ===")
+        print(confusion_matrix(y_test, preds))
+        # (5) Classify ALL rows
+        all_df = merged_df.copy()
+        X_all = all_df[feature_cols].fillna(0).values
+        probs_all = model.predict_proba(X_all)[:, 1]
+        # Probability => XGboost_score
+        all_df['XGboost_score'] = probs_all
+        # Decide XGboost_annotation
+        def get_annotation_from_prob(prob):
+            if prob > 0.9:
+                return 1  # class=1
+            elif prob < 0.1:
+                return 0  # class=0
+            return None  # uncertain
+        xgb_anno_col = [get_annotation_from_prob(p) for p in probs_all]
+        # Convert 0 => 2 if your DB uses "2" for the negative class
+        xgb_anno_col = [2 if x == 0 else x for x in xgb_anno_col]
+        all_df['XGboost_annotation'] = xgb_anno_col
+        # (6) Write results back to png_list
+        self.update_gui_text("Updating the database with XGBoost predictions...")
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+        # Ensure columns exist
+        try:
+            c.execute("ALTER TABLE png_list ADD COLUMN XGboost_annotation INTEGER")
+        except sqlite3.OperationalError:
+            pass
+        try:
+            c.execute("ALTER TABLE png_list ADD COLUMN XGboost_score FLOAT")
+        except sqlite3.OperationalError:
+            pass
+        # Update each row
+        for idx, row in all_df.iterrows():
+            score_val = float(row['XGboost_score'])
+            anno_val  = row['XGboost_annotation']
+            the_path  = row['png_path']
+            if pd.isna(the_path):
+                continue  # skip if no path
+            if pd.isna(anno_val):
+                # We set annotation=NULL but do set the score
+                c.execute("""
+                    UPDATE png_list
+                       SET XGboost_annotation = NULL,
+                           XGboost_score       = ?
+                     WHERE png_path = ?
+                """, (score_val, the_path))
+            else:
+                # numeric annotation + numeric score
+                c.execute("""
+                    UPDATE png_list
+                       SET XGboost_annotation = ?,
+                           XGboost_score       = ?
+                     WHERE png_path = ?
+                """, (int(anno_val), score_val, the_path))
+        self.annotation_column = 'XGboost_annotation'
 def standardize_figure(fig):
     from .gui_elements import set_dark_style

spacr/gui_utils.py CHANGED Viewed

@@ -482,7 +482,7 @@ def run_function_gui(settings_type, settings, q, fig_queue, stop_requested):
     if settings_type == 'mask':
         function = preprocess_generate_masks
-        imports = 2
+        imports = 1
     elif settings_type == 'measure':
         function = measure_crop
         imports = 1

spacr/io.py CHANGED Viewed

@@ -1773,7 +1773,7 @@ def _read_and_join_tables(db_path, table_names=['cell', 'cytoplasm', 'nucleus',
             print(e)
     conn.close()
     if 'png_list' in dataframes:
-        png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plate', 'row_name', 'column_name']].copy()
+        png_list_df = dataframes['png_list'][['cell_id', 'png_path', 'plate', 'row_name', 'column_name', 'field']].copy()
         png_list_df['cell_id'] = png_list_df['cell_id'].str[1:].astype(int)
         png_list_df.rename(columns={'cell_id': 'object_label'}, inplace=True)
         if 'cell' in dataframes:
@@ -2275,175 +2275,6 @@ def _read_db(db_loc, tables):
         dfs.append(df)
     conn.close() # Close the connection
     return dfs
-def _read_and_merge_data_v1(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
-    from .utils import _split_data
-    #Extract plate DataFrames
-    all_dfs = []
-    for loc in locs:
-        db_dfs = _read_db(loc, tables)
-        all_dfs.append(db_dfs)
-    #Extract Tables from DataFrames and concatinate rows
-    for i, dfs in enumerate(all_dfs):
-        if 'cell' in tables:
-            cell = dfs[0]
-            if verbose:
-                print(f'plate: {i+1} cells:{len(cell)}')
-	# see pathogens logic, copy logic to other tables #here
-        if 'nucleus' in tables:
-            nucleus = dfs[1]
-            if verbose:
-                print(f'plate: {i+1} nucleus:{len(nucleus)} ')
-        if 'pathogen' in tables:
-            if len(tables) == 1:
-                pathogen = dfs[0]
-                print(len(pathogen))
-            else:
-                pathogen = dfs[2]
-            if verbose:
-                print(f'plate: {i+1} pathogens:{len(pathogen)}')
-        if 'cytoplasm' in tables:
-            if not 'pathogen' in tables:
-                cytoplasm = dfs[2]
-            else:
-                cytoplasm = dfs[3]
-            if verbose:
-                print(f'plate: {i+1} cytoplasms: {len(cytoplasm)}')
-        if i > 0:
-            if 'cell' in tables:
-                cells = pd.concat([cells, cell], axis = 0)
-            if 'nucleus' in tables:
-                nucleus = pd.concat([nucleus, nucleus], axis = 0)
-            if 'pathogen' in tables:
-                pathogens = pd.concat([pathogens, pathogen], axis = 0)
-            if 'cytoplasm' in tables:
-                cytoplasms = pd.concat([cytoplasms, cytoplasm], axis = 0)
-        else:
-            if 'cell' in tables:
-                cells = cell.copy()
-            if 'nucleus' in tables:
-                nucleus = nucleus.copy()
-            if 'pathogen' in tables:
-                pathogens = pathogen.copy()
-            if 'cytoplasm' in tables:
-                cytoplasms = cytoplasm.copy()
-    #Add an o in front of all object and cell lables to convert them to strings
-    if 'cell' in tables:
-        cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        cells = cells.assign(prcfo = lambda x: x['prcf'] + '_' + x['object_label'])
-        cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
-        merged_df = cells_g_df.copy()
-        if verbose:
-            print(f'cells: {len(cells)}')
-            print(f'cells grouped: {len(cells_g_df)}')
-    if 'cytoplasm' in tables:
-        cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        cytoplasms = cytoplasms.assign(prcfo = lambda x: x['prcf'] + '_' + x['object_label'])
-        cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
-        merged_df = cells_g_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
-        if verbose:
-            print(f'cytoplasms: {len(cytoplasms)}')
-            print(f'cytoplasms grouped: {len(cytoplasms_g_df)}')
-    if 'nucleus' in tables:
-        if not 'cell' in tables:
-            cells_g_df = pd.DataFrame()
-        nucleus = nucleus.dropna(subset=['cell_id'])
-        nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
-        nucleus = nucleus.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
-        nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
-        if nuclei_limit == False:
-            nucleus = nucleus[nucleus['nucleus_prcfo_count']==1]
-        nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
-        if verbose:
-            print(f'nucleus: {len(nucleus)}')
-            print(f'nucleus grouped: {len(nucleus_g_df)}')
-        if 'cytoplasm' in tables:
-            merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
-        else:
-            merged_df = cells_g_df.merge(nucleus_g_df, left_index=True, right_index=True)
-    if 'pathogen' in tables:
-        if not 'cell' in tables:
-            cells_g_df = pd.DataFrame()
-            merged_df = []
-        try:
-            pathogens = pathogens.dropna(subset=['cell_id'])
-        except:
-            pathogens['cell_id'] = pathogens['object_label']
-            pathogens = pathogens.dropna(subset=['cell_id'])
-        pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
-        pathogens = pathogens.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
-        pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
-        if isinstance(pathogen_limit, bool):
-            if pathogen_limit == False:
-                pathogens = pathogens[pathogens['pathogen_prcfo_count']<=1]
-                print(f"after multiinfected Bool: {len(pathogens)}")
-        if isinstance(pathogen_limit, float):
-            pathogen_limit = int(pathogen_limit)
-        if isinstance(pathogen_limit, int):
-            pathogens = pathogens[pathogens['pathogen_prcfo_count']<=pathogen_limit]
-            print(f"afer multiinfected Float: {len(pathogens)}")
-        if not 'cell' in tables:
-            pathogens_g_df, metadata = _split_data(pathogens, 'prcfo', 'cell_id')
-        else:
-            pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
-        if verbose:
-            print(f'pathogens: {len(pathogens)}')
-            print(f'pathogens grouped: {len(pathogens_g_df)}')
-        if len(merged_df) == 0:
-            merged_df = pathogens_g_df
-        else:
-            merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
-    #Add prc column (plate row column)
-    metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name'])
-    #Count cells per well
-    cells_well = pd.DataFrame(metadata.groupby('prc')['object_label'].nunique())
-    cells_well.reset_index(inplace=True)
-    cells_well.rename(columns={'object_label': 'cells_per_well'}, inplace=True)
-    metadata = pd.merge(metadata, cells_well, on='prc', how='inner', suffixes=('', '_drop_col'))
-    object_label_cols = [col for col in metadata.columns if '_drop_col' in col]
-    metadata.drop(columns=object_label_cols, inplace=True)
-    #Add prcfo column (plate row column field object)
-    metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name']+ '_' +x['field']+ '_' +x['object_label'])
-    metadata.set_index('prcfo', inplace=True)
-    merged_df = metadata.merge(merged_df, left_index=True, right_index=True)
-    merged_df = merged_df.dropna(axis=1)
-    if verbose:
-        print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
-    obj_df_ls = []
-    if 'cell' in tables:
-        obj_df_ls.append(cells)
-    if 'cytoplasm' in tables:
-        obj_df_ls.append(cytoplasms)
-    if 'nucleus' in tables:
-        obj_df_ls.append(nucleus)
-    if 'pathogen' in tables:
-        obj_df_ls.append(pathogens)
-    return merged_df, obj_df_ls
 def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10, change_plate=False):
     from .io import _read_db
@@ -2929,6 +2760,7 @@ def generate_training_dataset(settings):
     def get_smallest_class_size(df, settings, dataset_mode):
         if dataset_mode == 'metadata':
             sizes = [len(df[df['condition'] == c]) for c in settings['class_metadata']]
+            #sizes = [len(df[df['condition'].isin(class_list)]) for class_list in settings['class_metadata']]
             print(f'Class sizes: {sizes}')
         elif dataset_mode == 'annotation':
             sizes = [len(class_paths) for class_paths in df]
@@ -2997,16 +2829,12 @@ def generate_training_dataset(settings):
         df = df.dropna(subset=['condition'])
         display(df)
-        #df['metadata_based_class'] = pd.NA
-        #for i, class_ in enumerate(settings['classes']):
-        #    ls = settings['class_metadata'][i]
-        #    df.loc[df[settings['metadata_type_by']].isin(ls), 'metadata_based_class'] = class_
         size = get_smallest_class_size(df, settings, 'metadata')
         for class_ in settings['class_metadata']:
             class_temp_df = df[df['condition'] == class_]
+            #class_temp_df = df[df['condition'].isin(class_)]
             print(f'Found {len(class_temp_df)} images for class {class_}')
             class_paths_temp = class_temp_df['png_path'].tolist()
@@ -3033,6 +2861,8 @@ def generate_training_dataset(settings):
     from .io import _read_and_merge_data, _read_db
     from .utils import get_paths_from_db, annotate_conditions, save_settings
     from .settings import set_generate_training_dataset_defaults
+    settings = set_generate_training_dataset_defaults(settings)
     if 'nucleus' not in settings['tables']:
         settings['nuclei_limit'] = False
@@ -3041,7 +2871,6 @@ def generate_training_dataset(settings):
         settings['pathogen_limit'] = 0
     # Set default settings and save
-    settings = set_generate_training_dataset_defaults(settings)
     save_settings(settings, 'cv_dataset', show=True)
     class_path_list = None

spacr 0.3.80__py3-none-any.whl → 0.4.0__py3-none-any.whl

spacr 0.3.80py3-none-any.whl → 0.4.0py3-none-any.whl