PyPI - spacr - Versions diffs - 0.3.72__py3-none-any.whl → 0.3.80__py3-none-any.whl - Mend

spacr 0.3.72py3-none-any.whl → 0.3.80py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

spacr/gui_core.py +138 -37
spacr/gui_elements.py +1 -1
spacr/ml.py +151 -56
spacr/settings.py +13 -1
spacr/utils.py +8 -1
{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/METADATA +1 -1
{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/RECORD +11 -11
{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/LICENSE +0 -0
{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/WHEEL +0 -0
{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/entry_points.txt +0 -0
{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/top_level.txt +0 -0

spacr/gui_core.py CHANGED Viewed

@@ -169,7 +169,7 @@ def display_figure(fig):
             #flash_feedback("right")
             show_next_figure()
-    def zoom(event):
+    def zoom_v1(event):
         nonlocal scale_factor
         zoom_speed = 0.1  # Adjust the zoom speed for smoother experience
@@ -197,6 +197,70 @@ def display_figure(fig):
         # Redraw the figure efficiently
         canvas.draw_idle()
+    def zoom_test(event):
+        if event.num == 4:  # Scroll up
+            print("zoom in")
+        elif event.num == 5: # Scroll down
+            print("zoom out")
+    def zoom_2(event):
+        zoom_speed = 0.1  # Change this to control how fast you zoom
+        # Determine the zoom direction based on the scroll event
+        if event.num == 4 or (hasattr(event, 'delta') and event.delta > 0):  # Scroll up = zoom in
+            factor = 1 - zoom_speed
+        elif event.num == 5 or (hasattr(event, 'delta') and event.delta < 0): # Scroll down = zoom out
+            factor = 1 + zoom_speed
+        else:
+            return  # No recognized scroll direction
+        for ax in canvas.figure.get_axes():
+            xlim = ax.get_xlim()
+            ylim = ax.get_ylim()
+            x_center = (xlim[1] + xlim[0]) / 2
+            y_center = (ylim[1] + ylim[0]) / 2
+            x_range = (xlim[1] - xlim[0]) * factor
+            y_range = (ylim[1] - ylim[0]) * factor
+            # Set the new limits
+            ax.set_xlim([x_center - x_range / 2, x_center + x_range / 2])
+            ax.set_ylim([y_center - y_range / 2, y_center + y_range / 2])
+        # Redraw the figure efficiently
+        canvas.draw_idle()
+    def zoom(event):
+        # Fixed zoom factors (adjust these if you want faster or slower zoom)
+        zoom_in_factor = 0.9   # When zooming in, ranges shrink by 10%
+        zoom_out_factor = 1.1  # When zooming out, ranges increase by 10%
+        # Determine the zoom direction based on the scroll event
+        if event.num == 4 or (hasattr(event, 'delta') and event.delta > 0):  # Scroll up = zoom in
+            factor = zoom_in_factor
+        elif event.num == 5 or (hasattr(event, 'delta') and event.delta < 0): # Scroll down = zoom out
+            factor = zoom_out_factor
+        else:
+            return  # No recognized scroll direction
+        for ax in canvas.figure.get_axes():
+            xlim = ax.get_xlim()
+            ylim = ax.get_ylim()
+            x_center = (xlim[1] + xlim[0]) / 2
+            y_center = (ylim[1] + ylim[0]) / 2
+            x_range = (xlim[1] - xlim[0]) * factor
+            y_range = (ylim[1] - ylim[0]) * factor
+            # Set the new limits
+            ax.set_xlim([x_center - x_range / 2, x_center + x_range / 2])
+            ax.set_ylim([y_center - y_range / 2, y_center + y_range / 2])
+        # Redraw the figure efficiently
+        canvas.draw_idle()
     # Bind events for hover, click interactions, and zoom
@@ -205,19 +269,20 @@ def display_figure(fig):
     canvas_widget.bind("<Button-1>", on_click)
     canvas_widget.bind("<Button-3>", on_right_click)
     # Detect the operating system and bind the appropriate mouse wheel events
     current_os = platform.system()
     if current_os == "Windows":
         canvas_widget.bind("<MouseWheel>", zoom)  # Windows
-    elif current_os == "Darwin":  # macOS
+    elif current_os == "Darwin":
         canvas_widget.bind("<MouseWheel>", zoom)
         canvas_widget.bind("<Button-4>", zoom)  # Scroll up
         canvas_widget.bind("<Button-5>", zoom)  # Scroll down
     elif current_os == "Linux":
         canvas_widget.bind("<Button-4>", zoom)  # Linux Scroll up
         canvas_widget.bind("<Button-5>", zoom)  # Linux Scroll down
+    process_fig_queue()
 def clear_unused_figures():
     global figures, figure_index
@@ -230,71 +295,97 @@ def clear_unused_figures():
     figure_index = min(max(figure_index, 0), len(figures) - 1)
 def show_previous_figure():
-    global figure_index, figures, fig_queue
+    from .gui_elements import standardize_figure
+    global figure_index, figures, fig_queue, index_control
     if figure_index is not None and figure_index > 0:
         figure_index -= 1
+        index_control.set(figure_index)
+        figures[figure_index] = standardize_figure(figures[figure_index])
         display_figure(figures[figure_index])
-        clear_unused_figures()
+        #clear_unused_figures()
 def show_next_figure():
-    global figure_index, figures, fig_queue
+    from .gui_elements import standardize_figure
+    global figure_index, figures, fig_queue, index_control
     if figure_index is not None and figure_index < len(figures) - 1:
         figure_index += 1
+        index_control.set(figure_index)
+        index_control.set_to(len(figures) - 1)
+        figures[figure_index] = standardize_figure(figures[figure_index])
         display_figure(figures[figure_index])
-        clear_unused_figures()
+        #clear_unused_figures()
     elif figure_index == len(figures) - 1 and not fig_queue.empty():
         fig = fig_queue.get_nowait()
         figures.append(fig)
         figure_index += 1
+        index_control.set(figure_index)
+        index_control.set_to(len(figures) - 1)
         display_figure(fig)
 def process_fig_queue():
     global canvas, fig_queue, canvas_widget, parent_frame, uppdate_frequency, figures, figure_index, index_control
     from .gui_elements import standardize_figure
+    #print("process_fig_queue called", flush=True)
     try:
+        got_new_figure = False
         while not fig_queue.empty():
             fig = fig_queue.get_nowait()
+            #print("Got a figure from fig_queue", flush=True)
             if fig is None:
-                print("Warning: Retrieved a None figure from fig_queue.")
-                continue  # Skip processing if the figure is None
+                print("Warning: Retrieved a None figure from fig_queue.", flush=True)
+                continue
-            # Standardize the figure appearance before adding it to the list
+            # Standardize the figure appearance before adding it
             fig = standardize_figure(fig)
             figures.append(fig)
-            # Update the slider range and set the value to the latest figure index
+            # Update slider maximum
             index_control.set_to(len(figures) - 1)
+            #print("New maximum slider value after adding a figure:", index_control.to, flush=True)
+            # If no figure has been displayed yet
             if figure_index == -1:
-                figure_index += 1
+                figure_index = 0
                 display_figure(figures[figure_index])
                 index_control.set(figure_index)
+                #print("Displayed the first figure and set slider value to 0", flush=True)
+            #got_new_figure = True
+        #if not got_new_figure:
+            # No new figures this time
+            #print("No new figures found in the queue this iteration.", flush=True)
     except Exception as e:
+        print("Exception in process_fig_queue:", e, flush=True)
         traceback.print_exc()
     finally:
+        # Schedule process_fig_queue() to run again
         after_id = canvas_widget.after(uppdate_frequency, process_fig_queue)
         parent_frame.after_tasks.append(after_id)
+        #print("process_fig_queue scheduled again", flush=True)
 def update_figure(value):
-    global figure_index, figures
+    from .gui_elements import standardize_figure
+    global figure_index, figures, index_control
     # Convert the value to an integer
     index = int(value)
     # Check if the index is valid
     if 0 <= index < len(figures):
         figure_index = index
+        figures[figure_index] = standardize_figure(figures[figure_index])
         display_figure(figures[figure_index])
-    # Update the index control widget's range and value
-    index_control.set_to(len(figures) - 1)
-    index_control.set(figure_index)
+        index_control.set(figure_index)
+        print("update_figure called with value:", figure_index)
+        index_control.set_to(len(figures) - 1)
 def setup_plot_section(vertical_container, settings_type):
     global canvas, canvas_widget, figures, figure_index, index_control
     from .gui_utils import display_media_in_plot_frame
@@ -305,29 +396,29 @@ def setup_plot_section(vertical_container, settings_type):
     # Initialize deque for storing figures and the current index
     figures = deque()
+    figure_index = -1  # Start with no figure displayed
     # Create a frame for the plot section
     plot_frame = tk.Frame(vertical_container)
     plot_frame.configure(bg=bg)
     vertical_container.add(plot_frame, stretch="always")
-    # Clear the plot_frame (optional, to handle cases where it may already have content)
+    # Clear the plot_frame (optional)
     for widget in plot_frame.winfo_children():
         widget.destroy()
-    # Create a figure and plot
+    # Create a figure and plot (initial figure)
     figure = Figure(figsize=(30, 4), dpi=100)
     plot = figure.add_subplot(111)
     plot.plot([], [])
     plot.axis('off')
     if settings_type == 'map_barcodes':
-        # Load and display GIF
         current_dir = os.path.dirname(__file__)
         resources_path = os.path.join(current_dir, 'resources', 'icons')
         gif_path = os.path.join(resources_path, 'dna_matrix.mp4')
         display_media_in_plot_frame(gif_path, plot_frame)
         canvas = FigureCanvasTkAgg(figure, master=plot_frame)
         canvas.get_tk_widget().configure(cursor='arrow', highlightthickness=0)
         canvas_widget = canvas.get_tk_widget()
@@ -348,10 +439,11 @@ def setup_plot_section(vertical_container, settings_type):
     # Create slider
     control_frame = tk.Frame(plot_frame, height=15*2,  bg=bg)
     control_frame.grid(row=1, column=0, sticky="ew", padx=10, pady=5)
-    control_frame.grid_propagate(False)
+    control_frame.grid_propagate(False)
-    # Pass the update_figure function as the command to spacrSlider
-    index_control = spacrSlider(control_frame, from_=0, to=0, value=0, thickness=2, knob_radius=10, position="center", show_index=True, command=update_figure)
+    index_control = spacrSlider(control_frame, from_=0, to=0, value=0, thickness=2, knob_radius=10,
+                                position="center", show_index=True, command=update_figure)
     index_control.grid(row=0, column=0, sticky="ew")
     control_frame.grid_columnconfigure(0, weight=1)
@@ -359,10 +451,17 @@ def setup_plot_section(vertical_container, settings_type):
     style = ttk.Style(vertical_container)
     _ = set_dark_style(style, containers=containers, widgets=widgets)
+    # Now ensure the first figure is displayed and recognized:
+    figures.append(figure)
+    figure_index = 0
+    display_figure(figures[figure_index])
+    index_control.set_to(len(figures) - 1)   # Slider max = 0 in this case, since there's only one figure
+    index_control.set(figure_index)          # Set slider to 0 to indicate the first figure
     return canvas, canvas_widget
-def set_globals(thread_control_var, q_var, console_output_var, parent_frame_var, vars_dict_var, canvas_var, canvas_widget_var, scrollable_frame_var, fig_queue_var, figures_var, figure_index_var, index_control_var, progress_bar_var, usage_bars_var):
-    global thread_control, q, console_output, parent_frame, vars_dict, canvas, canvas_widget, scrollable_frame, fig_queue, figures, figure_index, progress_bar, usage_bars, index_control
+def set_globals(thread_control_var, q_var, console_output_var, parent_frame_var, vars_dict_var, canvas_var, canvas_widget_var, scrollable_frame_var, fig_queue_var, progress_bar_var, usage_bars_var):
+    global thread_control, q, console_output, parent_frame, vars_dict, canvas, canvas_widget, scrollable_frame, fig_queue, progress_bar, usage_bars
     thread_control = thread_control_var
     q = q_var
     console_output = console_output_var
@@ -372,11 +471,11 @@ def set_globals(thread_control_var, q_var, console_output_var, parent_frame_var,
     canvas_widget = canvas_widget_var
     scrollable_frame = scrollable_frame_var
     fig_queue = fig_queue_var
-    figures = figures_var
-    figure_index = figure_index_var
+    #figures = figures_var
+    #figure_index = figure_index_var
+    #index_control = index_control_var
     progress_bar = progress_bar_var
     usage_bars = usage_bars_var
-    index_control = index_control_var
 def import_settings(settings_type='mask'):
     global vars_dict, scrollable_frame, button_scrollable_frame
@@ -606,6 +705,7 @@ def setup_button_section(horizontal_container, settings_type='mask', run=True, a
         widgets.append(import_button)
         btn_row += 1
+    btn_row += 1
     # Add the batch progress bar
     progress_bar = spacrProgressBar(button_scrollable_frame.scrollable_frame, orient='horizontal', mode='determinate')
     progress_bar.grid(row=btn_row, column=0, columnspan=7, pady=5, padx=5, sticky='ew')
@@ -853,7 +953,8 @@ def process_console_queue():
                     if progress_bar:
                         progress_bar['maximum'] = total_progress
                         progress_bar['value'] = unique_progress_count
+                        #print("Current progress bar value:", progress_bar['value']) # Debugg
                     # Store operation type and additional info
                     if operation_type:
                         progress_bar.operation_type = operation_type
@@ -955,7 +1056,7 @@ def initiate_root(parent, settings_type='mask'):
         else:
             usage_bars = []
-        set_globals(thread_control, q, console_output, parent_frame, vars_dict, canvas, canvas_widget, scrollable_frame, fig_queue, figures, figure_index, index_control, progress_bar, usage_bars)
+        set_globals(thread_control, q, console_output, parent_frame, vars_dict, canvas, canvas_widget, scrollable_frame, fig_queue, progress_bar, usage_bars)
         description_text = descriptions.get(settings_type, "No description available for this module.")
         q.put(f"Console")

spacr/gui_elements.py CHANGED Viewed

@@ -667,7 +667,7 @@ class spacrProgressBar(ttk.Progressbar):
         # Remove any borders and ensure the active color fills the entire space
         self.style.configure(
             "spacr.Horizontal.TProgressbar",
-            troughcolor=self.inactive_color,       # Set the trough to bg color
+            troughcolor=self.inactive_color, # Set the trough to bg color
             background=self.active_color,    # Active part is the active color
             borderwidth=0,                   # Remove border width
             pbarrelief="flat",               # Flat relief for the progress bar

spacr/ml.py CHANGED Viewed

@@ -27,6 +27,9 @@ from sklearn.linear_model import Lasso, Ridge
 from sklearn.preprocessing import FunctionTransformer
 from patsy import dmatrices
+from sklearn.metrics import classification_report, accuracy_score
+from sklearn.model_selection import StratifiedKFold, cross_val_score
+from sklearn.feature_selection import SelectKBest, f_classif
 from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
 from sklearn.linear_model import LogisticRegression
@@ -1165,21 +1168,29 @@ def generate_ml_scores(settings):
     settings = set_default_analyze_screen(settings)
-    src = settings['src']
+    srcs = settings['src']
     settings_df = pd.DataFrame(list(settings.items()), columns=['Key', 'Value'])
     display(settings_df)
-    db_loc = [src+'/measurements/measurements.db']
-    tables = ['cell', 'nucleus', 'pathogen','cytoplasm']
-    nuclei_limit, pathogen_limit = settings['nuclei_limit'], settings['pathogen_limit']
-    df, _ = _read_and_merge_data(db_loc,
-                                 tables,
-                                 settings['verbose'],
-                                 nuclei_limit,
-                                 pathogen_limit)
+    if isinstance(srcs, str):
+        srcs = [srcs]
+    df = pd.DataFrame()
+    for idx, src in enumerate(srcs):
+        if idx == 0:
+            src1 = src
+        db_loc = [src+'/measurements/measurements.db']
+        tables = ['cell', 'nucleus', 'pathogen','cytoplasm']
+        dft, _ = _read_and_merge_data(db_loc,
+                                    tables,
+                                    settings['verbose'],
+                                    nuclei_limit=settings['nuclei_limit'],
+                                    pathogen_limit=settings['pathogen_limit'])
+        df = pd.concat([df, dft])
     if settings['annotation_column'] is not None:
@@ -1191,6 +1202,7 @@ def generate_ml_scores(settings):
         annotated_df = png_list_df[['prcfo', settings['annotation_column']]].set_index('prcfo')
         df = annotated_df.merge(df, left_index=True, right_index=True)
         unique_values = df[settings['annotation_column']].dropna().unique()
         if len(unique_values) == 1:
             unannotated_rows = df[df[settings['annotation_column']].isna()].index
             existing_value = unique_values[0]
@@ -1213,8 +1225,8 @@ def generate_ml_scores(settings):
             df[settings['annotation_column']] = df[settings['annotation_column']].apply(str)
     if settings['channel_of_interest'] in [0,1,2,3]:
-        df['recruitment'] = df[f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity"]/df[f"cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity"]
+        if f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity" and f"cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity" in df.columns:
+            df['recruitment'] = df[f"pathogen_channel_{settings['channel_of_interest']}_mean_intensity"]/df[f"cytoplasm_channel_{settings['channel_of_interest']}_mean_intensity"]
     output, figs = ml_analysis(df,
                                settings['channel_of_interest'],
@@ -1224,18 +1236,24 @@ def generate_ml_scores(settings):
                                settings['exclude'],
                                settings['n_repeats'],
                                settings['top_features'],
+                               settings['reg_alpha'],
+                               settings['reg_lambda'],
+                               settings['learning_rate'],
                                settings['n_estimators'],
                                settings['test_size'],
                                settings['model_type_ml'],
                                settings['n_jobs'],
                                settings['remove_low_variance_features'],
                                settings['remove_highly_correlated_features'],
+                               settings['prune_features'],
+                               settings['cross_validation'],
                                settings['verbose'])
     shap_fig = shap_analysis(output[3], output[4], output[5])
     features = output[0].select_dtypes(include=[np.number]).columns.tolist()
+    train_features_df = pd.DataFrame(output[9], columns=['feature'])
     if not settings['heatmap_feature'] in features:
         raise ValueError(f"Variable {settings['heatmap_feature']} not found in the dataframe. Please choose one of the following: {features}")
@@ -1247,15 +1265,16 @@ def generate_ml_scores(settings):
                                 min_count=settings['minimum_cell_count'],
                                 verbose=settings['verbose'])
-    data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv = get_ml_results_paths(src, settings['model_type_ml'], settings['channel_of_interest'])
-    df, permutation_df, feature_importance_df, _, _, _, _, _, metrics_df = output
+    data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv, ml_features = get_ml_results_paths(src1, settings['model_type_ml'], settings['channel_of_interest'])
+    df, permutation_df, feature_importance_df, _, _, _, _, _, metrics_df, _ = output
     settings_df.to_csv(settings_csv, index=False)
     df.to_csv(data_path, mode='w', encoding='utf-8')
     permutation_df.to_csv(permutation_path, mode='w', encoding='utf-8')
     feature_importance_df.to_csv(feature_importance_path, mode='w', encoding='utf-8')
+    train_features_df.to_csv(ml_features, mode='w', encoding='utf-8')
     metrics_df.to_csv(model_metricks_path, mode='w', encoding='utf-8')
     plate_heatmap.savefig(plate_heatmap_path, format='pdf')
     figs[0].savefig(permutation_fig_path, format='pdf')
     figs[1].savefig(feature_importance_fig_path, format='pdf')
@@ -1263,7 +1282,7 @@ def generate_ml_scores(settings):
     if settings['save_to_db']:
         settings['csv_path'] = data_path
-        settings['db_path'] = os.path.join(src, 'measurements', 'measurements.db')
+        settings['db_path'] = os.path.join(src1, 'measurements', 'measurements.db')
         settings['table_name'] = 'png_list'
         settings['update_column'] = 'predictions'
         settings['match_column'] = 'prcfo'
@@ -1271,7 +1290,7 @@ def generate_ml_scores(settings):
     return [output, plate_heatmap]
-def ml_analysis(df, channel_of_interest=3, location_column='column_name', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, n_estimators=100, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
+def ml_analysis(df, channel_of_interest=3, location_column='column_name', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, reg_alpha=0.1, reg_lambda=1.0, learning_rate=0.00001, n_estimators=1000, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, prune_features=False, cross_validation=False, verbose=False):
     """
     Calculates permutation importance for numerical features in the dataframe,
@@ -1313,7 +1332,8 @@ def ml_analysis(df, channel_of_interest=3, location_column='column_name', positi
     if verbose:
         print(f'Found {len(features)} numerical features in the dataframe')
         print(f'Features used in training: {features}')
+        print(f'Features: {features}')
     df = pd.concat([df, df_metadata[location_column]], axis=1)
     # Subset the dataframe based on specified column values
@@ -1327,14 +1347,26 @@ def ml_analysis(df, channel_of_interest=3, location_column='column_name', positi
     # Combine the subsets for analysis
     combined_df = pd.concat([df1, df2])
     combined_df = combined_df.drop(columns=[location_column])
     if verbose:
         print(f'Found {len(df1)} samples for {negative_control} and {len(df2)} samples for {positive_control}. Total: {len(combined_df)}')
     X = combined_df[features]
     y = combined_df['target']
-    print(X)
-    print(y)
+    if prune_features:
+        before_pruning = len(X.columns)
+        selector = SelectKBest(score_func=f_classif, k=top_features)
+        X_selected = selector.fit_transform(X, y)
+        # Get the selected feature names
+        selected_features = X.columns[selector.get_support()]
+        X = pd.DataFrame(X_selected, columns=selected_features, index=X.index)
+        features = selected_features.tolist()
+        after_pruning = len(X.columns)
+        print(f"Removed {before_pruning - after_pruning} features using SelectKBest")
     # Split the data into training and testing sets
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
@@ -1353,12 +1385,102 @@ def ml_analysis(df, channel_of_interest=3, location_column='column_name', positi
     elif model_type == 'gradient_boosting':
         model = HistGradientBoostingClassifier(max_iter=n_estimators, random_state=random_state)  # Supports n_jobs internally
     elif model_type == 'xgboost':
-        model = XGBClassifier(n_estimators=n_estimators, random_state=random_state, nthread=n_jobs, use_label_encoder=False, eval_metric='logloss')
+        model = XGBClassifier(reg_alpha=reg_alpha, reg_lambda=reg_lambda, learning_rate=learning_rate, n_estimators=n_estimators, random_state=random_state, nthread=n_jobs, use_label_encoder=False, eval_metric='logloss')
     else:
         raise ValueError(f"Unsupported model_type: {model_type}")
-    model.fit(X_train, y_train)
+    # Perform k-fold cross-validation
+    if cross_validation:
+        # Cross-validation setup
+        kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
+        fold_metrics = []
+        for fold_idx, (train_index, test_index) in enumerate(kfold.split(X, y), start=1):
+            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
+            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
+            # Train the model
+            model.fit(X_train, y_train)
+            # Predict for the current test set
+            predictions_test = model.predict(X_test)
+            combined_df.loc[X_test.index, 'predictions'] = predictions_test
+            # Get prediction probabilities for the test set
+            prediction_probabilities_test = model.predict_proba(X_test)
+            # Find the optimal threshold
+            optimal_threshold = find_optimal_threshold(y_test, prediction_probabilities_test[:, 1])
+            if verbose:
+                print(f'Fold {fold_idx} - Optimal threshold: {optimal_threshold}')
+            # Assign predictions and probabilities to the test set in the DataFrame
+            df.loc[X_test.index, 'predictions'] = predictions_test
+            for i in range(prediction_probabilities_test.shape[1]):
+                df.loc[X_test.index, f'prediction_probability_class_{i}'] = prediction_probabilities_test[:, i]
+            # Evaluate performance for the current fold
+            fold_report = classification_report(y_test, predictions_test, output_dict=True)
+            fold_metrics.append(pd.DataFrame(fold_report).transpose())
+            if verbose:
+                print(f"Fold {fold_idx} Classification Report:")
+                print(classification_report(y_test, predictions_test))
+        # Aggregate metrics across all folds
+        metrics_df = pd.concat(fold_metrics).groupby(level=0).mean()
+        # Re-train on full data (X, y) and then apply to entire df
+        model.fit(X, y)
+        all_predictions = model.predict(df[features])  # Predict on entire df
+        df['predictions'] = all_predictions
+        # Get prediction probabilities for all rows in df
+        prediction_probabilities = model.predict_proba(df[features])
+        for i in range(prediction_probabilities.shape[1]):
+            df[f'prediction_probability_class_{i}'] = prediction_probabilities[:, i]
+        if verbose:
+            print("\nFinal Classification Report on Full Dataset:")
+            print(classification_report(y, all_predictions))
+        # Generate metrics DataFrame
+        final_report_dict = classification_report(y, all_predictions, output_dict=True)
+        metrics_df = pd.DataFrame(final_report_dict).transpose()
+    else:
+        model.fit(X_train, y_train)
+        # Predicting the target variable for the test set
+        predictions_test = model.predict(X_test)
+        combined_df.loc[X_test.index, 'predictions'] = predictions_test
+        # Get prediction probabilities for the test set
+        prediction_probabilities_test = model.predict_proba(X_test)
+        # Find the optimal threshold
+        optimal_threshold = find_optimal_threshold(y_test, prediction_probabilities_test[:, 1])
+        if verbose:
+            print(f'Optimal threshold: {optimal_threshold}')
+        # Predicting the target variable for all other rows in the dataframe
+        X_all = df[features]
+        all_predictions = model.predict(X_all)
+        df['predictions'] = all_predictions
+        # Get prediction probabilities for all rows in the dataframe
+        prediction_probabilities = model.predict_proba(X_all)
+        for i in range(prediction_probabilities.shape[1]):
+            df[f'prediction_probability_class_{i}'] = prediction_probabilities[:, i]
+        if verbose:
+            print("\nClassification Report:")
+            print(classification_report(y_test, predictions_test))
+        report_dict = classification_report(y_test, predictions_test, output_dict=True)
+        metrics_df = pd.DataFrame(report_dict).transpose()
     perm_importance = permutation_importance(model, X_train, y_train, n_repeats=n_repeats, random_state=random_state, n_jobs=n_jobs)
     # Create a DataFrame for permutation importances
@@ -1387,40 +1509,13 @@ def ml_analysis(df, channel_of_interest=3, location_column='column_name', positi
     else:
         feature_importance_df = pd.DataFrame()
-    # Predicting the target variable for the test set
-    predictions_test = model.predict(X_test)
-    combined_df.loc[X_test.index, 'predictions'] = predictions_test
-    # Get prediction probabilities for the test set
-    prediction_probabilities_test = model.predict_proba(X_test)
-    # Find the optimal threshold
-    optimal_threshold = find_optimal_threshold(y_test, prediction_probabilities_test[:, 1])
-    if verbose:
-        print(f'Optimal threshold: {optimal_threshold}')
-    # Predicting the target variable for all other rows in the dataframe
-    X_all = df[features]
-    all_predictions = model.predict(X_all)
-    df['predictions'] = all_predictions
-    # Get prediction probabilities for all rows in the dataframe
-    prediction_probabilities = model.predict_proba(X_all)
-    for i in range(prediction_probabilities.shape[1]):
-        df[f'prediction_probability_class_{i}'] = prediction_probabilities[:, i]
-    if verbose:
-        print("\nClassification Report:")
-        print(classification_report(y_test, predictions_test))
-    report_dict = classification_report(y_test, predictions_test, output_dict=True)
-    metrics_df = pd.DataFrame(report_dict).transpose()
     df = _calculate_similarity(df, features, location_column, positive_control, negative_control)
     df['prcfo'] = df.index.astype(str)
     df[['plate', 'row_name', 'column_name', 'field', 'object']] = df['prcfo'].str.split('_', expand=True)
     df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column_name']
-    return [df, permutation_df, feature_importance_df, model, X_train, X_test, y_train, y_test, metrics_df], [permutation_fig, feature_importance_fig]
+    return [df, permutation_df, feature_importance_df, model, X_train, X_test, y_train, y_test, metrics_df, features], [permutation_fig, feature_importance_fig]
 def shap_analysis(model, X_train, X_test):
@@ -1495,9 +1590,9 @@ def _calculate_similarity(df, features, col_to_compare, val1, val2):
         inv_cov_matrix = np.linalg.inv(cov_matrix + np.eye(cov_matrix.shape[0]) * epsilon)
     # Calculate similarity scores
-    def safe_similarity(func, row, control):
+    def safe_similarity(func, row, control, *args, **kwargs):
         try:
-            return func(row, control)
+            return func(row, control, *args, **kwargs)
         except Exception:
             return np.nan

spacr/settings.py CHANGED Viewed

@@ -283,7 +283,10 @@ def set_default_analyze_screen(settings):
     settings.setdefault('cmap','viridis')
     settings.setdefault('channel_of_interest',3)
     settings.setdefault('minimum_cell_count',25)
-    settings.setdefault('n_estimators',100)
+    settings.setdefault('reg_alpha',0.1)
+    settings.setdefault('reg_lambda',1.0)
+    settings.setdefault('learning_rate',0.001)
+    settings.setdefault('n_estimators',1000)
     settings.setdefault('test_size',0.2)
     settings.setdefault('location_column','column_name')
     settings.setdefault('positive_control','c2')
@@ -296,6 +299,8 @@ def set_default_analyze_screen(settings):
     settings.setdefault('remove_low_variance_features',True)
     settings.setdefault('remove_highly_correlated_features',True)
     settings.setdefault('n_jobs',-1)
+    settings.setdefault('prune_features',False)
+    settings.setdefault('cross_validation',True)
     settings.setdefault('verbose',True)
     return settings
@@ -872,6 +877,13 @@ expected_types = {
     "target_layer":str,
     "save_to_db":bool,
     "test_mode":bool,
+    "test_images":int,
+    "remove_background_cell":bool,
+    "remove_background_nucleus":bool,
+    "remove_background_pathogen":bool,
+    "figuresize":int,
+    "cmap":str,
+    "pathogen_model":str,
     "normalize_input":bool,
 }

spacr/utils.py CHANGED Viewed

@@ -4277,6 +4277,12 @@ def filter_dataframe_features(df, channel_of_interest, exclude=None, remove_low_
     if remove_highly_correlated_features:
         df = remove_highly_correlated_columns(df, threshold=0.95, verbose=verbose)
+    # Remove columns with NaN values
+    before_drop_NaN = len(df.columns)
+    df = df.dropna(axis=1)
+    after_drop_NaN = len(df.columns)
+    print(f"Dropped {before_drop_NaN - after_drop_NaN} columns with NaN values")
     # Select numerical features
     features = df.select_dtypes(include=[np.number]).columns.tolist()
@@ -4759,7 +4765,8 @@ def get_ml_results_paths(src, model_type='xgboost', channel_of_interest=1):
     shap_fig_path = os.path.join(res_fldr, 'shap.pdf')
     plate_heatmap_path = os.path.join(res_fldr, 'plate_heatmap.pdf')
     settings_csv = os.path.join(res_fldr, 'ml_settings.csv')
-    return data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv
+    ml_features = os.path.join(res_fldr, 'ml_features.csv')
+    return data_path, permutation_path, feature_importance_path, model_metricks_path, permutation_fig_path, feature_importance_fig_path, shap_fig_path, plate_heatmap_path, settings_csv, ml_features
 def augment_image(image):
     """

{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.72
+Version: 0.3.80
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson

{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/RECORD RENAMED Viewed

@@ -12,24 +12,24 @@ spacr/chat_bot.py,sha256=n3Fhqg3qofVXHmh3H9sUcmfYy9MmgRnr48663MVdY9E,1244
 spacr/core.py,sha256=3u2qKmPmTlswvE1uKTF4gi7KQ3sJBHV9No_ysgk7JCU,48487
 spacr/deep_spacr.py,sha256=V3diLyxX-0_F5UxhX_b94ROOvL9eoLvnoUmF3nMBqPQ,43250
 spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
-spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
-spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
+spacr/gui_core.py,sha256=6NKv8ebqC9Zuior4f2-L1By_Pjtt-RPCrEgnRuE9P54,45576
+spacr/gui_elements.py,sha256=I_eSYF1RkAG0zsa-ZiQT0EaaVvUpucULCuWCowO6t4E,138248
 spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
 spacr/io.py,sha256=LF6lpphw7GSeuoHQijPykjKNF56wNTFEWFZuDQp3O6Q,145739
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
 spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
-spacr/ml.py,sha256=h0IrXoNnyNzZLPYbtZPFI6c4Qeu1gH8R3iUz_O7-ar0,78114
+spacr/ml.py,sha256=x19S8OsR5omb8e6MU9I99Nz95J_QvM5siyk-zaAU3p8,82866
 spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
 spacr/plot.py,sha256=gXC7y3uT4sx8KRODeSFWQG_A1CylsuJ5B7HYe_un6so,165177
 spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
-spacr/settings.py,sha256=14PFxw3YK9tUqbaC6BqfbrWk3sN7gyTZAAI8KNy5KBA,80461
+spacr/settings.py,sha256=xTFTD04H8uXRJ5m4Pnr4Znhx0f_FxdgStMPXol3apxM,80888
 spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
 spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
 spacr/submodules.py,sha256=SK8YEs850LAx30YAiwap7ecLpp1_p-bci6H-Or0GLoA,55500
 spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
 spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
-spacr/utils.py,sha256=LX2Hu6QC-yG9ZVBiM2dkSN9yytCB0eTTRGfExiZzYzE,221940
+spacr/utils.py,sha256=SiUcctyUETEX_GZ-Nflba5whZiEjJynncaH-xcZPK1k,222242
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -152,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.72.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.72.dist-info/METADATA,sha256=Kt166mcmw6Hb0u47_tZVq1EiZuK3Z_aDC0T7jE41dnI,6032
-spacr-0.3.72.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.72.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.72.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.72.dist-info/RECORD,,
+spacr-0.3.80.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.80.dist-info/METADATA,sha256=Q0YV4N-C8XyUHH8HFW_k9ryAftcU8v9oMxNhgzvU8cA,6032
+spacr-0.3.80.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.80.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.80.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.80.dist-info/RECORD,,

{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.72.dist-info → spacr-0.3.80.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.72__py3-none-any.whl → 0.3.80__py3-none-any.whl

spacr 0.3.72py3-none-any.whl → 0.3.80py3-none-any.whl