PyPI - spacr - Versions diffs - 0.3.60__py3-none-any.whl → 0.3.61__py3-none-any.whl - Mend

spacr 0.3.60py3-none-any.whl → 0.3.61py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

spacr/app_annotate.py +0 -8
spacr/core.py +12 -7
spacr/gui_utils.py +24 -8
spacr/io.py +4 -155
spacr/ml.py +3 -4
spacr/plot.py +82 -23
spacr/settings.py +4 -13
spacr/submodules.py +299 -5
spacr/utils.py +39 -2
{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/METADATA +1 -1
{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/RECORD +15 -15
{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/LICENSE +0 -0
{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/WHEEL +0 -0
{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/entry_points.txt +0 -0
{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/top_level.txt +0 -0

spacr/app_annotate.py CHANGED Viewed

@@ -4,14 +4,6 @@ from .gui import MainApp
 from .gui_elements import set_dark_style, spacrButton
 def convert_to_number(value):
-    """
-    Converts a string value to an integer if possible, otherwise converts to a float.
-    Args:
-        value (str): The string representation of the number.
-    Returns:
-        int or float: The converted number.
-    """
     try:
         return int(value)
     except ValueError:

spacr/core.py CHANGED Viewed

@@ -465,10 +465,8 @@ def generate_image_umap(settings={}):
     display(settings_df)
     db_paths = get_db_paths(settings['src'])
     tables = settings['tables'] + ['png_list']
     all_df = pd.DataFrame()
-    #image_paths = []
     for i,db_path in enumerate(db_paths):
         df = _read_and_join_tables(db_path, table_names=tables)
@@ -476,7 +474,7 @@ def generate_image_umap(settings={}):
         all_df = pd.concat([all_df, df], axis=0)
         #image_paths.extend(image_paths_tmp)
-    all_df['cond'] = all_df['col'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
+    all_df['cond'] = all_df['column_name'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
     if settings['exclude_conditions']:
         if isinstance(settings['exclude_conditions'], str):
@@ -495,7 +493,10 @@ def generate_image_umap(settings={}):
         # Extract and reset the index for the column to compare
         col_to_compare = all_df[settings['col_to_compare']].reset_index(drop=True)
+        #if settings['only_top_features']:
+        #    column_list = None
         # Preprocess the data to obtain numeric data
         numeric_data = preprocess_data(all_df, settings['filter_by'], settings['remove_highly_correlated'], settings['log_data'], settings['exclude'])
@@ -571,7 +572,11 @@ def generate_image_umap(settings={}):
             print(f'Saved {reduction_method} embedding to {embedding_path} and grid to {grid_path}')
     # Add cluster labels to the dataframe
-    all_df['cluster'] = labels
+    if len(labels) > 0:
+        all_df['cluster'] = labels
+    else:
+        all_df['cluster'] = 1  # Assign a default cluster label
+        print("No clusters found. Consider reducing 'min_samples' or increasing 'eps' for DBSCAN.")
     # Save the results to a CSV file
     results_dir = os.path.join(settings['src'][0], 'results')
@@ -653,7 +658,7 @@ def reducer_hyperparameter_search(settings={}, reduction_params=None, dbscan_par
         df = _read_and_join_tables(db_path, table_names=tables)
         all_df = pd.concat([all_df, df], axis=0)
-    all_df['cond'] = all_df['col'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
+    all_df['cond'] = all_df['column_name'].apply(map_condition, neg=settings['neg'], pos=settings['pos'], mix=settings['mix'])
     if settings['exclude_conditions']:
         if isinstance(settings['exclude_conditions'], str):
@@ -882,7 +887,7 @@ def generate_screen_graphs(settings):
         db_loc = [os.path.join(src, 'measurements', 'measurements.db')]
         # Read and merge data from the database
-        df, _ = _read_and_merge_data(db_loc, settings['tables'], verbose=True, nuclei_limit=settings['nuclei_limit'], pathogen_limit=settings['pathogen_limit'], uninfected=settings['uninfected'])
+        df, _ = _read_and_merge_data(db_loc, settings['tables'], verbose=True, nuclei_limit=settings['nuclei_limit'], pathogen_limit=settings['pathogen_limit'])
         # Annotate the data
         df = annotate_conditions(df, cells=settings['cells'], cell_loc=None, pathogens=settings['controls'], pathogen_loc=settings['controls_loc'], treatments=None, treatment_loc=None)

spacr/gui_utils.py CHANGED Viewed

@@ -225,14 +225,30 @@ def annotate(settings):
     conn.close()
     root = tk.Tk()
-    root.geometry(settings['geom'])
-    app = AnnotateApp(root, db, src, image_type=settings['image_type'], channels=settings['channels'], image_size=settings['img_size'], grid_rows=settings['rows'], grid_cols=settings['columns'], annotation_column=settings['annotation_column'], normalize=settings['normalize'], percentiles=settings['percentiles'], measurement=settings['measurement'], threshold=settings['threshold'], normalize_channels=settings['normalize_channels'])
-    next_button = tk.Button(root, text="Next", command=app.next_page)
-    next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
-    back_button = tk.Button(root, text="Back", command=app.previous_page)
-    back_button.grid(row=app.grid_rows, column=app.grid_cols - 2)
-    exit_button = tk.Button(root, text="Exit", command=app.shutdown)
-    exit_button.grid(row=app.grid_rows, column=app.grid_cols - 3)
+    root.geometry(f"{root.winfo_screenwidth()}x{root.winfo_screenheight()}")
+    db_path = os.path.join(settings['src'], 'measurements/measurements.db')
+    app = AnnotateApp(root,
+                      db_path=db_path,
+                      src=settings['src'],
+                      image_type=settings['image_type'],
+                      channels=settings['channels'],
+                      image_size=settings['img_size'],
+                      annotation_column=settings['annotation_column'],
+                      normalize=settings['normalize'],
+                      percentiles=settings['percentiles'],
+                      measurement=settings['measurement'],
+                      threshold=settings['threshold'],
+                      normalize_channels=settings['normalize_channels'])
+    #next_button = tk.Button(root, text="Next", command=app.next_page)
+    #next_button.grid(row=app.grid_rows, column=app.grid_cols - 1)
+    #back_button = tk.Button(root, text="Back", command=app.previous_page)
+    #back_button.grid(row=app.grid_rows, column=app.grid_cols - 2)
+    #exit_button = tk.Button(root, text="Exit", command=app.shutdown)
+    #exit_button.grid(row=app.grid_rows, column=app.grid_cols - 3)
     app.load_images()
     root.mainloop()

spacr/io.py CHANGED Viewed

@@ -2089,150 +2089,6 @@ def _read_db(db_loc, tables):
     conn.close()
     return dfs
-def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False, uninfected=False):
-    """
-    Read and merge data from SQLite databases and perform data preprocessing.
-    Parameters:
-    - locs (list): A list of file paths to the SQLite database files.
-    - tables (list): A list of table names to read from the databases.
-    - verbose (bool): Whether to print verbose output. Default is False.
-    - nuclei_limit (bool): Whether to include multinucleated cells. Default is False.
-    - pathogen_limit (bool): Whether to include cells with multiple infections. Default is False.
-    - uninfected (bool): Whether to include non-infected cells. Default is False.
-    Returns:
-    - merged_df (pandas.DataFrame): The merged and preprocessed dataframe.
-    - obj_df_ls (list): A list of pandas DataFrames, each containing the data for a specific object type.
-    """
-    from .utils import _split_data
-    #Extract plate DataFrames
-    all_dfs = []
-    for loc in locs:
-        db_dfs = _read_db(loc, tables)
-        all_dfs.append(db_dfs)
-    #Extract Tables from DataFrames and concatinate rows
-    for i, dfs in enumerate(all_dfs):
-        if 'cell' in tables:
-            cell = dfs[0]
-            print(f'plate: {i+1} cells:{len(cell)}')
-        if 'nucleus' in tables:
-            nucleus = dfs[1]
-            print(f'plate: {i+1} nucleus:{len(nucleus)} ')
-        if 'pathogen' in tables:
-            pathogen = dfs[2]
-            print(f'plate: {i+1} pathogens:{len(pathogen)}')
-        if 'cytoplasm' in tables:
-            if not 'pathogen' in tables:
-                cytoplasm = dfs[2]
-            else:
-                cytoplasm = dfs[3]
-            print(f'plate: {i+1} cytoplasms: {len(cytoplasm)}')
-        if i > 0:
-            if 'cell' in tables:
-                cells = pd.concat([cells, cell], axis = 0)
-            if 'nucleus' in tables:
-                nucleus = pd.concat([nucleus, nucleus], axis = 0)
-            if 'pathogen' in tables:
-                pathogens = pd.concat([pathogens, pathogen], axis = 0)
-            if 'cytoplasm' in tables:
-                cytoplasms = pd.concat([cytoplasms, cytoplasm], axis = 0)
-        else:
-            if 'cell' in tables:
-                cells = cell.copy()
-            if 'nucleus' in tables:
-                nucleus = nucleus.copy()
-            if 'pathogen' in tables:
-                pathogens = pathogen.copy()
-            if 'cytoplasm' in tables:
-                cytoplasms = cytoplasm.copy()
-    #Add an o in front of all object and cell lables to convert them to strings
-    if 'cell' in tables:
-        cells = cells.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        cells = cells.assign(prcfo = lambda x: x['prcf'] + '_' + x['object_label'])
-        cells_g_df, metadata = _split_data(cells, 'prcfo', 'object_label')
-        print(f'cells: {len(cells)}')
-        print(f'cells grouped: {len(cells_g_df)}')
-    if 'cytoplasm' in tables:
-        cytoplasms = cytoplasms.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        cytoplasms = cytoplasms.assign(prcfo = lambda x: x['prcf'] + '_' + x['object_label'])
-        cytoplasms_g_df, _ = _split_data(cytoplasms, 'prcfo', 'object_label')
-        merged_df = cells_g_df.merge(cytoplasms_g_df, left_index=True, right_index=True)
-        print(f'cytoplasms: {len(cytoplasms)}')
-        print(f'cytoplasms grouped: {len(cytoplasms_g_df)}')
-    if 'nucleus' in tables:
-        nucleus = nucleus.dropna(subset=['cell_id'])
-        nucleus = nucleus.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        nucleus = nucleus.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
-        nucleus = nucleus.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
-        nucleus['nucleus_prcfo_count'] = nucleus.groupby('prcfo')['prcfo'].transform('count')
-        if nuclei_limit == False:
-            #nucleus = nucleus[~nucleus['prcfo'].duplicated()]
-            nucleus = nucleus[nucleus['nucleus_prcfo_count']==1]
-        nucleus_g_df, _ = _split_data(nucleus, 'prcfo', 'cell_id')
-        print(f'nucleus: {len(nucleus)}')
-        print(f'nucleus grouped: {len(nucleus_g_df)}')
-        if 'cytoplasm' in tables:
-            merged_df = merged_df.merge(nucleus_g_df, left_index=True, right_index=True)
-        else:
-            merged_df = cells_g_df.merge(nucleus_g_df, left_index=True, right_index=True)
-    if 'pathogen' in tables:
-        pathogens = pathogens.dropna(subset=['cell_id'])
-        pathogens = pathogens.assign(object_label=lambda x: 'o' + x['object_label'].astype(int).astype(str))
-        pathogens = pathogens.assign(cell_id=lambda x: 'o' + x['cell_id'].astype(int).astype(str))
-        pathogens = pathogens.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
-        pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
-        if uninfected == False:
-            pathogens = pathogens[pathogens['pathogen_prcfo_count']>=1]
-        if pathogen_limit == False:
-            pathogens = pathogens[pathogens['pathogen_prcfo_count']<=1]
-        pathogens_g_df, _ = _split_data(pathogens, 'prcfo', 'cell_id')
-        print(f'pathogens: {len(pathogens)}')
-        print(f'pathogens grouped: {len(pathogens_g_df)}')
-        merged_df = merged_df.merge(pathogens_g_df, left_index=True, right_index=True)
-    #Add prc column (plate row column)
-    metadata = metadata.assign(prc = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name'])
-    #Count cells per well
-    cells_well = pd.DataFrame(metadata.groupby('prc')['object_label'].nunique())
-    cells_well.reset_index(inplace=True)
-    cells_well.rename(columns={'object_label': 'cells_per_well'}, inplace=True)
-    metadata = pd.merge(metadata, cells_well, on='prc', how='inner', suffixes=('', '_drop_col'))
-    object_label_cols = [col for col in metadata.columns if '_drop_col' in col]
-    metadata.drop(columns=object_label_cols, inplace=True)
-    #Add prcfo column (plate row column field object)
-    metadata = metadata.assign(prcfo = lambda x: x['plate'] + '_' + x['row_name'] + '_' +x['column_name']+ '_' +x['field']+ '_' +x['object_label'])
-    metadata.set_index('prcfo', inplace=True)
-    merged_df = metadata.merge(merged_df, left_index=True, right_index=True)
-    merged_df = merged_df.dropna(axis=1)
-    print(f'Generated dataframe with: {len(merged_df.columns)} columns and {len(merged_df)} rows')
-    obj_df_ls = []
-    if 'cell' in tables:
-        obj_df_ls.append(cells)
-    if 'cytoplasm' in tables:
-        obj_df_ls.append(cytoplasms)
-    if 'nucleus' in tables:
-        obj_df_ls.append(nucleus)
-    if 'pathogen' in tables:
-        obj_df_ls.append(pathogens)
-    return merged_df, obj_df_ls
 def _results_to_csv(src, df, df_well):
     """
     Save the given dataframes as CSV files in the specified directory.
@@ -2420,7 +2276,7 @@ def _read_db(db_loc, tables):
     conn.close() # Close the connection
     return dfs
-def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False, uninfected=False):
+def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathogen_limit=False):
     from .utils import _split_data
@@ -2532,11 +2388,6 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=False, pathog
         pathogens = pathogens.assign(prcfo = lambda x: x['prcf'] + '_' + x['cell_id'])
         pathogens['pathogen_prcfo_count'] = pathogens.groupby('prcfo')['prcfo'].transform('count')
-        print(f"before noninfected: {len(pathogens)}")
-        if uninfected == False:
-            pathogens = pathogens[pathogens['pathogen_prcfo_count']>=1]
-            print(f"after noninfected: {len(pathogens)}")
         if isinstance(pathogen_limit, bool):
             if pathogen_limit == False:
                 pathogens = pathogens[pathogens['pathogen_prcfo_count']<=1]
@@ -2929,8 +2780,8 @@ def generate_training_dataset(settings):
                                      tables=tables,
                                      verbose=False,
                                      nuclei_limit=settings['nuclei_limit'],
-                                     pathogen_limit=settings['pathogen_limit'],
-                                     uninfected=settings['uninfected'])
+                                     pathogen_limit=settings['pathogen_limit'])
         [png_list_df] = _read_db(db_loc=db_path, tables=['png_list'])
         filtered_png_list_df = png_list_df[png_list_df['prcfo'].isin(df.index)]
         return filtered_png_list_df
@@ -2952,8 +2803,7 @@ def generate_training_dataset(settings):
                                      tables=tables,
                                      verbose=False,
                                      nuclei_limit=settings['nuclei_limit'],
-                                     pathogen_limit=settings['pathogen_limit'],
-                                     uninfected=settings['uninfected'])
+                                     pathogen_limit=settings['pathogen_limit'])
         print('length df 1', len(df))
         df = annotate_conditions(df, cells=['HeLa'], pathogens=['pathogen'], treatments=settings['classes'],
@@ -3034,7 +2884,6 @@ def generate_training_dataset(settings):
     if 'pathogen' not in settings['tables']:
         settings['pathogen_limit'] = 0
-        settings['uninfected'] = True
     # Set default settings and save
     settings = set_generate_training_dataset_defaults(settings)

spacr/ml.py CHANGED Viewed

@@ -1172,15 +1172,14 @@ def generate_ml_scores(settings):
     db_loc = [src+'/measurements/measurements.db']
     tables = ['cell', 'nucleus', 'pathogen','cytoplasm']
-    nuclei_limit, pathogen_limit, uninfected = settings['nuclei_limit'], settings['pathogen_limit'], settings['uninfected']
+    nuclei_limit, pathogen_limit = settings['nuclei_limit'], settings['pathogen_limit']
     df, _ = _read_and_merge_data(db_loc,
                                  tables,
                                  settings['verbose'],
                                  nuclei_limit,
-                                 pathogen_limit,
-                                 uninfected)
+                                 pathogen_limit)
     if settings['annotation_column'] is not None:
         settings['location_column'] = settings['annotation_column']

spacr/plot.py CHANGED Viewed

@@ -909,7 +909,7 @@ def plot_merged(src, settings):
         path = os.path.join(src, file)
         stack = np.load(path)
         print(f'Loaded: {path}')
-        if not settings['uninfected']:
+        if settings['pathogen_limit'] > 0:
             if settings['pathogen_mask_dim'] is not None and settings['cell_mask_dim'] is not None:
                 stack = _remove_noninfected(stack, settings['cell_mask_dim'], settings['nucleus_mask_dim'], settings['pathogen_mask_dim'])
@@ -2198,8 +2198,8 @@ def jitterplot_by_annotation(src, x_column, y_column, plot_title='Jitter Plot',
                                     tables,
                                     verbose=True,
                                     nuclei_limit=True,
-                                    pathogen_limit=True,
-                                    uninfected=True)
+                                    pathogen_limit=True)
         paths_df = _read_db(loc, tables=['png_list'])
         merged_df = pd.merge(df, paths_df[0], on='prcfo', how='left')
         return merged_df
@@ -2435,7 +2435,9 @@ class spacrGraph:
         self.df = df
         self.grouping_column = grouping_column
+        self.order = sorted(df[self.grouping_column].unique().tolist())
         self.data_column = data_column if isinstance(data_column, list) else [data_column]
         self.graph_type = graph_type
         self.summary_func = summary_func
         self.order = order
@@ -2909,9 +2911,11 @@ class spacrGraph:
         ax.set_xlim(-0.5, num_groups - 0.5)
         # Set ticks to match the group labels in your DataFrame
-        group_labels = self.df[self.grouping_column].unique()
-        ax.set_xticks(range(len(group_labels)))
-        ax.set_xticklabels(group_labels, rotation=45, ha='right')
+        #group_labels = self.df[self.grouping_column].unique()
+        #group_labels = self.order
+        #ax.set_xticks(range(len(group_labels)))
+        #ax.set_xticklabels(group_labels, rotation=45, ha='right')
+        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
         # Customize elements based on the graph type
         if graph_type == 'bar':
@@ -2943,6 +2947,66 @@ class spacrGraph:
         # Redraw the figure to apply changes
         ax.figure.canvas.draw()
+    def _standerdize_figure_format_v1(self, ax, num_groups, graph_type):
+        """
+        Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
+        """
+        if graph_type in ['line', 'line_std']:
+            print("Skipping layout adjustment for line graphs.")
+            return  # Skip layout adjustment for line graphs
+        correction_factor = 4
+        # Set figure size to ensure it remains square with a minimum size
+        fig_size = max(6, num_groups * 2) / correction_factor
+        ax.figure.set_size_inches(fig_size, fig_size)
+        # Configure layout based on the number of groups
+        bar_width = min(0.8, 1.5 / num_groups) / correction_factor
+        jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
+        jitter_size = max(50 / num_groups, 200)
+        # Adjust x-axis limits to fit the specified order of groups
+        ax.set_xlim(-0.5, len(self.order) - 0.5)  # Use `self.order` length to ensure alignment
+        # Use `self.order` as the x-tick labels to maintain consistent ordering
+        ax.set_xticks(range(len(self.order)))
+        #ax.set_xticklabels(self.order, rotation=45, ha='right')
+        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
+        # Customize elements based on the graph type
+        if graph_type == 'bar':
+            # Adjust bars' width and position
+            for bar in ax.patches:
+                bar.set_width(bar_width)
+                bar.set_x(bar.get_x() - bar_width / 2)
+        elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
+            # Adjust jitter points' position and size
+            for coll in ax.collections:
+                offsets = coll.get_offsets()
+                offsets[:, 0] += jitter_amount  # Shift jitter points slightly
+                coll.set_offsets(offsets)
+                coll.set_sizes([jitter_size] * len(offsets))  # Adjust point size dynamically
+        elif graph_type in ['box', 'violin']:
+            # Adjust box width for consistent spacing
+            for artist in ax.artists:
+                artist.set_width(bar_width)
+        # Adjust legend and axis labels
+        ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
+        ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
+        # Adjust legend placement and size
+        if ax.get_legend():
+            ax.get_legend().set_bbox_to_anchor((1.05, 1))
+            ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
+        # Redraw the figure to apply changes
+        ax.figure.canvas.draw()
     def _create_bar_plot(self, ax):
         """Helper method to create a bar plot with consistent bar thickness and centered error bars."""
@@ -2959,7 +3023,7 @@ class spacrGraph:
         summary_df = self.df_melted.groupby([x_axis_column]).agg(mean=('Value', 'mean'),std=('Value', 'std'),sem=('Value', 'sem')).reset_index()
         error_bars = summary_df[self.error_bar_type] if self.error_bar_type in ['std', 'sem'] else None
-        sns.barplot(data=self.df_melted, x=x_axis_column, y='Value', hue=self.hue, palette=self.sns_palette, ax=ax, dodge=self.jitter_bar_dodge, ci=None)
+        sns.barplot(data=self.df_melted, x=x_axis_column, y='Value', hue=self.hue, palette=self.sns_palette, ax=ax, dodge=self.jitter_bar_dodge, ci=None, order=self.order)
         # Adjust the bar width manually
         if len(self.data_column) > 1:
@@ -2999,7 +3063,7 @@ class spacrGraph:
             hue = None
         # Create the jitter plot
-        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax, alpha=0.6, size=16)
+        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax, alpha=0.6, size=16, order=self.order)
         # Adjust legend and labels
         ax.set_xlabel(self.grouping_column)
@@ -3088,7 +3152,7 @@ class spacrGraph:
             hue = None
         # Create the box plot
-        sns.boxplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue,palette=self.sns_palette,ax=ax)
+        sns.boxplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue,palette=self.sns_palette,ax=ax, order=self.order)
         # Adjust legend and labels
         ax.set_xlabel(self.grouping_column)
@@ -3117,7 +3181,7 @@ class spacrGraph:
             hue = None
         # Create the violin plot
-        sns.violinplot(data=self.df_melted,x=x_axis_column,y='Value', hue=self.hue,palette=self.sns_palette,ax=ax)
+        sns.violinplot(data=self.df_melted,x=x_axis_column,y='Value', hue=self.hue,palette=self.sns_palette,ax=ax, order=self.order)
         # Adjust legend and labels
         ax.set_xlabel(self.grouping_column)
@@ -3148,8 +3212,8 @@ class spacrGraph:
         summary_df = self.df_melted.groupby([x_axis_column]).agg(mean=('Value', 'mean'),std=('Value', 'std'),sem=('Value', 'sem')).reset_index()
         error_bars = summary_df[self.error_bar_type] if self.error_bar_type in ['std', 'sem'] else None
-        sns.barplot(data=self.df_melted, x=x_axis_column, y='Value', hue=self.hue, palette=self.sns_palette, ax=ax, dodge=self.jitter_bar_dodge, ci=None)
-        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=16)
+        sns.barplot(data=self.df_melted, x=x_axis_column, y='Value', hue=self.hue, palette=self.sns_palette, ax=ax, dodge=self.jitter_bar_dodge, ci=None, order=self.order)
+        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=16, order=self.order)
         # Adjust the bar width manually
         if len(self.data_column) > 1:
@@ -3189,8 +3253,8 @@ class spacrGraph:
             hue = None
         # Create the box plot
-        sns.boxplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue,palette=self.sns_palette,ax=ax)
-        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=12)
+        sns.boxplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue,palette=self.sns_palette,ax=ax, order=self.order)
+        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=12, order=self.order)
         # Adjust legend and labels
         ax.set_xlabel(self.grouping_column)
@@ -3264,12 +3328,11 @@ def plot_data_from_db(settings):
             [df1] = _read_db(db_loc, tables=[settings['table_names']])
         else:
             df1, _ = _read_and_merge_data(locs=[db_loc],
-                                    tables = ['cell', 'nucleus', 'pathogen','cytoplasm'],
+                                    tables = settings['tables'],
                                     verbose=settings['verbose'],
                                     nuclei_limit=settings['nuclei_limit'],
-                                    pathogen_limit=settings['pathogen_limit'],
-                                    uninfected=settings['uninfected'])
+                                    pathogen_limit=settings['pathogen_limit'])
         dft = annotate_conditions(df1,
                                 cells=settings['cell_types'],
                                 cell_loc=settings['cell_plate_metadata'],
@@ -3281,10 +3344,7 @@ def plot_data_from_db(settings):
     df = pd.concat(dfs, axis=0)
     df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
-    #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
-    #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
-    df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
     if settings['cell_plate_metadata'] !=  None:
         df = df.dropna(subset='host_cell')
@@ -3297,7 +3357,6 @@ def plot_data_from_db(settings):
     df = df.dropna(subset=settings['data_column'])
     df = df.dropna(subset=settings['grouping_column'])
     src = srcs[0]
     dst = os.path.join(src, 'results', settings['graph_name'])
     os.makedirs(dst, exist_ok=True)

spacr/settings.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os, ast
 def set_default_plot_merge_settings():
     settings = {}
-    settings.setdefault('uninfected', True)
     settings.setdefault('pathogen_limit', 10)
     settings.setdefault('nuclei_limit', 1)
     settings.setdefault('remove_background', False)
@@ -181,8 +180,8 @@ def set_default_umap_image_settings(settings={}):
     settings.setdefault('n_neighbors', 1000)
     settings.setdefault('min_dist', 0.1)
     settings.setdefault('metric', 'euclidean')
-    settings.setdefault('eps', 0.5)
-    settings.setdefault('min_samples', 1000)
+    settings.setdefault('eps', 0.9)
+    settings.setdefault('min_samples', 100)
     settings.setdefault('filter_by', 'channel_0')
     settings.setdefault('img_zoom', 0.5)
     settings.setdefault('plot_by_cluster', True)
@@ -201,16 +200,13 @@ def set_default_umap_image_settings(settings={}):
     settings.setdefault('col_to_compare', 'column_name')
     settings.setdefault('pos', 'c1')
     settings.setdefault('neg', 'c2')
+    settings.setdefault('mix', 'c3')
     settings.setdefault('embedding_by_controls', False)
     settings.setdefault('plot_images', True)
     settings.setdefault('reduction_method','umap')
     settings.setdefault('save_figure', False)
     settings.setdefault('n_jobs', -1)
     settings.setdefault('color_by', None)
-    settings.setdefault('neg', 'c1')
-    settings.setdefault('pos', 'c2')
-    settings.setdefault('mix', 'c3')
-    settings.setdefault('mix', 'c3')
     settings.setdefault('exclude_conditions', None)
     settings.setdefault('analyze_clusters', False)
     settings.setdefault('resnet_features', False)
@@ -295,7 +291,6 @@ def set_default_analyze_screen(settings):
     settings.setdefault('exclude',None)
     settings.setdefault('nuclei_limit',True)
     settings.setdefault('pathogen_limit',3)
-    settings.setdefault('uninfected',True)
     settings.setdefault('n_repeats',10)
     settings.setdefault('top_features',30)
     settings.setdefault('remove_low_variance_features',True)
@@ -353,7 +348,6 @@ def set_generate_training_dataset_defaults(settings):
     settings.setdefault('tables',None)
     settings.setdefault('nuclei_limit',True)
     settings.setdefault('pathogen_limit',True)
-    settings.setdefault('uninfected',True)
     settings.setdefault('png_type','cell_png')
     return settings
@@ -467,7 +461,6 @@ def get_analyze_recruitment_default_settings(settings):
     settings.setdefault('plot_nr',3)
     settings.setdefault('plot_control',True)
     settings.setdefault('figuresize',10)
-    settings.setdefault('uninfected',True)
     settings.setdefault('pathogen_limit',10)
     settings.setdefault('nuclei_limit',1)
     settings.setdefault('cells_per_well',0)
@@ -691,7 +684,6 @@ expected_types = {
     "measurement": str,
     "nr_imgs": int,
     "um_per_pixel": (int, float),
-    "uninfected": bool,
     "pathogen_limit": int,
     "nuclei_limit": int,
     "filter_min_max": (list, type(None)),
@@ -898,7 +890,7 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
              "Plot": ["plot", "plot_control", "plot_nr", "examples_to_plot", "normalize_plots", "cmap", "figuresize", "plot_cluster_grids", "img_zoom", "row_limit", "color_by", "plot_images", "smooth_lines", "plot_points", "plot_outlines", "black_background", "plot_by_cluster", "heatmap_feature","grouping","min_max","cmap","save_figure"],
              "Test": ["test_mode", "test_images", "random_test", "test_nr", "test", "test_split"],
              "Timelapse": ["timelapse", "fps", "timelapse_displacement", "timelapse_memory", "timelapse_frame_limits", "timelapse_remove_transient", "timelapse_mode", "timelapse_objects", "compartments"],
-             "Advanced": ["shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "uninfected", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
+             "Advanced": ["shuffle", "target_intensity_min", "cells_per_well", "nuclei_limit", "pathogen_limit", "background", "backgrounds", "schedule", "test_size","exclude","n_repeats","top_features", "model_type_ml", "model_type","minimum_cell_count","n_estimators","preprocess", "remove_background", "normalize", "lower_percentile", "merge_pathogens", "batch_size", "filter", "save", "masks", "verbose", "randomize", "n_jobs"],
              "Miscellaneous": ["all_to_mip", "pick_slice", "skip_mode", "upscale", "upscale_factor"]
              }
@@ -1080,7 +1072,6 @@ def generate_fields(variables, scrollable_frame):
         "img_zoom": "(float) - Zoom factor for the images in plots.",
         "nuclei_limit": "(int) - Whether to include multinucleated cells in the analysis.",
         "pathogen_limit": "(int) - Whether to include multi-infected cells in the analysis.",
-        "uninfected": "(bool) - Whether to include non-infected cells in the analysis.",
         "uninfected": "(bool) - Whether to include uninfected cells in the analysis.",
         "init_weights": "(bool) - Whether to initialize weights for the model.",
         "src": "(str) - Path to the folder containing the images.",

spacr/submodules.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import seaborn as sns
-import os, random, sqlite3
+import os, random, sqlite3, re, shap
 import pandas as pd
 import numpy as np
 import cellpose
@@ -7,6 +7,9 @@ from skimage.measure import regionprops, label
 from cellpose import models as cp_models
 from cellpose import train as train_cp
 from IPython.display import display
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.inspection import permutation_importance
+from math import pi
 import matplotlib.pyplot as plt
 from natsort import natsorted
@@ -43,9 +46,8 @@ def analyze_recruitment(settings={}):
                                  tables=['cell', 'nucleus', 'pathogen','cytoplasm'],
                                  verbose=True,
                                  nuclei_limit=settings['nuclei_limit'],
-                                 pathogen_limit=settings['pathogen_limit'],
-                                 uninfected=settings['uninfected'])
+                                 pathogen_limit=settings['pathogen_limit'])
     df = annotate_conditions(df,
                              cells=settings['cell_types'],
                              cell_loc=settings['cell_plate_metadata'],
@@ -550,4 +552,296 @@ def compare_reads_to_scores(reads_csv, scores_csv, empirical_dict={'r1':(90,10),
     fig_1 = plot_line(df, x_column = 'pc_fraction', y_columns=y_columns, group_column=None, xlabel=None, ylabel='Fraction', title=None, figsize=(10, 6), save_path=save_paths[0])
     fig_2 = plot_line(df, x_column = 'nc_fraction', y_columns=y_columns, group_column=None, xlabel=None, ylabel='Fraction', title=None, figsize=(10, 6), save_path=save_paths[1])
-    return [fig_1, fig_2]
+    return [fig_1, fig_2]
+def interperate_vision_model(settings={}):
+    from .io import _read_and_merge_data
+    def generate_comparison_columns(df, compartments=['cell', 'nucleus', 'pathogen', 'cytoplasm']):
+        comparison_dict = {}
+        # Get columns by compartment
+        compartment_columns = {comp: [col for col in df.columns if col.startswith(comp)] for comp in compartments}
+        for comp0, comp0_columns in compartment_columns.items():
+            for comp0_col in comp0_columns:
+                related_cols = []
+                base_col_name = comp0_col.replace(comp0, '')  # Base feature name without compartment prefix
+                # Look for matching columns in other compartments
+                for prefix, prefix_columns in compartment_columns.items():
+                    if prefix == comp0:  # Skip same-compartment comparisons
+                        continue
+                    # Check if related column exists in other compartment
+                    related_col = prefix + base_col_name
+                    if related_col in df.columns:
+                        related_cols.append(related_col)
+                        new_col_name = f"{prefix}_{comp0}{base_col_name}"  # Format: prefix_comp0_base
+                        # Calculate ratio and handle infinite or NaN values
+                        df[new_col_name] = df[related_col] / df[comp0_col]
+                        df[new_col_name].replace([float('inf'), -float('inf')], pd.NA, inplace=True)  # Replace inf values with NA
+                        df[new_col_name].fillna(0, inplace=True)  # Replace NaN values with 0 for ease of further calculations
+                # Generate all-to-all comparisons
+                if related_cols:
+                    comparison_dict[comp0_col] = related_cols
+                    for i, rel_col_1 in enumerate(related_cols):
+                        for rel_col_2 in related_cols[i + 1:]:
+                            # Create a new column name for each pairwise comparison
+                            comp1, comp2 = rel_col_1.split('_')[0], rel_col_2.split('_')[0]
+                            new_col_name_all = f"{comp1}_{comp2}{base_col_name}"
+                            # Calculate pairwise ratio and handle infinite or NaN values
+                            df[new_col_name_all] = df[rel_col_1] / df[rel_col_2]
+                            df[new_col_name_all].replace([float('inf'), -float('inf')], pd.NA, inplace=True)  # Replace inf with NA
+                            df[new_col_name_all].fillna(0, inplace=True)  # Replace NaN with 0
+        return df, comparison_dict
+    def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pathogen'], name='compartment', include_all=False):
+        # Function to determine compartment based on multiple matches
+        def find_feature_class(feature, compartments):
+            matches = [compartment for compartment in compartments if re.search(compartment, feature)]
+            if len(matches) > 1:
+                return '-'.join(matches)
+            elif matches:
+                return matches[0]
+            else:
+                return None
+        from spacr.plot import spacrGraph
+        df[name] = df['feature'].apply(lambda x: find_feature_class(x, feature_groups))
+        if name == 'channel':
+            df['channel'].fillna('morphology', inplace=True)
+        # Create new DataFrame with summed importance for each compartment and channel
+        importance_sum = df.groupby(name)['importance'].sum().reset_index(name=f'{name}_importance_sum')
+        if include_all:
+            total_compartment_importance = importance_sum[f'{name}_importance_sum'].sum()
+            importance_sum = pd.concat(
+                [importance_sum,
+                 pd.DataFrame(
+                     [{name: 'all', f'{name}_importance_sum': total_compartment_importance}])]
+                , ignore_index=True)
+        return importance_sum
+    # Function to create radar plot for individual and combined values
+    def create_extended_radar_plot(values, labels, title):
+        values = list(values) + [values[0]]  # Close the loop for radar chart
+        angles = [n / float(len(labels)) * 2 * pi for n in range(len(labels))]
+        angles += angles[:1]
+        fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
+        ax.plot(angles, values, linewidth=2, linestyle='solid')
+        ax.fill(angles, values, alpha=0.25)
+        ax.set_xticks(angles[:-1])
+        ax.set_xticklabels(labels, fontsize=10, rotation=45, ha='right')
+        plt.title(title, pad=20)
+        plt.show()
+    def extract_compartment_channel(feature_name):
+        # Identify compartment as the first part before an underscore
+        compartment = feature_name.split('_')[0]
+        if compartment == 'cells':
+            compartment = 'cell'
+        # Identify channels based on substring presence
+        channels = []
+        if 'channel_0' in feature_name:
+            channels.append('channel_0')
+        if 'channel_1' in feature_name:
+            channels.append('channel_1')
+        if 'channel_2' in feature_name:
+            channels.append('channel_2')
+        if 'channel_3' in feature_name:
+            channels.append('channel_3')
+        # If multiple channels are found, join them with a '+'
+        if channels:
+            channel = ' + '.join(channels)
+        else:
+            channel = 'morphology'  # Use 'morphology' if no channel identifier is found
+        return (compartment, channel)
+    def read_and_preprocess_data(settings):
+        df, _ = _read_and_merge_data(
+            locs=[settings['src']+'/measurements/measurements.db'],
+            tables=settings['tables'],
+            verbose=True,
+            nuclei_limit=settings['nuclei_limit'],
+            pathogen_limit=settings['pathogen_limit']
+        )
+        df, _dict = generate_comparison_columns(df, compartments=['cell', 'nucleus', 'pathogen', 'cytoplasm'])
+        print(f"Expanded dataframe to {len(df.columns)} columns with relative features")
+        scores_df = pd.read_csv(settings['scores'])
+        # Clean and align columns for merging
+        df['object_label'] = df['object_label'].str.replace('o', '')
+        if 'row_name' not in scores_df.columns:
+            scores_df['row_name'] = scores_df['row']
+        if 'column_name' not in scores_df.columns:
+            scores_df['column_name'] = scores_df['col']
+        if 'object_label' not in scores_df.columns:
+            scores_df['object_label'] = scores_df['object']
+        # Remove the 'o' prefix from 'object_label' in df, ensuring it is a string type
+        df['object_label'] = df['object_label'].str.replace('o', '').astype(str)
+        # Ensure 'object_label' in scores_df is also a string
+        scores_df['object_label'] = scores_df['object'].astype(str)
+        # Ensure all join columns have the same data type in both DataFrames
+        df[['plate', 'row_name', 'column_name', 'field', 'object_label']] = df[['plate', 'row_name', 'column_name', 'field', 'object_label']].astype(str)
+        scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label']] = scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label']].astype(str)
+        # Select only the necessary columns from scores_df for merging
+        scores_df = scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label', settings['score_column']]]
+        # Now merge DataFrames
+        merged_df = pd.merge(df, scores_df, on=['plate', 'row_name', 'column_name', 'field', 'object_label'], how='inner')
+        # Separate numerical features and the score column
+        X = merged_df.select_dtypes(include='number').drop(columns=[settings['score_column']])
+        y = merged_df[settings['score_column']]
+        return X, y, merged_df
+    X, y, merged_df = read_and_preprocess_data(settings)
+    output = {}
+    # Step 1: Feature Importance using Random Forest
+    if settings['feature_importance'] or settings['feature_importance']:
+        model = RandomForestClassifier(random_state=42, n_jobs=settings['n_jobs'])
+        model.fit(X, y)
+        if settings['feature_importance']:
+            print(f"Feature Importance ...")
+            feature_importances = model.feature_importances_
+            feature_importance_df = pd.DataFrame({'feature': X.columns, 'importance': feature_importances})
+            feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)
+            top_feature_importance_df = feature_importance_df.head(settings['top_features'])
+            # Plot Feature Importance
+            plt.figure(figsize=(10, 6))
+            plt.barh(top_feature_importance_df['feature'], top_feature_importance_df['importance'])
+            plt.xlabel('Importance')
+            plt.title(f"Top {settings['top_features']} Features - Feature Importance")
+            plt.gca().invert_yaxis()
+            plt.show()
+        output['feature_importance'] = feature_importance_df
+        fi_compartment_df = group_feature_class(feature_importance_df, feature_groups=settings['tables'], name='compartment', include_all=settings['include_all'])
+        fi_channel_df = group_feature_class(feature_importance_df, feature_groups=settings['channels'], name='channel', include_all=settings['include_all'])
+        output['feature_importance_compartment'] = fi_compartment_df
+        output['feature_importance_channel'] = fi_channel_df
+    # Step 2: Permutation Importance
+    if settings['permutation_importance']:
+        print(f"Permutation Importance ...")
+        perm_importance = permutation_importance(model, X, y, n_repeats=10, random_state=42, n_jobs=settings['n_jobs'])
+        perm_importance_df = pd.DataFrame({'feature': X.columns, 'importance': perm_importance.importances_mean})
+        perm_importance_df = perm_importance_df.sort_values(by='importance', ascending=False)
+        top_perm_importance_df = perm_importance_df.head(settings['top_features'])
+        # Plot Permutation Importance
+        plt.figure(figsize=(10, 6))
+        plt.barh(top_perm_importance_df['feature'], top_perm_importance_df['importance'])
+        plt.xlabel('Importance')
+        plt.title(f"Top {settings['top_features']} Features - Permutation Importance")
+        plt.gca().invert_yaxis()
+        plt.show()
+        output['permutation_importance'] = perm_importance_df
+    # Step 3: SHAP Analysis
+    if settings['shap']:
+        print(f"SHAP Analysis ...")
+        # Select top N features based on Random Forest importance and fit the model on these features only
+        top_features = feature_importance_df.head(settings['top_features'])['feature']
+        X_top = X[top_features]
+        # Refit the model on this subset of features
+        model = RandomForestClassifier(random_state=42, n_jobs=settings['n_jobs'])
+        model.fit(X_top, y)
+        # Sample a smaller subset of rows to speed up SHAP
+        if settings['shap_sample']:
+            sample = int(len(X_top) / 100)
+            X_sample = X_top.sample(min(sample, len(X_top)), random_state=42)
+        else:
+            X_sample = X_top
+        # Initialize SHAP explainer with the same subset of features
+        explainer = shap.Explainer(model.predict, X_sample)
+        shap_values = explainer(X_sample, max_evals=1500)
+        # Plot SHAP summary for the selected sample and top features
+        shap.summary_plot(shap_values, X_sample, max_display=settings['top_features'])
+        # Convert SHAP values to a DataFrame for easier manipulation
+        shap_df = pd.DataFrame(shap_values.values, columns=X_sample.columns)
+        # Apply the function to create MultiIndex columns with compartment and channel
+        shap_df.columns = pd.MultiIndex.from_tuples(
+            [extract_compartment_channel(feat) for feat in shap_df.columns],
+            names=['compartment', 'channel']
+        )
+        # Aggregate SHAP values by compartment and channel
+        compartment_mean = shap_df.abs().groupby(level='compartment', axis=1).mean().mean(axis=0)
+        channel_mean = shap_df.abs().groupby(level='channel', axis=1).mean().mean(axis=0)
+        # Calculate combined importance for each pair of compartments and channels
+        combined_compartment = {}
+        for i, comp1 in enumerate(compartment_mean.index):
+            for comp2 in compartment_mean.index[i+1:]:
+                combined_compartment[f"{comp1} + {comp2}"] = shap_df.loc[:, (comp1, slice(None))].abs().mean().mean() + \
+                                                              shap_df.loc[:, (comp2, slice(None))].abs().mean().mean()
+        combined_channel = {}
+        for i, chan1 in enumerate(channel_mean.index):
+            for chan2 in channel_mean.index[i+1:]:
+                combined_channel[f"{chan1} + {chan2}"] = shap_df.loc[:, (slice(None), chan1)].abs().mean().mean() + \
+                                                          shap_df.loc[:, (slice(None), chan2)].abs().mean().mean()
+        # Prepare values and labels for radar charts
+        all_compartment_importance = list(compartment_mean.values) + list(combined_compartment.values())
+        all_compartment_labels = list(compartment_mean.index) + list(combined_compartment.keys())
+        all_channel_importance = list(channel_mean.values) + list(combined_channel.values())
+        all_channel_labels = list(channel_mean.index) + list(combined_channel.keys())
+        # Create radar plots for compartments and channels
+        #create_extended_radar_plot(all_compartment_importance, all_compartment_labels, "SHAP Importance by Compartment (Individual and Combined)")
+        #create_extended_radar_plot(all_channel_importance, all_channel_labels, "SHAP Importance by Channel (Individual and Combined)")
+        output['shap'] = shap_df
+    if settings['save']:
+        dst = os.path.join(settings['src'], 'results')
+        os.makedirs(dst, exist_ok=True)
+        for key, df in output.items():
+            save_path = os.path.join(dst, f"{key}.csv")
+            df.to_csv(save_path)
+            print(f"Saved {save_path}")
+    return output

spacr/utils.py CHANGED Viewed

@@ -4052,7 +4052,7 @@ def measure_test_mode(settings):
     return settings
-def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude):
+def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude, column_list=False):
     """
     Preprocesses the given dataframe by applying filtering, removing highly correlated columns,
     applying log transformation, filling NaN values, and scaling the numeric data.
@@ -4076,7 +4076,10 @@ def preprocess_data(df, filter_by, remove_highly_correlated, log_data, exclude):
     # Apply filtering based on the `filter_by` parameter
     if filter_by is not None:
         df, _ = filter_dataframe_features(df, channel_of_interest=filter_by, exclude=exclude)
+    if column_list:
+        df = df[column_list]
     # Select numerical features
     numeric_data = df.select_dtypes(include=['number'])
@@ -4181,6 +4184,7 @@ def filter_dataframe_features(df, channel_of_interest, exclude=None, remove_low_
     if verbose:
         print("Columns to remove:", count_and_id_columns)
     df = df.drop(columns=count_and_id_columns)
     if not channel_of_interest is None:
@@ -4189,6 +4193,9 @@ def filter_dataframe_features(df, channel_of_interest, exclude=None, remove_low_
         if isinstance(channel_of_interest, list):
             feature_strings = [f"channel_{channel}" for channel in channel_of_interest]
+        elif isinstance(channel_of_interest, str):
+            feature_strings = [channel_of_interest]
         elif isinstance(channel_of_interest, int):
             feature_string = f"channel_{channel_of_interest}"
             feature_strings = [feature_string]
@@ -5164,3 +5171,33 @@ def rename_columns_in_db(db_path):
     # After closing the 'with' block, run VACUUM outside of any transaction
     with sqlite3.connect(db_path) as conn:
         conn.execute("VACUUM;")
+def group_feature_class(df, feature_groups=['cell', 'cytoplasm', 'nucleus', 'pathogen'], name='compartment'):
+    # Function to determine compartment based on multiple matches
+    def find_feature_class(feature, compartments):
+        matches = [compartment for compartment in compartments if re.search(compartment, feature)]
+        if len(matches) > 1:
+            return '-'.join(matches)
+        elif matches:
+            return matches[0]
+        else:
+            return None
+    from spacr.plot import spacrGraph
+    df[name] = df['feature'].apply(lambda x: find_feature_class(x, feature_groups))
+    if name == 'channel':
+        df['channel'].fillna('morphology', inplace=True)
+    # Create new DataFrame with summed importance for each compartment and channel
+    importance_sum = df.groupby(name)['importance'].sum().reset_index(name=f'{name}_importance_sum')
+    total_compartment_importance = importance_sum[f'{name}_importance_sum'].sum()
+    importance_sum = pd.concat(
+        [importance_sum,
+         pd.DataFrame(
+             [{name: 'all', '{name}_importance_sum': total_compartment_importance}])]
+        , ignore_index=True)
+    return df

{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.60
+Version: 0.3.61
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson

{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 spacr/__init__.py,sha256=CZtAdU5etLcb9dVmz-4Y7Hjhw3ubjMzfjG0L5ybyFVA,1592
 spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
-spacr/app_annotate.py,sha256=zGmAJplDOckhaUZijkHgbFH9LJNbd6TolU2hamplOBc,2769
+spacr/app_annotate.py,sha256=W9eLPa_LZIvXsXx_-0iDFEU938LBDvRy6prXo0qF4KQ,2533
 spacr/app_classify.py,sha256=urTP_wlZ58hSyM5a19slYlBxN0PdC-9-ga0hvq8CGWc,165
 spacr/app_make_masks.py,sha256=pqDhRpluiHZz-kPX2Zh_KbYe4TsU43qYBa_7f-rsjpw,1694
 spacr/app_mask.py,sha256=l-dBY8ftzCMdDe6-pXc2Nh_u-idNL9G7UOARiLJBtds,153
@@ -9,26 +9,26 @@ spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
 spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
 spacr/cellpose.py,sha256=RBHMs2vwXcfkj0xqAULpALyzJYXddSRycgZSzmwI7v0,14755
 spacr/chat_bot.py,sha256=n3Fhqg3qofVXHmh3H9sUcmfYy9MmgRnr48663MVdY9E,1244
-spacr/core.py,sha256=dW9RrAKFLfVsFhX0-kaVMc2T7b47Ky0pTXK-CEVOeWQ,48235
+spacr/core.py,sha256=3u2qKmPmTlswvE1uKTF4gi7KQ3sJBHV9No_ysgk7JCU,48487
 spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
 spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
 spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
 spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
-spacr/gui_utils.py,sha256=Ud6hRRPhombKjeGUhlleEr9I75SNnFj8UD11yKfp9Wo,40860
-spacr/io.py,sha256=VHs6h8o0gBEyKxfdNqEhpzjQXPrj7UGG47DwHeUyUDw,143390
+spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
+spacr/io.py,sha256=p-ky3yjtoSSvdsktPXVy_dx8dHgMeWqUZOtOwwfrk2o,136108
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
 spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
-spacr/ml.py,sha256=aberLbvUM9F6uNpEOFHzn8_w-fiW0sDG3jVb6TDxakI,68275
+spacr/ml.py,sha256=aLDeeaAl0d4-RP1CzFHPqz5br2HrFbJhvPexEm9lvSI,68198
 spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
-spacr/plot.py,sha256=Y5_VuRHNsIH7iezK8kWXHg9fwh5sW3S34ncIFshbBco,157893
+spacr/plot.py,sha256=zITe54dzQRz-gk_ZT0qJyARuUWJivIBKW8V4rjUH8SE,160320
 spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
-spacr/settings.py,sha256=6_GB1QQw_w_4yq8dH-Ypc4rJw__Cgs6g_BnR9bIjdZI,77669
+spacr/settings.py,sha256=zANLspVmllDZeYjQWIfrHN3VkVgicnYGTduv30MmQ18,77257
 spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
-spacr/submodules.py,sha256=dn-QSKX6ZqyyEr8_v69jVGpB-wd3KbaMRacIA8DXONU,28155
+spacr/submodules.py,sha256=Xq4gjvooHN8S7cTk5PIAkd7XD2c7CMVqNpeo8GCvtHc,42489
 spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
 spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
-spacr/utils.py,sha256=5XGA0aPray3DzCAgwJjPRlsaxsuSRJyTTTZ7rNDTRTg,219202
+spacr/utils.py,sha256=tqIKiSc30xEX0IlfSpoctFJQDVnGHDAX7l1VakRCBuY,220601
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.60.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.60.dist-info/METADATA,sha256=UF63-vN6-XEslhGhnotkQz6JanIajbV56bKcSEaEIjE,6032
-spacr-0.3.60.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.60.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.60.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.60.dist-info/RECORD,,
+spacr-0.3.61.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.61.dist-info/METADATA,sha256=2jlzT9lkaXx01IWlYMYrpf24p48qDHvrRLZm-YUUl-0,6032
+spacr-0.3.61.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.61.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.61.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.61.dist-info/RECORD,,

{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.60.dist-info → spacr-0.3.61.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.60__py3-none-any.whl → 0.3.61__py3-none-any.whl

spacr 0.3.60py3-none-any.whl → 0.3.61py3-none-any.whl