PyPI - spacr - Versions diffs - 0.3.46__py3-none-any.whl → 0.3.47__py3-none-any.whl - Mend

spacr 0.3.46py3-none-any.whl → 0.3.47py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

spacr/ml.py +94 -4
spacr/plot.py +28 -18
spacr/sequencing.py +119 -1
spacr/settings.py +1 -0
spacr/toxo.py +51 -41
spacr/utils.py +16 -1
{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/METADATA +1 -1
{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/RECORD +12 -12
{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/LICENSE +0 -0
{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/WHEEL +0 -0
{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/entry_points.txt +0 -0
{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/top_level.txt +0 -0

spacr/ml.py CHANGED Viewed

@@ -343,12 +343,79 @@ def regression(df, csv_path, dependent_variable='predictions', regression_type=N
     return model, coef_df
-def perform_regression(settings):
+def graph_cell_count_threshold(settings):
+    from .utils import correct_metadata_column_names
+    def _line_plot(df, x, y, log_x=False, log_y=False, title=""):
+        fig, ax = plt.subplots(figsize=(10, 6))
+        ax.plot(df[x], df[y], linestyle='-', color=(0, 0.6, 0.6), label=f"{y}")
+        ax.set_xlabel(x)
+        ax.set_ylabel(y)
+        ax.set_title(title)
+        ax.legend()
+        if log_x:
+            ax.set_xscale('log')
+        if log_y:
+            ax.set_yscale('log')
+        plt.show()
+    if isinstance(settings['score_data'], str):
+        settings['score_data'] = [settings['score_data']]
+    dfs = []
+    for i, score_data in enumerate(settings['score_data']):
+        df = pd.read_csv(score_data)
+        df = correct_metadata_column_names(df)
+        df['plate'] = f'plate{i+1}'
+        df['prc'] = df['plate'] + '_' + df['row'].astype(str) + '_' + df['column'].astype(str)
+        dfs.append(df)
+    df = pd.concat(dfs, axis=0)
+    # Compute the number of cells (or scores) per well
+    cell_counts = df.groupby('prc').size().reset_index(name='cell_count')
+    # Merge the cell counts back into the original DataFrame
+    df = df.merge(cell_counts, on='prc')
+    # Generate a range of thresholds
+    thresholds = np.arange(1, df['cell_count'].max() + 1)
+    results = []
+    # Iterate over thresholds and compute score mean and variance
+    for threshold in thresholds:
+        filtered_df = df[df['cell_count'] >= threshold]
+        score_mean = filtered_df.groupby('prc')[settings['score_column']].mean().mean()
+        score_variance = filtered_df.groupby('prc')[settings['score_column']].mean().var()
+        results.append((threshold, score_mean, score_variance))
+    results_df = pd.DataFrame(results, columns=['cell_count_threshold', 'score_mean', 'score_variance'])
+    if results_df.empty:
+        raise ValueError("No valid results were found. Check your data and thresholds.")
+    closest_threshold = results_df['score_variance'].diff().abs().argmin()
+    optimal_threshold = results_df.iloc[closest_threshold]
+    print(f"Optimal Threshold: {optimal_threshold['cell_count_threshold']}")
+    print(f"Score Mean at Optimal Threshold: {optimal_threshold['score_mean']}")
+    print(f"Score Variance at Optimal Threshold: {optimal_threshold['score_variance']}")
+    _line_plot(results_df, x='cell_count_threshold', y='score_mean',
+               title='Mean Well Score vs. Cell Count Threshold')
+    _line_plot(results_df, x='cell_count_threshold', y='score_variance',
+               title='Score Variance vs. Cell Count Threshold')
+    return optimal_threshold['cell_count_threshold']
+def perform_regression(settings):
     from .plot import plot_plates
     from .utils import merge_regression_res_with_metadata, save_settings
     from .settings import get_perform_regression_default_settings
     from .toxo import go_term_enrichment_by_column, custom_volcano_plot
+    from .sequencing import graph_sequencing_stats
     def _perform_regression_read_data(settings):
@@ -468,9 +535,15 @@ def perform_regression(settings):
     score_data_df = clean_controls(score_data_df, settings['filter_value'], settings['filter_column'])
     print(f"Dependent variable after clean_controls: {len(score_data_df)}")
+    if settings['min_cell_count'] is None:
+        settings['min_cell_count'] = graph_cell_count_threshold(settings)
     dependent_df, dependent_variable = process_scores(score_data_df, settings['dependent_variable'], settings['plate'], settings['min_cell_count'], settings['agg_type'], settings['transform'])
     print(f"Dependent variable after process_scores: {len(dependent_df)}")
+    if settings['fraction_threshold'] is None:
+        settings['fraction_threshold'] = graph_sequencing_stats(settings)
     independent_df = process_reads(count_data_df, settings['fraction_threshold'], settings['plate'], filter_column=filter_column, filter_value=filter_value)
     independent_df, n_grna, n_gene = _count_variable_instances(independent_df, column_1='grna', column_2='gene')
@@ -499,8 +572,12 @@ def perform_regression(settings):
     grna_coef_df = grna_coef_df.dropna(subset=['n_grna'])
     if settings['controls'] is not None:
         control_coef_df = grna_coef_df[grna_coef_df['grna'].isin(settings['controls'])]
         mean_coef = control_coef_df['coefficient'].mean()
+        significant_c = control_coef_df[control_coef_df['p_value']<= 0.05]
+        mean_coef_c = significant_c['coefficient'].mean()
+        print(mean_coef, mean_coef_c)
         if settings['threshold_method'] in ['var','variance']:
             coef_mes = control_coef_df['coefficient'].var()
@@ -508,6 +585,7 @@ def perform_regression(settings):
             coef_mes = control_coef_df['coefficient'].std()
         else:
             raise ValueError(f"Unsupported threshold method {settings['threshold_method']}. Supported methods: ['var','variance','std','standard_deveation']")
         reg_threshold = mean_coef + (settings['threshold_multiplier'] * coef_mes)
     coef_df.to_csv(results_path, index=False)
@@ -531,6 +609,12 @@ def perform_regression(settings):
     significant.to_csv(hits_path, index=False)
+    significant_grna_filtered = significant[significant['n_grna'] > settings['min_n']]
+    significant_gene_filtered = significant[significant['n_gene'] > settings['min_n']]
+    significant_filtered = pd.concat([significant_grna_filtered, significant_gene_filtered])
+    filtered_hit_path = os.path.join(os.path.dirname(hits_path), 'results_significant_filtered.csv')
+    significant_filtered.to_csv(filtered_hit_path, index=False)
     if isinstance(settings['metadata_files'], str):
         settings['metadata_files'] = [settings['metadata_files']]
@@ -549,9 +633,15 @@ def perform_regression(settings):
         base_dir = os.path.dirname(os.path.abspath(__file__))
         metadata_path = os.path.join(base_dir, 'resources', 'data', 'lopit.csv')
-        custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=200, figsize=20, threshold=reg_threshold, split_axis_lims=settings['split_axis_lims'], save_path=volcano_path)
-        #custom_volcano_plot(data_path_gene, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=reg_threshold)
-        #custom_volcano_plot(data_path_grna, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=reg_threshold)
+        if settings['volcano'] == 'all':
+            print('all')
+            custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=600, figsize=20, threshold=reg_threshold, split_axis_lims=settings['split_axis_lims'], save_path=volcano_path, x_lim=settings['x_lim'])
+        elif settings['volcano'] == 'gene':
+            print('gene')
+            custom_volcano_plot(data_path_gene, metadata_path, metadata_column='tagm_location', point_size=600, figsize=20, threshold=reg_threshold, split_axis_lims=settings['split_axis_lims'], save_path=volcano_path, x_lim=settings['x_lim'])
+        elif settings['volcano'] == 'grna':
+            print('grna')
+            custom_volcano_plot(data_path_grna, metadata_path, metadata_column='tagm_location', point_size=600, figsize=20, threshold=reg_threshold, split_axis_lims=settings['split_axis_lims'], save_path=volcano_path, x_lim=settings['x_lim'])
         #if len(significant) > 2:
         #    metadata_path = os.path.join(base_dir, 'resources', 'data', 'toxoplasma_metadata.csv')

spacr/plot.py CHANGED Viewed

@@ -2733,7 +2733,7 @@ class spacrGraph:
             hue = None
         # Create the jitter plot
-        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6)
+        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax, alpha=0.6, size=16)
         # Adjust legend and labels
         ax.set_xlabel(self.grouping_column)
@@ -2754,6 +2754,12 @@ class spacrGraph:
         # Ensure epoch is used on the x-axis and accuracy on the y-axis
         x_axis_column = self.data_column[0]
         y_axis_column = self.data_column[1]
+        if self.log_y:
+            self.df[y_axis_column] = np.log10(self.df[y_axis_column])
+        if self.log_x:
+            self.df[x_axis_column] = np.log10(self.df[x_axis_column])
         # Set hue to the grouping column to get one line per group
         hue = self.grouping_column
@@ -2771,11 +2777,6 @@ class spacrGraph:
         ax.set_xlabel(f"{x_axis_column}")
         ax.set_ylabel(f"{y_axis_column}")
-        if self.log_y:
-            ax.set_yscale('log')
-        if self.log_x:
-            ax.set_xscale('log')
     def _create_line_with_std_area(self, ax):
         """Helper method to create a line graph with shaded area representing standard deviation."""
@@ -2784,15 +2785,22 @@ class spacrGraph:
         y_axis_column_mean = f"mean_{y_axis_column}"
         y_axis_column_std = f"std_{y_axis_column_mean}"
+        if self.log_y:
+            self.df[y_axis_column] = np.log10(self.df[y_axis_column])
+        if self.log_x:
+            self.df[x_axis_column] = np.log10(self.df[x_axis_column])
         # Pivot the DataFrame to get mean and std for each epoch across plates
         summary_df = self.df.pivot_table(index=x_axis_column,values=y_axis_column,aggfunc=['mean', 'std']).reset_index()
         # Flatten MultiIndex columns (result of pivoting)
         summary_df.columns = [x_axis_column, y_axis_column_mean, y_axis_column_std]
         # Plot the mean accuracy as a line
         sns.lineplot(data=summary_df,x=x_axis_column,y=y_axis_column_mean,ax=ax,marker='o',linewidth=1,markersize=0,color='blue',label=y_axis_column_mean)
         # Fill the area representing the standard deviation
         ax.fill_between(summary_df[x_axis_column],summary_df[y_axis_column_mean] - summary_df[y_axis_column_std],summary_df[y_axis_column_mean] + summary_df[y_axis_column_std],color='blue',  alpha=0.1 )
@@ -2800,11 +2808,6 @@ class spacrGraph:
         ax.set_xlabel(f"{x_axis_column}")
         ax.set_ylabel(f"{y_axis_column}")
-        if self.log_y:
-            ax.set_yscale('log')
-        if self.log_x:
-            ax.set_xscale('log')
     def _create_box_plot(self, ax):
         """Helper method to create a box plot with consistent spacing."""
         # Combine grouping column and data column if needed
@@ -2969,23 +2972,29 @@ def plot_data_from_db(settings):
         df (pd.DataFrame): The extracted table as a DataFrame.
     """
     if isinstance(settings['src'], str):
         srcs = [settings['src']]
     elif isinstance(settings['src'], list):
         srcs = settings['src']
-        if isinstance(settings['database'], str):
-            settings['database'] = [settings['database'] for _ in range(len(srcs))]
     else:
         raise ValueError("src must be a string or a list of strings.")
+    if isinstance(settings['database'], str):
+        settings['database'] = [settings['database'] for _ in range(len(srcs))]
+    settings['dst'] = os.path.join(srcs[0], 'results')
     save_settings(settings, name=f"{settings['graph_name']}_plot_settings_db", show=True)
     dfs = []
     for i, src in enumerate(srcs):
         db_loc = os.path.join(src, 'measurements', settings['database'][i])
+        print(f"Database: {db_loc}")
         if settings['table_names'] in ['saliency_image_correlations']:
+            print(f"Database table: {settings['table_names']}")
             [df1] = _read_db(db_loc, tables=[settings['table_names']])
         else:
             df1, _ = _read_and_merge_data(locs=[db_loc],
@@ -3006,8 +3015,9 @@ def plot_data_from_db(settings):
     df = pd.concat(dfs, axis=0)
     df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
-    df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
-    df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
+    #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
+    #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
+    df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
     if settings['cell_plate_metadata'] !=  None:
         df = df.dropna(subset='host_cell')
@@ -3021,7 +3031,7 @@ def plot_data_from_db(settings):
     df = df.dropna(subset=settings['data_column'])
     df = df.dropna(subset=settings['grouping_column'])
-    #df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
     src = srcs[0]
     dst = os.path.join(src, 'results', settings['graph_name'])
     os.makedirs(dst, exist_ok=True)

spacr/sequencing.py CHANGED Viewed

@@ -2,6 +2,11 @@ import os, gzip, re, time, gzip
 import pandas as pd
 from multiprocessing import Pool, cpu_count, Queue, Process
 from Bio.Seq import Seq
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from .plot import plot_plates
+from IPython.display import display
 # Function to map sequences to names (same as your original)
 def map_sequences_to_names(csv_file, sequences, rc):
@@ -480,4 +485,117 @@ def barecodes_reverse_complement(csv_file):
     # Save the DataFrame with the reverse complement sequences
     df.to_csv(new_filename, index=False)
-    print(f"Reverse complement file saved as {new_filename}")
+    print(f"Reverse complement file saved as {new_filename}")
+def graph_sequencing_stats(settings):
+    from .utils import correct_metadata_column_names
+    def _plot_density(df, dependent_variable, dst=None):
+        """Plot a density plot of the dependent variable."""
+        plt.figure(figsize=(10, 6))
+        sns.kdeplot(df[dependent_variable], fill=True, alpha=0.6)
+        plt.title(f'Density Plot of {dependent_variable}')
+        plt.xlabel(dependent_variable)
+        plt.ylabel('Density')
+        if dst is not None:
+            filename = os.path.join(dst, 'dependent_variable_density.pdf')
+            plt.savefig(filename, format='pdf')
+            print(f'Saved density plot to {filename}')
+        plt.show()
+    def find_and_visualize_fraction_threshold(df, target_unique_count=5, log_x=False, log_y=False, dst=None):
+        """
+        Find the fraction threshold where the recalculated unique count matches the target value,
+        and visualize the relationship between fraction thresholds and unique counts.
+        """
+        def _line_plot(df, x='fraction_threshold', y='unique_count', log_x=False, log_y=False):
+            if x not in df.columns or y not in df.columns:
+                raise ValueError(f"Columns '{x}' and/or '{y}' not found in the DataFrame.")
+            fig, ax = plt.subplots(figsize=(10, 10))
+            ax.plot(df[x], df[y], linestyle='-', color=(0 / 255, 155 / 255, 155 / 255), label=f"{y}")
+            ax.set_xlabel(x)
+            ax.set_ylabel(y)
+            ax.set_title(f'{y} vs {x}')
+            ax.legend()
+            if log_x:
+                ax.set_xscale('log')
+            if log_y:
+                ax.set_yscale('log')
+            fig.tight_layout()
+            return fig, ax
+        fraction_thresholds = np.linspace(0.001, 0.99, 1000)
+        results = []
+        # Iterate through the fraction thresholds
+        for threshold in fraction_thresholds:
+            filtered_df = df[df['fraction'] >= threshold]
+            unique_count = filtered_df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
+            results.append((threshold, unique_count))
+        results_df = pd.DataFrame(results, columns=['fraction_threshold', 'unique_count'])
+        closest_index = (results_df['unique_count'] - target_unique_count).abs().argmin()
+        closest_threshold = results_df.iloc[closest_index]
+        print(f"Closest Fraction Threshold: {closest_threshold['fraction_threshold']}")
+        print(f"Unique Count at Threshold: {closest_threshold['unique_count']}")
+        fig, ax = _line_plot(df=results_df, x='fraction_threshold', y='unique_count', log_x=log_x, log_y=log_y)
+        plt.axvline(x=closest_threshold['fraction_threshold'], color='black', linestyle='--',
+                    label=f'Closest Threshold ({closest_threshold["fraction_threshold"]:.4f})')
+        plt.axhline(y=target_unique_count, color='black', linestyle='--',
+                    label=f'Target Unique Count ({target_unique_count})')
+        if dst is not None:
+            fig_path = os.path.join(dst, 'results')
+            os.makedirs(fig_path, exist_ok=True)
+            fig_file_path = os.path.join(fig_path, 'fraction_threshold.pdf')
+            fig.savefig(fig_file_path, format='pdf', dpi=600, bbox_inches='tight')
+            print(f"Saved {fig_file_path}")
+        plt.show()
+        return closest_threshold['fraction_threshold']
+    if isinstance(settings['count_data'], str):
+        settings['count_data'] = [settings['count_data']]
+    dfs = []
+    for i, count_data in enumerate(settings['count_data']):
+        df = pd.read_csv(count_data)
+        df['plate'] = f'plate{i+1}'
+        df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
+        df['total_count'] = df.groupby(['prc'])['count'].transform('sum')
+        df['fraction'] = df['count'] / df['total_count']
+        dfs.append(df)
+    df = pd.concat(dfs, axis=0)
+    df = correct_metadata_column_names(df)
+    for c in settings['control_wells']:
+        df = df[df[settings['filter_column']] != c]
+    dst = os.path.dirname(settings['count_data'][0])
+    closest_threshold = find_and_visualize_fraction_threshold(df, settings['target_unique_count'], log_x=settings['log_x'], log_y=settings['log_y'], dst=dst)
+    # Apply the closest threshold to the DataFrame
+    df = df[df['fraction'] >= closest_threshold]
+    # Group by 'plate', 'row', 'column' and compute unique counts of 'grna'
+    unique_counts = df.groupby(['plate', 'row', 'column'])['grna'].nunique().reset_index(name='unique_counts')
+    unique_count_mean = df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
+    unique_count_std = df.groupby(['plate', 'row', 'column'])['grna'].nunique().std()
+    # Merge the unique counts back into the original DataFrame
+    df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
+    print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
+    #_plot_density(df, dependent_variable='unique_counts')
+    plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
+    return closest_threshold

spacr/settings.py CHANGED Viewed

@@ -550,6 +550,7 @@ def get_perform_regression_default_settings(settings):
     settings.setdefault('plate','plate1')
     settings.setdefault('class_1_threshold',None)
     settings.setdefault('metadata_files',['/home/carruthers/Documents/TGME49_Summary.csv','/home/carruthers/Documents/TGGT1_Summary.csv'])
+    settings.setdefault('volcano','gene')
     settings.setdefault('toxo', True)
     if settings['regression_type'] == 'quantile':

spacr/toxo.py CHANGED Viewed

@@ -6,15 +6,17 @@ from adjustText import adjust_text
 import pandas as pd
 from scipy.stats import fisher_exact
 from IPython.display import display
+from matplotlib.legend import Legend
-def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10], save_path=None):
+def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10], save_path=None, x_lim=[-0.5, 0.5]):
     """
     Create a volcano plot with the ability to control the shape of points based on a categorical column,
     color points based on a condition, annotate specific points based on p-value and coefficient thresholds,
     and control the size of points.
     """
     volcano_path = save_path
+    padd = 30
+    fontsize = 18
     # Load the data
     if isinstance(data_path, pd.DataFrame):
         data = data_path
@@ -42,15 +44,13 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
     merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
     merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
     merged_data.loc[merged_data['condition'] == 'control', metadata_column] = 'control'
+    merged_data[metadata_column].fillna('unknown', inplace=True)
     # Categorize condition for coloring
     merged_data['condition'] = pd.Categorical(
         merged_data['condition'],
         categories=['other','pc', 'nc', 'control'],
         ordered=True)
-    display(merged_data)
     # Create subplots with a broken y-axis
     figsize_2 = figsize / 2
@@ -65,19 +65,19 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
         'nc': 'green',
         'control': 'white',
         'other': 'gray'}
-    # Scatter plot on both axes
+    # Scatter plot on both axes with legend completely disabled
     sns.scatterplot(
         data=merged_data,
         x='coefficient',
         y='-log10(p_value)',
-        hue='condition',  # Keep colors but prevent them from showing in the final legend
-        style=metadata_column if metadata_column else None,  # Shape-based legend
+        hue='condition',
+        style=metadata_column if metadata_column else None,
         s=point_size,
-        edgecolor='black',
+        edgecolor='black',
         palette=palette,
-        legend='brief',  # Capture the full legend initially
-        alpha=0.8,
+        legend=False,  # Disable automatic legend
+        alpha=0.6,
         ax=ax2  # Lower plot
     )
@@ -88,13 +88,41 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
         hue='condition',
         style=metadata_column if metadata_column else None,
         s=point_size,
+        edgecolor='black',
         palette=palette,
-        edgecolor='black',
-        legend=False,  # Suppress legend for upper plot
-        alpha=0.8,
+        legend=False,  # No legend on the upper plot
+        alpha=0.6,
         ax=ax1  # Upper plot
     )
+    # Ensure no previous legends on ax1 or ax2
+    if ax1.get_legend() is not None:
+        ax1.get_legend().remove()
+    if ax2.get_legend() is not None:
+        ax2.get_legend().remove()
+    # Manually gather handles and labels from ax2 after plotting
+    handles, labels = ax2.get_legend_handles_labels()
+    # Debug: Print the captured handles and labels for verification
+    print(f"Handles: {handles}")
+    print(f"Labels: {labels}")
+    # Identify shape-based legend entries (skip color-based entries)
+    n_color_entries = len(set(merged_data['condition']))
+    shape_handles = handles[n_color_entries:]
+    shape_labels = labels[n_color_entries:]
+    # Create and add the legend with shape-based entries
+    legend = Legend(
+        ax2, shape_handles, shape_labels,
+        bbox_to_anchor=(1.05, 1), loc='upper left',
+        handletextpad=2.0, labelspacing=1.5, borderaxespad=1.0,
+        markerscale=2.0, prop={'size': 14}
+    )
+    ax2.add_artist(legend)
     if isinstance(split_axis_lims, list):
         if len(split_axis_lims) == 4:
             ylim_min_ax1 = split_axis_lims[0]
@@ -113,28 +141,15 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
     # Set axis limits and hide unnecessary parts
     ax1.set_ylim(ylim_min_ax1, ylim_max_ax1)
     ax2.set_ylim(0, ylim_max_ax2)
+    if x_lim != None:
+        ax1.set_xlim(x_lim)
+        ax2.set_xlim(x_lim)
     ax1.spines['bottom'].set_visible(False)
     ax2.spines['top'].set_visible(False)
     ax1.tick_params(labelbottom=False)
-    if ax1.get_legend() is not None:
-        ax1.legend_.remove()
-        ax1.get_legend().remove()    # Extract handles and labels from the legend
-    handles, labels = ax2.get_legend_handles_labels()
-    # Identify shape-based legend entries (skip color-based entries)
-    shape_handles = handles[len(set(merged_data['condition'])):]
-    shape_labels = labels[len(set(merged_data['condition'])):]
-    # Set the legend with only shape-based entries
-    ax2.legend(
-        shape_handles,
-        shape_labels,
-        bbox_to_anchor=(1.05, 1),
-        loc='upper left',
-        borderaxespad=0.
-    )
     ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
     # Add vertical threshold lines to both plots
@@ -152,18 +167,13 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
     for i, row in merged_data.iterrows():
         if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
-            # Select the appropriate axis for the annotation
-            #ax = ax1 if row['-log10(p_value)'] > 10 else ax2
             ax = ax1 if row['-log10(p_value)'] >= ax1.get_ylim()[0] else ax2
             # Create the annotation on the selected axis
             text = ax.text(
                 row['coefficient'],
                 -np.log10(row['p_value']),
                 row['variable'],
-                fontsize=8,
+                fontsize=fontsize,
                 ha='center',
                 va='bottom',
             )
@@ -175,8 +185,8 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
                 texts_ax2.append(text)
     # Adjust text positions to avoid overlap for both axes
-    adjust_text(texts_ax1, arrowprops=dict(arrowstyle='-', color='black'), ax=ax1)
-    adjust_text(texts_ax2, arrowprops=dict(arrowstyle='-', color='black'), ax=ax2)
+    adjust_text(texts_ax1, arrowprops=dict(arrowstyle='-', color='black'), ax=ax1, expand_points=(padd, padd), fontsize=fontsize)
+    adjust_text(texts_ax2, arrowprops=dict(arrowstyle='-', color='black'), ax=ax2, expand_points=(padd, padd), fontsize=fontsize)
     # Move the legend outside the lower plot
     ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

spacr/utils.py CHANGED Viewed

@@ -5209,4 +5209,19 @@ def fill_holes_in_mask(mask):
         # Assign the original label back to the filled object
         filled_mask[filled_object] = i
-    return filled_mask
+    return filled_mask
+def correct_metadata_column_names(df):
+    if 'plate_name' in df.columns:
+        df = df.rename(columns={'plate_name': 'plate'})
+    if 'column_name' in df.columns:
+        df = df.rename(columns={'column_name': 'column'})
+    if 'col' in df.columns:
+        df = df.rename(columns={'col': 'column'})
+    if 'row_name' in df.columns:
+        df = df.rename(columns={'row_name': 'row'})
+    if 'grna_name' in df.columns:
+        df = df.rename(columns={'grna_name': 'grna'})
+    if 'plate_row' in df.columns:
+        df[['plate', 'row']] = df['plate_row'].str.split('_', expand=True)
+    return df

{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.46
+Version: 0.3.47
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson

{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/RECORD RENAMED Viewed

@@ -18,16 +18,16 @@ spacr/io.py,sha256=1rIdJ_8dyn7W4D2zXjaOqlgyo_Y5Z7X86aRp4hNYWCU,144194
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
 spacr/measure.py,sha256=KdboGXoi85BO5-_6er7932FgjFI7G7tuaQDnWSiEuew,54817
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
-spacr/ml.py,sha256=bPcKVk1camnOhv8jQglj6EYyipAxxmiB1QJ2Fdo3dEM,50654
+spacr/ml.py,sha256=Mkxl4n3OvNsVix8bbPQ09-HTgP3YTzQOESso4_exKLs,54634
 spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
-spacr/plot.py,sha256=r4kbrMA8iQ317f0lvIDj4wJDIDwDXXYHEgGtFJrO3-k,145387
-spacr/sequencing.py,sha256=t18mgpK6rhWuB1LtFOsPxqgpFXxuUmrD06ecsaVQ0Gw,19655
-spacr/settings.py,sha256=3ygnAY6uLtkzFQdK8TMBbWV6zXEX-G_wV19YLyjCBeM,77668
+spacr/plot.py,sha256=M04Cbv1n_FHxO0Qg3VNu_IQXRGt7lb-sgWzh9jjL4rI,145733
+spacr/sequencing.py,sha256=aP2QUfb9wpeJRZFLWFwxq1o4EtVpSvyVohOcm3Wvrq0,24965
+spacr/settings.py,sha256=XEpo9sQXmQ3-sdRNmcsss6q0j7ZAvoAFO-_D8ecgYQc,77710
 spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
 spacr/submodules.py,sha256=3C5M4UbI9Ral1MX4PTpucaAaqhL3RADuCOCqaHhMyUg,28048
 spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
-spacr/toxo.py,sha256=X62hKFcSzFhIxFYlhL2AZb0qNpvtjLs3y1HldReAQEY,12880
-spacr/utils.py,sha256=K36BxYr4GN956V4S7IkNty2sP4Y265WS7yMzAw8Tqeg,220451
+spacr/toxo.py,sha256=RjAqI2sCcYYr-eiLPGnyJUn96zR_ATENuyiM2CZT408,13358
+spacr/utils.py,sha256=zkgUP_w_w9HJe4000KhVmpwO2gELoeIvdYNrXlRAzG8,221050
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -150,9 +150,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.46.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.46.dist-info/METADATA,sha256=rDVd_7S8qknwKjW3gzWpaC4FvKLLArfmA3xqGlby088,5949
-spacr-0.3.46.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.46.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.46.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.46.dist-info/RECORD,,
+spacr-0.3.47.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.47.dist-info/METADATA,sha256=NEQNKKM40sYjqLkD1M-R4eOVTPSAQJ4zDD_5juTElbk,5949
+spacr-0.3.47.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.47.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.47.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.47.dist-info/RECORD,,

{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.46.dist-info → spacr-0.3.47.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.46__py3-none-any.whl → 0.3.47__py3-none-any.whl

spacr 0.3.46py3-none-any.whl → 0.3.47py3-none-any.whl