PyPI - spacr - Versions diffs - 0.3.46__py3-none-any.whl → 0.3.50__py3-none-any.whl - Mend

spacr 0.3.46py3-none-any.whl → 0.3.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

spacr/chat_bot.py +31 -0
spacr/gui_elements.py +33 -7
spacr/ml.py +478 -76
spacr/plot.py +488 -47
spacr/sequencing.py +122 -1
spacr/settings.py +2 -1
spacr/toxo.py +266 -147
spacr/utils.py +27 -4
{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/METADATA +2 -1
{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/RECORD +14 -13
{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/LICENSE +0 -0
{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/WHEEL +0 -0
{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/entry_points.txt +0 -0
{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/top_level.txt +0 -0

spacr/sequencing.py CHANGED Viewed

@@ -2,6 +2,11 @@ import os, gzip, re, time, gzip
 import pandas as pd
 from multiprocessing import Pool, cpu_count, Queue, Process
 from Bio.Seq import Seq
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from .plot import plot_plates
+from IPython.display import display
 # Function to map sequences to names (same as your original)
 def map_sequences_to_names(csv_file, sequences, rc):
@@ -480,4 +485,120 @@ def barecodes_reverse_complement(csv_file):
     # Save the DataFrame with the reverse complement sequences
     df.to_csv(new_filename, index=False)
-    print(f"Reverse complement file saved as {new_filename}")
+    print(f"Reverse complement file saved as {new_filename}")
+def graph_sequencing_stats(settings):
+    from .utils import correct_metadata_column_names
+    def _plot_density(df, dependent_variable, dst=None):
+        """Plot a density plot of the dependent variable."""
+        plt.figure(figsize=(10, 10))
+        sns.kdeplot(df[dependent_variable], fill=True, alpha=0.6)
+        plt.title(f'Density Plot of {dependent_variable}')
+        plt.xlabel(dependent_variable)
+        plt.ylabel('Density')
+        if dst is not None:
+            filename = os.path.join(dst, 'dependent_variable_density.pdf')
+            plt.savefig(filename, format='pdf')
+            print(f'Saved density plot to {filename}')
+        plt.show()
+    def find_and_visualize_fraction_threshold(df, target_unique_count=5, log_x=False, log_y=False, dst=None):
+        """
+        Find the fraction threshold where the recalculated unique count matches the target value,
+        and visualize the relationship between fraction thresholds and unique counts.
+        """
+        def _line_plot(df, x='fraction_threshold', y='unique_count', log_x=False, log_y=False):
+            if x not in df.columns or y not in df.columns:
+                raise ValueError(f"Columns '{x}' and/or '{y}' not found in the DataFrame.")
+            fig, ax = plt.subplots(figsize=(10, 10))
+            ax.plot(df[x], df[y], linestyle='-', color=(0 / 255, 155 / 255, 155 / 255), label=f"{y}")
+            ax.set_xlabel(x)
+            ax.set_ylabel(y)
+            ax.set_title(f'{y} vs {x}')
+            ax.legend()
+            if log_x:
+                ax.set_xscale('log')
+            if log_y:
+                ax.set_yscale('log')
+            fig.tight_layout()
+            return fig, ax
+        fraction_thresholds = np.linspace(0.001, 0.99, 1000)
+        results = []
+        # Iterate through the fraction thresholds
+        for threshold in fraction_thresholds:
+            filtered_df = df[df['fraction'] >= threshold]
+            unique_count = filtered_df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
+            results.append((threshold, unique_count))
+        results_df = pd.DataFrame(results, columns=['fraction_threshold', 'unique_count'])
+        closest_index = (results_df['unique_count'] - target_unique_count).abs().argmin()
+        closest_threshold = results_df.iloc[closest_index]
+        print(f"Closest Fraction Threshold: {closest_threshold['fraction_threshold']}")
+        print(f"Unique Count at Threshold: {closest_threshold['unique_count']}")
+        fig, ax = _line_plot(df=results_df, x='fraction_threshold', y='unique_count', log_x=log_x, log_y=log_y)
+        plt.axvline(x=closest_threshold['fraction_threshold'], color='black', linestyle='--',
+                    label=f'Closest Threshold ({closest_threshold["fraction_threshold"]:.4f})')
+        plt.axhline(y=target_unique_count, color='black', linestyle='--',
+                    label=f'Target Unique Count ({target_unique_count})')
+        plt.xlim(0,0.1)
+        plt.ylim(0,20)
+        if dst is not None:
+            fig_path = os.path.join(dst, 'results')
+            os.makedirs(fig_path, exist_ok=True)
+            fig_file_path = os.path.join(fig_path, 'fraction_threshold.pdf')
+            fig.savefig(fig_file_path, format='pdf', dpi=600, bbox_inches='tight')
+            print(f"Saved {fig_file_path}")
+        plt.show()
+        return closest_threshold['fraction_threshold']
+    if isinstance(settings['count_data'], str):
+        settings['count_data'] = [settings['count_data']]
+    dfs = []
+    for i, count_data in enumerate(settings['count_data']):
+        df = pd.read_csv(count_data)
+        df['plate'] = f'plate{i+1}'
+        df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
+        df['total_count'] = df.groupby(['prc'])['count'].transform('sum')
+        df['fraction'] = df['count'] / df['total_count']
+        dfs.append(df)
+    df = pd.concat(dfs, axis=0)
+    df = correct_metadata_column_names(df)
+    for c in settings['control_wells']:
+        df = df[df[settings['filter_column']] != c]
+    dst = os.path.dirname(settings['count_data'][0])
+    closest_threshold = find_and_visualize_fraction_threshold(df, settings['target_unique_count'], log_x=settings['log_x'], log_y=settings['log_y'], dst=dst)
+    # Apply the closest threshold to the DataFrame
+    df = df[df['fraction'] >= closest_threshold]
+    # Group by 'plate', 'row', 'column' and compute unique counts of 'grna'
+    unique_counts = df.groupby(['plate', 'row', 'column'])['grna'].nunique().reset_index(name='unique_counts')
+    unique_count_mean = df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
+    unique_count_std = df.groupby(['plate', 'row', 'column'])['grna'].nunique().std()
+    # Merge the unique counts back into the original DataFrame
+    df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
+    print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
+    display(df)
+    #_plot_density(df, dependent_variable='unique_counts')
+    plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
+    return closest_threshold

spacr/settings.py CHANGED Viewed

@@ -549,7 +549,8 @@ def get_perform_regression_default_settings(settings):
     settings.setdefault('filter_column','column')
     settings.setdefault('plate','plate1')
     settings.setdefault('class_1_threshold',None)
-    settings.setdefault('metadata_files',['/home/carruthers/Documents/TGME49_Summary.csv','/home/carruthers/Documents/TGGT1_Summary.csv'])
+    settings.setdefault('metadata_files',['/home/carruthers/Documents/TGGT1_Summary.csv','/home/carruthers/Documents/TGME49_Summary.csv'])
+    settings.setdefault('volcano','gene')
     settings.setdefault('toxo', True)
     if settings['regression_type'] == 'quantile':

spacr/toxo.py CHANGED Viewed

@@ -6,25 +6,53 @@ from adjustText import adjust_text
 import pandas as pd
 from scipy.stats import fisher_exact
 from IPython.display import display
+from matplotlib.legend import Legend
+from matplotlib.transforms import Bbox
+from brokenaxes import brokenaxes
-def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10], save_path=None):
-    """
-    Create a volcano plot with the ability to control the shape of points based on a categorical column,
-    color points based on a condition, annotate specific points based on p-value and coefficient thresholds,
-    and control the size of points.
-    """
-    volcano_path = save_path
-    # Load the data
+from matplotlib.gridspec import GridSpec
+def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location',
+                        point_size=50, figsize=20, threshold=0,
+                        save_path=None, x_lim=[-0.5, 0.5], y_lims=[[0, 6], [9, 15]]):
+    markers = [
+        'o',  # Circle
+        'X',  # X-shaped marker
+        '^',  # Upward triangle
+        's',  # Square
+        'v',  # Downward triangle
+        'P',  # Plus-filled pentagon
+        '*',  # Star
+        '+',  # Plus
+        'x',  # Cross
+        '.',  # Point
+        ',',  # Pixel
+        'd',  # Diamond
+        'D',  # Thin diamond
+        'h',  # Hexagon 1
+        'H',  # Hexagon 2
+        'p',  # Pentagon
+        '|',  # Vertical line
+        '_',  # Horizontal line
+    ]
+    plt.rcParams.update({'font.size': 14})
+    # Load data
     if isinstance(data_path, pd.DataFrame):
         data = data_path
     else:
         data = pd.read_csv(data_path)
+    fontsize = 18
+    plt.rcParams.update({'font.size': fontsize})
     data['variable'] = data['feature'].str.extract(r'\[(.*?)\]')
     data['variable'].fillna(data['feature'], inplace=True)
-    split_columns = data['variable'].str.split('_', expand=True)
-    data['gene_nr'] = split_columns[0]
+    data['gene_nr'] = data['variable'].str.split('_').str[0]
     data = data[data['variable'] != 'Intercept']
     # Load metadata
@@ -32,165 +60,110 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
         metadata = metadata_path
     else:
         metadata = pd.read_csv(metadata_path)
     metadata['gene_nr'] = metadata['gene_nr'].astype(str)
     data['gene_nr'] = data['gene_nr'].astype(str)
-    # Merge data and metadata on 'gene_nr'
-    merged_data = pd.merge(data, metadata[['gene_nr', 'tagm_location']], on='gene_nr', how='left')
+    merged_data = pd.merge(data, metadata[['gene_nr', metadata_column]], on='gene_nr', how='left')
+    merged_data[metadata_column].fillna('unknown', inplace=True)
-    merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
-    merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
-    merged_data.loc[merged_data['condition'] == 'control', metadata_column] = 'control'
+    # Define palette and markers
+    palette = {'pc': 'red', 'nc': 'green', 'control': 'white', 'other': 'gray'}
+    marker_dict = {val: marker for val, marker in zip(
+        merged_data[metadata_column].unique(), markers)}
-    # Categorize condition for coloring
-    merged_data['condition'] = pd.Categorical(
-        merged_data['condition'],
-        categories=['other','pc', 'nc', 'control'],
-        ordered=True)
+    # Create the figure with custom spacing
+    fig = plt.figure(figsize=(figsize,figsize))
+    gs = GridSpec(2, 1, height_ratios=[1, 3], hspace=0.05)
-    display(merged_data)
+    ax_upper = fig.add_subplot(gs[0])
+    ax_lower = fig.add_subplot(gs[1], sharex=ax_upper)
-    # Create subplots with a broken y-axis
-    figsize_2 = figsize / 2
-    fig, (ax1, ax2) = plt.subplots(
-        2, 1, figsize=(figsize, figsize),
-        sharex=True, gridspec_kw={'height_ratios': [1, 3]}
-    )
+    # Hide x-axis labels on the upper plot
+    ax_upper.tick_params(axis='x', which='both', bottom=False, labelbottom=False)
-    # Define color palette
-    palette = {
-        'pc': 'red',
-        'nc': 'green',
-        'control': 'white',
-        'other': 'gray'}
+    hit_list = []
     # Scatter plot on both axes
-    sns.scatterplot(
-        data=merged_data,
-        x='coefficient',
-        y='-log10(p_value)',
-        hue='condition',  # Keep colors but prevent them from showing in the final legend
-        style=metadata_column if metadata_column else None,  # Shape-based legend
-        s=point_size,
-        edgecolor='black',
-        palette=palette,
-        legend='brief',  # Capture the full legend initially
-        alpha=0.8,
-        ax=ax2  # Lower plot
-    )
+    for _, row in merged_data.iterrows():
+        y_val = -np.log10(row['p_value'])
+        ax = ax_upper if y_val > y_lims[1][0] else ax_lower
-    sns.scatterplot(
-        data=merged_data[merged_data['-log10(p_value)'] > 10],
-        x='coefficient',
-        y='-log10(p_value)',
-        hue='condition',
-        style=metadata_column if metadata_column else None,
-        s=point_size,
-        palette=palette,
-        edgecolor='black',
-        legend=False,  # Suppress legend for upper plot
-        alpha=0.8,
-        ax=ax1  # Upper plot
-    )
-    if isinstance(split_axis_lims, list):
-        if len(split_axis_lims) == 4:
-            ylim_min_ax1 = split_axis_lims[0]
-            if split_axis_lims[1] is None:
-                ylim_max_ax1 = merged_data['-log10(p_value)'].max() + 5
-            else:
-                ylim_max_ax1 = split_axis_lims[1]
-            ylim_min_ax2 = split_axis_lims[2]
-            ylim_max_ax2 = split_axis_lims[3]
-        else:
-            ylim_min_ax1 = None
-            ylim_max_ax1 = merged_data['-log10(p_value)'].max() + 5
-            ylim_min_ax2 = 0
-            ylim_max_ax2 = None
-    # Set axis limits and hide unnecessary parts
-    ax1.set_ylim(ylim_min_ax1, ylim_max_ax1)
-    ax2.set_ylim(0, ylim_max_ax2)
-    ax1.spines['bottom'].set_visible(False)
-    ax2.spines['top'].set_visible(False)
-    ax1.tick_params(labelbottom=False)
-    if ax1.get_legend() is not None:
-        ax1.legend_.remove()
-        ax1.get_legend().remove()    # Extract handles and labels from the legend
-    handles, labels = ax2.get_legend_handles_labels()
-    # Identify shape-based legend entries (skip color-based entries)
-    shape_handles = handles[len(set(merged_data['condition'])):]
-    shape_labels = labels[len(set(merged_data['condition'])):]
-    # Set the legend with only shape-based entries
-    ax2.legend(
-        shape_handles,
-        shape_labels,
-        bbox_to_anchor=(1.05, 1),
-        loc='upper left',
-        borderaxespad=0.
-    )
-    ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
-    # Add vertical threshold lines to both plots
-    if threshold > 0:
-        for ax in (ax1, ax2):
-            ax.axvline(x=-abs(threshold), linestyle='--', color='black')
-            ax.axvline(x=abs(threshold), linestyle='--', color='black')
+        ax.scatter(
+            row['coefficient'], y_val,
+            color=palette.get(row['condition'], 'gray'),
+            marker=marker_dict.get(row[metadata_column], 'o'),
+            s=point_size, edgecolor='black', alpha=0.6
+        )
-    # Add a horizontal line at p-value threshold (0.05)
-    ax2.axhline(y=-np.log10(0.05), color='black', linestyle='--')
-    # Annotate significant points on both axes
-    texts_ax1 = []
-    texts_ax2 = []
-    for i, row in merged_data.iterrows():
         if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
-            # Select the appropriate axis for the annotation
-            #ax = ax1 if row['-log10(p_value)'] > 10 else ax2
+            hit_list.append(row['variable'])
-            ax = ax1 if row['-log10(p_value)'] >= ax1.get_ylim()[0] else ax2
+    # Set axis limits
+    ax_upper.set_ylim(y_lims[1])
+    ax_lower.set_ylim(y_lims[0])
+    ax_lower.set_xlim(x_lim)
+    ax_lower.spines['top'].set_visible(False)
+    ax_upper.spines['top'].set_visible(False)
+    ax_upper.spines['bottom'].set_visible(False)
-            # Create the annotation on the selected axis
-            text = ax.text(
-                row['coefficient'],
-                -np.log10(row['p_value']),
-                row['variable'],
-                fontsize=8,
-                ha='center',
-                va='bottom',
-            )
+    # Set x-axis and y-axis titles
+    ax_lower.set_xlabel('Coefficient')  # X-axis title on the lower graph
+    ax_lower.set_ylabel('-log10(p-value)')  # Y-axis title on the lower graph
+    ax_upper.set_ylabel('-log10(p-value)')  # Y-axis title on the upper graph
+    for ax in [ax_upper, ax_lower]:
+        ax.spines['right'].set_visible(False)
-            # Store the text annotation in the correct list
-            if ax == ax1:
-                texts_ax1.append(text)
-            else:
-                texts_ax2.append(text)
+    # Add threshold lines to both axes
+    for ax in [ax_upper, ax_lower]:
+        ax.axvline(x=-abs(threshold), linestyle='--', color='black')
+        ax.axvline(x=abs(threshold), linestyle='--', color='black')
-    # Adjust text positions to avoid overlap for both axes
-    adjust_text(texts_ax1, arrowprops=dict(arrowstyle='-', color='black'), ax=ax1)
-    adjust_text(texts_ax2, arrowprops=dict(arrowstyle='-', color='black'), ax=ax2)
+    ax_lower.axhline(y=-np.log10(0.05), linestyle='--', color='black')
-    # Move the legend outside the lower plot
-    ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
+    # Annotate significant points
+    texts_upper, texts_lower = [], []  # Collect text annotations separately
-    # Adjust the spacing between subplots and move the title
-    plt.subplots_adjust(hspace=0.05)
-    fig.suptitle('Custom Volcano Plot of Coefficients', y=1.02, fontsize=16)  # Title above the top plot
+    for _, row in merged_data.iterrows():
+        y_val = -np.log10(row['p_value'])
+        if row['p_value'] > 0.05 or abs(row['coefficient']) < abs(threshold):
+            continue
-    # Save the plot as PDF
-    plt.savefig(volcano_path, format='pdf', bbox_inches='tight')
-    print(f'Saved Volcano plot: {volcano_path}')
+        ax = ax_upper if y_val > y_lims[1][0] else ax_lower
+        text = ax.text(row['coefficient'], y_val, row['variable'],
+                       fontsize=fontsize, ha='center', va='bottom')
-    # Show the plot
+        if ax == ax_upper:
+            texts_upper.append(text)
+        else:
+            texts_lower.append(text)
+    # Adjust text positions to avoid overlap
+    adjust_text(texts_upper, ax=ax_upper, arrowprops=dict(arrowstyle='-', color='black'))
+    adjust_text(texts_lower, ax=ax_lower, arrowprops=dict(arrowstyle='-', color='black'))
+    # Add a single legend on the lower axis
+    handles = [plt.Line2D([0], [0], marker=m, color='w', markerfacecolor='gray', markersize=10)
+               for m in marker_dict.values()]
+    labels = marker_dict.keys()
+    ax_lower.legend(handles,
+                    labels,
+                    bbox_to_anchor=(1.05, 1),
+                    loc='upper left',
+                    borderaxespad=0.25,
+                    labelspacing=2,
+                    handletextpad=0.25,
+                    markerscale=2,
+                    prop={'size': fontsize})
+    # Save and show the plot
+    if save_path:
+        plt.savefig(save_path, format='pdf', bbox_inches='tight')
     plt.show()
+    return hit_list
 def go_term_enrichment_by_column(significant_df, metadata_path, go_term_columns=['Computed GO Processes', 'Curated GO Components', 'Curated GO Functions', 'Curated GO Processes']):
     """
@@ -331,4 +304,150 @@ def go_term_enrichment_by_column(significant_df, metadata_path, go_term_columns=
     # Show the combined plot
     plt.tight_layout()
+    plt.show()
+def plot_gene_phenotypes(data, gene_list, x_column='Gene ID', data_column='T.gondii GT1 CRISPR Phenotype - Mean Phenotype',error_column='T.gondii GT1 CRISPR Phenotype - Standard Error', save_path=None):
+    """
+    Plot a line graph for the mean phenotype with standard error shading and highlighted genes.
+    Args:
+        data (pd.DataFrame): The input DataFrame containing gene data.
+        gene_list (list): A list of gene names to highlight on the plot.
+    """
+    # Ensure x_column is properly processed
+    def extract_gene_id(gene):
+        if isinstance(gene, str) and '_' in gene:
+            return gene.split('_')[1]
+        return str(gene)
+    data.loc[:, data_column] = pd.to_numeric(data[data_column], errors='coerce')
+    data = data.dropna(subset=[data_column])
+    data.loc[:, error_column] = pd.to_numeric(data[error_column], errors='coerce')
+    data = data.dropna(subset=[error_column])
+    data['x'] = data[x_column].apply(extract_gene_id)
+    # Sort by the data_column and assign ranks
+    data = data.sort_values(by=data_column).reset_index(drop=True)
+    data['rank'] = range(1, len(data) + 1)
+    # Prepare the x, y, and error values for plotting
+    x = data['rank']
+    y = data[data_column]
+    yerr = data[error_column]
+    # Create the plot
+    plt.figure(figsize=(10, 10))
+    # Plot the mean phenotype with standard error shading
+    plt.plot(x, y, label='Mean Phenotype', color=(0/255, 155/255, 155/255), linewidth=2)
+    plt.fill_between(
+        x, y - yerr, y + yerr,
+        color=(0/255, 155/255, 155/255), alpha=0.1, label='Standard Error'
+    )
+    # Prepare for adjustText
+    texts = []  # Store text objects for adjustment
+    # Highlight the genes in the gene_list
+    for gene in gene_list:
+        gene_id = extract_gene_id(gene)
+        gene_data = data[data['x'] == gene_id]
+        if not gene_data.empty:
+            # Scatter the highlighted points in purple and add labels for adjustment
+            plt.scatter(
+                gene_data['rank'],
+                gene_data[data_column],
+                color=(155/255, 55/255, 155/255),
+                s=200,
+                alpha=0.6,
+                label=f'Highlighted Gene: {gene}',
+                zorder=3  # Ensure the points are on top
+            )
+            # Add the text label next to the highlighted gene
+            texts.append(
+                plt.text(
+                    gene_data['rank'].values[0],
+                    gene_data[data_column].values[0],
+                    gene,
+                    fontsize=18,
+                    ha='right'
+                )
+            )
+    # Adjust text to avoid overlap with lines drawn from points to text
+    adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray'))
+    # Label the plot
+    plt.xlabel('Rank')
+    plt.ylabel('Mean Phenotype')
+    #plt.xticks(rotation=90)  # Rotate x-axis labels for readability
+    plt.legend().remove()  # Remove the legend if not needed
+    plt.tight_layout()
+    # Save the plot if a path is provided
+    if save_path:
+        plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
+        print(f"Figure saved to {save_path}")
+    plt.show()
+def plot_gene_heatmaps(data, gene_list, columns, x_column='Gene ID', normalize=False, save_path=None):
+    """
+    Generate a teal-to-white heatmap with the specified columns and genes.
+    Args:
+        data (pd.DataFrame): The input DataFrame containing gene data.
+        gene_list (list): A list of genes to include in the heatmap.
+        columns (list): A list of column names to visualize as heatmaps.
+        normalize (bool): If True, normalize the values for each gene between 0 and 1.
+        save_path (str): Optional. If provided, the plot will be saved to this path.
+    """
+    # Ensure x_column is properly processed
+    def extract_gene_id(gene):
+        if isinstance(gene, str) and '_' in gene:
+            return gene.split('_')[1]
+        return str(gene)
+    data['x'] = data[x_column].apply(extract_gene_id)
+    # Filter the data to only include the specified genes
+    filtered_data = data[data['x'].isin(gene_list)].set_index('x')[columns]
+    # Normalize each gene's values between 0 and 1 if normalize=True
+    if normalize:
+        filtered_data = filtered_data.apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=1)
+    # Define the figure size dynamically based on the number of genes and columns
+    width = len(columns) * 4
+    height = len(gene_list) * 1
+    # Create the heatmap
+    plt.figure(figsize=(width, height))
+    cmap = sns.color_palette("viridis", as_cmap=True)
+    # Plot the heatmap with genes on the y-axis and columns on the x-axis
+    sns.heatmap(
+        filtered_data,
+        cmap=cmap,
+        cbar=True,
+        annot=False,
+        linewidths=0.5,
+        square=True
+    )
+    # Set the labels
+    plt.xticks(rotation=90, ha='center')  # Rotate x-axis labels for better readability
+    plt.yticks(rotation=0)  # Keep y-axis labels horizontal
+    plt.xlabel('')
+    plt.ylabel('')
+    # Adjust layout to ensure the plot fits well
+    plt.tight_layout()
+    # Save the plot if a path is provided
+    if save_path:
+        plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
+        print(f"Figure saved to {save_path}")
     plt.show()

spacr/utils.py CHANGED Viewed

@@ -4067,7 +4067,7 @@ def generate_path_list_from_db(db_path, file_metadata):
     return all_paths
-def correct_paths(df, base_path):
+def correct_paths(df, base_path, folder='data'):
     if isinstance(df, pd.DataFrame):
@@ -4083,9 +4083,9 @@ def correct_paths(df, base_path):
     adjusted_image_paths = []
     for path in image_paths:
         if base_path not in path:
-            parts = path.split('/data/')
+            parts = path.split(f'/{folder}/')
             if len(parts) > 1:
-                new_path = os.path.join(base_path, 'data', parts[1])
+                new_path = os.path.join(base_path, f'{folder}', parts[1])
                 adjusted_image_paths.append(new_path)
             else:
                 adjusted_image_paths.append(path)
@@ -5209,4 +5209,27 @@ def fill_holes_in_mask(mask):
         # Assign the original label back to the filled object
         filled_mask[filled_object] = i
-    return filled_mask
+    return filled_mask
+def correct_metadata_column_names(df):
+    if 'plate_name' in df.columns:
+        df = df.rename(columns={'plate_name': 'plate'})
+    if 'column_name' in df.columns:
+        df = df.rename(columns={'column_name': 'column'})
+    if 'col' in df.columns:
+        df = df.rename(columns={'col': 'column'})
+    if 'row_name' in df.columns:
+        df = df.rename(columns={'row_name': 'row'})
+    if 'grna_name' in df.columns:
+        df = df.rename(columns={'grna_name': 'grna'})
+    if 'plate_row' in df.columns:
+        df[['plate', 'row']] = df['plate_row'].str.split('_', expand=True)
+    return df
+def control_filelist(folder, mode='column', values=['01','02']):
+    files = os.listdir(folder)
+    if mode is 'column':
+        filtered_files = [file for file in files if file.split('_')[1][1:] in values]
+    if mode is 'row':
+        filtered_files = [file for file in files if file.split('_')[1][:1] in values]
+    return filtered_files

{spacr-0.3.46.dist-info → spacr-0.3.50.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.46
+Version: 0.3.50
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson
@@ -66,6 +66,7 @@ Requires-Dist: gdown
 Requires-Dist: IPython<9.0,>=8.18.1
 Requires-Dist: ipykernel
 Requires-Dist: ipywidgets<9.0,>=8.1.2
+Requires-Dist: brokenaxes<1.0,>=0.6.2
 Requires-Dist: huggingface-hub<0.25,>=0.24.0
 Provides-Extra: dev
 Requires-Dist: pytest<3.11,>=3.9; extra == "dev"

spacr 0.3.46__py3-none-any.whl → 0.3.50__py3-none-any.whl

spacr 0.3.46py3-none-any.whl → 0.3.50py3-none-any.whl