PyPI - spacr - Versions diffs - 0.3.38__py3-none-any.whl → 0.3.41__py3-none-any.whl - Mend

spacr 0.3.38py3-none-any.whl → 0.3.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

spacr/core.py +1 -1
spacr/io.py +20 -13
spacr/ml.py +33 -24
spacr/plot.py +421 -37
spacr/toxo.py +202 -16
spacr/utils.py +4 -2
{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/METADATA +1 -1
{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/RECORD +12 -12
{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/LICENSE +0 -0
{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/WHEEL +0 -0
{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/entry_points.txt +0 -0
{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/top_level.txt +0 -0

spacr/core.py CHANGED Viewed

@@ -143,7 +143,7 @@ def preprocess_generate_masks(src, settings={}):
                                 start = time.time()
                                 if i+1 <= settings['examples_to_plot']:
                                     file_path = os.path.join(merged_src, file)
-                                    plot_image_mask_overlay(file_path, settings['channels'], settings['cell_channel'], settings['nucleus_channel'], settings['pathogen_channel'], figuresize=10, normalize=True, thickness=3, save_pdf=True)
+                                    plot_image_mask_overlay(file_path, settings['channels'], settings['cell_channel'], settings['nucleus_channel'], settings['pathogen_channel'], figuresize=10, percentiles=(1,99), thickness=3, save_pdf=True)
                                     stop = time.time()
                                     duration = stop-start
                                     time_ls.append(duration)

spacr/io.py CHANGED Viewed

@@ -1686,11 +1686,16 @@ def preprocess_img_data(settings):
         print(f'Found {extension_counts[most_common_extension]} {most_common_extension} files')
     else:
         print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
-        if os.path.exists(src+'/stack'):
+        if os.path.exists(os.path.join(src,'stack')):
             print('Found existing stack folder.')
-        if os.path.exists(src+'/channel_stack'):
+        if os.path.exists(os.path.join(src,'channel_stack')):
             print('Found existing channel_stack folder.')
-        if os.path.exists(src+'/norm_channel_stack'):
+        if os.path.exists(os.path.join(src,'norm_channel_stack')):
             print('Found existing norm_channel_stack folder. Skipping preprocessing')
             return settings, src
@@ -1713,12 +1718,13 @@ def preprocess_img_data(settings):
         src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
         settings['src'] = src
+    stack_path = os.path.join(src, 'stack')
     if img_format == None:
-        if not os.path.exists(src+'/stack'):
+        if not os.path.exists(stack_path):
             _merge_channels(src, plot=False)
-    if not os.path.exists(src+'/stack'):
+    if not os.path.exists(stack_path):
         try:
             if not img_format == None:
                 if timelapse:
@@ -1727,7 +1733,7 @@ def preprocess_img_data(settings):
                     _rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
                     #Make sure no batches will be of only one image
-                    all_imgs = len(src+'/stack')
+                    all_imgs = len(stack_path)
                     full_batches = all_imgs // batch_size
                     last_batch_size = all_imgs % batch_size
@@ -1738,26 +1744,27 @@ def preprocess_img_data(settings):
                             raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
                         # If the last batch is of size 1, merge it with the second last batch
                         elif full_batches > 0:
+                            print(f"all images: {all_imgs},  full batch: {full_batches}, last batch: {last_batch_size}")
                             raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
                 _merge_channels(src, plot=False)
                 if timelapse:
-                    _create_movies_from_npy_per_channel(src+'/stack', fps=2)
+                    _create_movies_from_npy_per_channel(stack_path, fps=2)
                 if plot:
                     print(f'plotting {nr} images from {src}/stack')
-                    plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
+                    plot_arrays(stack_path, figuresize, cmap, nr=nr, normalize=normalize)
                 if all_to_mip:
-                    _mip_all(src+'/stack')
+                    _mip_all(stack_path)
                     if plot:
                         print(f'plotting {nr} images from {src}/stack')
-                        plot_arrays(src+'/stack', figuresize, cmap, nr=nr, normalize=normalize)
+                        plot_arrays(stack_path, figuresize, cmap, nr=nr, normalize=normalize)
         except Exception as e:
             print(f"Error: {e}")
-    concatenate_and_normalize(src=src+'/stack',
+    concatenate_and_normalize(src=stack_path,
                               channels=mask_channels,
                               save_dtype=np.float32,
                               settings=settings)

spacr/ml.py CHANGED Viewed

@@ -134,7 +134,7 @@ def scale_variables(X, y):
     return X_scaled, y_scaled
-def process_model_coefficients(model, regression_type, X, y, highlight):
+def process_model_coefficients(model, regression_type, X, y, nc, pc, controls):
     """Return DataFrame of model coefficients and p-values."""
     if regression_type in ['ols', 'gls', 'wls', 'rlm', 'glm', 'mixed', 'quantile', 'logit', 'probit', 'poisson']:
         coefs = model.params
@@ -169,8 +169,8 @@ def process_model_coefficients(model, regression_type, X, y, highlight):
         coef_df['p_value'] = np.nan  # Placeholder since sklearn doesn't provide p-values
     coef_df['-log10(p_value)'] = -np.log10(coef_df['p_value'])
-    coef_df['highlight'] = coef_df['feature'].apply(lambda x: highlight in x)
+    coef_df['grna'] = coef_df['feature'].str.extract(r'\[(.*?)\]')[0]
+    coef_df['condition'] = coef_df.apply(lambda row: 'nc' if nc in row['feature'] else 'pc' if pc in row['feature'] else ('control' if row['grna'] in controls else 'other'),axis=1)
     return coef_df[~coef_df['feature'].str.contains('row|column')]
 def prepare_formula(dependent_variable, random_row_column_effects=False):
@@ -284,15 +284,13 @@ def check_and_clean_data(df, dependent_variable):
     df_cleaned['row'] = df['row']
     df_cleaned['column'] = df['column']
-    #display(df_cleaned)
     # Create a new column 'gene_fraction' that sums the fractions by gene within the same well
     df_cleaned['gene_fraction'] = df_cleaned.groupby(['prc', 'gene'])['fraction'].transform('sum')
     print("Data is ready for model fitting.")
     return df_cleaned
-def regression(df, csv_path, dependent_variable='predictions', regression_type=None, alpha=1.0, random_row_column_effects=False, highlight='220950', dst=None, cov_type=None):
+def regression(df, csv_path, dependent_variable='predictions', regression_type=None, alpha=1.0, random_row_column_effects=False, nc='233460', pc='220950', controls=[''], dst=None, cov_type=None, plot=False):
     from .plot import volcano_plot, plot_histogram
     # Generate the volcano filename
@@ -312,9 +310,7 @@ def regression(df, csv_path, dependent_variable='predictions', regression_type=N
     if regression_type is None:
         regression_type = 'ols' if is_normal else 'glm'
-    #display('before check_and_clean_data:',df)
     df = check_and_clean_data(df, dependent_variable)
-    #display('after check_and_clean_data:',df)
     # Handle mixed effects if row/column effect is treated as random
     if random_row_column_effects:
@@ -340,10 +336,10 @@ def regression(df, csv_path, dependent_variable='predictions', regression_type=N
         model = regression_model(X, y, regression_type=regression_type, groups=groups, alpha=alpha, cov_type=cov_type)
         # Process the model coefficients
-        coef_df = process_model_coefficients(model, regression_type, X, y, highlight)
-    # Plot the volcano plot
-    volcano_plot(coef_df, volcano_path)
+        coef_df = process_model_coefficients(model, regression_type, X, y, nc, pc, controls)
+    if plot:
+        volcano_plot(coef_df, volcano_path)
     return model, coef_df
@@ -487,19 +483,28 @@ def perform_regression(settings):
     if settings['transform'] is None:
         _ = plot_plates(score_data_df, variable=dependent_variable, grouping='mean', min_max='allq', cmap='viridis', min_count=settings['min_cell_count'], dst = res_folder)
-    model, coef_df = regression(merged_df, csv_path, dependent_variable, settings['regression_type'], settings['alpha'], settings['random_row_column_effects'], highlight=settings['highlight'], dst=res_folder, cov_type=settings['cov_type'])
+    model, coef_df = regression(merged_df, csv_path, dependent_variable, settings['regression_type'], settings['alpha'], settings['random_row_column_effects'], nc=settings['negative_control'], pc=settings['positive_control'], controls=settings['controls'], dst=res_folder, cov_type=settings['cov_type'])
     coef_df['grna'] = coef_df['feature'].apply(lambda x: re.search(r'grna\[(.*?)\]', x).group(1) if 'grna' in x else None)
     coef_df['gene'] = coef_df['feature'].apply(lambda x: re.search(r'gene\[(.*?)\]', x).group(1) if 'gene' in x else None)
     coef_df = coef_df.merge(n_grna, how='left', on='grna')
     coef_df = coef_df.merge(n_gene, how='left', on='gene')
-    display(coef_df)
     gene_coef_df = coef_df[coef_df['n_gene'] != None]
     grna_coef_df = coef_df[coef_df['n_grna'] != None]
     gene_coef_df = gene_coef_df.dropna(subset=['n_gene'])
     grna_coef_df = grna_coef_df.dropna(subset=['n_grna'])
+    if settings['controls'] is not None:
+        control_coef_df = grna_coef_df[grna_coef_df['grna'].isin(settings['controls'])]
+        mean_coef = control_coef_df['coefficient'].mean()
+        variance_coef = control_coef_df['coefficient'].var()
+        std_coef = control_coef_df['coefficient'].std()
+        reg_threshold = mean_coef + (3 * std_coef)
+    print('coef_df')
+    display(coef_df)
     coef_df.to_csv(results_path, index=False)
     gene_coef_df.to_csv(results_path_gene, index=False)
     grna_coef_df.to_csv(results_path_grna, index=False)
@@ -509,7 +514,10 @@ def perform_regression(settings):
     else:
         significant = coef_df[coef_df['p_value']<= 0.05]
-        #significant = significant[significant['coefficient'] > 0.1]
+        if settings['controls'] is not None:
+            significant_high = significant[significant['coefficient'] >= reg_threshold]
+            significant_low = significant[significant['coefficient'] <= reg_threshold]
+            significant = pd.concat([significant_high, significant_low])
         significant.sort_values(by='coefficient', ascending=False, inplace=True)
         significant = significant[~significant['feature'].str.contains('row|column')]
@@ -530,22 +538,24 @@ def perform_regression(settings):
         grna_merged_df = merge_regression_res_with_metadata(results_path_grna, metadata_file, name=filename)
     if settings['toxo']:
         data_path = merged_df
         data_path_gene = gene_merged_df
         data_path_grna = grna_merged_df
         base_dir = os.path.dirname(os.path.abspath(__file__))
         metadata_path = os.path.join(base_dir, 'resources', 'data', 'lopit.csv')
-        custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', string_list=[settings['highlight']], point_size=50, figsize=20)
-        custom_volcano_plot(data_path_gene, metadata_path, metadata_column='tagm_location', string_list=[settings['highlight']], point_size=50, figsize=20)
-        custom_volcano_plot(data_path_grna, metadata_path, metadata_column='tagm_location', string_list=[settings['highlight']], point_size=50, figsize=20)
-        if len(significant) > 2:
-            metadata_path = os.path.join(base_dir, 'resources', 'data', 'toxoplasma_metadata.csv')
-            go_term_enrichment_by_column(significant, metadata_path)
+        custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=200, figsize=20, threshold=reg_threshold, split_axis_lims=settings['split_axis_lims'])
+        #custom_volcano_plot(data_path_gene, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=reg_threshold)
+        #custom_volcano_plot(data_path_grna, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=reg_threshold)
+        #if len(significant) > 2:
+        #    metadata_path = os.path.join(base_dir, 'resources', 'data', 'toxoplasma_metadata.csv')
+        #    go_term_enrichment_by_column(significant, metadata_path)
     print('Significant Genes')
+    grnas = significant['grna'].unique().tolist()
+    genes = significant['gene'].unique().tolist()
+    print(f"Found p<0.05 coedfficients for {len(grnas)} gRNAs and {len(genes)} genes")
     display(significant)
     output = {'results':coef_df,
@@ -763,7 +773,6 @@ def generate_ml_scores(settings):
             raise ValueError("The 'png_list_df' DataFrame must contain 'prcfo' and 'test' columns.")
         annotated_df = png_list_df[['prcfo', settings['annotation_column']]].set_index('prcfo')
         df = annotated_df.merge(df, left_index=True, right_index=True)
-        #display(df)
         unique_values = df[settings['annotation_column']].dropna().unique()
         if len(unique_values) == 1:
             unannotated_rows = df[df[settings['annotation_column']].isna()].index

spacr/plot.py CHANGED Viewed

@@ -13,10 +13,11 @@ from IPython.display import display
 from skimage.segmentation import find_boundaries
 from skimage import measure
 from skimage.measure import find_contours, label, regionprops
+import tifffile as tiff
 from scipy.stats import normaltest, ttest_ind, mannwhitneyu, f_oneway, kruskal
 from statsmodels.stats.multicomp import pairwise_tukeyhsd
-from scipy.stats import ttest_ind, mannwhitneyu, levene, wilcoxon, kruskal
+from scipy.stats import ttest_ind, mannwhitneyu, levene, wilcoxon, kruskal, normaltest, shapiro
 import itertools
 import pingouin as pg
@@ -25,13 +26,26 @@ from IPython.display import Image as ipyimage
 import matplotlib.patches as patches
 from collections import defaultdict
+from matplotlib.gridspec import GridSpec
-def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, normalize=True, thickness=3, save_pdf=True):
+def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
     """Plot image and mask overlays."""
-    def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness):
+    def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
         """Plot the merged plot with overlay, image channels, and masks."""
+        def _generate_colored_mask(mask, alpha):
+            """ Generate a colored mask with transparency using the given colormap. """
+            cmap = generate_mask_random_cmap(mask)
+            rgba_mask = cmap(mask / mask.max())  # Normalize mask and map to colormap (RGBA)
+            rgba_mask[..., 3] = np.where(mask > 0, alpha, 0)  # Apply transparency only where mask is present
+            return rgba_mask
+        def _overlay_mask(image, mask):
+            """Overlay the colored mask onto the original image."""
+            combined = np.clip(image + mask[..., :3] * mask[..., 3:4], 0, 1)  # Ensure pixel values stay in [0, 1]
+            return combined
         def _normalize_image(image, percentiles=(2, 98)):
             """Normalize the image to the given percentiles."""
             v_min, v_max = np.percentile(image, percentiles)
@@ -61,11 +75,15 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
         # Plot each channel with its corresponding outlines
         for v in range(num_channels):
             channel_image = image[..., v]
-            channel_image_normalized = _normalize_image(channel_image)
+            channel_image_normalized = _normalize_image(channel_image, percentiles)
             channel_image_rgb = np.dstack((channel_image_normalized, channel_image_normalized, channel_image_normalized))
             for outline, color in zip(outlines, outline_colors):
-                channel_image_rgb = _apply_contours(channel_image_rgb, outline, color, thickness)
+                if mode == 'outlines':
+                    channel_image_rgb = _apply_contours(channel_image_rgb, outline, color, thickness)
+                else:
+                    mask = _generate_colored_mask(outline, alpha=0.5)
+                    channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
             ax[v].imshow(channel_image_rgb)
             ax[v].set_title(f'Image - Channel {v}')
@@ -75,11 +93,15 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
         rgb_channels = min(3, num_channels)
         for i in range(rgb_channels):
             channel_image = image[..., i]
-            channel_image_normalized = _normalize_image(channel_image)
+            channel_image_normalized = _normalize_image(channel_image, percentiles)
             rgb_image[..., i] = channel_image_normalized
         for outline, color in zip(outlines, outline_colors):
-            rgb_image = _apply_contours(rgb_image, outline, color, thickness)
+            if mode == 'outlines':
+                rgb_image = _apply_contours(rgb_image, outline, color, thickness)
+            else:
+                mask = _generate_colored_mask(outline, alpha=0.5)
+                rgb_image = _overlay_mask(rgb_image, mask)
         ax[-1].imshow(rgb_image)
         ax[-1].set_title('Combined RGB Image')
@@ -96,8 +118,22 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
         plt.show()
         return fig
+    def _save_channels_as_tiff(stack, save_dir, filename):
+        """Save each channel in the stack as a grayscale TIFF."""
+        os.makedirs(save_dir, exist_ok=True)
+        for i in range(stack.shape[-1]):
+            channel = stack[..., i]
+            tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
+            tiff.imwrite(tiff_path, channel, photometric='minisblack')
+            print(f"Saved {tiff_path}")
     stack = np.load(file)
+    if export_tiffs:
+        save_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', os.path.splitext(os.path.basename(file))[0], 'tiff')
+        filename = os.path.splitext(os.path.basename(file))[0]
+        _save_channels_as_tiff(stack, save_dir, filename)
     # Convert to float for normalization and ensure correct handling of both 8-bit and 16-bit arrays
     if stack.dtype == np.uint16:
         stack = stack.astype(np.float32)
@@ -128,7 +164,7 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
         outlines.append(np.take(stack, cell_mask_dim, axis=2))
         outline_colors.append('red')
-    fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness)
+    fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness, percentiles=percentiles, mode=mode)
     return fig
@@ -1691,17 +1727,25 @@ def plot_object_outlines(src, objects=['nucleus','cell','pathogen'], channels=[0
                                overlay=True,
                                max_nr=10,
                                randomize=True)
 def volcano_plot(coef_df, filename='volcano_plot.pdf'):
+    palette = {
+        'pc': 'red',
+        'nc': 'green',
+        'control': 'blue',
+        'other': 'gray'
+    }
     # Create the volcano plot
     plt.figure(figsize=(10, 6))
     sns.scatterplot(
         data=coef_df,
         x='coefficient',
         y='-log10(p_value)',
-        hue='highlight',
-        palette={True: 'red', False: 'blue'}
+        hue='condition',
+        palette=palette
     )
     plt.title('Volcano Plot of Coefficients')
     plt.xlabel('Coefficient')
     plt.ylabel('-log10(p-value)')
@@ -2098,7 +2142,7 @@ class spacrGraph:
     def __init__(self, df, grouping_column, data_column, graph_type='bar', summary_func='mean',
                  order=None, colors=None, output_dir='./output', save=False, y_lim=None,
                  error_bar_type='std', remove_outliers=False, theme='pastel', representation='object',
-                 paired=False, all_to_all=True, compare_group=None):
+                 paired=False, all_to_all=True, compare_group=None, graph_name=None):
         """
         Class for creating grouped plots with optional statistical tests and data preprocessing.
@@ -2121,11 +2165,14 @@ class spacrGraph:
         self.all_to_all = all_to_all
         self.compare_group = compare_group
         self.y_lim = y_lim
+        self.graph_name = graph_name
         self.results_df = pd.DataFrame()
         self.sns_palette = None
         self.fig = None
-        self.results_name = str(self.data_column[0])+'_'+str(self.grouping_column)+'_'+str(self.graph_type)
+        self.results_name = str(self.graph_name)+'_'+str(self.data_column[0])+'_'+str(self.grouping_column)+'_'+str(self.graph_type)
         self._set_theme()
         self.raw_df = self.df.copy()
@@ -2134,10 +2181,10 @@ class spacrGraph:
     def _set_theme(self):
         """Set the Seaborn theme and reorder colors if necessary."""
         integer_list = list(range(1, 81))
-        color_order = [0, 3, 9, 4, 6, 7, 9, 2] + integer_list
+        color_order = [7,9,4,0,3,6,2] + integer_list
         self.sns_palette = self._set_reordered_theme(self.theme, color_order, 100)
-    def _set_reordered_theme(self, theme='muted', order=None, n_colors=100, show_theme=False):
+    def _set_reordered_theme(self, theme='deep', order=None, n_colors=100, show_theme=False):
         """Set and reorder the Seaborn color palette."""
         palette = sns.color_palette(theme, n_colors)
         if order:
@@ -2182,20 +2229,36 @@ class spacrGraph:
         """Perform normality tests for each group and each data column."""
         unique_groups = self.df[self.grouping_column].unique()
         normality_results = []
         for column in self.data_column:
-            grouped_data = [self.df.loc[self.df[self.grouping_column] == group, column] for group in unique_groups]
-            normal_p_values = [normaltest(data).pvalue for data in grouped_data]
-            normal_stats = [normaltest(data).statistic for data in grouped_data]
-            is_normal = all(p > 0.05 for p in normal_p_values)  # Test if all groups are normal
-            for group, stat, p_value in zip(unique_groups, normal_stats, normal_p_values):
+            # Iterate over each group and its corresponding data
+            for group in unique_groups:
+                data = self.df.loc[self.df[self.grouping_column] == group, column]
+                n_samples = len(data)
+                if n_samples >= 8:
+                    # Use D'Agostino-Pearson test for larger samples
+                    stat, p_value = normaltest(data)
+                    test_name = "D'Agostino-Pearson test"
+                else:
+                    # Use Shapiro-Wilk test for smaller samples
+                    stat, p_value = shapiro(data)
+                    test_name = "Shapiro-Wilk test"
+                # Store the result for this group and column
                 normality_results.append({
                     'Comparison': f'Normality test for {group} on {column}',
                     'Test Statistic': stat,
                     'p-value': p_value,
-                    'Test Name': 'Normality test',
+                    'Test Name': test_name,
                     'Column': column,
-                    'n': len(self.df[self.df[self.grouping_column] == group])  # Sample size
+                    'n': n_samples  # Sample size
                 })
+            # Check if all groups are normally distributed (p > 0.05)
+            normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
+            is_normal = all(p > 0.05 for p in normal_p_values)
         return is_normal, normality_results
     def perform_levene_test(self, unique_groups):
@@ -2339,17 +2402,21 @@ class spacrGraph:
                     ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12)
         def _get_positions(self, ax):
-            if self.graph_type == 'bar':
+            if self.graph_type in ['bar','jitter_bar']:
                 x_positions = [np.mean(bar.get_paths()[0].vertices[:, 0]) for bar in ax.collections if hasattr(bar, 'get_paths')]
             elif self.graph_type == 'violin':
                 x_positions = [np.mean(violin.get_paths()[0].vertices[:, 0]) for violin in ax.collections if hasattr(violin, 'get_paths')]
-            elif self.graph_type == 'box':
+            elif self.graph_type in ['box', 'jitter_box']:
                 x_positions = list(set(line.get_xdata().mean() for line in ax.lines if line.get_linestyle() == '-'))
             elif self.graph_type == 'jitter':
                 x_positions = [np.mean(collection.get_offsets()[:, 0]) for collection in ax.collections if collection.get_offsets().size > 0]
+            elif self.graph_type in ['line', 'line_std']:
+                x_positions = []
             return x_positions
         def _draw_comparison_lines(ax, x_positions):
@@ -2367,7 +2434,7 @@ class spacrGraph:
                 # Determine significance marker
                 if p_value <= 0.001:
-                    significance = '***'
+                    signiresults_namecance = '***'
                 elif p_value <= 0.01:
                     significance = '**'
                 elif p_value <= 0.05:
@@ -2408,6 +2475,9 @@ class spacrGraph:
         self.fig_width = (num_groups * self.bar_width) + (spacing_between_groups * num_groups)
         self.fig_height = self.fig_width/2
+        if  self.graph_type in ['line','line_std']:
+            self.fig_height, self.fig_width = 10, 10
         if ax is None:
             self.fig, ax = plt.subplots(figsize=(self.fig_height, self.fig_width))
         else:
@@ -2429,6 +2499,14 @@ class spacrGraph:
             self._create_box_plot(ax)
         elif self.graph_type == 'violin':
             self._create_violin_plot(ax)
+        elif self.graph_type == 'jitter_box':
+            self._create_jitter_box_plot(ax)
+        elif self.graph_type == 'jitter_bar':
+            self._create_jitter_bar_plot(ax)
+        elif self.graph_type == 'line':
+            self._create_line_graph(ax)
+        elif self.graph_type == 'line_std':
+            self._create_line_with_std_area(ax)
         else:
             raise ValueError(f"Unknown graph type: {self.graph_type}")
@@ -2441,14 +2519,17 @@ class spacrGraph:
         sns.despine(ax=ax, top=True, right=True)
         ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Data Column') # Move the legend outside the plot
-        ax.set_xlabel('')
+        if not self.graph_type in ['line','line_std']:
+            ax.set_xlabel('')
         x_positions = _get_positions(self, ax)
-        if len(self.data_column) == 1:
+        if len(self.data_column) == 1 and not self.graph_type in ['line','line_std']:
             ax.legend().remove()
             ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
-        elif len(self.data_column) > 1:
+        elif len(self.data_column) > 1 and not self.graph_type in ['line','line_std']:
             ax.set_xticks([])
             ax.tick_params(bottom=False)
             ax.set_xticklabels([])
@@ -2524,7 +2605,54 @@ class spacrGraph:
         handles, labels = ax.get_legend_handles_labels()
         unique_labels = dict(zip(labels, handles))
         ax.legend(unique_labels.values(), unique_labels.keys(), loc='best')
+    def _create_line_graph(self, ax):
+        """Helper method to create a line graph with one line per group based on epochs and accuracy."""
+        #display(self.df)
+        # Ensure epoch is used on the x-axis and accuracy on the y-axis
+        x_axis_column = self.data_column[0]
+        y_axis_column = self.data_column[1]
+        # Set hue to the grouping column to get one line per group
+        hue = self.grouping_column
+        # Check if the required columns exist in the DataFrame
+        required_columns = [x_axis_column, y_axis_column, self.grouping_column]
+        for col in required_columns:
+            if col not in self.df.columns:
+                raise ValueError(f"Column '{col}' not found in DataFrame.")
+        # Create the line graph with one line per group
+        sns.lineplot(data=self.df,x=x_axis_column,y=y_axis_column,hue=hue,palette=self.sns_palette,ax=ax,marker='o',linewidth=1,markersize=6)
+        # Adjust axis labels
+        ax.set_xlabel(f"{x_axis_column}")
+        ax.set_ylabel(f"{y_axis_column}")
+    def _create_line_with_std_area(self, ax):
+        """Helper method to create a line graph with shaded area representing standard deviation."""
+        x_axis_column = self.data_column[0]
+        y_axis_column = self.data_column[1]
+        y_axis_column_mean = f"mean_{y_axis_column}"
+        y_axis_column_std = f"std_{y_axis_column_mean}"
+        # Pivot the DataFrame to get mean and std for each epoch across plates
+        summary_df = self.df.pivot_table(index=x_axis_column,values=y_axis_column,aggfunc=['mean', 'std']).reset_index()
+        # Flatten MultiIndex columns (result of pivoting)
+        summary_df.columns = [x_axis_column, y_axis_column_mean, y_axis_column_std]
+        # Plot the mean accuracy as a line
+        sns.lineplot(data=summary_df,x=x_axis_column,y=y_axis_column_mean,ax=ax,marker='o',linewidth=1,markersize=0,color='blue',label=y_axis_column_mean)
+        # Fill the area representing the standard deviation
+        ax.fill_between(summary_df[x_axis_column],summary_df[y_axis_column_mean] - summary_df[y_axis_column_std],summary_df[y_axis_column_mean] + summary_df[y_axis_column_std],color='blue',  alpha=0.1 )
+        # Adjust axis labels
+        ax.set_xlabel(f"{x_axis_column}")
+        ax.set_ylabel(f"{y_axis_column}")
     def _create_box_plot(self, ax):
         """Helper method to create a box plot with consistent spacing."""
         # Combine grouping column and data column if needed
@@ -2574,6 +2702,68 @@ class spacrGraph:
         unique_labels = dict(zip(labels, handles))
         ax.legend(unique_labels.values(), unique_labels.keys(), loc='best')
+    def _create_jitter_bar_plot(self, ax):
+        """Helper method to create a bar plot with consistent bar thickness and centered error bars."""
+        # Flatten DataFrame: Combine grouping column and data column into one group if needed
+        if len(self.data_column) > 1:
+            self.df_melted['Combined Group'] = (self.df_melted[self.grouping_column].astype(str) + " - " + self.df_melted['Data Column'].astype(str))
+            x_axis_column = 'Combined Group'
+            hue = None
+            ax.set_ylabel('Value')
+        else:
+            x_axis_column = self.grouping_column
+            ax.set_ylabel(self.data_column[0])
+            hue = None
+        summary_df = self.df_melted.groupby([x_axis_column]).agg(mean=('Value', 'mean'),std=('Value', 'std'),sem=('Value', 'sem')).reset_index()
+        error_bars = summary_df[self.error_bar_type] if self.error_bar_type in ['std', 'sem'] else None
+        sns.barplot(data=self.df_melted, x=x_axis_column, y='Value', hue=self.hue, palette=self.sns_palette, ax=ax, dodge=self.jitter_bar_dodge, ci=None)
+        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=16)
+        # Adjust the bar width manually
+        if len(self.data_column) > 1:
+            bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
+            target_width = self.bar_width * 2
+            for bar in bars:
+                bar.set_width(target_width)  # Set new width
+                # Center the bar on its x-coordinate
+                bar.set_x(bar.get_x() - target_width / 2)
+        # Adjust error bars alignment with bars
+        bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
+        for bar, (_, row) in zip(bars, summary_df.iterrows()):
+            x_bar = bar.get_x() + bar.get_width() / 2
+            err = row[self.error_bar_type]
+            ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
+        # Set legend and labels
+        ax.set_xlabel(self.grouping_column)
+    def _create_jitter_box_plot(self, ax):
+        """Helper method to create a box plot with consistent spacing."""
+        # Combine grouping column and data column if needed
+        if len(self.data_column) > 1:
+            self.df_melted['Combined Group'] = (self.df_melted[self.grouping_column].astype(str) + " - " + self.df_melted['Data Column'].astype(str))
+            x_axis_column = 'Combined Group'
+            hue = None
+            ax.set_ylabel('Value')
+        else:
+            x_axis_column = self.grouping_column
+            ax.set_ylabel(self.data_column[0])
+            hue = None
+        # Create the box plot
+        sns.boxplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue,palette=self.sns_palette,ax=ax)
+        sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=12)
+        # Adjust legend and labels
+        ax.set_xlabel(self.grouping_column)
+        # Manage the legend
+        handles, labels = ax.get_legend_handles_labels()
+        unique_labels = dict(zip(labels, handles))
+        ax.legend(unique_labels.values(), unique_labels.keys(), loc='best')
     def _save_results(self):
         """Helper method to save the plot and results."""
         os.makedirs(self.output_dir, exist_ok=True)
@@ -2594,14 +2784,14 @@ class spacrGraph:
 def plot_data_from_db(settings):
     from .io import _read_db, _read_and_merge_data
-    from .utils import annotate_conditions
+    from .utils import annotate_conditions, save_settings
     """
     Extracts the specified table from the SQLite database and plots a specified column.
     Args:
         db_path (str): The path to the SQLite database.
         table_names (str): The name of the table to extract.
-        column_name (str): The column to plot from the table.
+        data_column (str): The column to plot from the table.
     Returns:
         df (pd.DataFrame): The extracted table as a DataFrame.
@@ -2616,6 +2806,8 @@ def plot_data_from_db(settings):
     else:
         raise ValueError("src must be a string or a list of strings.")
+    save_settings(settings, name=f"{settings['graph_name']}_plot_settings_db", show=True)
     dfs = []
     for i, src in enumerate(srcs):
@@ -2643,6 +2835,7 @@ def plot_data_from_db(settings):
     df = pd.concat(dfs, axis=0)
     df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
     df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
+    df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
     if settings['cell_plate_metadata'] !=  None:
         df = df.dropna(subset='host_cell')
@@ -2653,24 +2846,91 @@ def plot_data_from_db(settings):
     if settings['treatment_plate_metadata'] !=  None:
         df = df.dropna(subset='treatment')
-    df = df.dropna(subset=settings['column_name'])
+    df = df.dropna(subset=settings['data_column'])
     df = df.dropna(subset=settings['grouping_column'])
+    #df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
+    src = srcs[0]
+    dst = os.path.join(src, 'results', settings['graph_name'])
+    os.makedirs(dst, exist_ok=True)
+    spacr_graph = spacrGraph(
+        df=df,                                       # Your DataFrame
+        grouping_column=settings['grouping_column'], # Column for grouping the data (x-axis)
+        data_column=settings['data_column'],         # Column for the data (y-axis)
+        graph_type=settings['graph_type'],           # Type of plot ('bar', 'box', 'violin', 'jitter')
+        graph_name=settings['graph_name'],           # Name of the plot
+        summary_func='mean',                         # Function to summarize data (e.g., 'mean', 'median')
+        colors=None,                                 # Custom colors for the plot (optional)
+        output_dir=dst,                              # Directory to save the plot and results
+        save=settings['save'],                       # Whether to save the plot and results
+        y_lim=settings['y_lim'],                     # Starting point for y-axis (optional)
+        error_bar_type='std',                        # Type of error bar ('std' or 'sem')
+        representation=settings['representation'],
+        theme=settings['theme'],                     # Seaborn color palette theme (e.g., 'pastel', 'muted')
+    )
+    # Create the plot
+    spacr_graph.create_plot()
+    # Get the figure object if needed
+    fig = spacr_graph.get_figure()
+    plt.show()
+    # Optional: Get the results DataFrame containing statistical test results
+    results_df = spacr_graph.get_results()
+    return fig, results_df
+def plot_data_from_csv(settings):
+    from .io import _read_db, _read_and_merge_data
+    from .utils import annotate_conditions, save_settings
+    """
+    Extracts the specified table from the SQLite database and plots a specified column.
+    Args:
+        db_path (str): The path to the SQLite database.
+        table_names (str): The name of the table to extract.
+        data_column (str): The column to plot from the table.
+    Returns:
+        df (pd.DataFrame): The extracted table as a DataFrame.
+    """
+    if isinstance(settings['src'], str):
+        srcs = [settings['src']]
+    elif isinstance(settings['src'], list):
+        srcs = settings['src']
+    else:
+        raise ValueError("src must be a string or a list of strings.")
+    #save_settings(settings, name=f"{settings['graph_name']}_plot_settings_csv", show=True)
+    dfs = []
+    for i, src in enumerate(srcs):
+        dft = pd.read_csv(src)
+        if 'plate' not in dft.columns:
+            dft['plate'] = f"plate{i+1}"
+        dfs.append(dft)
+    df = pd.concat(dfs, axis=0)
     #display(df)
-    #df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
+    df = df.dropna(subset=settings['data_column'])
+    df = df.dropna(subset=settings['grouping_column'])
+    src = srcs[0]
+    dst = os.path.join(os.path.dirname(src), 'results', settings['graph_name'])
+    os.makedirs(dst, exist_ok=True)
     spacr_graph = spacrGraph(
         df=df,                                       # Your DataFrame
         grouping_column=settings['grouping_column'], # Column for grouping the data (x-axis)
-        data_column=settings['column_name'],         # Column for the data (y-axis)
+        data_column=settings['data_column'],         # Column for the data (y-axis)
         graph_type=settings['graph_type'],           # Type of plot ('bar', 'box', 'violin', 'jitter')
+        graph_name=settings['graph_name'],           # Name of the plot
         summary_func='mean',                         # Function to summarize data (e.g., 'mean', 'median')
         colors=None,                                 # Custom colors for the plot (optional)
-        output_dir=settings['dst'],                  # Directory to save the plot and results
+        output_dir=dst,                              # Directory to save the plot and results
         save=settings['save'],                       # Whether to save the plot and results
         y_lim=settings['y_lim'],                     # Starting point for y-axis (optional)
         error_bar_type='std',                        # Type of error bar ('std' or 'sem')
@@ -2687,5 +2947,129 @@ def plot_data_from_db(settings):
     # Optional: Get the results DataFrame containing statistical test results
     results_df = spacr_graph.get_results()
-    return fig, results_df
+    return fig, results_df
+def plot_region(settings):
+    def _sort_paths_by_basename(paths):
+        return sorted(paths, key=lambda path: os.path.basename(path))
+    def save_figure_as_pdf(fig, path):
+        os.makedirs(os.path.dirname(path), exist_ok=True)  # Create directory if it doesn't exist
+        fig.savefig(path, format='pdf', dpi=600, bbox_inches='tight')
+        print(f"Saved {path}")
+    from .io import _read_db
+    fov_path = os.path.join(settings['src'], 'merged', settings['name'])
+    name = os.path.splitext(settings['name'])[0]
+    db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
+    paths_df = _read_db(db_path, tables=['png_list'])[0]
+    paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
+    activation_mode = f"{settings['activation_mode']}_list"
+    activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
+    activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
+    activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
+    png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
+    activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
+    fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
+    fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
+    fig_1 = plot_image_mask_overlay(file=fov_path,
+                                  channels=settings['channels'],
+                                  cell_channel=settings['cell_channel'],
+                                  nucleus_channel=settings['nucleus_channel'],
+                                  pathogen_channel=settings['pathogen_channel'],
+                                  figuresize=10,
+                                  percentiles=settings['percentiles'],
+                                  thickness=3,
+                                  save_pdf=False,
+                                  mode=settings['mode'],
+                                  export_tiffs=settings['export_tiffs'])
+    dst = os.path.join(settings['src'], 'results', name)
+    save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
+    save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
+    save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
+    return fig_1, fig_2, fig_3
+def plot_image_grid(image_paths, percentiles):
+    """
+    Plots a square grid of images from a list of image paths.
+    Unused subplots are filled with black, and padding is minimized.
+    Parameters:
+    - image_paths: List of paths to images to be displayed.
+    Returns:
+    - fig: The generated matplotlib figure.
+    """
+    from PIL import Image
+    import matplotlib.pyplot as plt
+    import math
+    def _normalize_image(image, percentiles=(2, 98)):
+        """ Normalize the image to the given percentiles for each channel independently, preserving the input type (either PIL.Image or numpy.ndarray)."""
+        # Check if the input is a PIL image and convert it to a NumPy array
+        is_pil_image = isinstance(image, Image.Image)
+        if is_pil_image:
+            image = np.array(image)
+        # If the image is single-channel, normalize directly
+        if image.ndim == 2:
+            v_min, v_max = np.percentile(image, percentiles)
+            normalized_image = np.clip((image - v_min) / (v_max - v_min), 0, 1)
+        else:
+            # If multi-channel, normalize each channel independently
+            normalized_image = np.zeros_like(image, dtype=np.float32)
+            for c in range(image.shape[-1]):
+                v_min, v_max = np.percentile(image[..., c], percentiles)
+                normalized_image[..., c] = np.clip((image[..., c] - v_min) / (v_max - v_min), 0, 1)
+        # If the input was a PIL image, convert the result back to PIL format
+        if is_pil_image:
+            # Ensure the image is converted back to 8-bit range (0-255) for PIL
+            normalized_image = (normalized_image * 255).astype(np.uint8)
+            return Image.fromarray(normalized_image)
+        return normalized_image
+    N = len(image_paths)
+    # Calculate the smallest square grid size to fit all images
+    grid_size = math.ceil(math.sqrt(N))
+    # Create the square grid of subplots with a black background
+    fig, axs = plt.subplots(
+        grid_size, grid_size,
+        figsize=(grid_size * 2, grid_size * 2),
+        facecolor='black'  # Set figure background to black
+    )
+    # Flatten axs in case of a 2D array
+    axs = axs.flatten()
+    for i, img_path in enumerate(image_paths):
+        ax = axs[i]
+        # Load the image
+        img = Image.open(img_path)
+        img = _normalize_image(img, percentiles)
+        # Display the image
+        ax.imshow(img)
+        ax.axis('off')  # Hide axes
+    # Fill any unused subplots with black
+    for j in range(i + 1, len(axs)):
+        axs[j].imshow([[0, 0, 0]], cmap='gray')  # Black square
+        axs[j].axis('off')  # Hide axes
+    # Adjust layout to minimize white space
+    plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, top=1, bottom=0)
+    return fig

spacr/toxo.py CHANGED Viewed

@@ -4,8 +4,9 @@ import numpy as np
 from adjustText import adjust_text
 import pandas as pd
 from scipy.stats import fisher_exact
+from IPython.display import display
-def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', string_list=[], point_size=50, figsize=20):
+def custom_volcano_plot_v1(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0):
     """
     Create a volcano plot with the ability to control the shape of points based on a categorical column,
     color points based on a string list, annotate specific points based on p-value and coefficient thresholds,
@@ -19,7 +20,8 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
     - point_size: Fixed value to control the size of points.
     - figsize: Width of the plot (height is half the width).
     """
     filename = 'volcano_plot.pdf'
     # Load the data
@@ -42,46 +44,65 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
     metadata['gene_nr'] = metadata['gene_nr'].astype(str)
     data['gene_nr'] = data['gene_nr'].astype(str)
     # Merge data and metadata on 'gene_nr'
     merged_data = pd.merge(data, metadata[['gene_nr', 'tagm_location']], on='gene_nr', how='left')
-    # Controls handling
-    controls = ['000000', '000001', '000002', '000003', '000004', '000005', '000006', '000007', '000008', '000009', '000010', '000011']
-    merged_data.loc[merged_data['gene_nr'].isin(controls), metadata_column] = 'control'
     merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
     merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
-    # Create a 'highlight_color' column based on the string_list
-    merged_data['highlight_color'] = merged_data['gene_nr'].apply(lambda x: 'red' if any(s in x for s in string_list) else 'blue')
     # Create the volcano plot
     figsize_2 = figsize / 2
     plt.figure(figsize=(figsize_2, figsize))
+    palette = {
+        'pc': 'red',
+        'nc': 'green',
+        'control': 'black',
+        'other': 'gray'
+    }
+    merged_data['condition'] = pd.Categorical(
+        merged_data['condition'],
+        categories=['pc', 'nc', 'control', 'other'],
+        ordered=True
+    )
+    display(merged_data)
     # Create the scatter plot with fixed point size
     sns.scatterplot(
         data=merged_data,
         x='coefficient',
         y='-log10(p_value)',
-        hue='highlight_color',
-        style=metadata_column if metadata_column else None,  # Control point shape with metadata_column
+        hue='condition',  # Controls color
+        style=metadata_column if metadata_column else None,  # Controls point shape
         s=point_size,  # Fixed size for all points
-        palette={'red': 'red', 'blue': 'blue'}
+        palette=palette,  # Color palette
+        alpha=1.0  # Transparency
     )
     # Set the plot title and labels
     plt.title('Custom Volcano Plot of Coefficients')
     plt.xlabel('Coefficient')
     plt.ylabel('-log10(p-value)')
+    if threshold > 0:
+        plt.gca().axvline(x=-abs(threshold), linestyle='--', color='black')
+        plt.gca().axvline(x=abs(threshold), linestyle='--', color='black')
     # Horizontal line at p-value threshold (0.05)
-    plt.axhline(y=-np.log10(0.05), color='red', linestyle='--')
+    plt.axhline(y=-np.log10(0.05), color='black', linestyle='--')
-    # Annotate points where p_value <= 0.05 and coefficient >= 0.25
     texts = []
     for i, row in merged_data.iterrows():
-        if row['p_value'] <= 0.05 and row['coefficient'] >= 0.25:
-            texts.append(plt.text(row['coefficient'], -np.log10(row['p_value']), row['gene_nr'], fontsize=9))
+        if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
+            texts.append(plt.text(
+                row['coefficient'],
+                -np.log10(row['p_value']),
+                row['variable'],
+                fontsize=8
+            ))
     # Adjust text positions to avoid overlap
     adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
@@ -96,6 +117,171 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
     # Show the plot
     plt.show()
+def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10]):
+    """
+    Create a volcano plot with the ability to control the shape of points based on a categorical column,
+    color points based on a condition, annotate specific points based on p-value and coefficient thresholds,
+    and control the size of points.
+    """
+    filename = 'volcano_plot.pdf'
+    # Load the data
+    if isinstance(data_path, pd.DataFrame):
+        data = data_path
+    else:
+        data = pd.read_csv(data_path)
+    data['variable'] = data['feature'].str.extract(r'\[(.*?)\]')
+    data['variable'].fillna(data['feature'], inplace=True)
+    split_columns = data['variable'].str.split('_', expand=True)
+    data['gene_nr'] = split_columns[0]
+    # Load metadata
+    if isinstance(metadata_path, pd.DataFrame):
+        metadata = metadata_path
+    else:
+        metadata = pd.read_csv(metadata_path)
+    metadata['gene_nr'] = metadata['gene_nr'].astype(str)
+    data['gene_nr'] = data['gene_nr'].astype(str)
+    # Merge data and metadata on 'gene_nr'
+    merged_data = pd.merge(data, metadata[['gene_nr', 'tagm_location']], on='gene_nr', how='left')
+    merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
+    merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
+    merged_data.loc[merged_data['condition'] == 'control', metadata_column] = 'control'
+    # Categorize condition for coloring
+    merged_data['condition'] = pd.Categorical(
+        merged_data['condition'],
+        categories=['other','pc', 'nc', 'control'],
+        ordered=True)
+    # Create subplots with a broken y-axis
+    figsize_2 = figsize / 2
+    fig, (ax1, ax2) = plt.subplots(
+        2, 1, figsize=(figsize_2, figsize),
+        sharex=True, gridspec_kw={'height_ratios': [1, 3]}
+    )
+    # Define color palette
+    palette = {
+        'pc': 'red',
+        'nc': 'green',
+        'control': 'white',
+        'other': 'gray'}
+    # Scatter plot on both axes
+    sns.scatterplot(
+        data=merged_data,
+        x='coefficient',
+        y='-log10(p_value)',
+        hue='condition',
+        style=metadata_column if metadata_column else None,
+        s=point_size,
+        edgecolor='black',
+        palette=palette,
+        alpha=0.8,
+        ax=ax2  # Lower plot
+    )
+    sns.scatterplot(
+        data=merged_data[merged_data['-log10(p_value)'] > 10],
+        x='coefficient',
+        y='-log10(p_value)',
+        hue='condition',
+        style=metadata_column if metadata_column else None,
+        s=point_size,
+        palette=palette,
+        edgecolor='black',
+        alpha=0.8,
+        ax=ax1  # Upper plot
+    )
+    if isinstance(split_axis_lims, list):
+        if len(split_axis_lims) == 4:
+            ylim_min_ax1 = split_axis_lims[0]
+            if split_axis_lims[1] is None:
+                ylim_max_ax1 = merged_data['-log10(p_value)'].max() + 5
+            else:
+                ylim_max_ax1 = split_axis_lims[1]
+            ylim_min_ax2 = split_axis_lims[2]
+            ylim_max_ax2 = split_axis_lims[3]
+        else:
+            ylim_min_ax1 = None
+            ylim_max_ax1 = merged_data['-log10(p_value)'].max() + 5
+            ylim_min_ax2 = 0
+            ylim_max_ax2 = None
+    # Set axis limits and hide unnecessary parts
+    ax1.set_ylim(ylim_min_ax1, ylim_max_ax1)
+    ax2.set_ylim(0, ylim_max_ax2)
+    ax1.spines['bottom'].set_visible(False)
+    ax2.spines['top'].set_visible(False)
+    ax1.tick_params(labelbottom=False)
+    ax1.legend_.remove()
+    if ax1.get_legend() is not None:
+        ax1.get_legend().remove()
+    ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
+    # Add vertical threshold lines to both plots
+    if threshold > 0:
+        for ax in (ax1, ax2):
+            ax.axvline(x=-abs(threshold), linestyle='--', color='black')
+            ax.axvline(x=abs(threshold), linestyle='--', color='black')
+    # Add a horizontal line at p-value threshold (0.05)
+    ax2.axhline(y=-np.log10(0.05), color='black', linestyle='--')
+    # Annotate significant points on both axes
+    texts_ax1 = []
+    texts_ax2 = []
+    for i, row in merged_data.iterrows():
+        if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
+            # Select the appropriate axis for the annotation
+            #ax = ax1 if row['-log10(p_value)'] > 10 else ax2
+            ax = ax1 if row['-log10(p_value)'] >= ax1.get_ylim()[0] else ax2
+            # Create the annotation on the selected axis
+            text = ax.text(
+                row['coefficient'],
+                -np.log10(row['p_value']),
+                row['variable'],
+                fontsize=8,
+                ha='center',
+                va='bottom',
+            )
+            # Store the text annotation in the correct list
+            if ax == ax1:
+                texts_ax1.append(text)
+            else:
+                texts_ax2.append(text)
+    # Adjust text positions to avoid overlap for both axes
+    adjust_text(texts_ax1, arrowprops=dict(arrowstyle='-', color='black'), ax=ax1)
+    adjust_text(texts_ax2, arrowprops=dict(arrowstyle='-', color='black'), ax=ax2)
+    # Move the legend outside the lower plot
+    ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
+    # Adjust the spacing between subplots and move the title
+    plt.subplots_adjust(hspace=0.05)
+    fig.suptitle('Custom Volcano Plot of Coefficients', y=1.02, fontsize=16)  # Title above the top plot
+    # Save the plot as PDF
+    plt.savefig(filename, format='pdf', bbox_inches='tight')
+    print(f'Saved Volcano plot: {filename}')
+    # Show the plot
+    plt.show()
 def go_term_enrichment_by_column(significant_df, metadata_path, go_term_columns=['Computed GO Processes', 'Curated GO Components', 'Curated GO Functions', 'Curated GO Processes']):
     """
     Perform GO term enrichment analysis for each GO term column and generate plots.

spacr/utils.py CHANGED Viewed

@@ -326,6 +326,8 @@ def save_settings(settings, name='settings', show=False):
     if isinstance(settings['src'], list):
         src = settings['src'][0]
+        #if os.path.exists(src):
         name = f"{name}_list"
     else:
         src = settings['src']
@@ -4712,10 +4714,10 @@ def merge_regression_res_with_metadata(results_file, metadata_file, name='_metad
     df_metadata['gene'] = df_metadata['Gene ID'].apply(lambda x: x.split('_')[1] if '_' in x else None)
     # Drop rows where gene extraction failed
-    df_results = df_results.dropna(subset=['gene'])
+    #df_results = df_results.dropna(subset=['gene'])
     # Merge the two dataframes on the gene column
-    merged_df = pd.merge(df_results, df_metadata, on='gene')
+    merged_df = pd.merge(df_results, df_metadata, on='gene', how='left')
     # Generate the new file name
     base, ext = os.path.splitext(results_file)

{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.38
+Version: 0.3.41
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson

{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/RECORD RENAMED Viewed

@@ -8,26 +8,26 @@ spacr/app_measure.py,sha256=_K7APYIeOKpV6e_LcqabBjvEi7mfq9Fch8175x1x0k8,162
 spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
 spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
 spacr/cellpose.py,sha256=zv4BzhaP2O-mtQ-pUfYvpOyxgn1ke_bDWgdHD5UWm9I,13942
-spacr/core.py,sha256=G_x-w7FRIHNfSOoPaIZPSf_A7mVj7PA7o9HQZ4nIu5o,48231
+spacr/core.py,sha256=dW9RrAKFLfVsFhX0-kaVMc2T7b47Ky0pTXK-CEVOeWQ,48235
 spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
 spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
 spacr/gui_core.py,sha256=LV_HX5zreu3Bye6sQFDbOuk8Dfj4StMoohy6hsrDEXA,41363
 spacr/gui_elements.py,sha256=w-S1MZdyxt5O3DsNAHNNXy_WGfwBPg0NhwQtCsJeiao,137071
 spacr/gui_utils.py,sha256=7e9DsZIuV7-jh97kEf7v1In_cFzlFueV4SGcGYGpTxw,45454
-spacr/io.py,sha256=AARmqn1fMmTgVDwWy8bEYK6SjH-6DZIulgCSPdBTyf0,143370
+spacr/io.py,sha256=LN_gJq_oqjbf8y-lBtLLZtJi8DLbNdyoGEcBYyOjbhQ,143606
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
 spacr/measure.py,sha256=BThn_sALgKrwGKnLOGpT4FyoJeRVoTZoP9SXbCtCMRw,54857
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
-spacr/ml.py,sha256=ItibDL_q0cKwEsJdwpBtVqfpRQGPXGbb0BX5UB5iH5s,49342
+spacr/ml.py,sha256=e6nUQaiKBPwcDN_aZZKsbZG6qEa5k9B42wtuL8ipv3Q,50287
 spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
-spacr/plot.py,sha256=W6F2Jaxq7WBnB9G3-7AESdQs6foGeyS70-LZwKgKJv8,118214
+spacr/plot.py,sha256=TDGMwiIHjvk6v94WFlIvemU-6JfEik_GmSez51vyvCc,135869
 spacr/sequencing.py,sha256=t18mgpK6rhWuB1LtFOsPxqgpFXxuUmrD06ecsaVQ0Gw,19655
 spacr/settings.py,sha256=AzP9NGiXI1MqT69bHObxwDSCUk0kdstBVvl1JpcD_-w,75960
 spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
 spacr/submodules.py,sha256=AB7s6-cULsaqz-haAaCtXfGEIi8uPZGT4xoCslUJC3Y,18391
 spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
-spacr/toxo.py,sha256=us3pQyULtMTyfTq0MWPn4QJTTmQ6BwAJKChNf75jo3I,10082
-spacr/utils.py,sha256=j6qE7aTGu7D82_A68md5b5Vgn8UrW2w2saa6nCbANw8,216373
+spacr/toxo.py,sha256=7dUJe5_HSvDCP16OIXtbYLyshh9LXb2JQ80Vtn-XdPk,15979
+spacr/utils.py,sha256=_8OxwGVCZaMNBiweB4_YOxBkqQX1LR9YstPSIFmeQKA,216420
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -150,9 +150,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.38.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.38.dist-info/METADATA,sha256=IfwGcod8ZUdemPlpbdoCoONBap_IZQCfiL-KURN3KuI,5949
-spacr-0.3.38.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.38.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.38.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.38.dist-info/RECORD,,
+spacr-0.3.41.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.41.dist-info/METADATA,sha256=zGZO-9iZjljHTjnVg9kAxQJjr2vqpobl7S7ZSQlgxP8,5949
+spacr-0.3.41.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.41.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.41.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.41.dist-info/RECORD,,

{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.38__py3-none-any.whl → 0.3.41__py3-none-any.whl

spacr 0.3.38py3-none-any.whl → 0.3.41py3-none-any.whl