PyPI - spacr - Versions diffs - 0.3.47__py3-none-any.whl → 0.3.52__py3-none-any.whl - Mend

spacr 0.3.47py3-none-any.whl → 0.3.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

spacr/chat_bot.py +31 -0
spacr/gui_elements.py +33 -7
spacr/gui_utils.py +11 -12
spacr/measure.py +4 -1
spacr/ml.py +453 -141
spacr/plot.py +612 -52
spacr/sequencing.py +5 -2
spacr/settings.py +15 -31
spacr/toxo.py +447 -159
spacr/utils.py +35 -4
{spacr-0.3.47.dist-info → spacr-0.3.52.dist-info}/METADATA +3 -1
{spacr-0.3.47.dist-info → spacr-0.3.52.dist-info}/RECORD +16 -15
{spacr-0.3.47.dist-info → spacr-0.3.52.dist-info}/LICENSE +0 -0
{spacr-0.3.47.dist-info → spacr-0.3.52.dist-info}/WHEEL +0 -0
{spacr-0.3.47.dist-info → spacr-0.3.52.dist-info}/entry_points.txt +0 -0
{spacr-0.3.47.dist-info → spacr-0.3.52.dist-info}/top_level.txt +0 -0

spacr/plot.py CHANGED Viewed

@@ -16,6 +16,7 @@ from skimage import measure
 from skimage.measure import find_contours, label, regionprops
 from skimage.segmentation import mark_boundaries
 from skimage.transform import resize as sk_resize
+import scikit_posthocs as sp
 import tifffile as tiff
@@ -32,7 +33,340 @@ import matplotlib.patches as patches
 from collections import defaultdict
 from matplotlib.gridspec import GridSpec
-def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
+#filter_dict={'cell':[(0,100000), (0, 65000)],'nucleus':[(3000,100000), (1500, 65000)],'pathogen':[(500,100000), (0, 65000)]}
+def plot_image_mask_overlay(
+    file,
+    channels,
+    cell_channel,
+    nucleus_channel,
+    pathogen_channel,
+    figuresize=10,
+    percentiles=(2, 98),
+    thickness=3,
+    save_pdf=True,
+    mode='outlines',
+    export_tiffs=False,
+    all_on_all=False,
+    all_outlines=False,
+    filter_dict=None
+):
+    """Plot image and mask overlays."""
+    def random_color_cmap(n_labels, seed=None):
+        """Generates a random color map for a given number of labels."""
+        if seed is not None:
+            np.random.seed(seed)
+        rand_colors = np.random.rand(n_labels, 3)
+        rand_colors = np.vstack([[0, 0, 0], rand_colors])  # Ensure background is black
+        cmap = ListedColormap(rand_colors)
+        return cmap
+    def _plot_merged_plot(
+        image,
+        outlines,
+        outline_colors,
+        figuresize,
+        thickness,
+        percentiles,
+        mode='outlines',
+        all_on_all=False,
+        all_outlines=False,
+        channels=None,
+        cell_channel=None,
+        nucleus_channel=None,
+        pathogen_channel=None,
+        cell_outlines=None,
+        nucleus_outlines=None,
+        pathogen_outlines=None,
+        save_pdf=True
+    ):
+        """Plot the merged plot with overlay, image channels, and masks."""
+        def _generate_colored_mask(mask, cmap):
+            """Generate a colored mask using the given colormap."""
+            mask_norm = mask / (mask.max() + 1e-5)  # Normalize mask
+            colored_mask = cmap(mask_norm)
+            colored_mask[..., 3] = np.where(mask > 0, 1, 0)  # Alpha channel
+            return colored_mask
+        def _overlay_mask(image, mask):
+            """Overlay the colored mask onto the original image."""
+            combined = np.clip(image * (1 - mask[..., 3:]) + mask[..., :3] * mask[..., 3:], 0, 1)
+            return combined
+        def _normalize_image(image, percentiles):
+            """Normalize the image based on given percentiles."""
+            v_min, v_max = np.percentile(image, percentiles)
+            image_normalized = np.clip((image - v_min) / (v_max - v_min + 1e-5), 0, 1)
+            return image_normalized
+        def _generate_contours(mask):
+            """Generate contours from the mask using OpenCV."""
+            contours, _ = cv2.findContours(
+                mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+            )
+            return contours
+        def _apply_contours(image, mask, color, thickness):
+            """Apply contours to the image."""
+            unique_labels = np.unique(mask)
+            for label in unique_labels:
+                if label == 0:
+                    continue  # Skip background
+                label_mask = (mask == label).astype(np.uint8)
+                contours = _generate_contours(label_mask)
+                cv2.drawContours(
+                    image, contours, -1, mpl.colors.to_rgb(color), thickness
+                )
+            return image
+        num_channels = image.shape[-1]
+        fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
+        # Identify channels without associated outlines
+        channels_with_outlines = []
+        if cell_channel is not None:
+            channels_with_outlines.append(cell_channel)
+        if nucleus_channel is not None:
+            channels_with_outlines.append(nucleus_channel)
+        if pathogen_channel is not None:
+            channels_with_outlines.append(pathogen_channel)
+        for v in range(num_channels):
+            channel_image = image[..., v]
+            channel_image_normalized = _normalize_image(channel_image, percentiles)
+            channel_image_rgb = np.dstack([channel_image_normalized] * 3)
+            current_channel = channels[v]
+            if all_on_all:
+                # Apply all outlines to all channels
+                for outline, color in zip(outlines, outline_colors):
+                    if mode == 'outlines':
+                        channel_image_rgb = _apply_contours(
+                            channel_image_rgb, outline, color, thickness
+                        )
+                    else:
+                        cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
+                        mask = _generate_colored_mask(outline, cmap)
+                        channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
+            elif current_channel in channels_with_outlines:
+                # Apply only the relevant outline to each channel
+                outline = None
+                color = None
+                if current_channel == cell_channel and cell_outlines is not None:
+                    outline = cell_outlines
+                elif current_channel == nucleus_channel and nucleus_outlines is not None:
+                    outline = nucleus_outlines
+                elif current_channel == pathogen_channel and pathogen_outlines is not None:
+                    outline = pathogen_outlines
+                if outline is not None:
+                    if mode == 'outlines':
+                        # Use magenta color when all_on_all=False
+                        channel_image_rgb = _apply_contours(
+                            channel_image_rgb, outline, '#FF00FF', thickness
+                        )
+                    else:
+                        cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
+                        mask = _generate_colored_mask(outline, cmap)
+                        channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
+            else:
+                # Channel without associated outlines
+                if all_outlines:
+                    # Apply all outlines with specified colors
+                    for outline, color in zip(outlines, ['blue', 'red', 'green']):
+                        if mode == 'outlines':
+                            channel_image_rgb = _apply_contours(
+                                channel_image_rgb, outline, color, thickness
+                            )
+                        else:
+                            cmap = random_color_cmap(int(outline.max() + 1), random.randint(0, 100))
+                            mask = _generate_colored_mask(outline, cmap)
+                            channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
+            ax[v].imshow(channel_image_rgb)
+            ax[v].set_title(f'Image - Channel {current_channel}')
+        # Create an image combining all objects filled with colors
+        combined_mask = np.zeros_like(outlines[0])
+        for outline in outlines:
+            combined_mask = np.maximum(combined_mask, outline)
+        cmap = random_color_cmap(int(combined_mask.max() + 1), random.randint(0, 100))
+        mask = _generate_colored_mask(combined_mask, cmap)
+        blank_image = np.zeros((*combined_mask.shape, 3))
+        filled_image = _overlay_mask(blank_image, mask)
+        ax[-1].imshow(filled_image)
+        ax[-1].set_title('Combined Objects Image')
+        plt.tight_layout()
+        # Save the figure as a PDF
+        if save_pdf:
+            pdf_dir = os.path.join(
+                os.path.dirname(os.path.dirname(file)), 'results', 'overlay'
+            )
+            os.makedirs(pdf_dir, exist_ok=True)
+            pdf_path = os.path.join(
+                pdf_dir, os.path.basename(file).replace('.npy', '.pdf')
+            )
+            fig.savefig(pdf_path, format='pdf')
+        plt.show()
+        return fig
+    def _save_channels_as_tiff(stack, save_dir, filename):
+        """Save each channel in the stack as a grayscale TIFF."""
+        os.makedirs(save_dir, exist_ok=True)
+        for i in range(stack.shape[-1]):
+            channel = stack[..., i]
+            tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
+            tiff.imwrite(tiff_path, channel.astype(np.uint16), photometric='minisblack')
+            print(f"Saved {tiff_path}")
+    def _filter_object(mask, intensity_image, min_max_area=(0, 10000000), min_max_intensity=(0, 65000), type_='object'):
+        """
+        Filter objects in a mask based on their area (size) and mean intensity.
+        Args:
+            mask (ndarray): The input mask.
+            intensity_image (ndarray): The corresponding intensity image.
+            min_max_area (tuple): A tuple (min_area, max_area) specifying the minimum and maximum area thresholds.
+            min_max_intensity (tuple): A tuple (min_intensity, max_intensity) specifying the minimum and maximum intensity thresholds.
+        Returns:
+            ndarray: The filtered mask.
+        """
+        original_dtype = mask.dtype
+        mask_int = mask.astype(np.int64)
+        intensity_image = intensity_image.astype(np.float64)
+        # Compute properties for each labeled object
+        unique_labels = np.unique(mask_int)
+        unique_labels = unique_labels[unique_labels != 0]  # Exclude background
+        num_objects_before = len(unique_labels)
+        # Initialize lists to store area and intensity for each object
+        areas = []
+        mean_intensities = []
+        labels_to_keep = []
+        for label in unique_labels:
+            label_mask = (mask_int == label)
+            area = np.sum(label_mask)
+            mean_intensity = np.mean(intensity_image[label_mask])
+            areas.append(area)
+            mean_intensities.append(mean_intensity)
+            # Check if the object meets both area and intensity criteria
+            if (min_max_area[0] <= area <= min_max_area[1]) and (min_max_intensity[0] <= mean_intensity <= min_max_intensity[1]):
+                labels_to_keep.append(label)
+        # Convert lists to numpy arrays for easier computation
+        areas = np.array(areas)
+        mean_intensities = np.array(mean_intensities)
+        num_objects_after = len(labels_to_keep)
+        # Compute average area and intensity before and after filtering
+        avg_area_before = areas.mean() if num_objects_before > 0 else 0
+        avg_intensity_before = mean_intensities.mean() if num_objects_before > 0 else 0
+        areas_after = areas[np.isin(unique_labels, labels_to_keep)]
+        mean_intensities_after = mean_intensities[np.isin(unique_labels, labels_to_keep)]
+        avg_area_after = areas_after.mean() if num_objects_after > 0 else 0
+        avg_intensity_after = mean_intensities_after.mean() if num_objects_after > 0 else 0
+        print(f"Before filtering {type_}: {num_objects_before} objects")
+        print(f"Average area {type_}: {avg_area_before:.2f} pixels, Average intensity: {avg_intensity_before:.2f}")
+        print(f"After filtering {type_}: {num_objects_after} objects")
+        print(f"Average area {type_}: {avg_area_after:.2f} pixels, Average intensity: {avg_intensity_after:.2f}")
+        mask_filtered = np.zeros_like(mask_int)
+        for label in labels_to_keep:
+            mask_filtered[mask_int == label] = label
+        mask_filtered = mask_filtered.astype(original_dtype)
+        return mask_filtered
+    stack = np.load(file)
+    if export_tiffs:
+        save_dir = os.path.join(
+            os.path.dirname(os.path.dirname(file)),
+            'results',
+            os.path.splitext(os.path.basename(file))[0],
+            'tiff'
+        )
+        filename = os.path.splitext(os.path.basename(file))[0]
+        _save_channels_as_tiff(stack, save_dir, filename)
+    # Convert to float for normalization and ensure correct handling of arrays
+    if stack.dtype in (np.uint16, np.uint8):
+        stack = stack.astype(np.float32)
+    image = stack[..., channels]
+    outlines = []
+    outline_colors = []
+    # Define variables to hold individual outlines
+    cell_outlines = None
+    nucleus_outlines = None
+    pathogen_outlines = None
+    if pathogen_channel is not None:
+        pathogen_mask_dim = -1
+        pathogen_outlines = np.take(stack, pathogen_mask_dim, axis=2)
+        if not filter_dict is None:
+            pathogen_intensity = np.take(stack, pathogen_channel, axis=2)
+            pathogen_outlines = _filter_object(pathogen_outlines, pathogen_intensity, filter_dict['pathogen'][0], filter_dict['pathogen'][1], type_='pathogen')
+        outlines.append(pathogen_outlines)
+        outline_colors.append('green')
+    if nucleus_channel is not None:
+        nucleus_mask_dim = -2 if pathogen_channel is not None else -1
+        nucleus_outlines = np.take(stack, nucleus_mask_dim, axis=2)
+        if not filter_dict is None:
+            nucleus_intensity = np.take(stack, nucleus_channel, axis=2)
+            nucleus_outlines = _filter_object(nucleus_outlines, nucleus_intensity, filter_dict['nucleus'][0], filter_dict['nucleus'][1], type_='nucleus')
+        outlines.append(nucleus_outlines)
+        outline_colors.append('blue')
+    if cell_channel is not None:
+        if nucleus_channel is not None and pathogen_channel is not None:
+            cell_mask_dim = -3
+        elif nucleus_channel is not None or pathogen_channel is not None:
+            cell_mask_dim = -2
+        else:
+            cell_mask_dim = -1
+        cell_outlines = np.take(stack, cell_mask_dim, axis=2)
+        if not filter_dict is None:
+            cell_intensity = np.take(stack, cell_channel, axis=2)
+            cell_outlines = _filter_object(cell_outlines, cell_intensity, filter_dict['cell'][0], filter_dict['cell'][1], type_='cell')
+        outlines.append(cell_outlines)
+        outline_colors.append('red')
+    fig = _plot_merged_plot(
+        image=image,
+        outlines=outlines,
+        outline_colors=outline_colors,
+        figuresize=figuresize,
+        thickness=thickness,
+        percentiles=percentiles,  # Pass percentiles to the plotting function
+        mode=mode,
+        all_on_all=all_on_all,
+        all_outlines=all_outlines,
+        channels=channels,
+        cell_channel=cell_channel,
+        nucleus_channel=nucleus_channel,
+        pathogen_channel=pathogen_channel,
+        cell_outlines=cell_outlines,
+        nucleus_outlines=nucleus_outlines,
+        pathogen_outlines=pathogen_outlines,
+        save_pdf=save_pdf
+    )
+    return fig
+def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
     """Plot image and mask overlays."""
     def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
@@ -1398,7 +1732,7 @@ def _plot_histograms_and_stats(df):
         print('-'*40)
         # Plot the histogram
-        plt.figure(figsize=(10,6))
+        plt.figure(figsize=(10,10))
         plt.hist(subset['pred'], bins=30, edgecolor='black')
         plt.axvline(mean_pred, color='red', linestyle='dashed', linewidth=1, label=f"Mean = {mean_pred:.2f}")
         plt.title(f'Histogram for pred - Condition: {condition}')
@@ -1455,12 +1789,16 @@ def _reg_v_plot(df, grouping, variable, plate_number):
     plt.show()
 def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_count):
+    if not isinstance(min_count, (int, float)):
+        min_count = 0
     df = df.copy()  # Work on a copy to avoid SettingWithCopyWarning
     df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
     # Filtering the dataframe based on the plate_number
     df = df[df['plate'] == plate_number].copy()  # Create another copy after filtering
     # Ensure proper ordering
     row_order = [f'r{i}' for i in range(1, 17)]
     col_order = [f'c{i}' for i in range(1, 28)]  # Exclude c15 as per your earlier code
@@ -1496,7 +1834,6 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
             min_max = np.quantile(plate_map.values, [min_max[0], min_max[1]])
         if isinstance(min_max[0], (int)) and isinstance(min_max[1], (int)):
             min_max = [min_max[0], min_max[1]]
     return plate_map, min_max
 def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True, dst=None):
@@ -1516,10 +1853,14 @@ def plot_plates(df, variable, grouping, min_max, cmap, min_count=0, verbose=True
     plt.subplots_adjust(wspace=0.1, hspace=0.4)
     if not dst is None:
-        filename = os.path.join(dst, 'plate_heatmap.pdf')
-        fig.savefig(filename, format='pdf')
-        print(f'Saved heatmap to {filename}')
+        for i in range(0,1000):
+            filename = os.path.join(dst, f'plate_heatmap_{i}.pdf')
+            if os.path.exists(filename):
+                continue
+            else:
+                fig.savefig(filename, format='pdf')
+                print(f'Saved heatmap to {filename}')
+                break
     if verbose:
         plt.show()
     return fig
@@ -1886,22 +2227,77 @@ def volcano_plot(coef_df, filename='volcano_plot.pdf'):
     print(f'Saved Volcano plot: {filename}')
     plt.show()
-def plot_histogram(df, dependent_variable, dst=None):
+def plot_histogram(df, column, dst=None):
     # Plot histogram of the dependent variable
-    plt.figure(figsize=(10, 6))
-    sns.histplot(df[dependent_variable], kde=True)
-    plt.title(f'Histogram of {dependent_variable}')
-    plt.xlabel(dependent_variable)
+    bar_color = (0/255, 155/255, 155/255)
+    plt.figure(figsize=(10, 10))
+    sns.histplot(df[column], kde=False, color=bar_color, edgecolor=None, alpha=0.6)
+    plt.title(f'Histogram of {column}')
+    plt.xlabel(column)
     plt.ylabel('Frequency')
     if not dst is None:
-        filename = os.path.join(dst, 'dependent_variable_histogram.pdf')
+        filename = os.path.join(dst, f'{column}_histogram.pdf')
         plt.savefig(filename, format='pdf')
         print(f'Saved histogram to {filename}')
     plt.show()
-def plot_lorenz_curves(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
+def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count', remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4'], x_lim=[0.0,1], y_lim=[0,1], save=True):
+    def lorenz_curve(data):
+        """Calculate Lorenz curve."""
+        sorted_data = np.sort(data)
+        cumulative_data = np.cumsum(sorted_data)
+        lorenz_curve = cumulative_data / cumulative_data[-1]
+        lorenz_curve = np.insert(lorenz_curve, 0, 0)
+        return lorenz_curve
+    combined_data = []
+    plt.figure(figsize=(10, 10))
+    for idx, csv_file in enumerate(csv_files):
+        if idx == 1:
+            save_fldr = os.path.dirname(csv_file)
+            save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
+        df = pd.read_csv(csv_file)
+        for remove in remove_keys:
+            df = df[df[name_column] != remove]
+        values = df[value_column].values
+        combined_data.extend(values)
+        lorenz = lorenz_curve(values)
+        name = f"plate {idx+1}"
+        plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
+    # Plot combined Lorenz curve
+    combined_lorenz = lorenz_curve(np.array(combined_data))
+    plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined", linestyle='--', color='black')
+    if x_lim != None:
+        plt.xlim(x_lim)
+    if y_lim != None:
+        plt.ylim(y_lim)
+    plt.title('Lorenz Curves')
+    plt.xlabel('Cumulative Share of Individuals')
+    plt.ylabel('Cumulative Share of Value')
+    plt.legend()
+    plt.grid(False)
+    if save:
+        save_path = os.path.join(os.path.dirname(csv_files[0]), 'results')
+        os.makedirs(save_path, exist_ok=True)
+        save_file_path = os.path.join(save_path, 'lorenz_curve.pdf')
+        plt.savefig(save_file_path, format='pdf', bbox_inches='tight')
+        print(f"Saved Lorenz Curve: {save_file_path}")
+        plt.show()
+def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
     def lorenz_curve(data):
         """Calculate Lorenz curve."""
@@ -2358,22 +2754,33 @@ class spacrGraph:
         return filtered_df
     def perform_normality_tests(self):
-        """Perform normality tests for each group and each data column."""
+        """Perform normality tests for each group and data column."""
         unique_groups = self.df[self.grouping_column].unique()
         normality_results = []
         for column in self.data_column:
-            # Iterate over each group and its corresponding data
             for group in unique_groups:
-                data = self.df.loc[self.df[self.grouping_column] == group, column]
+                data = self.df.loc[self.df[self.grouping_column] == group, column].dropna()
                 n_samples = len(data)
+                if n_samples < 3:
+                    # Skip test if there aren't enough data points
+                    print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
+                    normality_results.append({
+                        'Comparison': f'Normality test for {group} on {column}',
+                        'Test Statistic': None,
+                        'p-value': None,
+                        'Test Name': 'Skipped',
+                        'Column': column,
+                        'n': n_samples
+                    })
+                    continue
+                # Choose the appropriate normality test based on the sample size
                 if n_samples >= 8:
-                    # Use D'Agostino-Pearson test for larger samples
                     stat, p_value = normaltest(data)
                     test_name = "D'Agostino-Pearson test"
                 else:
-                    # Use Shapiro-Wilk test for smaller samples
                     stat, p_value = shapiro(data)
                     test_name = "Shapiro-Wilk test"
@@ -2384,11 +2791,11 @@ class spacrGraph:
                     'p-value': p_value,
                     'Test Name': test_name,
                     'Column': column,
-                    'n': n_samples  # Sample size
+                    'n': n_samples
                 })
             # Check if all groups are normally distributed (p > 0.05)
-            normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
+            normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
             is_normal = all(p > 0.05 for p in normal_p_values)
         return is_normal, normality_results
@@ -2438,9 +2845,13 @@ class spacrGraph:
                           len(self.df[self.df[self.grouping_column] == unique_groups[1]])})
         return test_results
     def perform_posthoc_tests(self, is_normal, unique_groups):
         """Perform post-hoc tests for multiple groups based on all_to_all flag."""
+        from .utils import choose_p_adjust_method
+        posthoc_results = []
         if is_normal and len(unique_groups) > 2 and self.all_to_all:
             tukey_result = pairwise_tukeyhsd(self.df[self.data_column], self.df[self.grouping_column], alpha=0.05)
             posthoc_results = []
@@ -2456,22 +2867,40 @@ class spacrGraph:
                     'n_object': len(raw_data1) + len(raw_data2),
                     'n_well': len(self.df[self.df[self.grouping_column] == comparison[0]]) + len(self.df[self.df[self.grouping_column] == comparison[1]])})
             return posthoc_results
-        elif len(unique_groups) > 2 and not self.all_to_all and self.compare_group:
-            dunn_result = pg.pairwise_tests(data=self.df, dv=self.data_column, between=self.grouping_column, padjust='bonf', test='dunn')
-            posthoc_results = []
-            for idx, row in dunn_result.iterrows():
-                if row['A'] == self.compare_group or row['B'] == self.compare_group:
-                    posthoc_results.append({
-                        'Comparison': f"{row['A']} vs {row['B']}",
-                        'Test Statistic': row['T'],  # Test statistic from Dunn's test
-                        'p-value': row['p-val'],
-                        'Test Name': 'Dunn’s Post-hoc',
-                        'n_object': None,
-                        'n_well': None})
+        elif len(unique_groups) > 2 and self.all_to_all:
+            print('performing_dunns')
+            # Prepare data for Dunn's test in long format
+            long_data = self.df[[self.data_column[0], self.grouping_column]].dropna()
+            p_adjust_method = choose_p_adjust_method(num_groups=len(long_data[self.grouping_column].unique()),num_data_points=len(long_data) // len(long_data[self.grouping_column].unique()))
+            # Perform Dunn's test with Bonferroni correction
+            dunn_result = sp.posthoc_dunn(
+                long_data,
+                val_col=self.data_column[0],
+                group_col=self.grouping_column,
+                p_adjust=p_adjust_method
+            )
+            for group_a, group_b in zip(*np.triu_indices_from(dunn_result, k=1)):
+                raw_data1 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.index[group_a]][self.data_column]
+                raw_data2 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.columns[group_b]][self.data_column]
+                posthoc_results.append({
+                    'Comparison': f"{dunn_result.index[group_a]} vs {dunn_result.columns[group_b]}",
+                    'Test Statistic': None,  # Dunn's test does not return a specific test statistic
+                    'p-value': dunn_result.iloc[group_a, group_b],  # Extract the p-value from the matrix
+                    'Test Name': "Dunn's Post-hoc",
+                    'p_adjust_method': p_adjust_method,
+                    'n_object': len(raw_data1) + len(raw_data2),  # Total objects
+                    'n_well': len(self.df[self.df[self.grouping_column] == dunn_result.index[group_a]]) +
+                            len(self.df[self.grouping_column] == dunn_result.columns[group_b])})
             return posthoc_results
-        return []
+        return posthoc_results
     def create_plot(self, ax=None):
         """Create and display the plot based on the chosen graph type."""
@@ -2507,7 +2936,43 @@ class spacrGraph:
             transposed_table = list(map(list, zip(*table_data)))
             return row_labels, transposed_table
-        def _place_symbols(row_labels, transposed_table, x_positions, ax):
+        def _place_symbols(row_labels, transposed_table, x_positions, ax):
+            """
+            Places symbols and row labels aligned under the bars or jitter points on the graph.
+            Parameters:
+            - row_labels: List of row titles to be displayed along the y-axis.
+            - transposed_table: Data to be placed under each bar/jitter as symbols.
+            - x_positions: X-axis positions for each group to align the symbols.
+            - ax: The matplotlib Axes object where the plot is drawn.
+            """
+            # Get plot dimensions and adjust for different plot sizes
+            y_axis_min = ax.get_ylim()[0]  # Minimum y-axis value (usually 0)
+            symbol_start_y = y_axis_min - 0.05 * (ax.get_ylim()[1] - y_axis_min)  # Adjust a bit below the x-axis
+            # Calculate spacing for the table rows (adjust as needed)
+            y_spacing = 0.04  # Adjust this for better spacing between rows
+            # Determine the leftmost x-position for row labels (align with the y-axis)
+            label_x_pos = ax.get_xlim()[0] - 0.3  # Adjust offset from the y-axis
+            # Place row labels vertically aligned with symbols
+            for row_idx, title in enumerate(row_labels):
+                y_pos = symbol_start_y - (row_idx * y_spacing)  # Calculate vertical position for each label
+                ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
+            # Place symbols under each bar or jitter point based on x-positions
+            for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
+                for row_idx, text in enumerate(column_data):
+                    y_pos = symbol_start_y - (row_idx * y_spacing)  # Adjust vertical spacing for symbols
+                    ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12, fontweight='regular')
+            # Redraw to apply changes
+            ax.figure.canvas.draw()
+        def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
             # Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
             y_axis_min = ax.get_ylim()[0]  # Minimum y-axis value (usually 0)
@@ -2642,6 +3107,10 @@ class spacrGraph:
         else:
             raise ValueError(f"Unknown graph type: {self.graph_type}")
+        if len(self.data_column) == 1:
+            num_groups = len(self.df[self.grouping_column].unique())
+            self._standerdize_figure_format(ax=ax, num_groups=num_groups, graph_type=self.graph_type)
         # Set y-axis start
         if isinstance(self.y_lim, list):
             if len(self.y_lim) == 2:
@@ -2676,7 +3145,73 @@ class spacrGraph:
         if self.save:
             self._save_results()
-        ax.margins(x=0.12)
+        ax.margins(x=0.12)
+    def _standerdize_figure_format(self, ax, num_groups, graph_type):
+        """
+        Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
+        Parameters:
+        - ax: The matplotlib Axes object.
+        - num_groups: Number of unique groups.
+        - graph_type: The type of graph (e.g., 'bar', 'jitter', 'box', etc.).
+        Returns:
+        - None. Modifies the figure and Axes in place.
+        """
+        if graph_type in ['line', 'line_std']:
+            print("Skipping layout adjustment for line graphs.")
+            return  # Skip layout adjustment for line graphs
+        correction_factor = 4
+        # Set figure size to ensure it remains square with a minimum size
+        fig_size = max(6, num_groups * 2)  / correction_factor
+        ax.figure.set_size_inches(fig_size, fig_size)
+        # Configure layout based on the number of groups
+        bar_width = min(0.8, 1.5 / num_groups) / correction_factor
+        jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
+        jitter_size = max(50 / num_groups, 200)
+        # Adjust axis limits to ensure bars are centered with respect to group labels
+        ax.set_xlim(-0.5, num_groups - 0.5)
+        # Set ticks to match the group labels in your DataFrame
+        group_labels = self.df[self.grouping_column].unique()
+        ax.set_xticks(range(len(group_labels)))
+        ax.set_xticklabels(group_labels, rotation=45, ha='right')
+        # Customize elements based on the graph type
+        if graph_type == 'bar':
+            # Adjust bars' width and position
+            for bar in ax.patches:
+                bar.set_width(bar_width)
+                bar.set_x(bar.get_x() - bar_width / 2)
+        elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
+            # Adjust jitter points' position and size
+            for coll in ax.collections:
+                offsets = coll.get_offsets()
+                offsets[:, 0] += jitter_amount  # Shift jitter points slightly
+                coll.set_offsets(offsets)
+                coll.set_sizes([jitter_size]  * len(offsets))  # Adjust point size dynamically
+        elif graph_type in ['box', 'violin']:
+            # Adjust box width for consistent spacing
+            for artist in ax.artists:
+                artist.set_width(bar_width)
+        # Adjust legend and axis labels
+        ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
+        ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
+        if ax.get_legend():
+            ax.get_legend().set_bbox_to_anchor((1.05, 1)) #loc='upper left',borderaxespad=0.
+            ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
+        # Redraw the figure to apply changes
+        ax.figure.canvas.draw()
     def _create_bar_plot(self, ax):
         """Helper method to create a bar plot with consistent bar thickness and centered error bars."""
@@ -2895,11 +3430,11 @@ class spacrGraph:
                 bar.set_x(bar.get_x() - target_width / 2)
         # Adjust error bars alignment with bars
-        bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
-        for bar, (_, row) in zip(bars, summary_df.iterrows()):
-            x_bar = bar.get_x() + bar.get_width() / 2
-            err = row[self.error_bar_type]
-            ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
+        #bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
+        #for bar, (_, row) in zip(bars, summary_df.iterrows()):
+        #    x_bar = bar.get_x() + bar.get_width() / 2
+        #    err = row[self.error_bar_type]
+        #    ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
         # Set legend and labels
         ax.set_xlabel(self.grouping_column)
@@ -3092,9 +3627,13 @@ def plot_data_from_csv(settings):
         dft = pd.read_csv(src)
         if 'plate' not in dft.columns:
             dft['plate'] = f"plate{i+1}"
+            dft['common'] = 'spacr'
         dfs.append(dft)
     df = pd.concat(dfs, axis=0)
+    display(df)
     df = df.dropna(subset=settings['data_column'])
     df = df.dropna(subset=settings['grouping_column'])
     src = srcs[0]
@@ -3141,23 +3680,39 @@ def plot_region(settings):
         print(f"Saved {path}")
     from .io import _read_db
+    from .utils import correct_paths
     fov_path = os.path.join(settings['src'], 'merged', settings['name'])
     name = os.path.splitext(settings['name'])[0]
     db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
     paths_df = _read_db(db_path, tables=['png_list'])[0]
+    paths_df, _ = correct_paths(df=paths_df, base_path=settings['src'], folder='data')
     paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
     activation_mode = f"{settings['activation_mode']}_list"
     activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
     activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
+    activation_db = os.path.splitext(settings['activation_db'])[0]
+    base_path=os.path.join(settings['src'], 'datasets',activation_db)
+    activation_paths_df, _ = correct_paths(df=activation_paths_df, base_path=base_path, folder=settings['activation_mode'])
     activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
     png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
     activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
-    fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
-    fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
+    if activation_paths:
+        fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
+    else:
+        fig_3 = None
+        print(f"Could not find any cropped PNGs")
+    if png_paths:
+        fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
+    else:
+        fig_2 = None
+        print(f"Could not find any activation maps")
+    print('fov_path', fov_path)
     fig_1 = plot_image_mask_overlay(file=fov_path,
                                     channels=settings['channels'],
                                     cell_channel=settings['cell_channel'],
@@ -3166,14 +3721,18 @@ def plot_region(settings):
                                     figuresize=10,
                                     percentiles=settings['percentiles'],
                                     thickness=3,
-                                    save_pdf=False,
+                                    save_pdf=True,
                                     mode=settings['mode'],
                                     export_tiffs=settings['export_tiffs'])
     dst = os.path.join(settings['src'], 'results', name)
-    save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
-    save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
-    save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
+    if not fig_1 == None:
+        save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
+    if not fig_2 == None:
+        save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
+    if not fig_3 == None:
+        save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
     return fig_1, fig_2, fig_3
@@ -3337,4 +3896,5 @@ def overlay_masks_on_images(img_folder, normalize=True, resize=True, save=False,
             plt.imshow(blended)
             plt.title(f"Overlay: {filename}")
             plt.axis('off')
-            plt.show()
+            plt.show()

spacr 0.3.47__py3-none-any.whl → 0.3.52__py3-none-any.whl

spacr 0.3.47py3-none-any.whl → 0.3.52py3-none-any.whl