PyPI - spacr - Versions diffs - 0.3.50__py3-none-any.whl → 0.3.52__py3-none-any.whl - Mend

spacr 0.3.50py3-none-any.whl → 0.3.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

spacr/gui_utils.py +11 -12
spacr/measure.py +4 -1
spacr/plot.py +152 -23
spacr/settings.py +14 -30
spacr/toxo.py +180 -1
spacr/utils.py +24 -1
{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/METADATA +2 -1
{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/RECORD +12 -12
{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/LICENSE +0 -0
{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/WHEEL +0 -0
{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/entry_points.txt +0 -0
{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/top_level.txt +0 -0

spacr/gui_utils.py CHANGED Viewed

@@ -76,8 +76,8 @@ def load_app(root, app_name, app_func):
         root.current_app_exit_func()
     else:
         proceed_with_app(root, app_name, app_func)
-def parse_list_v1(value):
+def parse_list(value):
     """
     Parses a string representation of a list and returns the parsed list.
@@ -85,7 +85,7 @@ def parse_list_v1(value):
         value (str): The string representation of the list.
     Returns:
-        list: The parsed list.
+        list: The parsed list, which can contain integers, floats, or strings.
     Raises:
         ValueError: If the input value is not a valid list format or contains mixed types or unsupported types.
@@ -93,21 +93,20 @@ def parse_list_v1(value):
     try:
         parsed_value = ast.literal_eval(value)
         if isinstance(parsed_value, list):
-            # Check if the list elements are homogeneous (all int or all str)
-            if all(isinstance(item, int) for item in parsed_value):
-                return parsed_value
-            elif all(isinstance(item, str) for item in parsed_value):
-                return parsed_value
-            elif all(isinstance(item, float) for item in parsed_value):
+            # Check if all elements are homogeneous (either all int, float, or str)
+            if all(isinstance(item, (int, float, str)) for item in parsed_value):
                 return parsed_value
             else:
                 raise ValueError("List contains mixed types or unsupported types")
+        elif isinstance(parsed_value, tuple):
+            # Convert tuple to list if it’s a single-element tuple
+            return list(parsed_value) if len(parsed_value) > 1 else [parsed_value[0]]
         else:
             raise ValueError(f"Expected a list but got {type(parsed_value).__name__}")
     except (ValueError, SyntaxError) as e:
         raise ValueError(f"Invalid format for list: {value}. Error: {e}")
-def parse_list(value):
+def parse_list_v1(value):
     """
     Parses a string representation of a list and returns the parsed list.
@@ -391,7 +390,7 @@ def convert_settings_dict_for_gui(settings):
         'nucleus_chann_dim': ('combo', chans, None),
         'pathogen_mask_dim': ('combo', chans, None),
         'pathogen_chann_dim': ('combo', chans, None),
-        'crop_mode': ('combo', ['cell', 'nucleus', 'pathogen', '[cell, nucleus, pathogen]', '[cell,nucleus, pathogen]'], ['cell']),
+        'crop_mode': ('combo', [['cell'], ['nucleus'], ['pathogen'], ['cell', 'nucleus'], ['cell', 'pathogen'], ['nucleus', 'pathogen'], ['cell', 'nucleus', 'pathogen']], ['cell']),
         'magnification': ('combo', [20, 40, 60], 20),
         'nucleus_channel': ('combo', chans_v2, None),
         'cell_channel': ('combo', chans_v2, None),

spacr/measure.py CHANGED Viewed

@@ -1028,7 +1028,10 @@ def measure_crop(settings):
                 return
             if not isinstance(settings['crop_mode'], list):
-                print(f"WARNING: crop_mode should be a list with at least one element e.g. ['cell'] or ['cell','nucleus'] or [None]")
+                print(f"WARNING: crop_mode should be a list with at least one element e.g. ['cell'] or ['cell','nucleus'] or [None] got: {settings['crop_mode']}")
+                settings['crop_mode'] = [settings['crop_mode']]
+                settings['crop_mode'] = [str(crop_mode) for crop_mode in settings['crop_mode']]
+                print(f"Converted crop_mode to list: {settings['crop_mode']}")
                 return
             _save_settings_to_db(settings)

spacr/plot.py CHANGED Viewed

@@ -16,6 +16,7 @@ from skimage import measure
 from skimage.measure import find_contours, label, regionprops
 from skimage.segmentation import mark_boundaries
 from skimage.transform import resize as sk_resize
+import scikit_posthocs as sp
 import tifffile as tiff
@@ -2844,9 +2845,13 @@ class spacrGraph:
                           len(self.df[self.df[self.grouping_column] == unique_groups[1]])})
         return test_results
     def perform_posthoc_tests(self, is_normal, unique_groups):
         """Perform post-hoc tests for multiple groups based on all_to_all flag."""
+        from .utils import choose_p_adjust_method
+        posthoc_results = []
         if is_normal and len(unique_groups) > 2 and self.all_to_all:
             tukey_result = pairwise_tukeyhsd(self.df[self.data_column], self.df[self.grouping_column], alpha=0.05)
             posthoc_results = []
@@ -2862,22 +2867,40 @@ class spacrGraph:
                     'n_object': len(raw_data1) + len(raw_data2),
                     'n_well': len(self.df[self.df[self.grouping_column] == comparison[0]]) + len(self.df[self.df[self.grouping_column] == comparison[1]])})
             return posthoc_results
-        elif len(unique_groups) > 2 and not self.all_to_all and self.compare_group:
-            dunn_result = pg.pairwise_tests(data=self.df, dv=self.data_column, between=self.grouping_column, padjust='bonf', test='dunn')
-            posthoc_results = []
-            for idx, row in dunn_result.iterrows():
-                if row['A'] == self.compare_group or row['B'] == self.compare_group:
-                    posthoc_results.append({
-                        'Comparison': f"{row['A']} vs {row['B']}",
-                        'Test Statistic': row['T'],  # Test statistic from Dunn's test
-                        'p-value': row['p-val'],
-                        'Test Name': 'Dunn’s Post-hoc',
-                        'n_object': None,
-                        'n_well': None})
+        elif len(unique_groups) > 2 and self.all_to_all:
+            print('performing_dunns')
+            # Prepare data for Dunn's test in long format
+            long_data = self.df[[self.data_column[0], self.grouping_column]].dropna()
+            p_adjust_method = choose_p_adjust_method(num_groups=len(long_data[self.grouping_column].unique()),num_data_points=len(long_data) // len(long_data[self.grouping_column].unique()))
+            # Perform Dunn's test with Bonferroni correction
+            dunn_result = sp.posthoc_dunn(
+                long_data,
+                val_col=self.data_column[0],
+                group_col=self.grouping_column,
+                p_adjust=p_adjust_method
+            )
+            for group_a, group_b in zip(*np.triu_indices_from(dunn_result, k=1)):
+                raw_data1 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.index[group_a]][self.data_column]
+                raw_data2 = self.raw_df[self.raw_df[self.grouping_column] == dunn_result.columns[group_b]][self.data_column]
+                posthoc_results.append({
+                    'Comparison': f"{dunn_result.index[group_a]} vs {dunn_result.columns[group_b]}",
+                    'Test Statistic': None,  # Dunn's test does not return a specific test statistic
+                    'p-value': dunn_result.iloc[group_a, group_b],  # Extract the p-value from the matrix
+                    'Test Name': "Dunn's Post-hoc",
+                    'p_adjust_method': p_adjust_method,
+                    'n_object': len(raw_data1) + len(raw_data2),  # Total objects
+                    'n_well': len(self.df[self.df[self.grouping_column] == dunn_result.index[group_a]]) +
+                            len(self.df[self.grouping_column] == dunn_result.columns[group_b])})
             return posthoc_results
-        return []
+        return posthoc_results
     def create_plot(self, ax=None):
         """Create and display the plot based on the chosen graph type."""
@@ -2913,7 +2936,43 @@ class spacrGraph:
             transposed_table = list(map(list, zip(*table_data)))
             return row_labels, transposed_table
-        def _place_symbols(row_labels, transposed_table, x_positions, ax):
+        def _place_symbols(row_labels, transposed_table, x_positions, ax):
+            """
+            Places symbols and row labels aligned under the bars or jitter points on the graph.
+            Parameters:
+            - row_labels: List of row titles to be displayed along the y-axis.
+            - transposed_table: Data to be placed under each bar/jitter as symbols.
+            - x_positions: X-axis positions for each group to align the symbols.
+            - ax: The matplotlib Axes object where the plot is drawn.
+            """
+            # Get plot dimensions and adjust for different plot sizes
+            y_axis_min = ax.get_ylim()[0]  # Minimum y-axis value (usually 0)
+            symbol_start_y = y_axis_min - 0.05 * (ax.get_ylim()[1] - y_axis_min)  # Adjust a bit below the x-axis
+            # Calculate spacing for the table rows (adjust as needed)
+            y_spacing = 0.04  # Adjust this for better spacing between rows
+            # Determine the leftmost x-position for row labels (align with the y-axis)
+            label_x_pos = ax.get_xlim()[0] - 0.3  # Adjust offset from the y-axis
+            # Place row labels vertically aligned with symbols
+            for row_idx, title in enumerate(row_labels):
+                y_pos = symbol_start_y - (row_idx * y_spacing)  # Calculate vertical position for each label
+                ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
+            # Place symbols under each bar or jitter point based on x-positions
+            for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
+                for row_idx, text in enumerate(column_data):
+                    y_pos = symbol_start_y - (row_idx * y_spacing)  # Adjust vertical spacing for symbols
+                    ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12, fontweight='regular')
+            # Redraw to apply changes
+            ax.figure.canvas.draw()
+        def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
             # Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
             y_axis_min = ax.get_ylim()[0]  # Minimum y-axis value (usually 0)
@@ -3048,6 +3107,10 @@ class spacrGraph:
         else:
             raise ValueError(f"Unknown graph type: {self.graph_type}")
+        if len(self.data_column) == 1:
+            num_groups = len(self.df[self.grouping_column].unique())
+            self._standerdize_figure_format(ax=ax, num_groups=num_groups, graph_type=self.graph_type)
         # Set y-axis start
         if isinstance(self.y_lim, list):
             if len(self.y_lim) == 2:
@@ -3082,7 +3145,73 @@ class spacrGraph:
         if self.save:
             self._save_results()
-        ax.margins(x=0.12)
+        ax.margins(x=0.12)
+    def _standerdize_figure_format(self, ax, num_groups, graph_type):
+        """
+        Adjusts the figure layout (size, bar width, jitter, and spacing) based on the number of groups.
+        Parameters:
+        - ax: The matplotlib Axes object.
+        - num_groups: Number of unique groups.
+        - graph_type: The type of graph (e.g., 'bar', 'jitter', 'box', etc.).
+        Returns:
+        - None. Modifies the figure and Axes in place.
+        """
+        if graph_type in ['line', 'line_std']:
+            print("Skipping layout adjustment for line graphs.")
+            return  # Skip layout adjustment for line graphs
+        correction_factor = 4
+        # Set figure size to ensure it remains square with a minimum size
+        fig_size = max(6, num_groups * 2)  / correction_factor
+        ax.figure.set_size_inches(fig_size, fig_size)
+        # Configure layout based on the number of groups
+        bar_width = min(0.8, 1.5 / num_groups) / correction_factor
+        jitter_amount = min(0.1, 0.2 / num_groups) / correction_factor
+        jitter_size = max(50 / num_groups, 200)
+        # Adjust axis limits to ensure bars are centered with respect to group labels
+        ax.set_xlim(-0.5, num_groups - 0.5)
+        # Set ticks to match the group labels in your DataFrame
+        group_labels = self.df[self.grouping_column].unique()
+        ax.set_xticks(range(len(group_labels)))
+        ax.set_xticklabels(group_labels, rotation=45, ha='right')
+        # Customize elements based on the graph type
+        if graph_type == 'bar':
+            # Adjust bars' width and position
+            for bar in ax.patches:
+                bar.set_width(bar_width)
+                bar.set_x(bar.get_x() - bar_width / 2)
+        elif graph_type in ['jitter', 'jitter_bar', 'jitter_box']:
+            # Adjust jitter points' position and size
+            for coll in ax.collections:
+                offsets = coll.get_offsets()
+                offsets[:, 0] += jitter_amount  # Shift jitter points slightly
+                coll.set_offsets(offsets)
+                coll.set_sizes([jitter_size]  * len(offsets))  # Adjust point size dynamically
+        elif graph_type in ['box', 'violin']:
+            # Adjust box width for consistent spacing
+            for artist in ax.artists:
+                artist.set_width(bar_width)
+        # Adjust legend and axis labels
+        ax.tick_params(axis='x', labelsize=max(10, 15 - num_groups // 2))
+        ax.tick_params(axis='y', labelsize=max(10, 15 - num_groups // 2))
+        if ax.get_legend():
+            ax.get_legend().set_bbox_to_anchor((1.05, 1)) #loc='upper left',borderaxespad=0.
+            ax.get_legend().prop.set_size(max(8, 12 - num_groups // 3))
+        # Redraw the figure to apply changes
+        ax.figure.canvas.draw()
     def _create_bar_plot(self, ax):
         """Helper method to create a bar plot with consistent bar thickness and centered error bars."""
@@ -3301,11 +3430,11 @@ class spacrGraph:
                 bar.set_x(bar.get_x() - target_width / 2)
         # Adjust error bars alignment with bars
-        bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
-        for bar, (_, row) in zip(bars, summary_df.iterrows()):
-            x_bar = bar.get_x() + bar.get_width() / 2
-            err = row[self.error_bar_type]
-            ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
+        #bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
+        #for bar, (_, row) in zip(bars, summary_df.iterrows()):
+        #    x_bar = bar.get_x() + bar.get_width() / 2
+        #    err = row[self.error_bar_type]
+        #    ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
         # Set legend and labels
         ax.set_xlabel(self.grouping_column)

spacr/settings.py CHANGED Viewed

@@ -246,7 +246,7 @@ def get_measure_crop_settings(settings={}):
     settings.setdefault('normalize_by','png')
     settings.setdefault('crop_mode',['cell'])
     settings.setdefault('dialate_pngs', False)
-    settings.setdefault('dialate_png_ratios', [0.2,0.2])
+    settings.setdefault('dialate_png_ratios', [0.2])
     # Timelapsed settings
     settings.setdefault('timelapse', False)
@@ -697,16 +697,6 @@ expected_types = {
     "overlay_chans": list,
     "overlay": bool,
     "normalization_percentiles": list,
-    "print_object_number": bool,
-    "nr": int,
-    "figuresize": int,
-    "cmap": str,
-    "test_mode": bool,
-    "test_images": int,
-    "remove_background_cell": bool,
-    "remove_background_nucleus": bool,
-    "remove_background_pathogen": bool,
-    "pathogen_model": (str, type(None)),
     "filter": bool,
     "fill_in":bool,
     "upscale": bool,
@@ -825,18 +815,6 @@ expected_types = {
     "transform": (str, type(None)),
     "agg_type": str,
     "min_cell_count": int,
-    "regression_type": str,
-    "random_row_column_effects": bool,
-    "alpha": float,
-    "fraction_threshold": float,
-    "class_1_threshold": (float, type(None)),
-    "batch_size": int,
-    "CP_prob": float,
-    "flow_threshold": float,
-    "percentiles": (list, type(None)),
-    "invert": bool,
-    "diameter": int,
-    "grayscale": bool,
     "resize": bool,
     "target_height": (int, type(None)),
     "target_width": (int, type(None)),
@@ -881,9 +859,6 @@ expected_types = {
     "metadata_type_by":str,
     "custom_measurement":str,
     "custom_model":bool,
-    "size":int,
-    "test_split":float,
-    "class_metadata":list, # This is a list of lists
     "png_type":str,
     "custom_model_path":str,
     "generate_training_dataset":bool,
@@ -894,6 +869,7 @@ expected_types = {
     "correlate":bool,
     "target_layer":str,
     "save_to_db":bool,
+    "test_mode":bool,
     "normalize_input":bool,
 }
@@ -904,7 +880,7 @@ categories = {"Paths":[ "src", "grna", "barcodes", "custom_model_path", "dataset
              "Nucleus": ["nucleus_intensity_range", "nucleus_size_range", "nucleus_chann_dim", "nucleus_channel", "nucleus_background", "nucleus_Signal_to_noise", "nucleus_CP_prob", "nucleus_FT", "remove_background_nucleus", "nucleus_min_size", "nucleus_mask_dim", "nucleus_loc"],
              "Pathogen": ["pathogen_intensity_range", "pathogen_size_range", "pathogen_chann_dim", "pathogen_channel", "pathogen_background", "pathogen_Signal_to_noise", "pathogen_CP_prob", "pathogen_FT", "pathogen_model", "remove_background_pathogen", "pathogen_min_size", "pathogen_mask_dim", "pathogens", "pathogen_loc", "pathogen_types", "pathogen_plate_metadata", ],
              "Measurements": ["remove_image_canvas", "remove_highly_correlated", "homogeneity", "homogeneity_distances", "radial_dist", "calculate_correlation", "manders_thresholds", "save_measurements", "tables", "image_nr", "dot_size", "filter_by", "remove_highly_correlated_features", "remove_low_variance_features", "channel_of_interest"],
-             "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "dialate_pngs", "normalize", "use_bounding_box"],
+             "Object Image": ["save_png", "dialate_pngs", "dialate_png_ratios", "png_size", "png_dims", "save_arrays", "normalize_by", "crop_mode", "normalize", "use_bounding_box"],
              "Sequencing": ["signal_direction","mode","comp_level","comp_type","save_h5","expected_end","offset","target_sequence","regex", "highlight"],
              "Generate Dataset":["save_to_db","file_metadata","class_metadata", "annotation_column","annotated_classes", "dataset_mode", "metadata_type_by","custom_measurement", "sample", "size"],
              "Hyperparamiters (Training)": ["png_type", "score_threshold","file_type", "train_channels", "epochs", "loss_type", "optimizer_type","image_size","val_split","learning_rate","weight_decay","dropout_rate", "init_weights", "train", "classes", "augment", "amsgrad","use_checkpoint","gradient_accumulation","gradient_accumulation_steps","intermedeate_save","pin_memory"],
@@ -939,6 +915,9 @@ def check_settings(vars_dict, expected_types, q=None):
                 continue
         value = var.get()
+        if value == 'None':
+            value = None
         expected_type = expected_types.get(key, str)
         try:
@@ -953,14 +932,19 @@ def check_settings(vars_dict, expected_types, q=None):
                 #    settings[key] = None
                 else:
                     raise ValueError("Invalid format for list or list of lists")
             elif expected_type == list:
                 settings[key] = parse_list(value) if value else None
+                if isinstance(settings[key], list) and len(settings[key]) == 1:
+                    settings[key] = settings[key][0]
             elif expected_type == bool:
                 settings[key] = value if isinstance(value, bool) else value.lower() in ['true', '1', 't', 'y', 'yes']
             elif expected_type == (int, type(None)):
-                settings[key] = int(value) if value else None
+                settings[key] = settings[key] = int(value) if isinstance(value, int) or str(value).isdigit() else None
             elif expected_type == (float, type(None)):
-                settings[key] = float(value) if value else None
+                settings[key] = float(value) if isinstance(value, float) or (isinstance(value, str) and value.replace(".", "", 1).isdigit()) else None
             elif expected_type == (int, float):
                 settings[key] = float(value) if '.' in value else int(value)
             elif expected_type == (str, type(None)):
@@ -1000,7 +984,7 @@ def check_settings(vars_dict, expected_types, q=None):
                 settings[key] = expected_type(value) if value else None
         except (ValueError, SyntaxError) as e:
             expected_type_name = ' or '.join([t.__name__ for t in expected_type]) if isinstance(expected_type, tuple) else expected_type.__name__
-            q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}")
+            q.put(f"Error: Invalid format for {key}. Expected type: {expected_type_name}. Error: {e}, Value entered: {value}")
             return
     return settings

spacr/toxo.py CHANGED Viewed

@@ -10,6 +10,17 @@ from matplotlib.legend import Legend
 from matplotlib.transforms import Bbox
 from brokenaxes import brokenaxes
+import os
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from scipy.spatial.distance import cosine
+from scipy.stats import pearsonr
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import mean_absolute_error
 from matplotlib.gridspec import GridSpec
@@ -450,4 +461,172 @@ def plot_gene_heatmaps(data, gene_list, columns, x_column='Gene ID', normalize=F
         plt.savefig(save_path, format='pdf', dpi=600, bbox_inches='tight')
         print(f"Figure saved to {save_path}")
-    plt.show()
+    plt.show()
+def generate_score_heatmap(settings):
+    def group_cv_score(csv, plate=1, column='c3', data_column='pred'):
+        df = pd.read_csv(csv)
+        if 'col' in df.columns:
+            df = df[df['col']==column]
+        elif 'column' in df.columns:
+            df['col'] = df['column']
+            df = df[df['col']==column]
+        if not plate is None:
+            df['plate'] = f"plate{plate}"
+        grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
+        grouped_df['prc'] = grouped_df['plate'].astype(str) + '_' + grouped_df['row'].astype(str) + '_' + grouped_df['col'].astype(str)
+        return grouped_df
+    def calculate_fraction_mixed_condition(csv, plate=1, column='c3', control_sgrnas = ['TGGT1_220950_1', 'TGGT1_233460_4']):
+        df = pd.read_csv(csv)
+        df = df[df['column_name']==column]
+        if plate not in df.columns:
+            df['plate'] = f"plate{plate}"
+        df = df[df['grna_name'].str.match(f'^{control_sgrnas[0]}$|^{control_sgrnas[1]}$')]
+        grouped_df = df.groupby(['plate', 'row_name', 'column_name'])['count'].sum().reset_index()
+        grouped_df = grouped_df.rename(columns={'count': 'total_count'})
+        merged_df = pd.merge(df, grouped_df, on=['plate', 'row_name', 'column_name'])
+        merged_df['fraction'] = merged_df['count'] / merged_df['total_count']
+        merged_df['prc'] = merged_df['plate'].astype(str) + '_' + merged_df['row_name'].astype(str) + '_' + merged_df['column_name'].astype(str)
+        return merged_df
+    def plot_multi_channel_heatmap(df, column='c3'):
+        """
+        Plot a heatmap with multiple channels as columns.
+        Parameters:
+        - df: DataFrame with scores for different channels.
+        - column: Column to filter by (default is 'c3').
+        """
+        # Extract row number and convert to integer for sorting
+        df['row_num'] = df['row'].str.extract(r'(\d+)').astype(int)
+        # Filter and sort by plate, row, and column
+        df = df[df['col'] == column]
+        df = df.sort_values(by=['plate', 'row_num', 'col'])
+        # Drop temporary 'row_num' column after sorting
+        df = df.drop('row_num', axis=1)
+        # Create a new column combining plate, row, and column for the index
+        df['plate_row_col'] = df['plate'] + '-' + df['row'] + '-' + df['col']
+        # Set 'plate_row_col' as the index
+        df.set_index('plate_row_col', inplace=True)
+        # Extract only numeric data for the heatmap
+        heatmap_data = df.select_dtypes(include=[float, int])
+        # Plot heatmap with square boxes, no annotations, and 'viridis' colormap
+        plt.figure(figsize=(12, 8))
+        sns.heatmap(
+            heatmap_data,
+            cmap="viridis",
+            cbar=True,
+            square=True,
+            annot=False
+        )
+        plt.title("Heatmap of Prediction Scores for All Channels")
+        plt.xlabel("Channels")
+        plt.ylabel("Plate-Row-Column")
+        plt.tight_layout()
+        # Save the figure object and return it
+        fig = plt.gcf()
+        plt.show()
+        return fig
+    def combine_classification_scores(folders, csv_name, data_column, plate=1, column='c3'):
+        # Ensure `folders` is a list
+        if isinstance(folders, str):
+            folders = [folders]
+        ls = []  # Initialize ls to store found CSV file paths
+        # Iterate over the provided folders
+        for folder in folders:
+            sub_folders = os.listdir(folder)  # Get sub-folder list
+            for sub_folder in sub_folders:  # Iterate through sub-folders
+                path = os.path.join(folder, sub_folder)  # Join the full path
+                if os.path.isdir(path):  # Check if it’s a directory
+                    csv = os.path.join(path, csv_name)  # Join path to the CSV file
+                    if os.path.exists(csv):  # If CSV exists, add to list
+                        ls.append(csv)
+                    else:
+                        print(f'No such file: {csv}')
+        # Initialize combined DataFrame
+        combined_df = None
+        print(f'Found {len(ls)} CSV files')
+        # Loop through all collected CSV files and process them
+        for csv_file in ls:
+            df = pd.read_csv(csv_file)  # Read CSV into DataFrame
+            df = df[df['col']==column]
+            if not plate is None:
+                df['plate'] = f"plate{plate}"
+            # Group the data by 'plate', 'row', and 'col'
+            grouped_df = df.groupby(['plate', 'row', 'col'])[data_column].mean().reset_index()
+            # Use the CSV filename to create a new column name
+            folder_name = os.path.dirname(csv_file).replace(".csv", "")
+            new_column_name = os.path.basename(f"{folder_name}_{data_column}")
+            print(new_column_name)
+            grouped_df = grouped_df.rename(columns={data_column: new_column_name})
+            # Merge into the combined DataFrame
+            if combined_df is None:
+                combined_df = grouped_df
+            else:
+                combined_df = pd.merge(combined_df, grouped_df, on=['plate', 'row', 'col'], how='outer')
+        combined_df['prc'] = combined_df['plate'].astype(str) + '_' + combined_df['row'].astype(str) + '_' + combined_df['col'].astype(str)
+        return combined_df
+    def calculate_mae(df):
+        """
+        Calculate the MAE between each channel's predictions and the fraction column for all rows.
+        """
+        # Extract numeric columns excluding 'fraction' and 'prc'
+        channels = df.drop(columns=['fraction', 'prc']).select_dtypes(include=[float, int])
+        mae_data = []
+        # Compute MAE for each channel with 'fraction' for all rows
+        for column in channels.columns:
+            for index, row in df.iterrows():
+                mae = mean_absolute_error([row['fraction']], [row[column]])
+                mae_data.append({'Channel': column, 'MAE': mae, 'Row': row['prc']})
+        # Convert the list of dictionaries to a DataFrame
+        mae_df = pd.DataFrame(mae_data)
+        return mae_df
+    result_df = combine_classification_scores(settings['folders'], settings['csv_name'], settings['data_column'], settings['plate'], settings['column'], )
+    df = calculate_fraction_mixed_condition(settings['csv'], settings['plate'], settings['column'], settings['control_sgrnas'])
+    df = df[df['grna_name']==settings['fraction_grna']]
+    fraction_df = df[['fraction', 'prc']]
+    merged_df = pd.merge(fraction_df, result_df, on=['prc'])
+    cv_df = group_cv_score(settings['cv_csv'], settings['plate'], settings['column'], settings['data_column_cv'])
+    cv_df = cv_df[[settings['data_column_cv'], 'prc']]
+    merged_df = pd.merge(merged_df, cv_df, on=['prc'])
+    fig = plot_multi_channel_heatmap(merged_df, settings['column'])
+    if 'row_number' in merged_df.columns:
+        merged_df = merged_df.drop('row_num', axis=1)
+    mae_df = calculate_mae(merged_df)
+    if 'row_number' in mae_df.columns:
+        mae_df = mae_df.drop('row_num', axis=1)
+    if not settings['dst'] is None:
+        mae_dst = os.path.join(settings['dst'], f"mae_scores_comparison_plate_{settings['plate']}.csv")
+        merged_dst = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}_data.csv")
+        heatmap_save = os.path.join(settings['dst'], f"scores_comparison_plate_{settings['plate']}.pdf")
+        mae_df.to_csv(mae_dst, index=False)
+        merged_df.to_csv(merged_dst, index=False)
+        fig.savefig(heatmap_save, format='pdf', dpi=600, bbox_inches='tight')
+    return merged_df

spacr/utils.py CHANGED Viewed

@@ -5232,4 +5232,27 @@ def control_filelist(folder, mode='column', values=['01','02']):
         filtered_files = [file for file in files if file.split('_')[1][1:] in values]
     if mode is 'row':
         filtered_files = [file for file in files if file.split('_')[1][:1] in values]
-    return filtered_files
+    return filtered_files
+def choose_p_adjust_method(num_groups, num_data_points):
+    """
+    Selects the most appropriate p-value adjustment method based on data characteristics.
+    Parameters:
+    - num_groups: Number of unique groups being compared
+    - num_data_points: Number of data points per group (assuming balanced groups)
+    Returns:
+    - A string representing the recommended p-adjustment method
+    """
+    num_comparisons = (num_groups * (num_groups - 1)) // 2  # Number of pairwise comparisons
+    # Decision logic for choosing the adjustment method
+    if num_comparisons <= 10 and num_data_points > 5:
+        return 'holm'  # Balanced between power and Type I error control
+    elif num_comparisons > 10 and num_data_points <= 5:
+        return 'fdr_bh'  # FDR control for large number of comparisons and small sample size
+    elif num_comparisons <= 10:
+        return 'sidak'  # Less conservative than Bonferroni, good for independent comparisons
+    else:
+        return 'bonferroni'  # Very conservative, use for strict control of Type I errors

{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: spacr
-Version: 0.3.50
+Version: 0.3.52
 Summary: Spatial phenotype analysis of crisp screens (SpaCr)
 Home-page: https://github.com/EinarOlafsson/spacr
 Author: Einar Birnir Olafsson
@@ -16,6 +16,7 @@ Requires-Dist: scipy<2.0,>=1.12.0
 Requires-Dist: cellpose<4.0,>=3.0.6
 Requires-Dist: scikit-image<1.0,>=0.22.0
 Requires-Dist: scikit-learn<2.0,>=1.4.1
+Requires-Dist: scikit-posthocs<0.20,>=0.10.0
 Requires-Dist: mahotas<2.0,>=1.4.13
 Requires-Dist: btrack<1.0,>=0.6.5
 Requires-Dist: trackpy<1.0,>=0.6.2

{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/RECORD RENAMED Viewed

@@ -14,21 +14,21 @@ spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
 spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
 spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
 spacr/gui_elements.py,sha256=Dr9KEek41LggJ2z2zfh28a7w86sZXg1jzF388rF2BT4,138249
-spacr/gui_utils.py,sha256=KDWDWsi7UdZVhXk1ZWGx3ZqJMIxCUm3lGfjrVhbk52s,45463
+spacr/gui_utils.py,sha256=aOVI2G71pObolEjuSgAKr86XJZtN98uqqBfCbucMhRQ,45560
 spacr/io.py,sha256=1rIdJ_8dyn7W4D2zXjaOqlgyo_Y5Z7X86aRp4hNYWCU,144194
 spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
-spacr/measure.py,sha256=KdboGXoi85BO5-_6er7932FgjFI7G7tuaQDnWSiEuew,54817
+spacr/measure.py,sha256=euywVbBXRllht-frnxy9QHV76i8HkPPjm3dJw46kcz0,55085
 spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
 spacr/ml.py,sha256=Oykp3drBxZrcwrWQh2n6Xt1OzZER6pSIiaR-W0GO2_E,67353
 spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
-spacr/plot.py,sha256=Wy5ac-InIn0VCfHNm1-MzFncNZqsTs4tHDWWFRdPz3Y,163420
+spacr/plot.py,sha256=B4nxp7NFyM6MmtarW9zbxydeEvVIMgPqHEElkCIycpA,169566
 spacr/sequencing.py,sha256=HDpF_C3hRd-fk6ZENPmI3vgYoom3HIvaeIIZWLhaIAY,25037
-spacr/settings.py,sha256=QXtnWbDlABezc3wQjV-jEJvJTfEupkK3WYyKTcHkghk,77710
+spacr/settings.py,sha256=2Er3G6ApcemIEhyQCsF8zmVdBW8KiKf4nkSismu03s8,77358
 spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
 spacr/submodules.py,sha256=3C5M4UbI9Ral1MX4PTpucaAaqhL3RADuCOCqaHhMyUg,28048
 spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
-spacr/toxo.py,sha256=od_nHj3xSkqBnRqIy0Pr9rzBpexxuBMKLUrWKucNRpc,17055
-spacr/utils.py,sha256=PKePCs3BVpaSV0XWLEcXMntI1l4hWiajM3eorHdo8Z8,221417
+spacr/toxo.py,sha256=bcH5Ug6xtf3BBxvOFc8e4Jc5R3f6-C46oHhgiSoYl3U,24935
+spacr/utils.py,sha256=clrjlUOhY-LQH0bTIiNJ9VMBEU9qlRbTvoU7-yNLzsg,222475
 spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
 spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
 spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +151,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
 spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
 spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
-spacr-0.3.50.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
-spacr-0.3.50.dist-info/METADATA,sha256=daGbAWScl6sfl9uYXfvBZ3R9FjG9uHmROss7eKpgZYk,5987
-spacr-0.3.50.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-spacr-0.3.50.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
-spacr-0.3.50.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
-spacr-0.3.50.dist-info/RECORD,,
+spacr-0.3.52.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
+spacr-0.3.52.dist-info/METADATA,sha256=I0OCX9zCPvfgnnGS2JYaV-vdNvirglSfO6IzX68s8uI,6032
+spacr-0.3.52.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
+spacr-0.3.52.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
+spacr-0.3.52.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
+spacr-0.3.52.dist-info/RECORD,,

{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/LICENSE RENAMED Viewed

File without changes

{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/WHEEL RENAMED Viewed

File without changes

{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spacr-0.3.50.dist-info → spacr-0.3.52.dist-info}/top_level.txt RENAMED Viewed

File without changes

spacr 0.3.50__py3-none-any.whl → 0.3.52__py3-none-any.whl

spacr 0.3.50py3-none-any.whl → 0.3.52py3-none-any.whl