PyPI - spacr - Versions diffs - 0.3.52__py3-none-any.whl → 0.3.55__py3-none-any.whl - Mend

spacr 0.3.52py3-none-any.whl → 0.3.55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

spacr/gui_elements.py +1 -1
spacr/gui_utils.py +0 -111
spacr/io.py +114 -140
spacr/measure.py +10 -11
spacr/ml.py +41 -32
spacr/plot.py +24 -293
spacr/sequencing.py +13 -9
spacr/settings.py +15 -9
spacr/submodules.py +19 -19
spacr/timelapse.py +16 -16
spacr/toxo.py +15 -15
spacr/utils.py +72 -164
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/METADATA +1 -1
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/RECORD +18 -18
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/LICENSE +0 -0
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/WHEEL +0 -0
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/entry_points.txt +0 -0
{spacr-0.3.52.dist-info → spacr-0.3.55.dist-info}/top_level.txt +0 -0

spacr/ml.py CHANGED Viewed

@@ -155,10 +155,6 @@ def process_model_coefficients(model, regression_type, X, y, nc, pc, controls):
     coef_df['condition'] = coef_df.apply(lambda row: 'nc' if nc in row['feature'] else 'pc' if pc in row['feature'] else ('control' if row['grna'] in controls else 'other'),axis=1)
     return coef_df[~coef_df['feature'].str.contains('row|column')]
 def check_distribution(y):
     """Check the type of distribution to recommend a model."""
     if np.all((y == 0) | (y == 1)):
@@ -288,7 +284,7 @@ def check_and_clean_data(df, dependent_variable):
     df = handle_missing_values(df, ['fraction', dependent_variable])
     # Step 2: Ensure grna, gene, plate, row, column, and prc are categorical types
-    df = ensure_valid_types(df, ['grna', 'gene', 'plate', 'row', 'column', 'prc'])
+    df = ensure_valid_types(df, ['grna', 'gene', 'plate', 'row_name', 'column', 'prc'])
     # Step 3: Check for multicollinearity in fraction and the dependent variable
     df_cleaned = check_collinearity(df, ['fraction', dependent_variable])
@@ -298,7 +294,7 @@ def check_and_clean_data(df, dependent_variable):
     df_cleaned['grna'] = df['grna']
     df_cleaned['prc'] = df['prc']
     df_cleaned['plate'] = df['plate']
-    df_cleaned['row'] = df['row']
+    df_cleaned['row_name'] = df['row_name']
     df_cleaned['column'] = df['column']
     # Create a new column 'gene_fraction' that sums the fractions by gene within the same well
@@ -337,7 +333,7 @@ def minimum_cell_simulation(settings, num_repeats=10, sample_size=100, tolerance
         df = pd.read_csv(score_data)
         df = correct_metadata_column_names(df)
         df['plate'] = f'plate{i + 1}'
-        df['prc'] = df['plate'] + '_' + df['row'].astype(str) + '_' + df['column'].astype(str)
+        df['prc'] = df['plate'] + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
         dfs.append(df)
     df = pd.concat(dfs, axis=0)
@@ -706,18 +702,16 @@ def perform_regression(settings):
     def _perform_regression_read_data(settings):
         if isinstance(settings['score_data'], list) and isinstance(settings['count_data'], list):
-            settings['plate'] = None
             if len(settings['score_data']) == 1:
-                settings['score_data'] = settings['score_data'][0]
-            if len(settings['count_data']) == 1:
-                settings['count_data'] = settings['count_data'][0]
+                count_data_df = pd.read_csv(settings['count_data'][0])
+                score_data_df = pd.read_csv(settings['score_data'][0])
             else:
                 count_data_df = pd.DataFrame()
                 for i, count_data in enumerate(settings['count_data']):
                     df = pd.read_csv(count_data)
                     df['plate_name'] = f'plate{i+1}'
                     if 'column' in df.columns:
-                        df['col'] = df['column']
+                        df['column_name'] = df['column']
                     count_data_df = pd.concat([count_data_df, df])
                     print('Count data:', len(count_data_df))
@@ -726,7 +720,7 @@ def perform_regression(settings):
                     df = pd.read_csv(score_data)
                     df['plate_name'] = f'plate{i+1}'
                     if 'column' in df.columns:
-                        df['col'] = df['column']
+                        df['column_name'] = df['column']
                     score_data_df = pd.concat([score_data_df, df])
                     print('Score data:', len(score_data_df))
         else:
@@ -806,9 +800,23 @@ def perform_regression(settings):
             return df, n_gene
         else:
             return df
     settings = get_perform_regression_default_settings(settings)
     count_data_df, score_data_df = _perform_regression_read_data(settings)
+    if "row_name" in count_data_df.columns:
+        num_parts = len(count_data_df['row_name'].iloc[0].split('_'))
+        if num_parts == 2:
+            split = count_data_df['row_name'].str.split('_', expand=True)
+            count_data_df['row_name'] = split[1]
+    if "prc" in score_data_df.columns:
+        num_parts = len(score_data_df['prc'].iloc[0].split('_'))
+        if num_parts == 3:
+            split = score_data_df['prc'].str.split('_', expand=True)
+            score_data_df['plate'] = settings['plate']
+            score_data_df['prc'] = score_data_df['plate'] + '_' + split[1] + '_' + split[2]
     results_path, results_path_gene, results_path_grna, hits_path, res_folder, csv_path = _perform_regression_set_paths(settings)
     save_settings(settings, name='regression', show=True)
@@ -849,7 +857,7 @@ def perform_regression(settings):
     merged_df.to_csv(data_path, index=False)
     print(f"Saved regression data to {data_path}")
-    merged_df[['plate', 'row', 'column']] = merged_df['prc'].str.split('_', expand=True)
+    merged_df[['plate', 'row_name', 'column']] = merged_df['prc'].str.split('_', expand=True)
     _ = plot_plates(merged_df, variable=orig_dv, grouping='mean', min_max='allq', cmap='viridis', min_count=None, dst=res_folder)
@@ -857,6 +865,7 @@ def perform_regression(settings):
     coef_df['grna'] = coef_df['feature'].apply(lambda x: re.search(r'grna\[(.*?)\]', x).group(1) if 'grna' in x else None)
     coef_df['gene'] = coef_df['feature'].apply(lambda x: re.search(r'gene\[(.*?)\]', x).group(1) if 'gene' in x else None)
     coef_df = coef_df.merge(n_grna, how='left', on='grna')
     coef_df = coef_df.merge(n_gene, how='left', on='gene')
@@ -903,7 +912,6 @@ def perform_regression(settings):
         save_summary_to_file(model, file_path=f'{res_folder}/mode_summary.csv')
     significant.to_csv(hits_path, index=False)
     significant_grna_filtered = significant[significant['n_grna'] > settings['min_n']]
     significant_gene_filtered = significant[significant['n_gene'] > settings['min_n']]
     significant_filtered = pd.concat([significant_grna_filtered, significant_gene_filtered])
@@ -928,8 +936,6 @@ def perform_regression(settings):
         base_dir = os.path.dirname(os.path.abspath(__file__))
         metadata_path = os.path.join(base_dir, 'resources', 'data', 'lopit.csv')
-        display(data_path)
         if settings['volcano'] == 'all':
             print('all')
             gene_list = custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=600, figsize=20, threshold=reg_threshold, save_path=volcano_path, x_lim=settings['x_lim'],y_lims=settings['y_lims'])
@@ -982,14 +988,14 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
         csv_df = csv_df.rename(columns={'plate_name': 'plate'})
     if 'column_name' in csv_df.columns:
         csv_df = csv_df.rename(columns={'column_name': 'column'})
-    if 'col' in csv_df.columns:
-        csv_df = csv_df.rename(columns={'col': 'column'})
+    if 'column_name' in csv_df.columns:
+        csv_df = csv_df.rename(columns={'column_name': 'column'})
     if 'row_name' in csv_df.columns:
-        csv_df = csv_df.rename(columns={'row_name': 'row'})
+        csv_df = csv_df.rename(columns={'row_name': 'row_name'})
     if 'grna_name' in csv_df.columns:
         csv_df = csv_df.rename(columns={'grna_name': 'grna'})
     if 'plate_row' in csv_df.columns:
-        csv_df[['plate', 'row']] = csv_df['plate_row'].str.split('_', expand=True)
+        csv_df[['plate', 'row_name']] = csv_df['plate_row'].str.split('_', expand=True)
     if not 'plate' in csv_df.columns:
         if not plate is None:
@@ -1009,11 +1015,11 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
                 csv_df = csv_df[csv_df[filter_col] != value]
     # Ensure the necessary columns are present
-    if not all(col in csv_df.columns for col in ['row','column','grna','count']):
-        raise ValueError("The CSV file must contain 'grna', 'count', 'row', and 'column' columns.")
+    if not all(col in csv_df.columns for col in ['row_name','column','grna','count']):
+        raise ValueError("The CSV file must contain 'grna', 'count', 'row_name', and 'column' columns.")
     # Create the prc column
-    csv_df['prc'] = csv_df['plate'] + '_' + csv_df['row'] + '_' + csv_df['column']
+    csv_df['prc'] = csv_df['plate'] + '_' + csv_df['row_name'] + '_' + csv_df['column']
     # Group by prc and calculate the sum of counts
     grouped_df = csv_df.groupby('prc')['count'].sum().reset_index()
@@ -1075,7 +1081,7 @@ def clean_controls(df,values, column):
     return df
 def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='mean', transform=None, regression_type='ols'):
     if 'plate_name' in df.columns:
         df.drop(columns=['plate'], inplace=True)
         df = df.rename(columns={'plate_name': 'plate'})
@@ -1083,11 +1089,14 @@ def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='m
     if plate is not None:
         df['plate'] = plate
-    if 'col' not in df.columns:
-        df['col'] = df['column']
+    if 'column_name' not in df.columns:
+        df['column_name'] = df['column']
-    df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
+    df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
+    display(df)
     df = df[['prc', dependent_variable]]
     # Group by prc and calculate the mean and count of the dependent_variable
@@ -1257,7 +1266,7 @@ def generate_ml_scores(settings):
     return [output, plate_heatmap]
-def ml_analysis(df, channel_of_interest=3, location_column='col', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, n_estimators=100, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
+def ml_analysis(df, channel_of_interest=3, location_column='column_name', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, n_estimators=100, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
     """
     Calculates permutation importance for numerical features in the dataframe,
@@ -1403,8 +1412,8 @@ def ml_analysis(df, channel_of_interest=3, location_column='col', positive_contr
     df = _calculate_similarity(df, features, location_column, positive_control, negative_control)
     df['prcfo'] = df.index.astype(str)
-    df[['plate', 'row', 'col', 'field', 'object']] = df['prcfo'].str.split('_', expand=True)
-    df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['col']
+    df[['plate', 'row_name', 'column_name', 'field', 'object']] = df['prcfo'].str.split('_', expand=True)
+    df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column_name']
     return [df, permutation_df, feature_importance_df, model, X_train, X_test, y_train, y_test, metrics_df], [permutation_fig, feature_importance_fig]

spacr/plot.py CHANGED Viewed

@@ -366,146 +366,6 @@ def plot_image_mask_overlay(
     return fig
-def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
-    """Plot image and mask overlays."""
-    def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
-        """Plot the merged plot with overlay, image channels, and masks."""
-        def _generate_colored_mask(mask, alpha):
-            """ Generate a colored mask with transparency using the given colormap. """
-            cmap = generate_mask_random_cmap(mask)
-            rgba_mask = cmap(mask / mask.max())  # Normalize mask and map to colormap (RGBA)
-            rgba_mask[..., 3] = np.where(mask > 0, alpha, 0)  # Apply transparency only where mask is present
-            return rgba_mask
-        def _overlay_mask(image, mask):
-            """Overlay the colored mask onto the original image."""
-            combined = np.clip(image + mask[..., :3] * mask[..., 3:4], 0, 1)  # Ensure pixel values stay in [0, 1]
-            return combined
-        def _normalize_image(image, percentiles=(2, 98)):
-            """Normalize the image to the given percentiles."""
-            v_min, v_max = np.percentile(image, percentiles)
-            image_normalized = np.clip((image - v_min) / (v_max - v_min), 0, 1)
-            return image_normalized
-        def _generate_contours(mask):
-            """Generate contours for the given mask using OpenCV."""
-            contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            return contours
-        def _apply_contours(image, mask, color, thickness):
-            """Apply the contours to the RGB image for each unique label."""
-            unique_labels = np.unique(mask)
-            for label in unique_labels:
-                if label == 0:
-                    continue  # Skip background
-                label_mask = np.where(mask == label, 1, 0).astype(np.uint8)
-                contours = _generate_contours(label_mask)
-                for contour in contours:
-                    cv2.drawContours(image, [contour], -1, mpl.colors.to_rgb(color), thickness)
-            return image
-        num_channels = image.shape[-1]
-        fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
-        # Plot each channel with its corresponding outlines
-        for v in range(num_channels):
-            channel_image = image[..., v]
-            channel_image_normalized = _normalize_image(channel_image, percentiles)
-            channel_image_rgb = np.dstack((channel_image_normalized, channel_image_normalized, channel_image_normalized))
-            for outline, color in zip(outlines, outline_colors):
-                if mode == 'outlines':
-                    channel_image_rgb = _apply_contours(channel_image_rgb, outline, color, thickness)
-                else:
-                    mask = _generate_colored_mask(outline, alpha=0.5)
-                    channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
-            ax[v].imshow(channel_image_rgb)
-            ax[v].set_title(f'Image - Channel {v}')
-        # Plot the combined RGB image with all outlines
-        rgb_image = np.zeros((*image.shape[:2], 3), dtype=float)
-        rgb_channels = min(3, num_channels)
-        for i in range(rgb_channels):
-            channel_image = image[..., i]
-            channel_image_normalized = _normalize_image(channel_image, percentiles)
-            rgb_image[..., i] = channel_image_normalized
-        for outline, color in zip(outlines, outline_colors):
-            if mode == 'outlines':
-                rgb_image = _apply_contours(rgb_image, outline, color, thickness)
-            else:
-                mask = _generate_colored_mask(outline, alpha=0.5)
-                rgb_image = _overlay_mask(rgb_image, mask)
-        ax[-1].imshow(rgb_image)
-        ax[-1].set_title('Combined RGB Image')
-        plt.tight_layout()
-        # Save the figure as a PDF
-        if save_pdf:
-            pdf_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', 'overlay')
-            os.makedirs(pdf_dir, exist_ok=True)
-            pdf_path = os.path.join(pdf_dir, os.path.basename(file).replace('.npy', '.pdf'))
-            fig.savefig(pdf_path, format='pdf')
-        plt.show()
-        return fig
-    def _save_channels_as_tiff(stack, save_dir, filename):
-        """Save each channel in the stack as a grayscale TIFF."""
-        os.makedirs(save_dir, exist_ok=True)
-        for i in range(stack.shape[-1]):
-            channel = stack[..., i]
-            tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
-            tiff.imwrite(tiff_path, channel, photometric='minisblack')
-            print(f"Saved {tiff_path}")
-    stack = np.load(file)
-    if export_tiffs:
-        save_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', os.path.splitext(os.path.basename(file))[0], 'tiff')
-        filename = os.path.splitext(os.path.basename(file))[0]
-        _save_channels_as_tiff(stack, save_dir, filename)
-    # Convert to float for normalization and ensure correct handling of both 8-bit and 16-bit arrays
-    if stack.dtype == np.uint16:
-        stack = stack.astype(np.float32)
-    elif stack.dtype == np.uint8:
-        stack = stack.astype(np.float32)
-    image = stack[..., channels]
-    outlines = []
-    outline_colors = []
-    if pathogen_channel is not None:
-        pathogen_mask_dim = -1  # last dimension
-        outlines.append(np.take(stack, pathogen_mask_dim, axis=2))
-        outline_colors.append('blue')
-    if nucleus_channel is not None:
-        nucleus_mask_dim = -2 if pathogen_channel is not None else -1
-        outlines.append(np.take(stack, nucleus_mask_dim, axis=2))
-        outline_colors.append('green')
-    if cell_channel is not None:
-        if nucleus_channel is not None and pathogen_channel is not None:
-            cell_mask_dim = -3
-        elif nucleus_channel is not None or pathogen_channel is not None:
-            cell_mask_dim = -2
-        else:
-            cell_mask_dim = -1
-        outlines.append(np.take(stack, cell_mask_dim, axis=2))
-        outline_colors.append('red')
-    fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness, percentiles=percentiles, mode=mode)
-    return fig
 def plot_masks(batch, masks, flows, cmap='inferno', figuresize=10, nr=1, file_type='.npz', print_object_number=True):
     """
     Plot the masks and flows for a given batch of images.
@@ -1793,25 +1653,40 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
     if not isinstance(min_count, (int, float)):
         min_count = 0
-    df = df.copy()  # Work on a copy to avoid SettingWithCopyWarning
-    df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
+    # Check the number of parts in 'prc'
+    num_parts = len(df['prc'].iloc[0].split('_'))
+    if num_parts == 4:
+        split = df['prc'].str.split('_', expand=True)
+        df['row_name'] = split[2]
+        df['prc'] = f"{plate_number}" + '_' + split[2] + '_' + split[3]
+    # Construct 'prc' based on 'plate', 'row_name', and 'column' columns
+    #df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
+    if 'column_name' not in df.columns:
+        if 'column' in df.columns:
+            df['column_name'] = df['column']
+        if 'column_name' in df.columns:
+            df['column_name'] = df['column_name']
+    df['plate'], df['row_name'], df['column_name'] = zip(*df['prc'].str.split('_'))
     # Filtering the dataframe based on the plate_number
     df = df[df['plate'] == plate_number].copy()  # Create another copy after filtering
     # Ensure proper ordering
     row_order = [f'r{i}' for i in range(1, 17)]
     col_order = [f'c{i}' for i in range(1, 28)]  # Exclude c15 as per your earlier code
-    df['row'] = pd.Categorical(df['row'], categories=row_order, ordered=True)
-    df['col'] = pd.Categorical(df['col'], categories=col_order, ordered=True)
-    df['count'] = df.groupby(['row', 'col'])['row'].transform('count')
+    df['row_name'] = pd.Categorical(df['row_name'], categories=row_order, ordered=True)
+    df['column_name'] = pd.Categorical(df['column_name'], categories=col_order, ordered=True)
+    df['count'] = df.groupby(['row_name', 'column_name'])['row_name'].transform('count')
     if min_count > 0:
         df = df[df['count'] >= min_count]
     # Explicitly set observed=True to avoid FutureWarning
-    grouped = df.groupby(['row', 'col'], observed=True) # Group by row and column
+    grouped = df.groupby(['row_name', 'column_name'], observed=True) # Group by row and column
     if grouping == 'mean':
         plate = grouped[variable].mean().reset_index()
@@ -1823,7 +1698,7 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
     else:
         raise ValueError(f"Unsupported grouping: {grouping}")
-    plate_map = pd.pivot_table(plate, values=variable, index='row', columns='col').fillna(0)
+    plate_map = pd.pivot_table(plate, values=variable, index='row_name', columns='column_name').fillna(0)
     if min_max == 'all':
         min_max = [plate_map.min().min(), plate_map.max().max()]
@@ -1965,81 +1840,6 @@ def print_mask_and_flows(stack, mask, flows, overlay=True, max_size=1000, thickn
     fig.tight_layout()
     plt.show()
-def print_mask_and_flows_v1(stack, mask, flows, overlay=False, max_size=1000):
-    """
-    Display the original image, mask, and flow with optional resizing for large images.
-    Args:
-        stack (np.array): Original image or stack.
-        mask (np.array): Mask image.
-        flows (list): List of flow images.
-        overlay (bool): Whether to overlay the mask on the original image.
-        max_size (int): Maximum allowed size for any dimension of the images.
-    """
-    def resize_if_needed(image, max_size):
-        """Resize image if any dimension exceeds max_size while maintaining aspect ratio."""
-        if max(image.shape[:2]) > max_size:
-            scale = max_size / max(image.shape[:2])
-            new_shape = (int(image.shape[0] * scale), int(image.shape[1] * scale))
-            if image.ndim == 3:
-                new_shape += (image.shape[2],)
-            return skimage.transform.resize(image, new_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
-        return image
-    # Resize if necessary
-    stack = resize_if_needed(stack, max_size)
-    mask = resize_if_needed(mask, max_size)
-    flows = [resize_if_needed(flow, max_size) for flow in flows]
-    fig, axs = plt.subplots(1, 3, figsize=(12, 4))  # Adjust subplot layout
-    if stack.shape[-1] == 1:
-        stack = np.squeeze(stack)
-    # Display original image or its first channel
-    if stack.ndim == 2:
-        axs[0].imshow(stack, cmap='gray')
-    elif stack.ndim == 3:
-        axs[0].imshow(stack)
-    else:
-        raise ValueError("Unexpected stack dimensionality.")
-    axs[0].set_title('Original Image')
-    axs[0].axis('off')
-    # Overlay mask on original image if overlay is True
-    if overlay:
-        mask_cmap = generate_mask_random_cmap(mask)  # Generate random colormap for mask
-        mask_overlay = np.ma.masked_where(mask == 0, mask)  # Mask background
-        outlines = find_boundaries(mask, mode='thick')  # Find mask outlines
-        if stack.ndim == 2 or stack.ndim == 3:
-            axs[1].imshow(stack, cmap='gray' if stack.ndim == 2 else None)
-            axs[1].imshow(mask_overlay, cmap=mask_cmap, alpha=0.5)  # Overlay mask
-            axs[1].contour(outlines, colors='r', linewidths=2)  # Add red outlines with thickness 2
-    else:
-        axs[1].imshow(mask, cmap='gray')
-    axs[1].set_title('Mask with Overlay' if overlay else 'Mask')
-    axs[1].axis('off')
-    # Display flow image or its first channel
-    if flows and isinstance(flows, list) and flows[0].ndim in [2, 3]:
-        flow_image = flows[0]
-        if flow_image.ndim == 3:
-            flow_image = flow_image[:, :, 0]  # Use first channel for 3D
-        axs[2].imshow(flow_image, cmap='jet')
-    else:
-        raise ValueError("Unexpected flow dimensionality or structure.")
-    axs[2].set_title('Flows')
-    axs[2].axis('off')
-    fig.tight_layout()
-    plt.show()
 def plot_resize(images, resized_images, labels, resized_labels):
     # Display an example image and label before and after resizing
@@ -2297,48 +2097,6 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
         print(f"Saved Lorenz Curve: {save_file_path}")
         plt.show()
-def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
-    def lorenz_curve(data):
-        """Calculate Lorenz curve."""
-        sorted_data = np.sort(data)
-        cumulative_data = np.cumsum(sorted_data)
-        lorenz_curve = cumulative_data / cumulative_data[-1]
-        lorenz_curve = np.insert(lorenz_curve, 0, 0)
-        return lorenz_curve
-    combined_data = []
-    plt.figure(figsize=(10, 6))
-    for idx, csv_file in enumerate(csv_files):
-        if idx == 1:
-            save_fldr = os.path.dirname(csv_file)
-            save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
-        df = pd.read_csv(csv_file)
-        for remove in remove_keys:
-            df = df[df['key'] != remove]
-        values = df['value'].values
-        combined_data.extend(values)
-        lorenz = lorenz_curve(values)
-        name = os.path.basename(csv_file)[:3]
-        plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
-    # Plot combined Lorenz curve
-    combined_lorenz = lorenz_curve(np.array(combined_data))
-    plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined Lorenz Curve", linestyle='--', color='black')
-    plt.title('Lorenz Curves')
-    plt.xlabel('Cumulative Share of Individuals')
-    plt.ylabel('Cumulative Share of Value')
-    plt.legend()
-    plt.grid(False)
-    plt.savefig(save_path)
-    plt.show()
 def plot_permutation(permutation_df):
     num_features = len(permutation_df)
     fig_height = max(8, num_features * 0.3)  # Set a minimum height of 8 and adjust height based on number of features
@@ -2970,33 +2728,6 @@ class spacrGraph:
             # Redraw to apply changes
             ax.figure.canvas.draw()
-        def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
-            # Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
-            y_axis_min = ax.get_ylim()[0]  # Minimum y-axis value (usually 0)
-            symbol_start_y = ax.transData.transform((0, y_axis_min))[1] - 30  # Slightly below the x-axis line
-            # Convert to figure coordinates
-            symbol_start_y_fig = ax.transAxes.inverted().transform((0, symbol_start_y))[1]
-            # Calculate y-spacing for the table rows (adjust as needed)
-            y_spacing = 0.02 # Control vertical spacing between elements
-            # X-coordinate for the row labels at the y-axis and x-axis intersection
-            label_x_pos = ax.get_xlim()[0] - 0.5  # Slightly offset from the y-axis
-            # Place the row titles at the y-axis intersection
-            for row_idx, title in enumerate(row_labels):
-                y_pos = symbol_start_y_fig - (row_idx * y_spacing)  # Align with row index
-                ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
-            # Place the symbols under each bar
-            for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
-                for row_idx, text in enumerate(column_data):
-                    y_pos = symbol_start_y_fig - (row_idx * y_spacing)
-                    ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12)
         def _get_positions(self, ax):
             if self.graph_type in ['bar','jitter_bar']:
@@ -3549,7 +3280,7 @@ def plot_data_from_db(settings):
         dfs.append(dft)
     df = pd.concat(dfs, axis=0)
-    df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
+    df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
     #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
     #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
     df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))

spacr/sequencing.py CHANGED Viewed

@@ -125,7 +125,7 @@ def process_chunk(chunk_data):
                         consensus_sequences.append(consensus_seq)
                         column_sequence = match.group('column')
                         grna_sequence = match.group('grna')
-                        row_sequence = match.group('row')
+                        row_sequence = match.group('row_name')
                         columns.append(column_sequence)
                         grnas.append(grna_sequence)
                         rows.append(row_sequence)
@@ -176,7 +176,7 @@ def process_chunk(chunk_data):
                         consensus_sequences.append(consensus_seq)
                         column_sequence = match.group('column')
                         grna_sequence = match.group('grna')
-                        row_sequence = match.group('row')
+                        row_sequence = match.group('row_name')
                         columns.append(column_sequence)
                         grnas.append(grna_sequence)
                         rows.append(row_sequence)
@@ -532,7 +532,7 @@ def graph_sequencing_stats(settings):
         # Iterate through the fraction thresholds
         for threshold in fraction_thresholds:
             filtered_df = df[df['fraction'] >= threshold]
-            unique_count = filtered_df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
+            unique_count = filtered_df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
             results.append((threshold, unique_count))
         results_df = pd.DataFrame(results, columns=['fraction_threshold', 'unique_count'])
@@ -588,17 +588,21 @@ def graph_sequencing_stats(settings):
     # Apply the closest threshold to the DataFrame
     df = df[df['fraction'] >= closest_threshold]
-    # Group by 'plate', 'row', 'column' and compute unique counts of 'grna'
-    unique_counts = df.groupby(['plate', 'row', 'column'])['grna'].nunique().reset_index(name='unique_counts')
-    unique_count_mean = df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
-    unique_count_std = df.groupby(['plate', 'row', 'column'])['grna'].nunique().std()
+    # Group by 'plate', 'row_name', 'column' and compute unique counts of 'grna'
+    unique_counts = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().reset_index(name='unique_counts')
+    unique_count_mean = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
+    unique_count_std = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().std()
     # Merge the unique counts back into the original DataFrame
-    df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
+    df = pd.merge(df, unique_counts, on=['plate', 'row_name', 'column'], how='left')
     print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
-    display(df)
     #_plot_density(df, dependent_variable='unique_counts')
+    has_underscore = df['row_name'].str.contains('_').any()
+    if has_underscore:
+        df['row_name'] = df['row_name'].apply(lambda x: x.split('_')[1])
     plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
     return closest_threshold

spacr 0.3.52__py3-none-any.whl → 0.3.55__py3-none-any.whl

spacr 0.3.52py3-none-any.whl → 0.3.55py3-none-any.whl