spacr 0.3.52__py3-none-any.whl → 0.3.55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/ml.py CHANGED
@@ -155,10 +155,6 @@ def process_model_coefficients(model, regression_type, X, y, nc, pc, controls):
155
155
  coef_df['condition'] = coef_df.apply(lambda row: 'nc' if nc in row['feature'] else 'pc' if pc in row['feature'] else ('control' if row['grna'] in controls else 'other'),axis=1)
156
156
  return coef_df[~coef_df['feature'].str.contains('row|column')]
157
157
 
158
-
159
-
160
-
161
-
162
158
  def check_distribution(y):
163
159
  """Check the type of distribution to recommend a model."""
164
160
  if np.all((y == 0) | (y == 1)):
@@ -288,7 +284,7 @@ def check_and_clean_data(df, dependent_variable):
288
284
  df = handle_missing_values(df, ['fraction', dependent_variable])
289
285
 
290
286
  # Step 2: Ensure grna, gene, plate, row, column, and prc are categorical types
291
- df = ensure_valid_types(df, ['grna', 'gene', 'plate', 'row', 'column', 'prc'])
287
+ df = ensure_valid_types(df, ['grna', 'gene', 'plate', 'row_name', 'column', 'prc'])
292
288
 
293
289
  # Step 3: Check for multicollinearity in fraction and the dependent variable
294
290
  df_cleaned = check_collinearity(df, ['fraction', dependent_variable])
@@ -298,7 +294,7 @@ def check_and_clean_data(df, dependent_variable):
298
294
  df_cleaned['grna'] = df['grna']
299
295
  df_cleaned['prc'] = df['prc']
300
296
  df_cleaned['plate'] = df['plate']
301
- df_cleaned['row'] = df['row']
297
+ df_cleaned['row_name'] = df['row_name']
302
298
  df_cleaned['column'] = df['column']
303
299
 
304
300
  # Create a new column 'gene_fraction' that sums the fractions by gene within the same well
@@ -337,7 +333,7 @@ def minimum_cell_simulation(settings, num_repeats=10, sample_size=100, tolerance
337
333
  df = pd.read_csv(score_data)
338
334
  df = correct_metadata_column_names(df)
339
335
  df['plate'] = f'plate{i + 1}'
340
- df['prc'] = df['plate'] + '_' + df['row'].astype(str) + '_' + df['column'].astype(str)
336
+ df['prc'] = df['plate'] + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
341
337
  dfs.append(df)
342
338
 
343
339
  df = pd.concat(dfs, axis=0)
@@ -706,18 +702,16 @@ def perform_regression(settings):
706
702
  def _perform_regression_read_data(settings):
707
703
 
708
704
  if isinstance(settings['score_data'], list) and isinstance(settings['count_data'], list):
709
- settings['plate'] = None
710
705
  if len(settings['score_data']) == 1:
711
- settings['score_data'] = settings['score_data'][0]
712
- if len(settings['count_data']) == 1:
713
- settings['count_data'] = settings['count_data'][0]
706
+ count_data_df = pd.read_csv(settings['count_data'][0])
707
+ score_data_df = pd.read_csv(settings['score_data'][0])
714
708
  else:
715
709
  count_data_df = pd.DataFrame()
716
710
  for i, count_data in enumerate(settings['count_data']):
717
711
  df = pd.read_csv(count_data)
718
712
  df['plate_name'] = f'plate{i+1}'
719
713
  if 'column' in df.columns:
720
- df['col'] = df['column']
714
+ df['column_name'] = df['column']
721
715
  count_data_df = pd.concat([count_data_df, df])
722
716
  print('Count data:', len(count_data_df))
723
717
 
@@ -726,7 +720,7 @@ def perform_regression(settings):
726
720
  df = pd.read_csv(score_data)
727
721
  df['plate_name'] = f'plate{i+1}'
728
722
  if 'column' in df.columns:
729
- df['col'] = df['column']
723
+ df['column_name'] = df['column']
730
724
  score_data_df = pd.concat([score_data_df, df])
731
725
  print('Score data:', len(score_data_df))
732
726
  else:
@@ -806,9 +800,23 @@ def perform_regression(settings):
806
800
  return df, n_gene
807
801
  else:
808
802
  return df
809
-
803
+
810
804
  settings = get_perform_regression_default_settings(settings)
811
805
  count_data_df, score_data_df = _perform_regression_read_data(settings)
806
+
807
+ if "row_name" in count_data_df.columns:
808
+ num_parts = len(count_data_df['row_name'].iloc[0].split('_'))
809
+ if num_parts == 2:
810
+ split = count_data_df['row_name'].str.split('_', expand=True)
811
+ count_data_df['row_name'] = split[1]
812
+
813
+ if "prc" in score_data_df.columns:
814
+ num_parts = len(score_data_df['prc'].iloc[0].split('_'))
815
+ if num_parts == 3:
816
+ split = score_data_df['prc'].str.split('_', expand=True)
817
+ score_data_df['plate'] = settings['plate']
818
+ score_data_df['prc'] = score_data_df['plate'] + '_' + split[1] + '_' + split[2]
819
+
812
820
  results_path, results_path_gene, results_path_grna, hits_path, res_folder, csv_path = _perform_regression_set_paths(settings)
813
821
  save_settings(settings, name='regression', show=True)
814
822
 
@@ -849,7 +857,7 @@ def perform_regression(settings):
849
857
  merged_df.to_csv(data_path, index=False)
850
858
  print(f"Saved regression data to {data_path}")
851
859
 
852
- merged_df[['plate', 'row', 'column']] = merged_df['prc'].str.split('_', expand=True)
860
+ merged_df[['plate', 'row_name', 'column']] = merged_df['prc'].str.split('_', expand=True)
853
861
 
854
862
  _ = plot_plates(merged_df, variable=orig_dv, grouping='mean', min_max='allq', cmap='viridis', min_count=None, dst=res_folder)
855
863
 
@@ -857,6 +865,7 @@ def perform_regression(settings):
857
865
 
858
866
  coef_df['grna'] = coef_df['feature'].apply(lambda x: re.search(r'grna\[(.*?)\]', x).group(1) if 'grna' in x else None)
859
867
  coef_df['gene'] = coef_df['feature'].apply(lambda x: re.search(r'gene\[(.*?)\]', x).group(1) if 'gene' in x else None)
868
+
860
869
  coef_df = coef_df.merge(n_grna, how='left', on='grna')
861
870
  coef_df = coef_df.merge(n_gene, how='left', on='gene')
862
871
 
@@ -903,7 +912,6 @@ def perform_regression(settings):
903
912
  save_summary_to_file(model, file_path=f'{res_folder}/mode_summary.csv')
904
913
 
905
914
  significant.to_csv(hits_path, index=False)
906
-
907
915
  significant_grna_filtered = significant[significant['n_grna'] > settings['min_n']]
908
916
  significant_gene_filtered = significant[significant['n_gene'] > settings['min_n']]
909
917
  significant_filtered = pd.concat([significant_grna_filtered, significant_gene_filtered])
@@ -928,8 +936,6 @@ def perform_regression(settings):
928
936
  base_dir = os.path.dirname(os.path.abspath(__file__))
929
937
  metadata_path = os.path.join(base_dir, 'resources', 'data', 'lopit.csv')
930
938
 
931
- display(data_path)
932
-
933
939
  if settings['volcano'] == 'all':
934
940
  print('all')
935
941
  gene_list = custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=600, figsize=20, threshold=reg_threshold, save_path=volcano_path, x_lim=settings['x_lim'],y_lims=settings['y_lims'])
@@ -982,14 +988,14 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
982
988
  csv_df = csv_df.rename(columns={'plate_name': 'plate'})
983
989
  if 'column_name' in csv_df.columns:
984
990
  csv_df = csv_df.rename(columns={'column_name': 'column'})
985
- if 'col' in csv_df.columns:
986
- csv_df = csv_df.rename(columns={'col': 'column'})
991
+ if 'column_name' in csv_df.columns:
992
+ csv_df = csv_df.rename(columns={'column_name': 'column'})
987
993
  if 'row_name' in csv_df.columns:
988
- csv_df = csv_df.rename(columns={'row_name': 'row'})
994
+ csv_df = csv_df.rename(columns={'row_name': 'row_name'})
989
995
  if 'grna_name' in csv_df.columns:
990
996
  csv_df = csv_df.rename(columns={'grna_name': 'grna'})
991
997
  if 'plate_row' in csv_df.columns:
992
- csv_df[['plate', 'row']] = csv_df['plate_row'].str.split('_', expand=True)
998
+ csv_df[['plate', 'row_name']] = csv_df['plate_row'].str.split('_', expand=True)
993
999
 
994
1000
  if not 'plate' in csv_df.columns:
995
1001
  if not plate is None:
@@ -1009,11 +1015,11 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
1009
1015
  csv_df = csv_df[csv_df[filter_col] != value]
1010
1016
 
1011
1017
  # Ensure the necessary columns are present
1012
- if not all(col in csv_df.columns for col in ['row','column','grna','count']):
1013
- raise ValueError("The CSV file must contain 'grna', 'count', 'row', and 'column' columns.")
1018
+ if not all(col in csv_df.columns for col in ['row_name','column','grna','count']):
1019
+ raise ValueError("The CSV file must contain 'grna', 'count', 'row_name', and 'column' columns.")
1014
1020
 
1015
1021
  # Create the prc column
1016
- csv_df['prc'] = csv_df['plate'] + '_' + csv_df['row'] + '_' + csv_df['column']
1022
+ csv_df['prc'] = csv_df['plate'] + '_' + csv_df['row_name'] + '_' + csv_df['column']
1017
1023
 
1018
1024
  # Group by prc and calculate the sum of counts
1019
1025
  grouped_df = csv_df.groupby('prc')['count'].sum().reset_index()
@@ -1075,7 +1081,7 @@ def clean_controls(df,values, column):
1075
1081
  return df
1076
1082
 
1077
1083
  def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='mean', transform=None, regression_type='ols'):
1078
-
1084
+
1079
1085
  if 'plate_name' in df.columns:
1080
1086
  df.drop(columns=['plate'], inplace=True)
1081
1087
  df = df.rename(columns={'plate_name': 'plate'})
@@ -1083,11 +1089,14 @@ def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='m
1083
1089
  if plate is not None:
1084
1090
  df['plate'] = plate
1085
1091
 
1086
- if 'col' not in df.columns:
1087
- df['col'] = df['column']
1092
+ if 'column_name' not in df.columns:
1093
+ df['column_name'] = df['column']
1088
1094
 
1089
- df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
1095
+ df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
1090
1096
 
1097
+ display(df)
1098
+
1099
+
1091
1100
  df = df[['prc', dependent_variable]]
1092
1101
 
1093
1102
  # Group by prc and calculate the mean and count of the dependent_variable
@@ -1257,7 +1266,7 @@ def generate_ml_scores(settings):
1257
1266
 
1258
1267
  return [output, plate_heatmap]
1259
1268
 
1260
- def ml_analysis(df, channel_of_interest=3, location_column='col', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, n_estimators=100, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
1269
+ def ml_analysis(df, channel_of_interest=3, location_column='column_name', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, n_estimators=100, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
1261
1270
 
1262
1271
  """
1263
1272
  Calculates permutation importance for numerical features in the dataframe,
@@ -1403,8 +1412,8 @@ def ml_analysis(df, channel_of_interest=3, location_column='col', positive_contr
1403
1412
  df = _calculate_similarity(df, features, location_column, positive_control, negative_control)
1404
1413
 
1405
1414
  df['prcfo'] = df.index.astype(str)
1406
- df[['plate', 'row', 'col', 'field', 'object']] = df['prcfo'].str.split('_', expand=True)
1407
- df['prc'] = df['plate'] + '_' + df['row'] + '_' + df['col']
1415
+ df[['plate', 'row_name', 'column_name', 'field', 'object']] = df['prcfo'].str.split('_', expand=True)
1416
+ df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column_name']
1408
1417
 
1409
1418
  return [df, permutation_df, feature_importance_df, model, X_train, X_test, y_train, y_test, metrics_df], [permutation_fig, feature_importance_fig]
1410
1419
 
spacr/plot.py CHANGED
@@ -366,146 +366,6 @@ def plot_image_mask_overlay(
366
366
 
367
367
  return fig
368
368
 
369
- def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
370
- """Plot image and mask overlays."""
371
-
372
- def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
373
- """Plot the merged plot with overlay, image channels, and masks."""
374
-
375
- def _generate_colored_mask(mask, alpha):
376
- """ Generate a colored mask with transparency using the given colormap. """
377
- cmap = generate_mask_random_cmap(mask)
378
- rgba_mask = cmap(mask / mask.max()) # Normalize mask and map to colormap (RGBA)
379
- rgba_mask[..., 3] = np.where(mask > 0, alpha, 0) # Apply transparency only where mask is present
380
- return rgba_mask
381
-
382
- def _overlay_mask(image, mask):
383
- """Overlay the colored mask onto the original image."""
384
- combined = np.clip(image + mask[..., :3] * mask[..., 3:4], 0, 1) # Ensure pixel values stay in [0, 1]
385
- return combined
386
-
387
- def _normalize_image(image, percentiles=(2, 98)):
388
- """Normalize the image to the given percentiles."""
389
- v_min, v_max = np.percentile(image, percentiles)
390
- image_normalized = np.clip((image - v_min) / (v_max - v_min), 0, 1)
391
- return image_normalized
392
-
393
- def _generate_contours(mask):
394
- """Generate contours for the given mask using OpenCV."""
395
- contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
396
- return contours
397
-
398
- def _apply_contours(image, mask, color, thickness):
399
- """Apply the contours to the RGB image for each unique label."""
400
- unique_labels = np.unique(mask)
401
- for label in unique_labels:
402
- if label == 0:
403
- continue # Skip background
404
- label_mask = np.where(mask == label, 1, 0).astype(np.uint8)
405
- contours = _generate_contours(label_mask)
406
- for contour in contours:
407
- cv2.drawContours(image, [contour], -1, mpl.colors.to_rgb(color), thickness)
408
- return image
409
-
410
- num_channels = image.shape[-1]
411
- fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
412
-
413
- # Plot each channel with its corresponding outlines
414
- for v in range(num_channels):
415
- channel_image = image[..., v]
416
- channel_image_normalized = _normalize_image(channel_image, percentiles)
417
- channel_image_rgb = np.dstack((channel_image_normalized, channel_image_normalized, channel_image_normalized))
418
-
419
- for outline, color in zip(outlines, outline_colors):
420
- if mode == 'outlines':
421
- channel_image_rgb = _apply_contours(channel_image_rgb, outline, color, thickness)
422
- else:
423
- mask = _generate_colored_mask(outline, alpha=0.5)
424
- channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
425
-
426
- ax[v].imshow(channel_image_rgb)
427
- ax[v].set_title(f'Image - Channel {v}')
428
-
429
- # Plot the combined RGB image with all outlines
430
- rgb_image = np.zeros((*image.shape[:2], 3), dtype=float)
431
- rgb_channels = min(3, num_channels)
432
- for i in range(rgb_channels):
433
- channel_image = image[..., i]
434
- channel_image_normalized = _normalize_image(channel_image, percentiles)
435
- rgb_image[..., i] = channel_image_normalized
436
-
437
- for outline, color in zip(outlines, outline_colors):
438
- if mode == 'outlines':
439
- rgb_image = _apply_contours(rgb_image, outline, color, thickness)
440
- else:
441
- mask = _generate_colored_mask(outline, alpha=0.5)
442
- rgb_image = _overlay_mask(rgb_image, mask)
443
-
444
- ax[-1].imshow(rgb_image)
445
- ax[-1].set_title('Combined RGB Image')
446
-
447
- plt.tight_layout()
448
-
449
- # Save the figure as a PDF
450
- if save_pdf:
451
- pdf_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', 'overlay')
452
- os.makedirs(pdf_dir, exist_ok=True)
453
- pdf_path = os.path.join(pdf_dir, os.path.basename(file).replace('.npy', '.pdf'))
454
- fig.savefig(pdf_path, format='pdf')
455
-
456
- plt.show()
457
- return fig
458
-
459
- def _save_channels_as_tiff(stack, save_dir, filename):
460
- """Save each channel in the stack as a grayscale TIFF."""
461
- os.makedirs(save_dir, exist_ok=True)
462
- for i in range(stack.shape[-1]):
463
- channel = stack[..., i]
464
- tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
465
- tiff.imwrite(tiff_path, channel, photometric='minisblack')
466
- print(f"Saved {tiff_path}")
467
-
468
- stack = np.load(file)
469
-
470
- if export_tiffs:
471
- save_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', os.path.splitext(os.path.basename(file))[0], 'tiff')
472
- filename = os.path.splitext(os.path.basename(file))[0]
473
- _save_channels_as_tiff(stack, save_dir, filename)
474
-
475
- # Convert to float for normalization and ensure correct handling of both 8-bit and 16-bit arrays
476
- if stack.dtype == np.uint16:
477
- stack = stack.astype(np.float32)
478
- elif stack.dtype == np.uint8:
479
- stack = stack.astype(np.float32)
480
-
481
- image = stack[..., channels]
482
- outlines = []
483
- outline_colors = []
484
-
485
- if pathogen_channel is not None:
486
- pathogen_mask_dim = -1 # last dimension
487
- outlines.append(np.take(stack, pathogen_mask_dim, axis=2))
488
- outline_colors.append('blue')
489
-
490
- if nucleus_channel is not None:
491
- nucleus_mask_dim = -2 if pathogen_channel is not None else -1
492
- outlines.append(np.take(stack, nucleus_mask_dim, axis=2))
493
- outline_colors.append('green')
494
-
495
- if cell_channel is not None:
496
- if nucleus_channel is not None and pathogen_channel is not None:
497
- cell_mask_dim = -3
498
- elif nucleus_channel is not None or pathogen_channel is not None:
499
- cell_mask_dim = -2
500
- else:
501
- cell_mask_dim = -1
502
- outlines.append(np.take(stack, cell_mask_dim, axis=2))
503
- outline_colors.append('red')
504
-
505
- fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness, percentiles=percentiles, mode=mode)
506
-
507
- return fig
508
-
509
369
  def plot_masks(batch, masks, flows, cmap='inferno', figuresize=10, nr=1, file_type='.npz', print_object_number=True):
510
370
  """
511
371
  Plot the masks and flows for a given batch of images.
@@ -1793,25 +1653,40 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
1793
1653
  if not isinstance(min_count, (int, float)):
1794
1654
  min_count = 0
1795
1655
 
1796
- df = df.copy() # Work on a copy to avoid SettingWithCopyWarning
1797
- df['plate'], df['row'], df['col'] = zip(*df['prc'].str.split('_'))
1656
+ # Check the number of parts in 'prc'
1657
+ num_parts = len(df['prc'].iloc[0].split('_'))
1658
+ if num_parts == 4:
1659
+ split = df['prc'].str.split('_', expand=True)
1660
+ df['row_name'] = split[2]
1661
+ df['prc'] = f"{plate_number}" + '_' + split[2] + '_' + split[3]
1662
+
1663
+ # Construct 'prc' based on 'plate', 'row_name', and 'column' columns
1664
+ #df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
1665
+
1666
+ if 'column_name' not in df.columns:
1667
+ if 'column' in df.columns:
1668
+ df['column_name'] = df['column']
1669
+ if 'column_name' in df.columns:
1670
+ df['column_name'] = df['column_name']
1671
+
1672
+ df['plate'], df['row_name'], df['column_name'] = zip(*df['prc'].str.split('_'))
1798
1673
 
1799
1674
  # Filtering the dataframe based on the plate_number
1800
1675
  df = df[df['plate'] == plate_number].copy() # Create another copy after filtering
1801
-
1676
+
1802
1677
  # Ensure proper ordering
1803
1678
  row_order = [f'r{i}' for i in range(1, 17)]
1804
1679
  col_order = [f'c{i}' for i in range(1, 28)] # Exclude c15 as per your earlier code
1805
1680
 
1806
- df['row'] = pd.Categorical(df['row'], categories=row_order, ordered=True)
1807
- df['col'] = pd.Categorical(df['col'], categories=col_order, ordered=True)
1808
- df['count'] = df.groupby(['row', 'col'])['row'].transform('count')
1681
+ df['row_name'] = pd.Categorical(df['row_name'], categories=row_order, ordered=True)
1682
+ df['column_name'] = pd.Categorical(df['column_name'], categories=col_order, ordered=True)
1683
+ df['count'] = df.groupby(['row_name', 'column_name'])['row_name'].transform('count')
1809
1684
 
1810
1685
  if min_count > 0:
1811
1686
  df = df[df['count'] >= min_count]
1812
1687
 
1813
1688
  # Explicitly set observed=True to avoid FutureWarning
1814
- grouped = df.groupby(['row', 'col'], observed=True) # Group by row and column
1689
+ grouped = df.groupby(['row_name', 'column_name'], observed=True) # Group by row and column
1815
1690
 
1816
1691
  if grouping == 'mean':
1817
1692
  plate = grouped[variable].mean().reset_index()
@@ -1823,7 +1698,7 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
1823
1698
  else:
1824
1699
  raise ValueError(f"Unsupported grouping: {grouping}")
1825
1700
 
1826
- plate_map = pd.pivot_table(plate, values=variable, index='row', columns='col').fillna(0)
1701
+ plate_map = pd.pivot_table(plate, values=variable, index='row_name', columns='column_name').fillna(0)
1827
1702
 
1828
1703
  if min_max == 'all':
1829
1704
  min_max = [plate_map.min().min(), plate_map.max().max()]
@@ -1965,81 +1840,6 @@ def print_mask_and_flows(stack, mask, flows, overlay=True, max_size=1000, thickn
1965
1840
 
1966
1841
  fig.tight_layout()
1967
1842
  plt.show()
1968
-
1969
- def print_mask_and_flows_v1(stack, mask, flows, overlay=False, max_size=1000):
1970
- """
1971
- Display the original image, mask, and flow with optional resizing for large images.
1972
-
1973
- Args:
1974
- stack (np.array): Original image or stack.
1975
- mask (np.array): Mask image.
1976
- flows (list): List of flow images.
1977
- overlay (bool): Whether to overlay the mask on the original image.
1978
- max_size (int): Maximum allowed size for any dimension of the images.
1979
- """
1980
-
1981
- def resize_if_needed(image, max_size):
1982
- """Resize image if any dimension exceeds max_size while maintaining aspect ratio."""
1983
- if max(image.shape[:2]) > max_size:
1984
- scale = max_size / max(image.shape[:2])
1985
- new_shape = (int(image.shape[0] * scale), int(image.shape[1] * scale))
1986
- if image.ndim == 3:
1987
- new_shape += (image.shape[2],)
1988
- return skimage.transform.resize(image, new_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
1989
- return image
1990
-
1991
- # Resize if necessary
1992
- stack = resize_if_needed(stack, max_size)
1993
- mask = resize_if_needed(mask, max_size)
1994
- flows = [resize_if_needed(flow, max_size) for flow in flows]
1995
-
1996
- fig, axs = plt.subplots(1, 3, figsize=(12, 4)) # Adjust subplot layout
1997
-
1998
- if stack.shape[-1] == 1:
1999
- stack = np.squeeze(stack)
2000
-
2001
- # Display original image or its first channel
2002
- if stack.ndim == 2:
2003
- axs[0].imshow(stack, cmap='gray')
2004
- elif stack.ndim == 3:
2005
- axs[0].imshow(stack)
2006
- else:
2007
- raise ValueError("Unexpected stack dimensionality.")
2008
-
2009
- axs[0].set_title('Original Image')
2010
- axs[0].axis('off')
2011
-
2012
-
2013
- # Overlay mask on original image if overlay is True
2014
- if overlay:
2015
- mask_cmap = generate_mask_random_cmap(mask) # Generate random colormap for mask
2016
- mask_overlay = np.ma.masked_where(mask == 0, mask) # Mask background
2017
- outlines = find_boundaries(mask, mode='thick') # Find mask outlines
2018
-
2019
- if stack.ndim == 2 or stack.ndim == 3:
2020
- axs[1].imshow(stack, cmap='gray' if stack.ndim == 2 else None)
2021
- axs[1].imshow(mask_overlay, cmap=mask_cmap, alpha=0.5) # Overlay mask
2022
- axs[1].contour(outlines, colors='r', linewidths=2) # Add red outlines with thickness 2
2023
- else:
2024
- axs[1].imshow(mask, cmap='gray')
2025
-
2026
- axs[1].set_title('Mask with Overlay' if overlay else 'Mask')
2027
- axs[1].axis('off')
2028
-
2029
- # Display flow image or its first channel
2030
- if flows and isinstance(flows, list) and flows[0].ndim in [2, 3]:
2031
- flow_image = flows[0]
2032
- if flow_image.ndim == 3:
2033
- flow_image = flow_image[:, :, 0] # Use first channel for 3D
2034
- axs[2].imshow(flow_image, cmap='jet')
2035
- else:
2036
- raise ValueError("Unexpected flow dimensionality or structure.")
2037
-
2038
- axs[2].set_title('Flows')
2039
- axs[2].axis('off')
2040
-
2041
- fig.tight_layout()
2042
- plt.show()
2043
1843
 
2044
1844
  def plot_resize(images, resized_images, labels, resized_labels):
2045
1845
  # Display an example image and label before and after resizing
@@ -2297,48 +2097,6 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
2297
2097
  print(f"Saved Lorenz Curve: {save_file_path}")
2298
2098
  plt.show()
2299
2099
 
2300
- def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
2301
-
2302
- def lorenz_curve(data):
2303
- """Calculate Lorenz curve."""
2304
- sorted_data = np.sort(data)
2305
- cumulative_data = np.cumsum(sorted_data)
2306
- lorenz_curve = cumulative_data / cumulative_data[-1]
2307
- lorenz_curve = np.insert(lorenz_curve, 0, 0)
2308
- return lorenz_curve
2309
-
2310
- combined_data = []
2311
-
2312
- plt.figure(figsize=(10, 6))
2313
-
2314
- for idx, csv_file in enumerate(csv_files):
2315
- if idx == 1:
2316
- save_fldr = os.path.dirname(csv_file)
2317
- save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
2318
-
2319
- df = pd.read_csv(csv_file)
2320
- for remove in remove_keys:
2321
- df = df[df['key'] != remove]
2322
-
2323
- values = df['value'].values
2324
- combined_data.extend(values)
2325
-
2326
- lorenz = lorenz_curve(values)
2327
- name = os.path.basename(csv_file)[:3]
2328
- plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
2329
-
2330
- # Plot combined Lorenz curve
2331
- combined_lorenz = lorenz_curve(np.array(combined_data))
2332
- plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined Lorenz Curve", linestyle='--', color='black')
2333
-
2334
- plt.title('Lorenz Curves')
2335
- plt.xlabel('Cumulative Share of Individuals')
2336
- plt.ylabel('Cumulative Share of Value')
2337
- plt.legend()
2338
- plt.grid(False)
2339
- plt.savefig(save_path)
2340
- plt.show()
2341
-
2342
2100
  def plot_permutation(permutation_df):
2343
2101
  num_features = len(permutation_df)
2344
2102
  fig_height = max(8, num_features * 0.3) # Set a minimum height of 8 and adjust height based on number of features
@@ -2970,33 +2728,6 @@ class spacrGraph:
2970
2728
 
2971
2729
  # Redraw to apply changes
2972
2730
  ax.figure.canvas.draw()
2973
-
2974
-
2975
- def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
2976
-
2977
- # Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
2978
- y_axis_min = ax.get_ylim()[0] # Minimum y-axis value (usually 0)
2979
- symbol_start_y = ax.transData.transform((0, y_axis_min))[1] - 30 # Slightly below the x-axis line
2980
-
2981
- # Convert to figure coordinates
2982
- symbol_start_y_fig = ax.transAxes.inverted().transform((0, symbol_start_y))[1]
2983
-
2984
- # Calculate y-spacing for the table rows (adjust as needed)
2985
- y_spacing = 0.02 # Control vertical spacing between elements
2986
-
2987
- # X-coordinate for the row labels at the y-axis and x-axis intersection
2988
- label_x_pos = ax.get_xlim()[0] - 0.5 # Slightly offset from the y-axis
2989
-
2990
- # Place the row titles at the y-axis intersection
2991
- for row_idx, title in enumerate(row_labels):
2992
- y_pos = symbol_start_y_fig - (row_idx * y_spacing) # Align with row index
2993
- ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
2994
-
2995
- # Place the symbols under each bar
2996
- for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
2997
- for row_idx, text in enumerate(column_data):
2998
- y_pos = symbol_start_y_fig - (row_idx * y_spacing)
2999
- ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12)
3000
2731
 
3001
2732
  def _get_positions(self, ax):
3002
2733
  if self.graph_type in ['bar','jitter_bar']:
@@ -3549,7 +3280,7 @@ def plot_data_from_db(settings):
3549
3280
  dfs.append(dft)
3550
3281
 
3551
3282
  df = pd.concat(dfs, axis=0)
3552
- df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
3283
+ df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
3553
3284
  #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
3554
3285
  #df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
3555
3286
  df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
spacr/sequencing.py CHANGED
@@ -125,7 +125,7 @@ def process_chunk(chunk_data):
125
125
  consensus_sequences.append(consensus_seq)
126
126
  column_sequence = match.group('column')
127
127
  grna_sequence = match.group('grna')
128
- row_sequence = match.group('row')
128
+ row_sequence = match.group('row_name')
129
129
  columns.append(column_sequence)
130
130
  grnas.append(grna_sequence)
131
131
  rows.append(row_sequence)
@@ -176,7 +176,7 @@ def process_chunk(chunk_data):
176
176
  consensus_sequences.append(consensus_seq)
177
177
  column_sequence = match.group('column')
178
178
  grna_sequence = match.group('grna')
179
- row_sequence = match.group('row')
179
+ row_sequence = match.group('row_name')
180
180
  columns.append(column_sequence)
181
181
  grnas.append(grna_sequence)
182
182
  rows.append(row_sequence)
@@ -532,7 +532,7 @@ def graph_sequencing_stats(settings):
532
532
  # Iterate through the fraction thresholds
533
533
  for threshold in fraction_thresholds:
534
534
  filtered_df = df[df['fraction'] >= threshold]
535
- unique_count = filtered_df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
535
+ unique_count = filtered_df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
536
536
  results.append((threshold, unique_count))
537
537
 
538
538
  results_df = pd.DataFrame(results, columns=['fraction_threshold', 'unique_count'])
@@ -588,17 +588,21 @@ def graph_sequencing_stats(settings):
588
588
  # Apply the closest threshold to the DataFrame
589
589
  df = df[df['fraction'] >= closest_threshold]
590
590
 
591
- # Group by 'plate', 'row', 'column' and compute unique counts of 'grna'
592
- unique_counts = df.groupby(['plate', 'row', 'column'])['grna'].nunique().reset_index(name='unique_counts')
593
- unique_count_mean = df.groupby(['plate', 'row', 'column'])['grna'].nunique().mean()
594
- unique_count_std = df.groupby(['plate', 'row', 'column'])['grna'].nunique().std()
591
+ # Group by 'plate', 'row_name', 'column' and compute unique counts of 'grna'
592
+ unique_counts = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().reset_index(name='unique_counts')
593
+ unique_count_mean = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().mean()
594
+ unique_count_std = df.groupby(['plate', 'row_name', 'column'])['grna'].nunique().std()
595
595
 
596
596
  # Merge the unique counts back into the original DataFrame
597
- df = pd.merge(df, unique_counts, on=['plate', 'row', 'column'], how='left')
597
+ df = pd.merge(df, unique_counts, on=['plate', 'row_name', 'column'], how='left')
598
598
 
599
599
  print(f"unique_count mean: {unique_count_mean} std: {unique_count_std}")
600
- display(df)
601
600
  #_plot_density(df, dependent_variable='unique_counts')
601
+
602
+ has_underscore = df['row_name'].str.contains('_').any()
603
+ if has_underscore:
604
+ df['row_name'] = df['row_name'].apply(lambda x: x.split('_')[1])
605
+
602
606
  plot_plates(df=df, variable='unique_counts', grouping='mean', min_max='allq', cmap='viridis',min_count=0, verbose=True, dst=dst)
603
607
 
604
608
  return closest_threshold