spacr 0.3.52__py3-none-any.whl → 0.3.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/gui_elements.py +1 -1
- spacr/gui_utils.py +0 -111
- spacr/io.py +114 -140
- spacr/measure.py +10 -11
- spacr/ml.py +55 -41
- spacr/plot.py +24 -293
- spacr/sequencing.py +13 -9
- spacr/settings.py +15 -9
- spacr/submodules.py +19 -19
- spacr/timelapse.py +16 -16
- spacr/toxo.py +15 -15
- spacr/utils.py +72 -164
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/METADATA +1 -1
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/RECORD +18 -18
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/LICENSE +0 -0
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/WHEEL +0 -0
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.52.dist-info → spacr-0.3.60.dist-info}/top_level.txt +0 -0
spacr/ml.py
CHANGED
@@ -155,10 +155,6 @@ def process_model_coefficients(model, regression_type, X, y, nc, pc, controls):
|
|
155
155
|
coef_df['condition'] = coef_df.apply(lambda row: 'nc' if nc in row['feature'] else 'pc' if pc in row['feature'] else ('control' if row['grna'] in controls else 'other'),axis=1)
|
156
156
|
return coef_df[~coef_df['feature'].str.contains('row|column')]
|
157
157
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
158
|
def check_distribution(y):
|
163
159
|
"""Check the type of distribution to recommend a model."""
|
164
160
|
if np.all((y == 0) | (y == 1)):
|
@@ -195,18 +191,18 @@ def prepare_formula(dependent_variable, random_row_column_effects=False):
|
|
195
191
|
if random_row_column_effects:
|
196
192
|
# Random effects for row and column + gene weighted by gene_fraction + grna weighted by fraction
|
197
193
|
return f'{dependent_variable} ~ fraction:grna + gene_fraction:gene'
|
198
|
-
return f'{dependent_variable} ~ fraction:grna + gene_fraction:gene +
|
194
|
+
return f'{dependent_variable} ~ fraction:grna + gene_fraction:gene + row_name + column_name'
|
199
195
|
|
200
196
|
def fit_mixed_model(df, formula, dst):
|
201
197
|
from .plot import plot_histogram
|
202
198
|
|
203
|
-
"""Fit the mixed model with plate,
|
199
|
+
"""Fit the mixed model with plate, row_name, and column_name as random effects and return results."""
|
204
200
|
# Specify random effects for plate, row, and column
|
205
201
|
model = smf.mixedlm(formula,
|
206
202
|
data=df,
|
207
203
|
groups=df['plate'],
|
208
|
-
re_formula="1 +
|
209
|
-
vc_formula={"
|
204
|
+
re_formula="1 + row_name + column_name",
|
205
|
+
vc_formula={"row_name": "0 + row_name", "column_name": "0 + column_name"})
|
210
206
|
|
211
207
|
mixed_model = model.fit()
|
212
208
|
|
@@ -288,7 +284,7 @@ def check_and_clean_data(df, dependent_variable):
|
|
288
284
|
df = handle_missing_values(df, ['fraction', dependent_variable])
|
289
285
|
|
290
286
|
# Step 2: Ensure grna, gene, plate, row, column, and prc are categorical types
|
291
|
-
df = ensure_valid_types(df, ['grna', 'gene', 'plate', '
|
287
|
+
df = ensure_valid_types(df, ['grna', 'gene', 'plate', 'row_name', 'column_name', 'prc'])
|
292
288
|
|
293
289
|
# Step 3: Check for multicollinearity in fraction and the dependent variable
|
294
290
|
df_cleaned = check_collinearity(df, ['fraction', dependent_variable])
|
@@ -298,8 +294,8 @@ def check_and_clean_data(df, dependent_variable):
|
|
298
294
|
df_cleaned['grna'] = df['grna']
|
299
295
|
df_cleaned['prc'] = df['prc']
|
300
296
|
df_cleaned['plate'] = df['plate']
|
301
|
-
df_cleaned['
|
302
|
-
df_cleaned['
|
297
|
+
df_cleaned['row_name'] = df['row_name']
|
298
|
+
df_cleaned['column_name'] = df['column']
|
303
299
|
|
304
300
|
# Create a new column 'gene_fraction' that sums the fractions by gene within the same well
|
305
301
|
df_cleaned['gene_fraction'] = df_cleaned.groupby(['prc', 'gene'])['fraction'].transform('sum')
|
@@ -337,7 +333,7 @@ def minimum_cell_simulation(settings, num_repeats=10, sample_size=100, tolerance
|
|
337
333
|
df = pd.read_csv(score_data)
|
338
334
|
df = correct_metadata_column_names(df)
|
339
335
|
df['plate'] = f'plate{i + 1}'
|
340
|
-
df['prc'] = df['plate'] + '_' + df['
|
336
|
+
df['prc'] = df['plate'] + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
|
341
337
|
dfs.append(df)
|
342
338
|
|
343
339
|
df = pd.concat(dfs, axis=0)
|
@@ -635,9 +631,9 @@ def regression(df, csv_path, dependent_variable='predictions', regression_type=N
|
|
635
631
|
random_row_column_effects=False, nc='233460', pc='220950', controls=[''],
|
636
632
|
dst=None, cov_type=None, plot=False):
|
637
633
|
|
638
|
-
from
|
639
|
-
from
|
640
|
-
|
634
|
+
from .plot import volcano_plot, plot_histogram
|
635
|
+
#from .ml import create_volcano_filename, check_and_clean_data, prepare_formula, scale_variables
|
636
|
+
|
641
637
|
# Generate the volcano filename
|
642
638
|
volcano_path = create_volcano_filename(csv_path, regression_type, alpha, dst)
|
643
639
|
|
@@ -706,18 +702,16 @@ def perform_regression(settings):
|
|
706
702
|
def _perform_regression_read_data(settings):
|
707
703
|
|
708
704
|
if isinstance(settings['score_data'], list) and isinstance(settings['count_data'], list):
|
709
|
-
settings['plate'] = None
|
710
705
|
if len(settings['score_data']) == 1:
|
711
|
-
|
712
|
-
|
713
|
-
settings['count_data'] = settings['count_data'][0]
|
706
|
+
count_data_df = pd.read_csv(settings['count_data'][0])
|
707
|
+
score_data_df = pd.read_csv(settings['score_data'][0])
|
714
708
|
else:
|
715
709
|
count_data_df = pd.DataFrame()
|
716
710
|
for i, count_data in enumerate(settings['count_data']):
|
717
711
|
df = pd.read_csv(count_data)
|
718
712
|
df['plate_name'] = f'plate{i+1}'
|
719
713
|
if 'column' in df.columns:
|
720
|
-
df['
|
714
|
+
df['column_name'] = df['column']
|
721
715
|
count_data_df = pd.concat([count_data_df, df])
|
722
716
|
print('Count data:', len(count_data_df))
|
723
717
|
|
@@ -726,7 +720,7 @@ def perform_regression(settings):
|
|
726
720
|
df = pd.read_csv(score_data)
|
727
721
|
df['plate_name'] = f'plate{i+1}'
|
728
722
|
if 'column' in df.columns:
|
729
|
-
df['
|
723
|
+
df['column_name'] = df['column']
|
730
724
|
score_data_df = pd.concat([score_data_df, df])
|
731
725
|
print('Score data:', len(score_data_df))
|
732
726
|
else:
|
@@ -806,9 +800,23 @@ def perform_regression(settings):
|
|
806
800
|
return df, n_gene
|
807
801
|
else:
|
808
802
|
return df
|
809
|
-
|
803
|
+
|
810
804
|
settings = get_perform_regression_default_settings(settings)
|
811
805
|
count_data_df, score_data_df = _perform_regression_read_data(settings)
|
806
|
+
|
807
|
+
if "row_name" in count_data_df.columns:
|
808
|
+
num_parts = len(count_data_df['row_name'].iloc[0].split('_'))
|
809
|
+
if num_parts == 2:
|
810
|
+
split = count_data_df['row_name'].str.split('_', expand=True)
|
811
|
+
count_data_df['row_name'] = split[1]
|
812
|
+
|
813
|
+
if "prc" in score_data_df.columns:
|
814
|
+
num_parts = len(score_data_df['prc'].iloc[0].split('_'))
|
815
|
+
if num_parts == 3:
|
816
|
+
split = score_data_df['prc'].str.split('_', expand=True)
|
817
|
+
score_data_df['plate'] = settings['plate']
|
818
|
+
score_data_df['prc'] = score_data_df['plate'] + '_' + split[1] + '_' + split[2]
|
819
|
+
|
812
820
|
results_path, results_path_gene, results_path_grna, hits_path, res_folder, csv_path = _perform_regression_set_paths(settings)
|
813
821
|
save_settings(settings, name='regression', show=True)
|
814
822
|
|
@@ -849,7 +857,7 @@ def perform_regression(settings):
|
|
849
857
|
merged_df.to_csv(data_path, index=False)
|
850
858
|
print(f"Saved regression data to {data_path}")
|
851
859
|
|
852
|
-
merged_df[['plate', '
|
860
|
+
merged_df[['plate', 'row_name', 'column']] = merged_df['prc'].str.split('_', expand=True)
|
853
861
|
|
854
862
|
_ = plot_plates(merged_df, variable=orig_dv, grouping='mean', min_max='allq', cmap='viridis', min_count=None, dst=res_folder)
|
855
863
|
|
@@ -857,6 +865,7 @@ def perform_regression(settings):
|
|
857
865
|
|
858
866
|
coef_df['grna'] = coef_df['feature'].apply(lambda x: re.search(r'grna\[(.*?)\]', x).group(1) if 'grna' in x else None)
|
859
867
|
coef_df['gene'] = coef_df['feature'].apply(lambda x: re.search(r'gene\[(.*?)\]', x).group(1) if 'gene' in x else None)
|
868
|
+
|
860
869
|
coef_df = coef_df.merge(n_grna, how='left', on='grna')
|
861
870
|
coef_df = coef_df.merge(n_gene, how='left', on='gene')
|
862
871
|
|
@@ -903,7 +912,6 @@ def perform_regression(settings):
|
|
903
912
|
save_summary_to_file(model, file_path=f'{res_folder}/mode_summary.csv')
|
904
913
|
|
905
914
|
significant.to_csv(hits_path, index=False)
|
906
|
-
|
907
915
|
significant_grna_filtered = significant[significant['n_grna'] > settings['min_n']]
|
908
916
|
significant_gene_filtered = significant[significant['n_gene'] > settings['min_n']]
|
909
917
|
significant_filtered = pd.concat([significant_grna_filtered, significant_gene_filtered])
|
@@ -928,8 +936,6 @@ def perform_regression(settings):
|
|
928
936
|
base_dir = os.path.dirname(os.path.abspath(__file__))
|
929
937
|
metadata_path = os.path.join(base_dir, 'resources', 'data', 'lopit.csv')
|
930
938
|
|
931
|
-
display(data_path)
|
932
|
-
|
933
939
|
if settings['volcano'] == 'all':
|
934
940
|
print('all')
|
935
941
|
gene_list = custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=600, figsize=20, threshold=reg_threshold, save_path=volcano_path, x_lim=settings['x_lim'],y_lims=settings['y_lims'])
|
@@ -982,14 +988,14 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
|
|
982
988
|
csv_df = csv_df.rename(columns={'plate_name': 'plate'})
|
983
989
|
if 'column_name' in csv_df.columns:
|
984
990
|
csv_df = csv_df.rename(columns={'column_name': 'column'})
|
985
|
-
if '
|
986
|
-
csv_df = csv_df.rename(columns={'
|
991
|
+
if 'column_name' in csv_df.columns:
|
992
|
+
csv_df = csv_df.rename(columns={'column_name': 'column'})
|
987
993
|
if 'row_name' in csv_df.columns:
|
988
|
-
csv_df = csv_df.rename(columns={'row_name': '
|
994
|
+
csv_df = csv_df.rename(columns={'row_name': 'row_name'})
|
989
995
|
if 'grna_name' in csv_df.columns:
|
990
996
|
csv_df = csv_df.rename(columns={'grna_name': 'grna'})
|
991
997
|
if 'plate_row' in csv_df.columns:
|
992
|
-
csv_df[['plate', '
|
998
|
+
csv_df[['plate', 'row_name']] = csv_df['plate_row'].str.split('_', expand=True)
|
993
999
|
|
994
1000
|
if not 'plate' in csv_df.columns:
|
995
1001
|
if not plate is None:
|
@@ -1009,11 +1015,11 @@ def process_reads(csv_path, fraction_threshold, plate, filter_column=None, filte
|
|
1009
1015
|
csv_df = csv_df[csv_df[filter_col] != value]
|
1010
1016
|
|
1011
1017
|
# Ensure the necessary columns are present
|
1012
|
-
if not all(col in csv_df.columns for col in ['
|
1013
|
-
raise ValueError("The CSV file must contain 'grna', 'count', '
|
1018
|
+
if not all(col in csv_df.columns for col in ['row_name','column','grna','count']):
|
1019
|
+
raise ValueError("The CSV file must contain 'grna', 'count', 'row_name', and 'column' columns.")
|
1014
1020
|
|
1015
1021
|
# Create the prc column
|
1016
|
-
csv_df['prc'] = csv_df['plate'] + '_' + csv_df['
|
1022
|
+
csv_df['prc'] = csv_df['plate'] + '_' + csv_df['row_name'] + '_' + csv_df['column']
|
1017
1023
|
|
1018
1024
|
# Group by prc and calculate the sum of counts
|
1019
1025
|
grouped_df = csv_df.groupby('prc')['count'].sum().reset_index()
|
@@ -1075,19 +1081,27 @@ def clean_controls(df,values, column):
|
|
1075
1081
|
return df
|
1076
1082
|
|
1077
1083
|
def process_scores(df, dependent_variable, plate, min_cell_count=25, agg_type='mean', transform=None, regression_type='ols'):
|
1078
|
-
|
1084
|
+
|
1079
1085
|
if 'plate_name' in df.columns:
|
1080
1086
|
df.drop(columns=['plate'], inplace=True)
|
1081
1087
|
df = df.rename(columns={'plate_name': 'plate'})
|
1082
|
-
|
1088
|
+
|
1089
|
+
if 'row' in df.columns:
|
1090
|
+
df = df.rename(columns={'row': 'row_name'})
|
1091
|
+
if 'col' in df.columns:
|
1092
|
+
df = df.rename(columns={'row': 'column_name'})
|
1093
|
+
|
1083
1094
|
if plate is not None:
|
1084
1095
|
df['plate'] = plate
|
1085
1096
|
|
1086
|
-
if '
|
1087
|
-
df['
|
1097
|
+
if 'column_name' not in df.columns:
|
1098
|
+
df['column_name'] = df['column']
|
1088
1099
|
|
1089
|
-
df['prc'] = df['plate'].astype(str) + '_' + df['
|
1100
|
+
df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
|
1090
1101
|
|
1102
|
+
display(df)
|
1103
|
+
|
1104
|
+
|
1091
1105
|
df = df[['prc', dependent_variable]]
|
1092
1106
|
|
1093
1107
|
# Group by prc and calculate the mean and count of the dependent_variable
|
@@ -1257,7 +1271,7 @@ def generate_ml_scores(settings):
|
|
1257
1271
|
|
1258
1272
|
return [output, plate_heatmap]
|
1259
1273
|
|
1260
|
-
def ml_analysis(df, channel_of_interest=3, location_column='
|
1274
|
+
def ml_analysis(df, channel_of_interest=3, location_column='column_name', positive_control='c2', negative_control='c1', exclude=None, n_repeats=10, top_features=30, n_estimators=100, test_size=0.2, model_type='xgboost', n_jobs=-1, remove_low_variance_features=True, remove_highly_correlated_features=True, verbose=False):
|
1261
1275
|
|
1262
1276
|
"""
|
1263
1277
|
Calculates permutation importance for numerical features in the dataframe,
|
@@ -1403,8 +1417,8 @@ def ml_analysis(df, channel_of_interest=3, location_column='col', positive_contr
|
|
1403
1417
|
df = _calculate_similarity(df, features, location_column, positive_control, negative_control)
|
1404
1418
|
|
1405
1419
|
df['prcfo'] = df.index.astype(str)
|
1406
|
-
df[['plate', '
|
1407
|
-
df['prc'] = df['plate'] + '_' + df['
|
1420
|
+
df[['plate', 'row_name', 'column_name', 'field', 'object']] = df['prcfo'].str.split('_', expand=True)
|
1421
|
+
df['prc'] = df['plate'] + '_' + df['row_name'] + '_' + df['column_name']
|
1408
1422
|
|
1409
1423
|
return [df, permutation_df, feature_importance_df, model, X_train, X_test, y_train, y_test, metrics_df], [permutation_fig, feature_importance_fig]
|
1410
1424
|
|
spacr/plot.py
CHANGED
@@ -366,146 +366,6 @@ def plot_image_mask_overlay(
|
|
366
366
|
|
367
367
|
return fig
|
368
368
|
|
369
|
-
def plot_image_mask_overlay_v1(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
|
370
|
-
"""Plot image and mask overlays."""
|
371
|
-
|
372
|
-
def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
|
373
|
-
"""Plot the merged plot with overlay, image channels, and masks."""
|
374
|
-
|
375
|
-
def _generate_colored_mask(mask, alpha):
|
376
|
-
""" Generate a colored mask with transparency using the given colormap. """
|
377
|
-
cmap = generate_mask_random_cmap(mask)
|
378
|
-
rgba_mask = cmap(mask / mask.max()) # Normalize mask and map to colormap (RGBA)
|
379
|
-
rgba_mask[..., 3] = np.where(mask > 0, alpha, 0) # Apply transparency only where mask is present
|
380
|
-
return rgba_mask
|
381
|
-
|
382
|
-
def _overlay_mask(image, mask):
|
383
|
-
"""Overlay the colored mask onto the original image."""
|
384
|
-
combined = np.clip(image + mask[..., :3] * mask[..., 3:4], 0, 1) # Ensure pixel values stay in [0, 1]
|
385
|
-
return combined
|
386
|
-
|
387
|
-
def _normalize_image(image, percentiles=(2, 98)):
|
388
|
-
"""Normalize the image to the given percentiles."""
|
389
|
-
v_min, v_max = np.percentile(image, percentiles)
|
390
|
-
image_normalized = np.clip((image - v_min) / (v_max - v_min), 0, 1)
|
391
|
-
return image_normalized
|
392
|
-
|
393
|
-
def _generate_contours(mask):
|
394
|
-
"""Generate contours for the given mask using OpenCV."""
|
395
|
-
contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
396
|
-
return contours
|
397
|
-
|
398
|
-
def _apply_contours(image, mask, color, thickness):
|
399
|
-
"""Apply the contours to the RGB image for each unique label."""
|
400
|
-
unique_labels = np.unique(mask)
|
401
|
-
for label in unique_labels:
|
402
|
-
if label == 0:
|
403
|
-
continue # Skip background
|
404
|
-
label_mask = np.where(mask == label, 1, 0).astype(np.uint8)
|
405
|
-
contours = _generate_contours(label_mask)
|
406
|
-
for contour in contours:
|
407
|
-
cv2.drawContours(image, [contour], -1, mpl.colors.to_rgb(color), thickness)
|
408
|
-
return image
|
409
|
-
|
410
|
-
num_channels = image.shape[-1]
|
411
|
-
fig, ax = plt.subplots(1, num_channels + 1, figsize=(4 * figuresize, figuresize))
|
412
|
-
|
413
|
-
# Plot each channel with its corresponding outlines
|
414
|
-
for v in range(num_channels):
|
415
|
-
channel_image = image[..., v]
|
416
|
-
channel_image_normalized = _normalize_image(channel_image, percentiles)
|
417
|
-
channel_image_rgb = np.dstack((channel_image_normalized, channel_image_normalized, channel_image_normalized))
|
418
|
-
|
419
|
-
for outline, color in zip(outlines, outline_colors):
|
420
|
-
if mode == 'outlines':
|
421
|
-
channel_image_rgb = _apply_contours(channel_image_rgb, outline, color, thickness)
|
422
|
-
else:
|
423
|
-
mask = _generate_colored_mask(outline, alpha=0.5)
|
424
|
-
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
425
|
-
|
426
|
-
ax[v].imshow(channel_image_rgb)
|
427
|
-
ax[v].set_title(f'Image - Channel {v}')
|
428
|
-
|
429
|
-
# Plot the combined RGB image with all outlines
|
430
|
-
rgb_image = np.zeros((*image.shape[:2], 3), dtype=float)
|
431
|
-
rgb_channels = min(3, num_channels)
|
432
|
-
for i in range(rgb_channels):
|
433
|
-
channel_image = image[..., i]
|
434
|
-
channel_image_normalized = _normalize_image(channel_image, percentiles)
|
435
|
-
rgb_image[..., i] = channel_image_normalized
|
436
|
-
|
437
|
-
for outline, color in zip(outlines, outline_colors):
|
438
|
-
if mode == 'outlines':
|
439
|
-
rgb_image = _apply_contours(rgb_image, outline, color, thickness)
|
440
|
-
else:
|
441
|
-
mask = _generate_colored_mask(outline, alpha=0.5)
|
442
|
-
rgb_image = _overlay_mask(rgb_image, mask)
|
443
|
-
|
444
|
-
ax[-1].imshow(rgb_image)
|
445
|
-
ax[-1].set_title('Combined RGB Image')
|
446
|
-
|
447
|
-
plt.tight_layout()
|
448
|
-
|
449
|
-
# Save the figure as a PDF
|
450
|
-
if save_pdf:
|
451
|
-
pdf_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', 'overlay')
|
452
|
-
os.makedirs(pdf_dir, exist_ok=True)
|
453
|
-
pdf_path = os.path.join(pdf_dir, os.path.basename(file).replace('.npy', '.pdf'))
|
454
|
-
fig.savefig(pdf_path, format='pdf')
|
455
|
-
|
456
|
-
plt.show()
|
457
|
-
return fig
|
458
|
-
|
459
|
-
def _save_channels_as_tiff(stack, save_dir, filename):
|
460
|
-
"""Save each channel in the stack as a grayscale TIFF."""
|
461
|
-
os.makedirs(save_dir, exist_ok=True)
|
462
|
-
for i in range(stack.shape[-1]):
|
463
|
-
channel = stack[..., i]
|
464
|
-
tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
|
465
|
-
tiff.imwrite(tiff_path, channel, photometric='minisblack')
|
466
|
-
print(f"Saved {tiff_path}")
|
467
|
-
|
468
|
-
stack = np.load(file)
|
469
|
-
|
470
|
-
if export_tiffs:
|
471
|
-
save_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', os.path.splitext(os.path.basename(file))[0], 'tiff')
|
472
|
-
filename = os.path.splitext(os.path.basename(file))[0]
|
473
|
-
_save_channels_as_tiff(stack, save_dir, filename)
|
474
|
-
|
475
|
-
# Convert to float for normalization and ensure correct handling of both 8-bit and 16-bit arrays
|
476
|
-
if stack.dtype == np.uint16:
|
477
|
-
stack = stack.astype(np.float32)
|
478
|
-
elif stack.dtype == np.uint8:
|
479
|
-
stack = stack.astype(np.float32)
|
480
|
-
|
481
|
-
image = stack[..., channels]
|
482
|
-
outlines = []
|
483
|
-
outline_colors = []
|
484
|
-
|
485
|
-
if pathogen_channel is not None:
|
486
|
-
pathogen_mask_dim = -1 # last dimension
|
487
|
-
outlines.append(np.take(stack, pathogen_mask_dim, axis=2))
|
488
|
-
outline_colors.append('blue')
|
489
|
-
|
490
|
-
if nucleus_channel is not None:
|
491
|
-
nucleus_mask_dim = -2 if pathogen_channel is not None else -1
|
492
|
-
outlines.append(np.take(stack, nucleus_mask_dim, axis=2))
|
493
|
-
outline_colors.append('green')
|
494
|
-
|
495
|
-
if cell_channel is not None:
|
496
|
-
if nucleus_channel is not None and pathogen_channel is not None:
|
497
|
-
cell_mask_dim = -3
|
498
|
-
elif nucleus_channel is not None or pathogen_channel is not None:
|
499
|
-
cell_mask_dim = -2
|
500
|
-
else:
|
501
|
-
cell_mask_dim = -1
|
502
|
-
outlines.append(np.take(stack, cell_mask_dim, axis=2))
|
503
|
-
outline_colors.append('red')
|
504
|
-
|
505
|
-
fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness, percentiles=percentiles, mode=mode)
|
506
|
-
|
507
|
-
return fig
|
508
|
-
|
509
369
|
def plot_masks(batch, masks, flows, cmap='inferno', figuresize=10, nr=1, file_type='.npz', print_object_number=True):
|
510
370
|
"""
|
511
371
|
Plot the masks and flows for a given batch of images.
|
@@ -1793,25 +1653,40 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
|
|
1793
1653
|
if not isinstance(min_count, (int, float)):
|
1794
1654
|
min_count = 0
|
1795
1655
|
|
1796
|
-
|
1797
|
-
|
1656
|
+
# Check the number of parts in 'prc'
|
1657
|
+
num_parts = len(df['prc'].iloc[0].split('_'))
|
1658
|
+
if num_parts == 4:
|
1659
|
+
split = df['prc'].str.split('_', expand=True)
|
1660
|
+
df['row_name'] = split[2]
|
1661
|
+
df['prc'] = f"{plate_number}" + '_' + split[2] + '_' + split[3]
|
1662
|
+
|
1663
|
+
# Construct 'prc' based on 'plate', 'row_name', and 'column' columns
|
1664
|
+
#df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column'].astype(str)
|
1665
|
+
|
1666
|
+
if 'column_name' not in df.columns:
|
1667
|
+
if 'column' in df.columns:
|
1668
|
+
df['column_name'] = df['column']
|
1669
|
+
if 'column_name' in df.columns:
|
1670
|
+
df['column_name'] = df['column_name']
|
1671
|
+
|
1672
|
+
df['plate'], df['row_name'], df['column_name'] = zip(*df['prc'].str.split('_'))
|
1798
1673
|
|
1799
1674
|
# Filtering the dataframe based on the plate_number
|
1800
1675
|
df = df[df['plate'] == plate_number].copy() # Create another copy after filtering
|
1801
|
-
|
1676
|
+
|
1802
1677
|
# Ensure proper ordering
|
1803
1678
|
row_order = [f'r{i}' for i in range(1, 17)]
|
1804
1679
|
col_order = [f'c{i}' for i in range(1, 28)] # Exclude c15 as per your earlier code
|
1805
1680
|
|
1806
|
-
df['
|
1807
|
-
df['
|
1808
|
-
df['count'] = df.groupby(['
|
1681
|
+
df['row_name'] = pd.Categorical(df['row_name'], categories=row_order, ordered=True)
|
1682
|
+
df['column_name'] = pd.Categorical(df['column_name'], categories=col_order, ordered=True)
|
1683
|
+
df['count'] = df.groupby(['row_name', 'column_name'])['row_name'].transform('count')
|
1809
1684
|
|
1810
1685
|
if min_count > 0:
|
1811
1686
|
df = df[df['count'] >= min_count]
|
1812
1687
|
|
1813
1688
|
# Explicitly set observed=True to avoid FutureWarning
|
1814
|
-
grouped = df.groupby(['
|
1689
|
+
grouped = df.groupby(['row_name', 'column_name'], observed=True) # Group by row and column
|
1815
1690
|
|
1816
1691
|
if grouping == 'mean':
|
1817
1692
|
plate = grouped[variable].mean().reset_index()
|
@@ -1823,7 +1698,7 @@ def generate_plate_heatmap(df, plate_number, variable, grouping, min_max, min_co
|
|
1823
1698
|
else:
|
1824
1699
|
raise ValueError(f"Unsupported grouping: {grouping}")
|
1825
1700
|
|
1826
|
-
plate_map = pd.pivot_table(plate, values=variable, index='
|
1701
|
+
plate_map = pd.pivot_table(plate, values=variable, index='row_name', columns='column_name').fillna(0)
|
1827
1702
|
|
1828
1703
|
if min_max == 'all':
|
1829
1704
|
min_max = [plate_map.min().min(), plate_map.max().max()]
|
@@ -1965,81 +1840,6 @@ def print_mask_and_flows(stack, mask, flows, overlay=True, max_size=1000, thickn
|
|
1965
1840
|
|
1966
1841
|
fig.tight_layout()
|
1967
1842
|
plt.show()
|
1968
|
-
|
1969
|
-
def print_mask_and_flows_v1(stack, mask, flows, overlay=False, max_size=1000):
|
1970
|
-
"""
|
1971
|
-
Display the original image, mask, and flow with optional resizing for large images.
|
1972
|
-
|
1973
|
-
Args:
|
1974
|
-
stack (np.array): Original image or stack.
|
1975
|
-
mask (np.array): Mask image.
|
1976
|
-
flows (list): List of flow images.
|
1977
|
-
overlay (bool): Whether to overlay the mask on the original image.
|
1978
|
-
max_size (int): Maximum allowed size for any dimension of the images.
|
1979
|
-
"""
|
1980
|
-
|
1981
|
-
def resize_if_needed(image, max_size):
|
1982
|
-
"""Resize image if any dimension exceeds max_size while maintaining aspect ratio."""
|
1983
|
-
if max(image.shape[:2]) > max_size:
|
1984
|
-
scale = max_size / max(image.shape[:2])
|
1985
|
-
new_shape = (int(image.shape[0] * scale), int(image.shape[1] * scale))
|
1986
|
-
if image.ndim == 3:
|
1987
|
-
new_shape += (image.shape[2],)
|
1988
|
-
return skimage.transform.resize(image, new_shape, preserve_range=True, anti_aliasing=True).astype(image.dtype)
|
1989
|
-
return image
|
1990
|
-
|
1991
|
-
# Resize if necessary
|
1992
|
-
stack = resize_if_needed(stack, max_size)
|
1993
|
-
mask = resize_if_needed(mask, max_size)
|
1994
|
-
flows = [resize_if_needed(flow, max_size) for flow in flows]
|
1995
|
-
|
1996
|
-
fig, axs = plt.subplots(1, 3, figsize=(12, 4)) # Adjust subplot layout
|
1997
|
-
|
1998
|
-
if stack.shape[-1] == 1:
|
1999
|
-
stack = np.squeeze(stack)
|
2000
|
-
|
2001
|
-
# Display original image or its first channel
|
2002
|
-
if stack.ndim == 2:
|
2003
|
-
axs[0].imshow(stack, cmap='gray')
|
2004
|
-
elif stack.ndim == 3:
|
2005
|
-
axs[0].imshow(stack)
|
2006
|
-
else:
|
2007
|
-
raise ValueError("Unexpected stack dimensionality.")
|
2008
|
-
|
2009
|
-
axs[0].set_title('Original Image')
|
2010
|
-
axs[0].axis('off')
|
2011
|
-
|
2012
|
-
|
2013
|
-
# Overlay mask on original image if overlay is True
|
2014
|
-
if overlay:
|
2015
|
-
mask_cmap = generate_mask_random_cmap(mask) # Generate random colormap for mask
|
2016
|
-
mask_overlay = np.ma.masked_where(mask == 0, mask) # Mask background
|
2017
|
-
outlines = find_boundaries(mask, mode='thick') # Find mask outlines
|
2018
|
-
|
2019
|
-
if stack.ndim == 2 or stack.ndim == 3:
|
2020
|
-
axs[1].imshow(stack, cmap='gray' if stack.ndim == 2 else None)
|
2021
|
-
axs[1].imshow(mask_overlay, cmap=mask_cmap, alpha=0.5) # Overlay mask
|
2022
|
-
axs[1].contour(outlines, colors='r', linewidths=2) # Add red outlines with thickness 2
|
2023
|
-
else:
|
2024
|
-
axs[1].imshow(mask, cmap='gray')
|
2025
|
-
|
2026
|
-
axs[1].set_title('Mask with Overlay' if overlay else 'Mask')
|
2027
|
-
axs[1].axis('off')
|
2028
|
-
|
2029
|
-
# Display flow image or its first channel
|
2030
|
-
if flows and isinstance(flows, list) and flows[0].ndim in [2, 3]:
|
2031
|
-
flow_image = flows[0]
|
2032
|
-
if flow_image.ndim == 3:
|
2033
|
-
flow_image = flow_image[:, :, 0] # Use first channel for 3D
|
2034
|
-
axs[2].imshow(flow_image, cmap='jet')
|
2035
|
-
else:
|
2036
|
-
raise ValueError("Unexpected flow dimensionality or structure.")
|
2037
|
-
|
2038
|
-
axs[2].set_title('Flows')
|
2039
|
-
axs[2].axis('off')
|
2040
|
-
|
2041
|
-
fig.tight_layout()
|
2042
|
-
plt.show()
|
2043
1843
|
|
2044
1844
|
def plot_resize(images, resized_images, labels, resized_labels):
|
2045
1845
|
# Display an example image and label before and after resizing
|
@@ -2297,48 +2097,6 @@ def plot_lorenz_curves(csv_files, name_column='grna_name', value_column='count',
|
|
2297
2097
|
print(f"Saved Lorenz Curve: {save_file_path}")
|
2298
2098
|
plt.show()
|
2299
2099
|
|
2300
|
-
def plot_lorenz_curves_v1(csv_files, remove_keys=['TGGT1_220950_1', 'TGGT1_233460_4']):
|
2301
|
-
|
2302
|
-
def lorenz_curve(data):
|
2303
|
-
"""Calculate Lorenz curve."""
|
2304
|
-
sorted_data = np.sort(data)
|
2305
|
-
cumulative_data = np.cumsum(sorted_data)
|
2306
|
-
lorenz_curve = cumulative_data / cumulative_data[-1]
|
2307
|
-
lorenz_curve = np.insert(lorenz_curve, 0, 0)
|
2308
|
-
return lorenz_curve
|
2309
|
-
|
2310
|
-
combined_data = []
|
2311
|
-
|
2312
|
-
plt.figure(figsize=(10, 6))
|
2313
|
-
|
2314
|
-
for idx, csv_file in enumerate(csv_files):
|
2315
|
-
if idx == 1:
|
2316
|
-
save_fldr = os.path.dirname(csv_file)
|
2317
|
-
save_path = os.path.join(save_fldr, 'lorenz_curve.pdf')
|
2318
|
-
|
2319
|
-
df = pd.read_csv(csv_file)
|
2320
|
-
for remove in remove_keys:
|
2321
|
-
df = df[df['key'] != remove]
|
2322
|
-
|
2323
|
-
values = df['value'].values
|
2324
|
-
combined_data.extend(values)
|
2325
|
-
|
2326
|
-
lorenz = lorenz_curve(values)
|
2327
|
-
name = os.path.basename(csv_file)[:3]
|
2328
|
-
plt.plot(np.linspace(0, 1, len(lorenz)), lorenz, label=name)
|
2329
|
-
|
2330
|
-
# Plot combined Lorenz curve
|
2331
|
-
combined_lorenz = lorenz_curve(np.array(combined_data))
|
2332
|
-
plt.plot(np.linspace(0, 1, len(combined_lorenz)), combined_lorenz, label="Combined Lorenz Curve", linestyle='--', color='black')
|
2333
|
-
|
2334
|
-
plt.title('Lorenz Curves')
|
2335
|
-
plt.xlabel('Cumulative Share of Individuals')
|
2336
|
-
plt.ylabel('Cumulative Share of Value')
|
2337
|
-
plt.legend()
|
2338
|
-
plt.grid(False)
|
2339
|
-
plt.savefig(save_path)
|
2340
|
-
plt.show()
|
2341
|
-
|
2342
2100
|
def plot_permutation(permutation_df):
|
2343
2101
|
num_features = len(permutation_df)
|
2344
2102
|
fig_height = max(8, num_features * 0.3) # Set a minimum height of 8 and adjust height based on number of features
|
@@ -2970,33 +2728,6 @@ class spacrGraph:
|
|
2970
2728
|
|
2971
2729
|
# Redraw to apply changes
|
2972
2730
|
ax.figure.canvas.draw()
|
2973
|
-
|
2974
|
-
|
2975
|
-
def _place_symbols_v1(row_labels, transposed_table, x_positions, ax):
|
2976
|
-
|
2977
|
-
# Get the bottom of the y-axis (y=0) in data coordinates and convert to display coordinates
|
2978
|
-
y_axis_min = ax.get_ylim()[0] # Minimum y-axis value (usually 0)
|
2979
|
-
symbol_start_y = ax.transData.transform((0, y_axis_min))[1] - 30 # Slightly below the x-axis line
|
2980
|
-
|
2981
|
-
# Convert to figure coordinates
|
2982
|
-
symbol_start_y_fig = ax.transAxes.inverted().transform((0, symbol_start_y))[1]
|
2983
|
-
|
2984
|
-
# Calculate y-spacing for the table rows (adjust as needed)
|
2985
|
-
y_spacing = 0.02 # Control vertical spacing between elements
|
2986
|
-
|
2987
|
-
# X-coordinate for the row labels at the y-axis and x-axis intersection
|
2988
|
-
label_x_pos = ax.get_xlim()[0] - 0.5 # Slightly offset from the y-axis
|
2989
|
-
|
2990
|
-
# Place the row titles at the y-axis intersection
|
2991
|
-
for row_idx, title in enumerate(row_labels):
|
2992
|
-
y_pos = symbol_start_y_fig - (row_idx * y_spacing) # Align with row index
|
2993
|
-
ax.text(label_x_pos, y_pos, title, ha='right', va='center', fontsize=12, fontweight='regular')
|
2994
|
-
|
2995
|
-
# Place the symbols under each bar
|
2996
|
-
for idx, (x_pos, column_data) in enumerate(zip(x_positions, transposed_table)):
|
2997
|
-
for row_idx, text in enumerate(column_data):
|
2998
|
-
y_pos = symbol_start_y_fig - (row_idx * y_spacing)
|
2999
|
-
ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12)
|
3000
2731
|
|
3001
2732
|
def _get_positions(self, ax):
|
3002
2733
|
if self.graph_type in ['bar','jitter_bar']:
|
@@ -3549,7 +3280,7 @@ def plot_data_from_db(settings):
|
|
3549
3280
|
dfs.append(dft)
|
3550
3281
|
|
3551
3282
|
df = pd.concat(dfs, axis=0)
|
3552
|
-
df['prc'] = df['plate'].astype(str) + '_' + df['
|
3283
|
+
df['prc'] = df['plate'].astype(str) + '_' + df['row_name'].astype(str) + '_' + df['column_name'].astype(str)
|
3553
3284
|
#df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
3554
3285
|
#df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
3555
3286
|
df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
|